{ "base": { "dir": "/ai/text-generation/models/MiniMaxAI_MiniMax-M2.5-4.0bpw-h6-exl3", "bpw": 4.018129277476331 }, "alts": [ { "dir": "/ai/text-generation/models/MiniMaxAI_MiniMax-M2.5-5.0bpw-h6-exl3", "bpw": 5.0179149152040905 } ], "groups": [ { "idx": 0, "layers": [ "model.layers.0.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0011206239461898776, "dbits": 18874368 } ] }, { "idx": 1, "layers": [ "model.layers.0.self_attn.k_proj", "model.layers.0.self_attn.v_proj" ], "candidates": [ { "dkld": -0.009157107584178453, "dbits": 6291456 } ] }, { "idx": 2, "layers": [ "model.layers.0.self_attn.o_proj" ], "candidates": [ { "dkld": -0.007582003250718117, "dbits": 18874368 } ] }, { "idx": 3, "layers": [ "model.layers.0.block_sparse_moe.experts.0.w1", "model.layers.0.block_sparse_moe.experts.1.w1", "model.layers.0.block_sparse_moe.experts.2.w1", "model.layers.0.block_sparse_moe.experts.3.w1", "model.layers.0.block_sparse_moe.experts.4.w1", "model.layers.0.block_sparse_moe.experts.5.w1", "model.layers.0.block_sparse_moe.experts.6.w1", "model.layers.0.block_sparse_moe.experts.7.w1", "model.layers.0.block_sparse_moe.experts.8.w1", "model.layers.0.block_sparse_moe.experts.9.w1", "model.layers.0.block_sparse_moe.experts.10.w1", "model.layers.0.block_sparse_moe.experts.11.w1", "model.layers.0.block_sparse_moe.experts.12.w1", "model.layers.0.block_sparse_moe.experts.13.w1", "model.layers.0.block_sparse_moe.experts.14.w1", "model.layers.0.block_sparse_moe.experts.15.w1", "model.layers.0.block_sparse_moe.experts.16.w1", "model.layers.0.block_sparse_moe.experts.17.w1", "model.layers.0.block_sparse_moe.experts.18.w1", "model.layers.0.block_sparse_moe.experts.19.w1", "model.layers.0.block_sparse_moe.experts.20.w1", "model.layers.0.block_sparse_moe.experts.21.w1", "model.layers.0.block_sparse_moe.experts.22.w1", "model.layers.0.block_sparse_moe.experts.23.w1", "model.layers.0.block_sparse_moe.experts.24.w1", "model.layers.0.block_sparse_moe.experts.25.w1", "model.layers.0.block_sparse_moe.experts.26.w1", "model.layers.0.block_sparse_moe.experts.27.w1", "model.layers.0.block_sparse_moe.experts.28.w1", "model.layers.0.block_sparse_moe.experts.29.w1", "model.layers.0.block_sparse_moe.experts.30.w1", "model.layers.0.block_sparse_moe.experts.31.w1", "model.layers.0.block_sparse_moe.experts.32.w1", "model.layers.0.block_sparse_moe.experts.33.w1", "model.layers.0.block_sparse_moe.experts.34.w1", "model.layers.0.block_sparse_moe.experts.35.w1", "model.layers.0.block_sparse_moe.experts.36.w1", "model.layers.0.block_sparse_moe.experts.37.w1", "model.layers.0.block_sparse_moe.experts.38.w1", "model.layers.0.block_sparse_moe.experts.39.w1", "model.layers.0.block_sparse_moe.experts.40.w1", "model.layers.0.block_sparse_moe.experts.41.w1", "model.layers.0.block_sparse_moe.experts.42.w1", "model.layers.0.block_sparse_moe.experts.43.w1", "model.layers.0.block_sparse_moe.experts.44.w1", "model.layers.0.block_sparse_moe.experts.45.w1", "model.layers.0.block_sparse_moe.experts.46.w1", "model.layers.0.block_sparse_moe.experts.47.w1", "model.layers.0.block_sparse_moe.experts.48.w1", "model.layers.0.block_sparse_moe.experts.49.w1", "model.layers.0.block_sparse_moe.experts.50.w1", "model.layers.0.block_sparse_moe.experts.51.w1", "model.layers.0.block_sparse_moe.experts.52.w1", "model.layers.0.block_sparse_moe.experts.53.w1", "model.layers.0.block_sparse_moe.experts.54.w1", "model.layers.0.block_sparse_moe.experts.55.w1", "model.layers.0.block_sparse_moe.experts.56.w1", "model.layers.0.block_sparse_moe.experts.57.w1", "model.layers.0.block_sparse_moe.experts.58.w1", "model.layers.0.block_sparse_moe.experts.59.w1", "model.layers.0.block_sparse_moe.experts.60.w1", "model.layers.0.block_sparse_moe.experts.61.w1", "model.layers.0.block_sparse_moe.experts.62.w1", "model.layers.0.block_sparse_moe.experts.63.w1", "model.layers.0.block_sparse_moe.experts.64.w1", "model.layers.0.block_sparse_moe.experts.65.w1", "model.layers.0.block_sparse_moe.experts.66.w1", "model.layers.0.block_sparse_moe.experts.67.w1", "model.layers.0.block_sparse_moe.experts.68.w1", "model.layers.0.block_sparse_moe.experts.69.w1", "model.layers.0.block_sparse_moe.experts.70.w1", "model.layers.0.block_sparse_moe.experts.71.w1", "model.layers.0.block_sparse_moe.experts.72.w1", "model.layers.0.block_sparse_moe.experts.73.w1", "model.layers.0.block_sparse_moe.experts.74.w1", "model.layers.0.block_sparse_moe.experts.75.w1", "model.layers.0.block_sparse_moe.experts.76.w1", "model.layers.0.block_sparse_moe.experts.77.w1", "model.layers.0.block_sparse_moe.experts.78.w1", "model.layers.0.block_sparse_moe.experts.79.w1", "model.layers.0.block_sparse_moe.experts.80.w1", "model.layers.0.block_sparse_moe.experts.81.w1", "model.layers.0.block_sparse_moe.experts.82.w1", "model.layers.0.block_sparse_moe.experts.83.w1", "model.layers.0.block_sparse_moe.experts.84.w1", "model.layers.0.block_sparse_moe.experts.85.w1", "model.layers.0.block_sparse_moe.experts.86.w1", "model.layers.0.block_sparse_moe.experts.87.w1", "model.layers.0.block_sparse_moe.experts.88.w1", "model.layers.0.block_sparse_moe.experts.89.w1", "model.layers.0.block_sparse_moe.experts.90.w1", "model.layers.0.block_sparse_moe.experts.91.w1", "model.layers.0.block_sparse_moe.experts.92.w1", "model.layers.0.block_sparse_moe.experts.93.w1", "model.layers.0.block_sparse_moe.experts.94.w1", "model.layers.0.block_sparse_moe.experts.95.w1", "model.layers.0.block_sparse_moe.experts.96.w1", "model.layers.0.block_sparse_moe.experts.97.w1", "model.layers.0.block_sparse_moe.experts.98.w1", "model.layers.0.block_sparse_moe.experts.99.w1", "model.layers.0.block_sparse_moe.experts.100.w1", "model.layers.0.block_sparse_moe.experts.101.w1", "model.layers.0.block_sparse_moe.experts.102.w1", "model.layers.0.block_sparse_moe.experts.103.w1", "model.layers.0.block_sparse_moe.experts.104.w1", "model.layers.0.block_sparse_moe.experts.105.w1", "model.layers.0.block_sparse_moe.experts.106.w1", "model.layers.0.block_sparse_moe.experts.107.w1", "model.layers.0.block_sparse_moe.experts.108.w1", "model.layers.0.block_sparse_moe.experts.109.w1", "model.layers.0.block_sparse_moe.experts.110.w1", "model.layers.0.block_sparse_moe.experts.111.w1", "model.layers.0.block_sparse_moe.experts.112.w1", "model.layers.0.block_sparse_moe.experts.113.w1", "model.layers.0.block_sparse_moe.experts.114.w1", "model.layers.0.block_sparse_moe.experts.115.w1", "model.layers.0.block_sparse_moe.experts.116.w1", "model.layers.0.block_sparse_moe.experts.117.w1", "model.layers.0.block_sparse_moe.experts.118.w1", "model.layers.0.block_sparse_moe.experts.119.w1", "model.layers.0.block_sparse_moe.experts.120.w1", "model.layers.0.block_sparse_moe.experts.121.w1", "model.layers.0.block_sparse_moe.experts.122.w1", "model.layers.0.block_sparse_moe.experts.123.w1", "model.layers.0.block_sparse_moe.experts.124.w1", "model.layers.0.block_sparse_moe.experts.125.w1", "model.layers.0.block_sparse_moe.experts.126.w1", "model.layers.0.block_sparse_moe.experts.127.w1", "model.layers.0.block_sparse_moe.experts.128.w1", "model.layers.0.block_sparse_moe.experts.129.w1", "model.layers.0.block_sparse_moe.experts.130.w1", "model.layers.0.block_sparse_moe.experts.131.w1", "model.layers.0.block_sparse_moe.experts.132.w1", "model.layers.0.block_sparse_moe.experts.133.w1", "model.layers.0.block_sparse_moe.experts.134.w1", "model.layers.0.block_sparse_moe.experts.135.w1", "model.layers.0.block_sparse_moe.experts.136.w1", "model.layers.0.block_sparse_moe.experts.137.w1", "model.layers.0.block_sparse_moe.experts.138.w1", "model.layers.0.block_sparse_moe.experts.139.w1", "model.layers.0.block_sparse_moe.experts.140.w1", "model.layers.0.block_sparse_moe.experts.141.w1", "model.layers.0.block_sparse_moe.experts.142.w1", "model.layers.0.block_sparse_moe.experts.143.w1", "model.layers.0.block_sparse_moe.experts.144.w1", "model.layers.0.block_sparse_moe.experts.145.w1", "model.layers.0.block_sparse_moe.experts.146.w1", "model.layers.0.block_sparse_moe.experts.147.w1", "model.layers.0.block_sparse_moe.experts.148.w1", "model.layers.0.block_sparse_moe.experts.149.w1", "model.layers.0.block_sparse_moe.experts.150.w1", "model.layers.0.block_sparse_moe.experts.151.w1", "model.layers.0.block_sparse_moe.experts.152.w1", "model.layers.0.block_sparse_moe.experts.153.w1", "model.layers.0.block_sparse_moe.experts.154.w1", "model.layers.0.block_sparse_moe.experts.155.w1", "model.layers.0.block_sparse_moe.experts.156.w1", "model.layers.0.block_sparse_moe.experts.157.w1", "model.layers.0.block_sparse_moe.experts.158.w1", "model.layers.0.block_sparse_moe.experts.159.w1", "model.layers.0.block_sparse_moe.experts.160.w1", "model.layers.0.block_sparse_moe.experts.161.w1", "model.layers.0.block_sparse_moe.experts.162.w1", "model.layers.0.block_sparse_moe.experts.163.w1", "model.layers.0.block_sparse_moe.experts.164.w1", "model.layers.0.block_sparse_moe.experts.165.w1", "model.layers.0.block_sparse_moe.experts.166.w1", "model.layers.0.block_sparse_moe.experts.167.w1", "model.layers.0.block_sparse_moe.experts.168.w1", "model.layers.0.block_sparse_moe.experts.169.w1", "model.layers.0.block_sparse_moe.experts.170.w1", "model.layers.0.block_sparse_moe.experts.171.w1", "model.layers.0.block_sparse_moe.experts.172.w1", "model.layers.0.block_sparse_moe.experts.173.w1", "model.layers.0.block_sparse_moe.experts.174.w1", "model.layers.0.block_sparse_moe.experts.175.w1", "model.layers.0.block_sparse_moe.experts.176.w1", "model.layers.0.block_sparse_moe.experts.177.w1", "model.layers.0.block_sparse_moe.experts.178.w1", "model.layers.0.block_sparse_moe.experts.179.w1", "model.layers.0.block_sparse_moe.experts.180.w1", "model.layers.0.block_sparse_moe.experts.181.w1", "model.layers.0.block_sparse_moe.experts.182.w1", "model.layers.0.block_sparse_moe.experts.183.w1", "model.layers.0.block_sparse_moe.experts.184.w1", "model.layers.0.block_sparse_moe.experts.185.w1", "model.layers.0.block_sparse_moe.experts.186.w1", "model.layers.0.block_sparse_moe.experts.187.w1", "model.layers.0.block_sparse_moe.experts.188.w1", "model.layers.0.block_sparse_moe.experts.189.w1", "model.layers.0.block_sparse_moe.experts.190.w1", "model.layers.0.block_sparse_moe.experts.191.w1", "model.layers.0.block_sparse_moe.experts.192.w1", "model.layers.0.block_sparse_moe.experts.193.w1", "model.layers.0.block_sparse_moe.experts.194.w1", "model.layers.0.block_sparse_moe.experts.195.w1", "model.layers.0.block_sparse_moe.experts.196.w1", "model.layers.0.block_sparse_moe.experts.197.w1", "model.layers.0.block_sparse_moe.experts.198.w1", "model.layers.0.block_sparse_moe.experts.199.w1", "model.layers.0.block_sparse_moe.experts.200.w1", "model.layers.0.block_sparse_moe.experts.201.w1", "model.layers.0.block_sparse_moe.experts.202.w1", "model.layers.0.block_sparse_moe.experts.203.w1", "model.layers.0.block_sparse_moe.experts.204.w1", "model.layers.0.block_sparse_moe.experts.205.w1", "model.layers.0.block_sparse_moe.experts.206.w1", "model.layers.0.block_sparse_moe.experts.207.w1", "model.layers.0.block_sparse_moe.experts.208.w1", "model.layers.0.block_sparse_moe.experts.209.w1", "model.layers.0.block_sparse_moe.experts.210.w1", "model.layers.0.block_sparse_moe.experts.211.w1", "model.layers.0.block_sparse_moe.experts.212.w1", "model.layers.0.block_sparse_moe.experts.213.w1", "model.layers.0.block_sparse_moe.experts.214.w1", "model.layers.0.block_sparse_moe.experts.215.w1", "model.layers.0.block_sparse_moe.experts.216.w1", "model.layers.0.block_sparse_moe.experts.217.w1", "model.layers.0.block_sparse_moe.experts.218.w1", "model.layers.0.block_sparse_moe.experts.219.w1", "model.layers.0.block_sparse_moe.experts.220.w1", "model.layers.0.block_sparse_moe.experts.221.w1", "model.layers.0.block_sparse_moe.experts.222.w1", "model.layers.0.block_sparse_moe.experts.223.w1", "model.layers.0.block_sparse_moe.experts.224.w1", "model.layers.0.block_sparse_moe.experts.225.w1", "model.layers.0.block_sparse_moe.experts.226.w1", "model.layers.0.block_sparse_moe.experts.227.w1", "model.layers.0.block_sparse_moe.experts.228.w1", "model.layers.0.block_sparse_moe.experts.229.w1", "model.layers.0.block_sparse_moe.experts.230.w1", "model.layers.0.block_sparse_moe.experts.231.w1", "model.layers.0.block_sparse_moe.experts.232.w1", "model.layers.0.block_sparse_moe.experts.233.w1", "model.layers.0.block_sparse_moe.experts.234.w1", "model.layers.0.block_sparse_moe.experts.235.w1", "model.layers.0.block_sparse_moe.experts.236.w1", "model.layers.0.block_sparse_moe.experts.237.w1", "model.layers.0.block_sparse_moe.experts.238.w1", "model.layers.0.block_sparse_moe.experts.239.w1", "model.layers.0.block_sparse_moe.experts.240.w1", "model.layers.0.block_sparse_moe.experts.241.w1", "model.layers.0.block_sparse_moe.experts.242.w1", "model.layers.0.block_sparse_moe.experts.243.w1", "model.layers.0.block_sparse_moe.experts.244.w1", "model.layers.0.block_sparse_moe.experts.245.w1", "model.layers.0.block_sparse_moe.experts.246.w1", "model.layers.0.block_sparse_moe.experts.247.w1", "model.layers.0.block_sparse_moe.experts.248.w1", "model.layers.0.block_sparse_moe.experts.249.w1", "model.layers.0.block_sparse_moe.experts.250.w1", "model.layers.0.block_sparse_moe.experts.251.w1", "model.layers.0.block_sparse_moe.experts.252.w1", "model.layers.0.block_sparse_moe.experts.253.w1", "model.layers.0.block_sparse_moe.experts.254.w1", "model.layers.0.block_sparse_moe.experts.255.w1", "model.layers.0.block_sparse_moe.experts.0.w3", "model.layers.0.block_sparse_moe.experts.1.w3", "model.layers.0.block_sparse_moe.experts.2.w3", "model.layers.0.block_sparse_moe.experts.3.w3", "model.layers.0.block_sparse_moe.experts.4.w3", "model.layers.0.block_sparse_moe.experts.5.w3", "model.layers.0.block_sparse_moe.experts.6.w3", "model.layers.0.block_sparse_moe.experts.7.w3", "model.layers.0.block_sparse_moe.experts.8.w3", "model.layers.0.block_sparse_moe.experts.9.w3", "model.layers.0.block_sparse_moe.experts.10.w3", "model.layers.0.block_sparse_moe.experts.11.w3", "model.layers.0.block_sparse_moe.experts.12.w3", "model.layers.0.block_sparse_moe.experts.13.w3", "model.layers.0.block_sparse_moe.experts.14.w3", "model.layers.0.block_sparse_moe.experts.15.w3", "model.layers.0.block_sparse_moe.experts.16.w3", "model.layers.0.block_sparse_moe.experts.17.w3", "model.layers.0.block_sparse_moe.experts.18.w3", "model.layers.0.block_sparse_moe.experts.19.w3", "model.layers.0.block_sparse_moe.experts.20.w3", "model.layers.0.block_sparse_moe.experts.21.w3", "model.layers.0.block_sparse_moe.experts.22.w3", "model.layers.0.block_sparse_moe.experts.23.w3", "model.layers.0.block_sparse_moe.experts.24.w3", "model.layers.0.block_sparse_moe.experts.25.w3", "model.layers.0.block_sparse_moe.experts.26.w3", "model.layers.0.block_sparse_moe.experts.27.w3", "model.layers.0.block_sparse_moe.experts.28.w3", "model.layers.0.block_sparse_moe.experts.29.w3", "model.layers.0.block_sparse_moe.experts.30.w3", "model.layers.0.block_sparse_moe.experts.31.w3", "model.layers.0.block_sparse_moe.experts.32.w3", "model.layers.0.block_sparse_moe.experts.33.w3", "model.layers.0.block_sparse_moe.experts.34.w3", "model.layers.0.block_sparse_moe.experts.35.w3", "model.layers.0.block_sparse_moe.experts.36.w3", "model.layers.0.block_sparse_moe.experts.37.w3", "model.layers.0.block_sparse_moe.experts.38.w3", "model.layers.0.block_sparse_moe.experts.39.w3", "model.layers.0.block_sparse_moe.experts.40.w3", "model.layers.0.block_sparse_moe.experts.41.w3", "model.layers.0.block_sparse_moe.experts.42.w3", "model.layers.0.block_sparse_moe.experts.43.w3", "model.layers.0.block_sparse_moe.experts.44.w3", "model.layers.0.block_sparse_moe.experts.45.w3", "model.layers.0.block_sparse_moe.experts.46.w3", "model.layers.0.block_sparse_moe.experts.47.w3", "model.layers.0.block_sparse_moe.experts.48.w3", "model.layers.0.block_sparse_moe.experts.49.w3", "model.layers.0.block_sparse_moe.experts.50.w3", "model.layers.0.block_sparse_moe.experts.51.w3", "model.layers.0.block_sparse_moe.experts.52.w3", "model.layers.0.block_sparse_moe.experts.53.w3", "model.layers.0.block_sparse_moe.experts.54.w3", "model.layers.0.block_sparse_moe.experts.55.w3", "model.layers.0.block_sparse_moe.experts.56.w3", "model.layers.0.block_sparse_moe.experts.57.w3", "model.layers.0.block_sparse_moe.experts.58.w3", "model.layers.0.block_sparse_moe.experts.59.w3", "model.layers.0.block_sparse_moe.experts.60.w3", "model.layers.0.block_sparse_moe.experts.61.w3", "model.layers.0.block_sparse_moe.experts.62.w3", "model.layers.0.block_sparse_moe.experts.63.w3", "model.layers.0.block_sparse_moe.experts.64.w3", "model.layers.0.block_sparse_moe.experts.65.w3", "model.layers.0.block_sparse_moe.experts.66.w3", "model.layers.0.block_sparse_moe.experts.67.w3", "model.layers.0.block_sparse_moe.experts.68.w3", "model.layers.0.block_sparse_moe.experts.69.w3", "model.layers.0.block_sparse_moe.experts.70.w3", "model.layers.0.block_sparse_moe.experts.71.w3", "model.layers.0.block_sparse_moe.experts.72.w3", "model.layers.0.block_sparse_moe.experts.73.w3", "model.layers.0.block_sparse_moe.experts.74.w3", "model.layers.0.block_sparse_moe.experts.75.w3", "model.layers.0.block_sparse_moe.experts.76.w3", "model.layers.0.block_sparse_moe.experts.77.w3", "model.layers.0.block_sparse_moe.experts.78.w3", "model.layers.0.block_sparse_moe.experts.79.w3", "model.layers.0.block_sparse_moe.experts.80.w3", "model.layers.0.block_sparse_moe.experts.81.w3", "model.layers.0.block_sparse_moe.experts.82.w3", "model.layers.0.block_sparse_moe.experts.83.w3", "model.layers.0.block_sparse_moe.experts.84.w3", "model.layers.0.block_sparse_moe.experts.85.w3", "model.layers.0.block_sparse_moe.experts.86.w3", "model.layers.0.block_sparse_moe.experts.87.w3", "model.layers.0.block_sparse_moe.experts.88.w3", "model.layers.0.block_sparse_moe.experts.89.w3", "model.layers.0.block_sparse_moe.experts.90.w3", "model.layers.0.block_sparse_moe.experts.91.w3", "model.layers.0.block_sparse_moe.experts.92.w3", "model.layers.0.block_sparse_moe.experts.93.w3", "model.layers.0.block_sparse_moe.experts.94.w3", "model.layers.0.block_sparse_moe.experts.95.w3", "model.layers.0.block_sparse_moe.experts.96.w3", "model.layers.0.block_sparse_moe.experts.97.w3", "model.layers.0.block_sparse_moe.experts.98.w3", "model.layers.0.block_sparse_moe.experts.99.w3", "model.layers.0.block_sparse_moe.experts.100.w3", "model.layers.0.block_sparse_moe.experts.101.w3", "model.layers.0.block_sparse_moe.experts.102.w3", "model.layers.0.block_sparse_moe.experts.103.w3", "model.layers.0.block_sparse_moe.experts.104.w3", "model.layers.0.block_sparse_moe.experts.105.w3", "model.layers.0.block_sparse_moe.experts.106.w3", "model.layers.0.block_sparse_moe.experts.107.w3", "model.layers.0.block_sparse_moe.experts.108.w3", "model.layers.0.block_sparse_moe.experts.109.w3", "model.layers.0.block_sparse_moe.experts.110.w3", "model.layers.0.block_sparse_moe.experts.111.w3", "model.layers.0.block_sparse_moe.experts.112.w3", "model.layers.0.block_sparse_moe.experts.113.w3", "model.layers.0.block_sparse_moe.experts.114.w3", "model.layers.0.block_sparse_moe.experts.115.w3", "model.layers.0.block_sparse_moe.experts.116.w3", "model.layers.0.block_sparse_moe.experts.117.w3", "model.layers.0.block_sparse_moe.experts.118.w3", "model.layers.0.block_sparse_moe.experts.119.w3", "model.layers.0.block_sparse_moe.experts.120.w3", "model.layers.0.block_sparse_moe.experts.121.w3", "model.layers.0.block_sparse_moe.experts.122.w3", "model.layers.0.block_sparse_moe.experts.123.w3", "model.layers.0.block_sparse_moe.experts.124.w3", "model.layers.0.block_sparse_moe.experts.125.w3", "model.layers.0.block_sparse_moe.experts.126.w3", "model.layers.0.block_sparse_moe.experts.127.w3", "model.layers.0.block_sparse_moe.experts.128.w3", "model.layers.0.block_sparse_moe.experts.129.w3", "model.layers.0.block_sparse_moe.experts.130.w3", "model.layers.0.block_sparse_moe.experts.131.w3", "model.layers.0.block_sparse_moe.experts.132.w3", "model.layers.0.block_sparse_moe.experts.133.w3", "model.layers.0.block_sparse_moe.experts.134.w3", "model.layers.0.block_sparse_moe.experts.135.w3", "model.layers.0.block_sparse_moe.experts.136.w3", "model.layers.0.block_sparse_moe.experts.137.w3", "model.layers.0.block_sparse_moe.experts.138.w3", "model.layers.0.block_sparse_moe.experts.139.w3", "model.layers.0.block_sparse_moe.experts.140.w3", "model.layers.0.block_sparse_moe.experts.141.w3", "model.layers.0.block_sparse_moe.experts.142.w3", "model.layers.0.block_sparse_moe.experts.143.w3", "model.layers.0.block_sparse_moe.experts.144.w3", "model.layers.0.block_sparse_moe.experts.145.w3", "model.layers.0.block_sparse_moe.experts.146.w3", "model.layers.0.block_sparse_moe.experts.147.w3", "model.layers.0.block_sparse_moe.experts.148.w3", "model.layers.0.block_sparse_moe.experts.149.w3", "model.layers.0.block_sparse_moe.experts.150.w3", "model.layers.0.block_sparse_moe.experts.151.w3", "model.layers.0.block_sparse_moe.experts.152.w3", "model.layers.0.block_sparse_moe.experts.153.w3", "model.layers.0.block_sparse_moe.experts.154.w3", "model.layers.0.block_sparse_moe.experts.155.w3", "model.layers.0.block_sparse_moe.experts.156.w3", "model.layers.0.block_sparse_moe.experts.157.w3", "model.layers.0.block_sparse_moe.experts.158.w3", "model.layers.0.block_sparse_moe.experts.159.w3", "model.layers.0.block_sparse_moe.experts.160.w3", "model.layers.0.block_sparse_moe.experts.161.w3", "model.layers.0.block_sparse_moe.experts.162.w3", "model.layers.0.block_sparse_moe.experts.163.w3", "model.layers.0.block_sparse_moe.experts.164.w3", "model.layers.0.block_sparse_moe.experts.165.w3", "model.layers.0.block_sparse_moe.experts.166.w3", "model.layers.0.block_sparse_moe.experts.167.w3", "model.layers.0.block_sparse_moe.experts.168.w3", "model.layers.0.block_sparse_moe.experts.169.w3", "model.layers.0.block_sparse_moe.experts.170.w3", "model.layers.0.block_sparse_moe.experts.171.w3", "model.layers.0.block_sparse_moe.experts.172.w3", "model.layers.0.block_sparse_moe.experts.173.w3", "model.layers.0.block_sparse_moe.experts.174.w3", "model.layers.0.block_sparse_moe.experts.175.w3", "model.layers.0.block_sparse_moe.experts.176.w3", "model.layers.0.block_sparse_moe.experts.177.w3", "model.layers.0.block_sparse_moe.experts.178.w3", "model.layers.0.block_sparse_moe.experts.179.w3", "model.layers.0.block_sparse_moe.experts.180.w3", "model.layers.0.block_sparse_moe.experts.181.w3", "model.layers.0.block_sparse_moe.experts.182.w3", "model.layers.0.block_sparse_moe.experts.183.w3", "model.layers.0.block_sparse_moe.experts.184.w3", "model.layers.0.block_sparse_moe.experts.185.w3", "model.layers.0.block_sparse_moe.experts.186.w3", "model.layers.0.block_sparse_moe.experts.187.w3", "model.layers.0.block_sparse_moe.experts.188.w3", "model.layers.0.block_sparse_moe.experts.189.w3", "model.layers.0.block_sparse_moe.experts.190.w3", "model.layers.0.block_sparse_moe.experts.191.w3", "model.layers.0.block_sparse_moe.experts.192.w3", "model.layers.0.block_sparse_moe.experts.193.w3", "model.layers.0.block_sparse_moe.experts.194.w3", "model.layers.0.block_sparse_moe.experts.195.w3", "model.layers.0.block_sparse_moe.experts.196.w3", "model.layers.0.block_sparse_moe.experts.197.w3", "model.layers.0.block_sparse_moe.experts.198.w3", "model.layers.0.block_sparse_moe.experts.199.w3", "model.layers.0.block_sparse_moe.experts.200.w3", "model.layers.0.block_sparse_moe.experts.201.w3", "model.layers.0.block_sparse_moe.experts.202.w3", "model.layers.0.block_sparse_moe.experts.203.w3", "model.layers.0.block_sparse_moe.experts.204.w3", "model.layers.0.block_sparse_moe.experts.205.w3", "model.layers.0.block_sparse_moe.experts.206.w3", "model.layers.0.block_sparse_moe.experts.207.w3", "model.layers.0.block_sparse_moe.experts.208.w3", "model.layers.0.block_sparse_moe.experts.209.w3", "model.layers.0.block_sparse_moe.experts.210.w3", "model.layers.0.block_sparse_moe.experts.211.w3", "model.layers.0.block_sparse_moe.experts.212.w3", "model.layers.0.block_sparse_moe.experts.213.w3", "model.layers.0.block_sparse_moe.experts.214.w3", "model.layers.0.block_sparse_moe.experts.215.w3", "model.layers.0.block_sparse_moe.experts.216.w3", "model.layers.0.block_sparse_moe.experts.217.w3", "model.layers.0.block_sparse_moe.experts.218.w3", "model.layers.0.block_sparse_moe.experts.219.w3", "model.layers.0.block_sparse_moe.experts.220.w3", "model.layers.0.block_sparse_moe.experts.221.w3", "model.layers.0.block_sparse_moe.experts.222.w3", "model.layers.0.block_sparse_moe.experts.223.w3", "model.layers.0.block_sparse_moe.experts.224.w3", "model.layers.0.block_sparse_moe.experts.225.w3", "model.layers.0.block_sparse_moe.experts.226.w3", "model.layers.0.block_sparse_moe.experts.227.w3", "model.layers.0.block_sparse_moe.experts.228.w3", "model.layers.0.block_sparse_moe.experts.229.w3", "model.layers.0.block_sparse_moe.experts.230.w3", "model.layers.0.block_sparse_moe.experts.231.w3", "model.layers.0.block_sparse_moe.experts.232.w3", "model.layers.0.block_sparse_moe.experts.233.w3", "model.layers.0.block_sparse_moe.experts.234.w3", "model.layers.0.block_sparse_moe.experts.235.w3", "model.layers.0.block_sparse_moe.experts.236.w3", "model.layers.0.block_sparse_moe.experts.237.w3", "model.layers.0.block_sparse_moe.experts.238.w3", "model.layers.0.block_sparse_moe.experts.239.w3", "model.layers.0.block_sparse_moe.experts.240.w3", "model.layers.0.block_sparse_moe.experts.241.w3", "model.layers.0.block_sparse_moe.experts.242.w3", "model.layers.0.block_sparse_moe.experts.243.w3", "model.layers.0.block_sparse_moe.experts.244.w3", "model.layers.0.block_sparse_moe.experts.245.w3", "model.layers.0.block_sparse_moe.experts.246.w3", "model.layers.0.block_sparse_moe.experts.247.w3", "model.layers.0.block_sparse_moe.experts.248.w3", "model.layers.0.block_sparse_moe.experts.249.w3", "model.layers.0.block_sparse_moe.experts.250.w3", "model.layers.0.block_sparse_moe.experts.251.w3", "model.layers.0.block_sparse_moe.experts.252.w3", "model.layers.0.block_sparse_moe.experts.253.w3", "model.layers.0.block_sparse_moe.experts.254.w3", "model.layers.0.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0015724407508969362, "dbits": 2415919104 } ] }, { "idx": 4, "layers": [ "model.layers.0.block_sparse_moe.experts.0.w2", "model.layers.0.block_sparse_moe.experts.1.w2", "model.layers.0.block_sparse_moe.experts.2.w2", "model.layers.0.block_sparse_moe.experts.3.w2", "model.layers.0.block_sparse_moe.experts.4.w2", "model.layers.0.block_sparse_moe.experts.5.w2", "model.layers.0.block_sparse_moe.experts.6.w2", "model.layers.0.block_sparse_moe.experts.7.w2", "model.layers.0.block_sparse_moe.experts.8.w2", "model.layers.0.block_sparse_moe.experts.9.w2", "model.layers.0.block_sparse_moe.experts.10.w2", "model.layers.0.block_sparse_moe.experts.11.w2", "model.layers.0.block_sparse_moe.experts.12.w2", "model.layers.0.block_sparse_moe.experts.13.w2", "model.layers.0.block_sparse_moe.experts.14.w2", "model.layers.0.block_sparse_moe.experts.15.w2", "model.layers.0.block_sparse_moe.experts.16.w2", "model.layers.0.block_sparse_moe.experts.17.w2", "model.layers.0.block_sparse_moe.experts.18.w2", "model.layers.0.block_sparse_moe.experts.19.w2", "model.layers.0.block_sparse_moe.experts.20.w2", "model.layers.0.block_sparse_moe.experts.21.w2", "model.layers.0.block_sparse_moe.experts.22.w2", "model.layers.0.block_sparse_moe.experts.23.w2", "model.layers.0.block_sparse_moe.experts.24.w2", "model.layers.0.block_sparse_moe.experts.25.w2", "model.layers.0.block_sparse_moe.experts.26.w2", "model.layers.0.block_sparse_moe.experts.27.w2", "model.layers.0.block_sparse_moe.experts.28.w2", "model.layers.0.block_sparse_moe.experts.29.w2", "model.layers.0.block_sparse_moe.experts.30.w2", "model.layers.0.block_sparse_moe.experts.31.w2", "model.layers.0.block_sparse_moe.experts.32.w2", "model.layers.0.block_sparse_moe.experts.33.w2", "model.layers.0.block_sparse_moe.experts.34.w2", "model.layers.0.block_sparse_moe.experts.35.w2", "model.layers.0.block_sparse_moe.experts.36.w2", "model.layers.0.block_sparse_moe.experts.37.w2", "model.layers.0.block_sparse_moe.experts.38.w2", "model.layers.0.block_sparse_moe.experts.39.w2", "model.layers.0.block_sparse_moe.experts.40.w2", "model.layers.0.block_sparse_moe.experts.41.w2", "model.layers.0.block_sparse_moe.experts.42.w2", "model.layers.0.block_sparse_moe.experts.43.w2", "model.layers.0.block_sparse_moe.experts.44.w2", "model.layers.0.block_sparse_moe.experts.45.w2", "model.layers.0.block_sparse_moe.experts.46.w2", "model.layers.0.block_sparse_moe.experts.47.w2", "model.layers.0.block_sparse_moe.experts.48.w2", "model.layers.0.block_sparse_moe.experts.49.w2", "model.layers.0.block_sparse_moe.experts.50.w2", "model.layers.0.block_sparse_moe.experts.51.w2", "model.layers.0.block_sparse_moe.experts.52.w2", "model.layers.0.block_sparse_moe.experts.53.w2", "model.layers.0.block_sparse_moe.experts.54.w2", "model.layers.0.block_sparse_moe.experts.55.w2", "model.layers.0.block_sparse_moe.experts.56.w2", "model.layers.0.block_sparse_moe.experts.57.w2", "model.layers.0.block_sparse_moe.experts.58.w2", "model.layers.0.block_sparse_moe.experts.59.w2", "model.layers.0.block_sparse_moe.experts.60.w2", "model.layers.0.block_sparse_moe.experts.61.w2", "model.layers.0.block_sparse_moe.experts.62.w2", "model.layers.0.block_sparse_moe.experts.63.w2", "model.layers.0.block_sparse_moe.experts.64.w2", "model.layers.0.block_sparse_moe.experts.65.w2", "model.layers.0.block_sparse_moe.experts.66.w2", "model.layers.0.block_sparse_moe.experts.67.w2", "model.layers.0.block_sparse_moe.experts.68.w2", "model.layers.0.block_sparse_moe.experts.69.w2", "model.layers.0.block_sparse_moe.experts.70.w2", "model.layers.0.block_sparse_moe.experts.71.w2", "model.layers.0.block_sparse_moe.experts.72.w2", "model.layers.0.block_sparse_moe.experts.73.w2", "model.layers.0.block_sparse_moe.experts.74.w2", "model.layers.0.block_sparse_moe.experts.75.w2", "model.layers.0.block_sparse_moe.experts.76.w2", "model.layers.0.block_sparse_moe.experts.77.w2", "model.layers.0.block_sparse_moe.experts.78.w2", "model.layers.0.block_sparse_moe.experts.79.w2", "model.layers.0.block_sparse_moe.experts.80.w2", "model.layers.0.block_sparse_moe.experts.81.w2", "model.layers.0.block_sparse_moe.experts.82.w2", "model.layers.0.block_sparse_moe.experts.83.w2", "model.layers.0.block_sparse_moe.experts.84.w2", "model.layers.0.block_sparse_moe.experts.85.w2", "model.layers.0.block_sparse_moe.experts.86.w2", "model.layers.0.block_sparse_moe.experts.87.w2", "model.layers.0.block_sparse_moe.experts.88.w2", "model.layers.0.block_sparse_moe.experts.89.w2", "model.layers.0.block_sparse_moe.experts.90.w2", "model.layers.0.block_sparse_moe.experts.91.w2", "model.layers.0.block_sparse_moe.experts.92.w2", "model.layers.0.block_sparse_moe.experts.93.w2", "model.layers.0.block_sparse_moe.experts.94.w2", "model.layers.0.block_sparse_moe.experts.95.w2", "model.layers.0.block_sparse_moe.experts.96.w2", "model.layers.0.block_sparse_moe.experts.97.w2", "model.layers.0.block_sparse_moe.experts.98.w2", "model.layers.0.block_sparse_moe.experts.99.w2", "model.layers.0.block_sparse_moe.experts.100.w2", "model.layers.0.block_sparse_moe.experts.101.w2", "model.layers.0.block_sparse_moe.experts.102.w2", "model.layers.0.block_sparse_moe.experts.103.w2", "model.layers.0.block_sparse_moe.experts.104.w2", "model.layers.0.block_sparse_moe.experts.105.w2", "model.layers.0.block_sparse_moe.experts.106.w2", "model.layers.0.block_sparse_moe.experts.107.w2", "model.layers.0.block_sparse_moe.experts.108.w2", "model.layers.0.block_sparse_moe.experts.109.w2", "model.layers.0.block_sparse_moe.experts.110.w2", "model.layers.0.block_sparse_moe.experts.111.w2", "model.layers.0.block_sparse_moe.experts.112.w2", "model.layers.0.block_sparse_moe.experts.113.w2", "model.layers.0.block_sparse_moe.experts.114.w2", "model.layers.0.block_sparse_moe.experts.115.w2", "model.layers.0.block_sparse_moe.experts.116.w2", "model.layers.0.block_sparse_moe.experts.117.w2", "model.layers.0.block_sparse_moe.experts.118.w2", "model.layers.0.block_sparse_moe.experts.119.w2", "model.layers.0.block_sparse_moe.experts.120.w2", "model.layers.0.block_sparse_moe.experts.121.w2", "model.layers.0.block_sparse_moe.experts.122.w2", "model.layers.0.block_sparse_moe.experts.123.w2", "model.layers.0.block_sparse_moe.experts.124.w2", "model.layers.0.block_sparse_moe.experts.125.w2", "model.layers.0.block_sparse_moe.experts.126.w2", "model.layers.0.block_sparse_moe.experts.127.w2", "model.layers.0.block_sparse_moe.experts.128.w2", "model.layers.0.block_sparse_moe.experts.129.w2", "model.layers.0.block_sparse_moe.experts.130.w2", "model.layers.0.block_sparse_moe.experts.131.w2", "model.layers.0.block_sparse_moe.experts.132.w2", "model.layers.0.block_sparse_moe.experts.133.w2", "model.layers.0.block_sparse_moe.experts.134.w2", "model.layers.0.block_sparse_moe.experts.135.w2", "model.layers.0.block_sparse_moe.experts.136.w2", "model.layers.0.block_sparse_moe.experts.137.w2", "model.layers.0.block_sparse_moe.experts.138.w2", "model.layers.0.block_sparse_moe.experts.139.w2", "model.layers.0.block_sparse_moe.experts.140.w2", "model.layers.0.block_sparse_moe.experts.141.w2", "model.layers.0.block_sparse_moe.experts.142.w2", "model.layers.0.block_sparse_moe.experts.143.w2", "model.layers.0.block_sparse_moe.experts.144.w2", "model.layers.0.block_sparse_moe.experts.145.w2", "model.layers.0.block_sparse_moe.experts.146.w2", "model.layers.0.block_sparse_moe.experts.147.w2", "model.layers.0.block_sparse_moe.experts.148.w2", "model.layers.0.block_sparse_moe.experts.149.w2", "model.layers.0.block_sparse_moe.experts.150.w2", "model.layers.0.block_sparse_moe.experts.151.w2", "model.layers.0.block_sparse_moe.experts.152.w2", "model.layers.0.block_sparse_moe.experts.153.w2", "model.layers.0.block_sparse_moe.experts.154.w2", "model.layers.0.block_sparse_moe.experts.155.w2", "model.layers.0.block_sparse_moe.experts.156.w2", "model.layers.0.block_sparse_moe.experts.157.w2", "model.layers.0.block_sparse_moe.experts.158.w2", "model.layers.0.block_sparse_moe.experts.159.w2", "model.layers.0.block_sparse_moe.experts.160.w2", "model.layers.0.block_sparse_moe.experts.161.w2", "model.layers.0.block_sparse_moe.experts.162.w2", "model.layers.0.block_sparse_moe.experts.163.w2", "model.layers.0.block_sparse_moe.experts.164.w2", "model.layers.0.block_sparse_moe.experts.165.w2", "model.layers.0.block_sparse_moe.experts.166.w2", "model.layers.0.block_sparse_moe.experts.167.w2", "model.layers.0.block_sparse_moe.experts.168.w2", "model.layers.0.block_sparse_moe.experts.169.w2", "model.layers.0.block_sparse_moe.experts.170.w2", "model.layers.0.block_sparse_moe.experts.171.w2", "model.layers.0.block_sparse_moe.experts.172.w2", "model.layers.0.block_sparse_moe.experts.173.w2", "model.layers.0.block_sparse_moe.experts.174.w2", "model.layers.0.block_sparse_moe.experts.175.w2", "model.layers.0.block_sparse_moe.experts.176.w2", "model.layers.0.block_sparse_moe.experts.177.w2", "model.layers.0.block_sparse_moe.experts.178.w2", "model.layers.0.block_sparse_moe.experts.179.w2", "model.layers.0.block_sparse_moe.experts.180.w2", "model.layers.0.block_sparse_moe.experts.181.w2", "model.layers.0.block_sparse_moe.experts.182.w2", "model.layers.0.block_sparse_moe.experts.183.w2", "model.layers.0.block_sparse_moe.experts.184.w2", "model.layers.0.block_sparse_moe.experts.185.w2", "model.layers.0.block_sparse_moe.experts.186.w2", "model.layers.0.block_sparse_moe.experts.187.w2", "model.layers.0.block_sparse_moe.experts.188.w2", "model.layers.0.block_sparse_moe.experts.189.w2", "model.layers.0.block_sparse_moe.experts.190.w2", "model.layers.0.block_sparse_moe.experts.191.w2", "model.layers.0.block_sparse_moe.experts.192.w2", "model.layers.0.block_sparse_moe.experts.193.w2", "model.layers.0.block_sparse_moe.experts.194.w2", "model.layers.0.block_sparse_moe.experts.195.w2", "model.layers.0.block_sparse_moe.experts.196.w2", "model.layers.0.block_sparse_moe.experts.197.w2", "model.layers.0.block_sparse_moe.experts.198.w2", "model.layers.0.block_sparse_moe.experts.199.w2", "model.layers.0.block_sparse_moe.experts.200.w2", "model.layers.0.block_sparse_moe.experts.201.w2", "model.layers.0.block_sparse_moe.experts.202.w2", "model.layers.0.block_sparse_moe.experts.203.w2", "model.layers.0.block_sparse_moe.experts.204.w2", "model.layers.0.block_sparse_moe.experts.205.w2", "model.layers.0.block_sparse_moe.experts.206.w2", "model.layers.0.block_sparse_moe.experts.207.w2", "model.layers.0.block_sparse_moe.experts.208.w2", "model.layers.0.block_sparse_moe.experts.209.w2", "model.layers.0.block_sparse_moe.experts.210.w2", "model.layers.0.block_sparse_moe.experts.211.w2", "model.layers.0.block_sparse_moe.experts.212.w2", "model.layers.0.block_sparse_moe.experts.213.w2", "model.layers.0.block_sparse_moe.experts.214.w2", "model.layers.0.block_sparse_moe.experts.215.w2", "model.layers.0.block_sparse_moe.experts.216.w2", "model.layers.0.block_sparse_moe.experts.217.w2", "model.layers.0.block_sparse_moe.experts.218.w2", "model.layers.0.block_sparse_moe.experts.219.w2", "model.layers.0.block_sparse_moe.experts.220.w2", "model.layers.0.block_sparse_moe.experts.221.w2", "model.layers.0.block_sparse_moe.experts.222.w2", "model.layers.0.block_sparse_moe.experts.223.w2", "model.layers.0.block_sparse_moe.experts.224.w2", "model.layers.0.block_sparse_moe.experts.225.w2", "model.layers.0.block_sparse_moe.experts.226.w2", "model.layers.0.block_sparse_moe.experts.227.w2", "model.layers.0.block_sparse_moe.experts.228.w2", "model.layers.0.block_sparse_moe.experts.229.w2", "model.layers.0.block_sparse_moe.experts.230.w2", "model.layers.0.block_sparse_moe.experts.231.w2", "model.layers.0.block_sparse_moe.experts.232.w2", "model.layers.0.block_sparse_moe.experts.233.w2", "model.layers.0.block_sparse_moe.experts.234.w2", "model.layers.0.block_sparse_moe.experts.235.w2", "model.layers.0.block_sparse_moe.experts.236.w2", "model.layers.0.block_sparse_moe.experts.237.w2", "model.layers.0.block_sparse_moe.experts.238.w2", "model.layers.0.block_sparse_moe.experts.239.w2", "model.layers.0.block_sparse_moe.experts.240.w2", "model.layers.0.block_sparse_moe.experts.241.w2", "model.layers.0.block_sparse_moe.experts.242.w2", "model.layers.0.block_sparse_moe.experts.243.w2", "model.layers.0.block_sparse_moe.experts.244.w2", "model.layers.0.block_sparse_moe.experts.245.w2", "model.layers.0.block_sparse_moe.experts.246.w2", "model.layers.0.block_sparse_moe.experts.247.w2", "model.layers.0.block_sparse_moe.experts.248.w2", "model.layers.0.block_sparse_moe.experts.249.w2", "model.layers.0.block_sparse_moe.experts.250.w2", "model.layers.0.block_sparse_moe.experts.251.w2", "model.layers.0.block_sparse_moe.experts.252.w2", "model.layers.0.block_sparse_moe.experts.253.w2", "model.layers.0.block_sparse_moe.experts.254.w2", "model.layers.0.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0005157133564352989, "dbits": 1207959552 } ] }, { "idx": 5, "layers": [ "model.layers.1.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0006104845553636523, "dbits": 18874368 } ] }, { "idx": 6, "layers": [ "model.layers.1.self_attn.k_proj", "model.layers.1.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00172465350478887, "dbits": 6291456 } ] }, { "idx": 7, "layers": [ "model.layers.1.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0007539384067058591, "dbits": 18874368 } ] }, { "idx": 8, "layers": [ "model.layers.1.block_sparse_moe.experts.0.w1", "model.layers.1.block_sparse_moe.experts.1.w1", "model.layers.1.block_sparse_moe.experts.2.w1", "model.layers.1.block_sparse_moe.experts.3.w1", "model.layers.1.block_sparse_moe.experts.4.w1", "model.layers.1.block_sparse_moe.experts.5.w1", "model.layers.1.block_sparse_moe.experts.6.w1", "model.layers.1.block_sparse_moe.experts.7.w1", "model.layers.1.block_sparse_moe.experts.8.w1", "model.layers.1.block_sparse_moe.experts.9.w1", "model.layers.1.block_sparse_moe.experts.10.w1", "model.layers.1.block_sparse_moe.experts.11.w1", "model.layers.1.block_sparse_moe.experts.12.w1", "model.layers.1.block_sparse_moe.experts.13.w1", "model.layers.1.block_sparse_moe.experts.14.w1", "model.layers.1.block_sparse_moe.experts.15.w1", "model.layers.1.block_sparse_moe.experts.16.w1", "model.layers.1.block_sparse_moe.experts.17.w1", "model.layers.1.block_sparse_moe.experts.18.w1", "model.layers.1.block_sparse_moe.experts.19.w1", "model.layers.1.block_sparse_moe.experts.20.w1", "model.layers.1.block_sparse_moe.experts.21.w1", "model.layers.1.block_sparse_moe.experts.22.w1", "model.layers.1.block_sparse_moe.experts.23.w1", "model.layers.1.block_sparse_moe.experts.24.w1", "model.layers.1.block_sparse_moe.experts.25.w1", "model.layers.1.block_sparse_moe.experts.26.w1", "model.layers.1.block_sparse_moe.experts.27.w1", "model.layers.1.block_sparse_moe.experts.28.w1", "model.layers.1.block_sparse_moe.experts.29.w1", "model.layers.1.block_sparse_moe.experts.30.w1", "model.layers.1.block_sparse_moe.experts.31.w1", "model.layers.1.block_sparse_moe.experts.32.w1", "model.layers.1.block_sparse_moe.experts.33.w1", "model.layers.1.block_sparse_moe.experts.34.w1", "model.layers.1.block_sparse_moe.experts.35.w1", "model.layers.1.block_sparse_moe.experts.36.w1", "model.layers.1.block_sparse_moe.experts.37.w1", "model.layers.1.block_sparse_moe.experts.38.w1", "model.layers.1.block_sparse_moe.experts.39.w1", "model.layers.1.block_sparse_moe.experts.40.w1", "model.layers.1.block_sparse_moe.experts.41.w1", "model.layers.1.block_sparse_moe.experts.42.w1", "model.layers.1.block_sparse_moe.experts.43.w1", "model.layers.1.block_sparse_moe.experts.44.w1", "model.layers.1.block_sparse_moe.experts.45.w1", "model.layers.1.block_sparse_moe.experts.46.w1", "model.layers.1.block_sparse_moe.experts.47.w1", "model.layers.1.block_sparse_moe.experts.48.w1", "model.layers.1.block_sparse_moe.experts.49.w1", "model.layers.1.block_sparse_moe.experts.50.w1", "model.layers.1.block_sparse_moe.experts.51.w1", "model.layers.1.block_sparse_moe.experts.52.w1", "model.layers.1.block_sparse_moe.experts.53.w1", "model.layers.1.block_sparse_moe.experts.54.w1", "model.layers.1.block_sparse_moe.experts.55.w1", "model.layers.1.block_sparse_moe.experts.56.w1", "model.layers.1.block_sparse_moe.experts.57.w1", "model.layers.1.block_sparse_moe.experts.58.w1", "model.layers.1.block_sparse_moe.experts.59.w1", "model.layers.1.block_sparse_moe.experts.60.w1", "model.layers.1.block_sparse_moe.experts.61.w1", "model.layers.1.block_sparse_moe.experts.62.w1", "model.layers.1.block_sparse_moe.experts.63.w1", "model.layers.1.block_sparse_moe.experts.64.w1", "model.layers.1.block_sparse_moe.experts.65.w1", "model.layers.1.block_sparse_moe.experts.66.w1", "model.layers.1.block_sparse_moe.experts.67.w1", "model.layers.1.block_sparse_moe.experts.68.w1", "model.layers.1.block_sparse_moe.experts.69.w1", "model.layers.1.block_sparse_moe.experts.70.w1", "model.layers.1.block_sparse_moe.experts.71.w1", "model.layers.1.block_sparse_moe.experts.72.w1", "model.layers.1.block_sparse_moe.experts.73.w1", "model.layers.1.block_sparse_moe.experts.74.w1", "model.layers.1.block_sparse_moe.experts.75.w1", "model.layers.1.block_sparse_moe.experts.76.w1", "model.layers.1.block_sparse_moe.experts.77.w1", "model.layers.1.block_sparse_moe.experts.78.w1", "model.layers.1.block_sparse_moe.experts.79.w1", "model.layers.1.block_sparse_moe.experts.80.w1", "model.layers.1.block_sparse_moe.experts.81.w1", "model.layers.1.block_sparse_moe.experts.82.w1", "model.layers.1.block_sparse_moe.experts.83.w1", "model.layers.1.block_sparse_moe.experts.84.w1", "model.layers.1.block_sparse_moe.experts.85.w1", "model.layers.1.block_sparse_moe.experts.86.w1", "model.layers.1.block_sparse_moe.experts.87.w1", "model.layers.1.block_sparse_moe.experts.88.w1", "model.layers.1.block_sparse_moe.experts.89.w1", "model.layers.1.block_sparse_moe.experts.90.w1", "model.layers.1.block_sparse_moe.experts.91.w1", "model.layers.1.block_sparse_moe.experts.92.w1", "model.layers.1.block_sparse_moe.experts.93.w1", "model.layers.1.block_sparse_moe.experts.94.w1", "model.layers.1.block_sparse_moe.experts.95.w1", "model.layers.1.block_sparse_moe.experts.96.w1", "model.layers.1.block_sparse_moe.experts.97.w1", "model.layers.1.block_sparse_moe.experts.98.w1", "model.layers.1.block_sparse_moe.experts.99.w1", "model.layers.1.block_sparse_moe.experts.100.w1", "model.layers.1.block_sparse_moe.experts.101.w1", "model.layers.1.block_sparse_moe.experts.102.w1", "model.layers.1.block_sparse_moe.experts.103.w1", "model.layers.1.block_sparse_moe.experts.104.w1", "model.layers.1.block_sparse_moe.experts.105.w1", "model.layers.1.block_sparse_moe.experts.106.w1", "model.layers.1.block_sparse_moe.experts.107.w1", "model.layers.1.block_sparse_moe.experts.108.w1", "model.layers.1.block_sparse_moe.experts.109.w1", "model.layers.1.block_sparse_moe.experts.110.w1", "model.layers.1.block_sparse_moe.experts.111.w1", "model.layers.1.block_sparse_moe.experts.112.w1", "model.layers.1.block_sparse_moe.experts.113.w1", "model.layers.1.block_sparse_moe.experts.114.w1", "model.layers.1.block_sparse_moe.experts.115.w1", "model.layers.1.block_sparse_moe.experts.116.w1", "model.layers.1.block_sparse_moe.experts.117.w1", "model.layers.1.block_sparse_moe.experts.118.w1", "model.layers.1.block_sparse_moe.experts.119.w1", "model.layers.1.block_sparse_moe.experts.120.w1", "model.layers.1.block_sparse_moe.experts.121.w1", "model.layers.1.block_sparse_moe.experts.122.w1", "model.layers.1.block_sparse_moe.experts.123.w1", "model.layers.1.block_sparse_moe.experts.124.w1", "model.layers.1.block_sparse_moe.experts.125.w1", "model.layers.1.block_sparse_moe.experts.126.w1", "model.layers.1.block_sparse_moe.experts.127.w1", "model.layers.1.block_sparse_moe.experts.128.w1", "model.layers.1.block_sparse_moe.experts.129.w1", "model.layers.1.block_sparse_moe.experts.130.w1", "model.layers.1.block_sparse_moe.experts.131.w1", "model.layers.1.block_sparse_moe.experts.132.w1", "model.layers.1.block_sparse_moe.experts.133.w1", "model.layers.1.block_sparse_moe.experts.134.w1", "model.layers.1.block_sparse_moe.experts.135.w1", "model.layers.1.block_sparse_moe.experts.136.w1", "model.layers.1.block_sparse_moe.experts.137.w1", "model.layers.1.block_sparse_moe.experts.138.w1", "model.layers.1.block_sparse_moe.experts.139.w1", "model.layers.1.block_sparse_moe.experts.140.w1", "model.layers.1.block_sparse_moe.experts.141.w1", "model.layers.1.block_sparse_moe.experts.142.w1", "model.layers.1.block_sparse_moe.experts.143.w1", "model.layers.1.block_sparse_moe.experts.144.w1", "model.layers.1.block_sparse_moe.experts.145.w1", "model.layers.1.block_sparse_moe.experts.146.w1", "model.layers.1.block_sparse_moe.experts.147.w1", "model.layers.1.block_sparse_moe.experts.148.w1", "model.layers.1.block_sparse_moe.experts.149.w1", "model.layers.1.block_sparse_moe.experts.150.w1", "model.layers.1.block_sparse_moe.experts.151.w1", "model.layers.1.block_sparse_moe.experts.152.w1", "model.layers.1.block_sparse_moe.experts.153.w1", "model.layers.1.block_sparse_moe.experts.154.w1", "model.layers.1.block_sparse_moe.experts.155.w1", "model.layers.1.block_sparse_moe.experts.156.w1", "model.layers.1.block_sparse_moe.experts.157.w1", "model.layers.1.block_sparse_moe.experts.158.w1", "model.layers.1.block_sparse_moe.experts.159.w1", "model.layers.1.block_sparse_moe.experts.160.w1", "model.layers.1.block_sparse_moe.experts.161.w1", "model.layers.1.block_sparse_moe.experts.162.w1", "model.layers.1.block_sparse_moe.experts.163.w1", "model.layers.1.block_sparse_moe.experts.164.w1", "model.layers.1.block_sparse_moe.experts.165.w1", "model.layers.1.block_sparse_moe.experts.166.w1", "model.layers.1.block_sparse_moe.experts.167.w1", "model.layers.1.block_sparse_moe.experts.168.w1", "model.layers.1.block_sparse_moe.experts.169.w1", "model.layers.1.block_sparse_moe.experts.170.w1", "model.layers.1.block_sparse_moe.experts.171.w1", "model.layers.1.block_sparse_moe.experts.172.w1", "model.layers.1.block_sparse_moe.experts.173.w1", "model.layers.1.block_sparse_moe.experts.174.w1", "model.layers.1.block_sparse_moe.experts.175.w1", "model.layers.1.block_sparse_moe.experts.176.w1", "model.layers.1.block_sparse_moe.experts.177.w1", "model.layers.1.block_sparse_moe.experts.178.w1", "model.layers.1.block_sparse_moe.experts.179.w1", "model.layers.1.block_sparse_moe.experts.180.w1", "model.layers.1.block_sparse_moe.experts.181.w1", "model.layers.1.block_sparse_moe.experts.182.w1", "model.layers.1.block_sparse_moe.experts.183.w1", "model.layers.1.block_sparse_moe.experts.184.w1", "model.layers.1.block_sparse_moe.experts.185.w1", "model.layers.1.block_sparse_moe.experts.186.w1", "model.layers.1.block_sparse_moe.experts.187.w1", "model.layers.1.block_sparse_moe.experts.188.w1", "model.layers.1.block_sparse_moe.experts.189.w1", "model.layers.1.block_sparse_moe.experts.190.w1", "model.layers.1.block_sparse_moe.experts.191.w1", "model.layers.1.block_sparse_moe.experts.192.w1", "model.layers.1.block_sparse_moe.experts.193.w1", "model.layers.1.block_sparse_moe.experts.194.w1", "model.layers.1.block_sparse_moe.experts.195.w1", "model.layers.1.block_sparse_moe.experts.196.w1", "model.layers.1.block_sparse_moe.experts.197.w1", "model.layers.1.block_sparse_moe.experts.198.w1", "model.layers.1.block_sparse_moe.experts.199.w1", "model.layers.1.block_sparse_moe.experts.200.w1", "model.layers.1.block_sparse_moe.experts.201.w1", "model.layers.1.block_sparse_moe.experts.202.w1", "model.layers.1.block_sparse_moe.experts.203.w1", "model.layers.1.block_sparse_moe.experts.204.w1", "model.layers.1.block_sparse_moe.experts.205.w1", "model.layers.1.block_sparse_moe.experts.206.w1", "model.layers.1.block_sparse_moe.experts.207.w1", "model.layers.1.block_sparse_moe.experts.208.w1", "model.layers.1.block_sparse_moe.experts.209.w1", "model.layers.1.block_sparse_moe.experts.210.w1", "model.layers.1.block_sparse_moe.experts.211.w1", "model.layers.1.block_sparse_moe.experts.212.w1", "model.layers.1.block_sparse_moe.experts.213.w1", "model.layers.1.block_sparse_moe.experts.214.w1", "model.layers.1.block_sparse_moe.experts.215.w1", "model.layers.1.block_sparse_moe.experts.216.w1", "model.layers.1.block_sparse_moe.experts.217.w1", "model.layers.1.block_sparse_moe.experts.218.w1", "model.layers.1.block_sparse_moe.experts.219.w1", "model.layers.1.block_sparse_moe.experts.220.w1", "model.layers.1.block_sparse_moe.experts.221.w1", "model.layers.1.block_sparse_moe.experts.222.w1", "model.layers.1.block_sparse_moe.experts.223.w1", "model.layers.1.block_sparse_moe.experts.224.w1", "model.layers.1.block_sparse_moe.experts.225.w1", "model.layers.1.block_sparse_moe.experts.226.w1", "model.layers.1.block_sparse_moe.experts.227.w1", "model.layers.1.block_sparse_moe.experts.228.w1", "model.layers.1.block_sparse_moe.experts.229.w1", "model.layers.1.block_sparse_moe.experts.230.w1", "model.layers.1.block_sparse_moe.experts.231.w1", "model.layers.1.block_sparse_moe.experts.232.w1", "model.layers.1.block_sparse_moe.experts.233.w1", "model.layers.1.block_sparse_moe.experts.234.w1", "model.layers.1.block_sparse_moe.experts.235.w1", "model.layers.1.block_sparse_moe.experts.236.w1", "model.layers.1.block_sparse_moe.experts.237.w1", "model.layers.1.block_sparse_moe.experts.238.w1", "model.layers.1.block_sparse_moe.experts.239.w1", "model.layers.1.block_sparse_moe.experts.240.w1", "model.layers.1.block_sparse_moe.experts.241.w1", "model.layers.1.block_sparse_moe.experts.242.w1", "model.layers.1.block_sparse_moe.experts.243.w1", "model.layers.1.block_sparse_moe.experts.244.w1", "model.layers.1.block_sparse_moe.experts.245.w1", "model.layers.1.block_sparse_moe.experts.246.w1", "model.layers.1.block_sparse_moe.experts.247.w1", "model.layers.1.block_sparse_moe.experts.248.w1", "model.layers.1.block_sparse_moe.experts.249.w1", "model.layers.1.block_sparse_moe.experts.250.w1", "model.layers.1.block_sparse_moe.experts.251.w1", "model.layers.1.block_sparse_moe.experts.252.w1", "model.layers.1.block_sparse_moe.experts.253.w1", "model.layers.1.block_sparse_moe.experts.254.w1", "model.layers.1.block_sparse_moe.experts.255.w1", "model.layers.1.block_sparse_moe.experts.0.w3", "model.layers.1.block_sparse_moe.experts.1.w3", "model.layers.1.block_sparse_moe.experts.2.w3", "model.layers.1.block_sparse_moe.experts.3.w3", "model.layers.1.block_sparse_moe.experts.4.w3", "model.layers.1.block_sparse_moe.experts.5.w3", "model.layers.1.block_sparse_moe.experts.6.w3", "model.layers.1.block_sparse_moe.experts.7.w3", "model.layers.1.block_sparse_moe.experts.8.w3", "model.layers.1.block_sparse_moe.experts.9.w3", "model.layers.1.block_sparse_moe.experts.10.w3", "model.layers.1.block_sparse_moe.experts.11.w3", "model.layers.1.block_sparse_moe.experts.12.w3", "model.layers.1.block_sparse_moe.experts.13.w3", "model.layers.1.block_sparse_moe.experts.14.w3", "model.layers.1.block_sparse_moe.experts.15.w3", "model.layers.1.block_sparse_moe.experts.16.w3", "model.layers.1.block_sparse_moe.experts.17.w3", "model.layers.1.block_sparse_moe.experts.18.w3", "model.layers.1.block_sparse_moe.experts.19.w3", "model.layers.1.block_sparse_moe.experts.20.w3", "model.layers.1.block_sparse_moe.experts.21.w3", "model.layers.1.block_sparse_moe.experts.22.w3", "model.layers.1.block_sparse_moe.experts.23.w3", "model.layers.1.block_sparse_moe.experts.24.w3", "model.layers.1.block_sparse_moe.experts.25.w3", "model.layers.1.block_sparse_moe.experts.26.w3", "model.layers.1.block_sparse_moe.experts.27.w3", "model.layers.1.block_sparse_moe.experts.28.w3", "model.layers.1.block_sparse_moe.experts.29.w3", "model.layers.1.block_sparse_moe.experts.30.w3", "model.layers.1.block_sparse_moe.experts.31.w3", "model.layers.1.block_sparse_moe.experts.32.w3", "model.layers.1.block_sparse_moe.experts.33.w3", "model.layers.1.block_sparse_moe.experts.34.w3", "model.layers.1.block_sparse_moe.experts.35.w3", "model.layers.1.block_sparse_moe.experts.36.w3", "model.layers.1.block_sparse_moe.experts.37.w3", "model.layers.1.block_sparse_moe.experts.38.w3", "model.layers.1.block_sparse_moe.experts.39.w3", "model.layers.1.block_sparse_moe.experts.40.w3", "model.layers.1.block_sparse_moe.experts.41.w3", "model.layers.1.block_sparse_moe.experts.42.w3", "model.layers.1.block_sparse_moe.experts.43.w3", "model.layers.1.block_sparse_moe.experts.44.w3", "model.layers.1.block_sparse_moe.experts.45.w3", "model.layers.1.block_sparse_moe.experts.46.w3", "model.layers.1.block_sparse_moe.experts.47.w3", "model.layers.1.block_sparse_moe.experts.48.w3", "model.layers.1.block_sparse_moe.experts.49.w3", "model.layers.1.block_sparse_moe.experts.50.w3", "model.layers.1.block_sparse_moe.experts.51.w3", "model.layers.1.block_sparse_moe.experts.52.w3", "model.layers.1.block_sparse_moe.experts.53.w3", "model.layers.1.block_sparse_moe.experts.54.w3", "model.layers.1.block_sparse_moe.experts.55.w3", "model.layers.1.block_sparse_moe.experts.56.w3", "model.layers.1.block_sparse_moe.experts.57.w3", "model.layers.1.block_sparse_moe.experts.58.w3", "model.layers.1.block_sparse_moe.experts.59.w3", "model.layers.1.block_sparse_moe.experts.60.w3", "model.layers.1.block_sparse_moe.experts.61.w3", "model.layers.1.block_sparse_moe.experts.62.w3", "model.layers.1.block_sparse_moe.experts.63.w3", "model.layers.1.block_sparse_moe.experts.64.w3", "model.layers.1.block_sparse_moe.experts.65.w3", "model.layers.1.block_sparse_moe.experts.66.w3", "model.layers.1.block_sparse_moe.experts.67.w3", "model.layers.1.block_sparse_moe.experts.68.w3", "model.layers.1.block_sparse_moe.experts.69.w3", "model.layers.1.block_sparse_moe.experts.70.w3", "model.layers.1.block_sparse_moe.experts.71.w3", "model.layers.1.block_sparse_moe.experts.72.w3", "model.layers.1.block_sparse_moe.experts.73.w3", "model.layers.1.block_sparse_moe.experts.74.w3", "model.layers.1.block_sparse_moe.experts.75.w3", "model.layers.1.block_sparse_moe.experts.76.w3", "model.layers.1.block_sparse_moe.experts.77.w3", "model.layers.1.block_sparse_moe.experts.78.w3", "model.layers.1.block_sparse_moe.experts.79.w3", "model.layers.1.block_sparse_moe.experts.80.w3", "model.layers.1.block_sparse_moe.experts.81.w3", "model.layers.1.block_sparse_moe.experts.82.w3", "model.layers.1.block_sparse_moe.experts.83.w3", "model.layers.1.block_sparse_moe.experts.84.w3", "model.layers.1.block_sparse_moe.experts.85.w3", "model.layers.1.block_sparse_moe.experts.86.w3", "model.layers.1.block_sparse_moe.experts.87.w3", "model.layers.1.block_sparse_moe.experts.88.w3", "model.layers.1.block_sparse_moe.experts.89.w3", "model.layers.1.block_sparse_moe.experts.90.w3", "model.layers.1.block_sparse_moe.experts.91.w3", "model.layers.1.block_sparse_moe.experts.92.w3", "model.layers.1.block_sparse_moe.experts.93.w3", "model.layers.1.block_sparse_moe.experts.94.w3", "model.layers.1.block_sparse_moe.experts.95.w3", "model.layers.1.block_sparse_moe.experts.96.w3", "model.layers.1.block_sparse_moe.experts.97.w3", "model.layers.1.block_sparse_moe.experts.98.w3", "model.layers.1.block_sparse_moe.experts.99.w3", "model.layers.1.block_sparse_moe.experts.100.w3", "model.layers.1.block_sparse_moe.experts.101.w3", "model.layers.1.block_sparse_moe.experts.102.w3", "model.layers.1.block_sparse_moe.experts.103.w3", "model.layers.1.block_sparse_moe.experts.104.w3", "model.layers.1.block_sparse_moe.experts.105.w3", "model.layers.1.block_sparse_moe.experts.106.w3", "model.layers.1.block_sparse_moe.experts.107.w3", "model.layers.1.block_sparse_moe.experts.108.w3", "model.layers.1.block_sparse_moe.experts.109.w3", "model.layers.1.block_sparse_moe.experts.110.w3", "model.layers.1.block_sparse_moe.experts.111.w3", "model.layers.1.block_sparse_moe.experts.112.w3", "model.layers.1.block_sparse_moe.experts.113.w3", "model.layers.1.block_sparse_moe.experts.114.w3", "model.layers.1.block_sparse_moe.experts.115.w3", "model.layers.1.block_sparse_moe.experts.116.w3", "model.layers.1.block_sparse_moe.experts.117.w3", "model.layers.1.block_sparse_moe.experts.118.w3", "model.layers.1.block_sparse_moe.experts.119.w3", "model.layers.1.block_sparse_moe.experts.120.w3", "model.layers.1.block_sparse_moe.experts.121.w3", "model.layers.1.block_sparse_moe.experts.122.w3", "model.layers.1.block_sparse_moe.experts.123.w3", "model.layers.1.block_sparse_moe.experts.124.w3", "model.layers.1.block_sparse_moe.experts.125.w3", "model.layers.1.block_sparse_moe.experts.126.w3", "model.layers.1.block_sparse_moe.experts.127.w3", "model.layers.1.block_sparse_moe.experts.128.w3", "model.layers.1.block_sparse_moe.experts.129.w3", "model.layers.1.block_sparse_moe.experts.130.w3", "model.layers.1.block_sparse_moe.experts.131.w3", "model.layers.1.block_sparse_moe.experts.132.w3", "model.layers.1.block_sparse_moe.experts.133.w3", "model.layers.1.block_sparse_moe.experts.134.w3", "model.layers.1.block_sparse_moe.experts.135.w3", "model.layers.1.block_sparse_moe.experts.136.w3", "model.layers.1.block_sparse_moe.experts.137.w3", "model.layers.1.block_sparse_moe.experts.138.w3", "model.layers.1.block_sparse_moe.experts.139.w3", "model.layers.1.block_sparse_moe.experts.140.w3", "model.layers.1.block_sparse_moe.experts.141.w3", "model.layers.1.block_sparse_moe.experts.142.w3", "model.layers.1.block_sparse_moe.experts.143.w3", "model.layers.1.block_sparse_moe.experts.144.w3", "model.layers.1.block_sparse_moe.experts.145.w3", "model.layers.1.block_sparse_moe.experts.146.w3", "model.layers.1.block_sparse_moe.experts.147.w3", "model.layers.1.block_sparse_moe.experts.148.w3", "model.layers.1.block_sparse_moe.experts.149.w3", "model.layers.1.block_sparse_moe.experts.150.w3", "model.layers.1.block_sparse_moe.experts.151.w3", "model.layers.1.block_sparse_moe.experts.152.w3", "model.layers.1.block_sparse_moe.experts.153.w3", "model.layers.1.block_sparse_moe.experts.154.w3", "model.layers.1.block_sparse_moe.experts.155.w3", "model.layers.1.block_sparse_moe.experts.156.w3", "model.layers.1.block_sparse_moe.experts.157.w3", "model.layers.1.block_sparse_moe.experts.158.w3", "model.layers.1.block_sparse_moe.experts.159.w3", "model.layers.1.block_sparse_moe.experts.160.w3", "model.layers.1.block_sparse_moe.experts.161.w3", "model.layers.1.block_sparse_moe.experts.162.w3", "model.layers.1.block_sparse_moe.experts.163.w3", "model.layers.1.block_sparse_moe.experts.164.w3", "model.layers.1.block_sparse_moe.experts.165.w3", "model.layers.1.block_sparse_moe.experts.166.w3", "model.layers.1.block_sparse_moe.experts.167.w3", "model.layers.1.block_sparse_moe.experts.168.w3", "model.layers.1.block_sparse_moe.experts.169.w3", "model.layers.1.block_sparse_moe.experts.170.w3", "model.layers.1.block_sparse_moe.experts.171.w3", "model.layers.1.block_sparse_moe.experts.172.w3", "model.layers.1.block_sparse_moe.experts.173.w3", "model.layers.1.block_sparse_moe.experts.174.w3", "model.layers.1.block_sparse_moe.experts.175.w3", "model.layers.1.block_sparse_moe.experts.176.w3", "model.layers.1.block_sparse_moe.experts.177.w3", "model.layers.1.block_sparse_moe.experts.178.w3", "model.layers.1.block_sparse_moe.experts.179.w3", "model.layers.1.block_sparse_moe.experts.180.w3", "model.layers.1.block_sparse_moe.experts.181.w3", "model.layers.1.block_sparse_moe.experts.182.w3", "model.layers.1.block_sparse_moe.experts.183.w3", "model.layers.1.block_sparse_moe.experts.184.w3", "model.layers.1.block_sparse_moe.experts.185.w3", "model.layers.1.block_sparse_moe.experts.186.w3", "model.layers.1.block_sparse_moe.experts.187.w3", "model.layers.1.block_sparse_moe.experts.188.w3", "model.layers.1.block_sparse_moe.experts.189.w3", "model.layers.1.block_sparse_moe.experts.190.w3", "model.layers.1.block_sparse_moe.experts.191.w3", "model.layers.1.block_sparse_moe.experts.192.w3", "model.layers.1.block_sparse_moe.experts.193.w3", "model.layers.1.block_sparse_moe.experts.194.w3", "model.layers.1.block_sparse_moe.experts.195.w3", "model.layers.1.block_sparse_moe.experts.196.w3", "model.layers.1.block_sparse_moe.experts.197.w3", "model.layers.1.block_sparse_moe.experts.198.w3", "model.layers.1.block_sparse_moe.experts.199.w3", "model.layers.1.block_sparse_moe.experts.200.w3", "model.layers.1.block_sparse_moe.experts.201.w3", "model.layers.1.block_sparse_moe.experts.202.w3", "model.layers.1.block_sparse_moe.experts.203.w3", "model.layers.1.block_sparse_moe.experts.204.w3", "model.layers.1.block_sparse_moe.experts.205.w3", "model.layers.1.block_sparse_moe.experts.206.w3", "model.layers.1.block_sparse_moe.experts.207.w3", "model.layers.1.block_sparse_moe.experts.208.w3", "model.layers.1.block_sparse_moe.experts.209.w3", "model.layers.1.block_sparse_moe.experts.210.w3", "model.layers.1.block_sparse_moe.experts.211.w3", "model.layers.1.block_sparse_moe.experts.212.w3", "model.layers.1.block_sparse_moe.experts.213.w3", "model.layers.1.block_sparse_moe.experts.214.w3", "model.layers.1.block_sparse_moe.experts.215.w3", "model.layers.1.block_sparse_moe.experts.216.w3", "model.layers.1.block_sparse_moe.experts.217.w3", "model.layers.1.block_sparse_moe.experts.218.w3", "model.layers.1.block_sparse_moe.experts.219.w3", "model.layers.1.block_sparse_moe.experts.220.w3", "model.layers.1.block_sparse_moe.experts.221.w3", "model.layers.1.block_sparse_moe.experts.222.w3", "model.layers.1.block_sparse_moe.experts.223.w3", "model.layers.1.block_sparse_moe.experts.224.w3", "model.layers.1.block_sparse_moe.experts.225.w3", "model.layers.1.block_sparse_moe.experts.226.w3", "model.layers.1.block_sparse_moe.experts.227.w3", "model.layers.1.block_sparse_moe.experts.228.w3", "model.layers.1.block_sparse_moe.experts.229.w3", "model.layers.1.block_sparse_moe.experts.230.w3", "model.layers.1.block_sparse_moe.experts.231.w3", "model.layers.1.block_sparse_moe.experts.232.w3", "model.layers.1.block_sparse_moe.experts.233.w3", "model.layers.1.block_sparse_moe.experts.234.w3", "model.layers.1.block_sparse_moe.experts.235.w3", "model.layers.1.block_sparse_moe.experts.236.w3", "model.layers.1.block_sparse_moe.experts.237.w3", "model.layers.1.block_sparse_moe.experts.238.w3", "model.layers.1.block_sparse_moe.experts.239.w3", "model.layers.1.block_sparse_moe.experts.240.w3", "model.layers.1.block_sparse_moe.experts.241.w3", "model.layers.1.block_sparse_moe.experts.242.w3", "model.layers.1.block_sparse_moe.experts.243.w3", "model.layers.1.block_sparse_moe.experts.244.w3", "model.layers.1.block_sparse_moe.experts.245.w3", "model.layers.1.block_sparse_moe.experts.246.w3", "model.layers.1.block_sparse_moe.experts.247.w3", "model.layers.1.block_sparse_moe.experts.248.w3", "model.layers.1.block_sparse_moe.experts.249.w3", "model.layers.1.block_sparse_moe.experts.250.w3", "model.layers.1.block_sparse_moe.experts.251.w3", "model.layers.1.block_sparse_moe.experts.252.w3", "model.layers.1.block_sparse_moe.experts.253.w3", "model.layers.1.block_sparse_moe.experts.254.w3", "model.layers.1.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0005475036799907629, "dbits": 2415919104 } ] }, { "idx": 9, "layers": [ "model.layers.1.block_sparse_moe.experts.0.w2", "model.layers.1.block_sparse_moe.experts.1.w2", "model.layers.1.block_sparse_moe.experts.2.w2", "model.layers.1.block_sparse_moe.experts.3.w2", "model.layers.1.block_sparse_moe.experts.4.w2", "model.layers.1.block_sparse_moe.experts.5.w2", "model.layers.1.block_sparse_moe.experts.6.w2", "model.layers.1.block_sparse_moe.experts.7.w2", "model.layers.1.block_sparse_moe.experts.8.w2", "model.layers.1.block_sparse_moe.experts.9.w2", "model.layers.1.block_sparse_moe.experts.10.w2", "model.layers.1.block_sparse_moe.experts.11.w2", "model.layers.1.block_sparse_moe.experts.12.w2", "model.layers.1.block_sparse_moe.experts.13.w2", "model.layers.1.block_sparse_moe.experts.14.w2", "model.layers.1.block_sparse_moe.experts.15.w2", "model.layers.1.block_sparse_moe.experts.16.w2", "model.layers.1.block_sparse_moe.experts.17.w2", "model.layers.1.block_sparse_moe.experts.18.w2", "model.layers.1.block_sparse_moe.experts.19.w2", "model.layers.1.block_sparse_moe.experts.20.w2", "model.layers.1.block_sparse_moe.experts.21.w2", "model.layers.1.block_sparse_moe.experts.22.w2", "model.layers.1.block_sparse_moe.experts.23.w2", "model.layers.1.block_sparse_moe.experts.24.w2", "model.layers.1.block_sparse_moe.experts.25.w2", "model.layers.1.block_sparse_moe.experts.26.w2", "model.layers.1.block_sparse_moe.experts.27.w2", "model.layers.1.block_sparse_moe.experts.28.w2", "model.layers.1.block_sparse_moe.experts.29.w2", "model.layers.1.block_sparse_moe.experts.30.w2", "model.layers.1.block_sparse_moe.experts.31.w2", "model.layers.1.block_sparse_moe.experts.32.w2", "model.layers.1.block_sparse_moe.experts.33.w2", "model.layers.1.block_sparse_moe.experts.34.w2", "model.layers.1.block_sparse_moe.experts.35.w2", "model.layers.1.block_sparse_moe.experts.36.w2", "model.layers.1.block_sparse_moe.experts.37.w2", "model.layers.1.block_sparse_moe.experts.38.w2", "model.layers.1.block_sparse_moe.experts.39.w2", "model.layers.1.block_sparse_moe.experts.40.w2", "model.layers.1.block_sparse_moe.experts.41.w2", "model.layers.1.block_sparse_moe.experts.42.w2", "model.layers.1.block_sparse_moe.experts.43.w2", "model.layers.1.block_sparse_moe.experts.44.w2", "model.layers.1.block_sparse_moe.experts.45.w2", "model.layers.1.block_sparse_moe.experts.46.w2", "model.layers.1.block_sparse_moe.experts.47.w2", "model.layers.1.block_sparse_moe.experts.48.w2", "model.layers.1.block_sparse_moe.experts.49.w2", "model.layers.1.block_sparse_moe.experts.50.w2", "model.layers.1.block_sparse_moe.experts.51.w2", "model.layers.1.block_sparse_moe.experts.52.w2", "model.layers.1.block_sparse_moe.experts.53.w2", "model.layers.1.block_sparse_moe.experts.54.w2", "model.layers.1.block_sparse_moe.experts.55.w2", "model.layers.1.block_sparse_moe.experts.56.w2", "model.layers.1.block_sparse_moe.experts.57.w2", "model.layers.1.block_sparse_moe.experts.58.w2", "model.layers.1.block_sparse_moe.experts.59.w2", "model.layers.1.block_sparse_moe.experts.60.w2", "model.layers.1.block_sparse_moe.experts.61.w2", "model.layers.1.block_sparse_moe.experts.62.w2", "model.layers.1.block_sparse_moe.experts.63.w2", "model.layers.1.block_sparse_moe.experts.64.w2", "model.layers.1.block_sparse_moe.experts.65.w2", "model.layers.1.block_sparse_moe.experts.66.w2", "model.layers.1.block_sparse_moe.experts.67.w2", "model.layers.1.block_sparse_moe.experts.68.w2", "model.layers.1.block_sparse_moe.experts.69.w2", "model.layers.1.block_sparse_moe.experts.70.w2", "model.layers.1.block_sparse_moe.experts.71.w2", "model.layers.1.block_sparse_moe.experts.72.w2", "model.layers.1.block_sparse_moe.experts.73.w2", "model.layers.1.block_sparse_moe.experts.74.w2", "model.layers.1.block_sparse_moe.experts.75.w2", "model.layers.1.block_sparse_moe.experts.76.w2", "model.layers.1.block_sparse_moe.experts.77.w2", "model.layers.1.block_sparse_moe.experts.78.w2", "model.layers.1.block_sparse_moe.experts.79.w2", "model.layers.1.block_sparse_moe.experts.80.w2", "model.layers.1.block_sparse_moe.experts.81.w2", "model.layers.1.block_sparse_moe.experts.82.w2", "model.layers.1.block_sparse_moe.experts.83.w2", "model.layers.1.block_sparse_moe.experts.84.w2", "model.layers.1.block_sparse_moe.experts.85.w2", "model.layers.1.block_sparse_moe.experts.86.w2", "model.layers.1.block_sparse_moe.experts.87.w2", "model.layers.1.block_sparse_moe.experts.88.w2", "model.layers.1.block_sparse_moe.experts.89.w2", "model.layers.1.block_sparse_moe.experts.90.w2", "model.layers.1.block_sparse_moe.experts.91.w2", "model.layers.1.block_sparse_moe.experts.92.w2", "model.layers.1.block_sparse_moe.experts.93.w2", "model.layers.1.block_sparse_moe.experts.94.w2", "model.layers.1.block_sparse_moe.experts.95.w2", "model.layers.1.block_sparse_moe.experts.96.w2", "model.layers.1.block_sparse_moe.experts.97.w2", "model.layers.1.block_sparse_moe.experts.98.w2", "model.layers.1.block_sparse_moe.experts.99.w2", "model.layers.1.block_sparse_moe.experts.100.w2", "model.layers.1.block_sparse_moe.experts.101.w2", "model.layers.1.block_sparse_moe.experts.102.w2", "model.layers.1.block_sparse_moe.experts.103.w2", "model.layers.1.block_sparse_moe.experts.104.w2", "model.layers.1.block_sparse_moe.experts.105.w2", "model.layers.1.block_sparse_moe.experts.106.w2", "model.layers.1.block_sparse_moe.experts.107.w2", "model.layers.1.block_sparse_moe.experts.108.w2", "model.layers.1.block_sparse_moe.experts.109.w2", "model.layers.1.block_sparse_moe.experts.110.w2", "model.layers.1.block_sparse_moe.experts.111.w2", "model.layers.1.block_sparse_moe.experts.112.w2", "model.layers.1.block_sparse_moe.experts.113.w2", "model.layers.1.block_sparse_moe.experts.114.w2", "model.layers.1.block_sparse_moe.experts.115.w2", "model.layers.1.block_sparse_moe.experts.116.w2", "model.layers.1.block_sparse_moe.experts.117.w2", "model.layers.1.block_sparse_moe.experts.118.w2", "model.layers.1.block_sparse_moe.experts.119.w2", "model.layers.1.block_sparse_moe.experts.120.w2", "model.layers.1.block_sparse_moe.experts.121.w2", "model.layers.1.block_sparse_moe.experts.122.w2", "model.layers.1.block_sparse_moe.experts.123.w2", "model.layers.1.block_sparse_moe.experts.124.w2", "model.layers.1.block_sparse_moe.experts.125.w2", "model.layers.1.block_sparse_moe.experts.126.w2", "model.layers.1.block_sparse_moe.experts.127.w2", "model.layers.1.block_sparse_moe.experts.128.w2", "model.layers.1.block_sparse_moe.experts.129.w2", "model.layers.1.block_sparse_moe.experts.130.w2", "model.layers.1.block_sparse_moe.experts.131.w2", "model.layers.1.block_sparse_moe.experts.132.w2", "model.layers.1.block_sparse_moe.experts.133.w2", "model.layers.1.block_sparse_moe.experts.134.w2", "model.layers.1.block_sparse_moe.experts.135.w2", "model.layers.1.block_sparse_moe.experts.136.w2", "model.layers.1.block_sparse_moe.experts.137.w2", "model.layers.1.block_sparse_moe.experts.138.w2", "model.layers.1.block_sparse_moe.experts.139.w2", "model.layers.1.block_sparse_moe.experts.140.w2", "model.layers.1.block_sparse_moe.experts.141.w2", "model.layers.1.block_sparse_moe.experts.142.w2", "model.layers.1.block_sparse_moe.experts.143.w2", "model.layers.1.block_sparse_moe.experts.144.w2", "model.layers.1.block_sparse_moe.experts.145.w2", "model.layers.1.block_sparse_moe.experts.146.w2", "model.layers.1.block_sparse_moe.experts.147.w2", "model.layers.1.block_sparse_moe.experts.148.w2", "model.layers.1.block_sparse_moe.experts.149.w2", "model.layers.1.block_sparse_moe.experts.150.w2", "model.layers.1.block_sparse_moe.experts.151.w2", "model.layers.1.block_sparse_moe.experts.152.w2", "model.layers.1.block_sparse_moe.experts.153.w2", "model.layers.1.block_sparse_moe.experts.154.w2", "model.layers.1.block_sparse_moe.experts.155.w2", "model.layers.1.block_sparse_moe.experts.156.w2", "model.layers.1.block_sparse_moe.experts.157.w2", "model.layers.1.block_sparse_moe.experts.158.w2", "model.layers.1.block_sparse_moe.experts.159.w2", "model.layers.1.block_sparse_moe.experts.160.w2", "model.layers.1.block_sparse_moe.experts.161.w2", "model.layers.1.block_sparse_moe.experts.162.w2", "model.layers.1.block_sparse_moe.experts.163.w2", "model.layers.1.block_sparse_moe.experts.164.w2", "model.layers.1.block_sparse_moe.experts.165.w2", "model.layers.1.block_sparse_moe.experts.166.w2", "model.layers.1.block_sparse_moe.experts.167.w2", "model.layers.1.block_sparse_moe.experts.168.w2", "model.layers.1.block_sparse_moe.experts.169.w2", "model.layers.1.block_sparse_moe.experts.170.w2", "model.layers.1.block_sparse_moe.experts.171.w2", "model.layers.1.block_sparse_moe.experts.172.w2", "model.layers.1.block_sparse_moe.experts.173.w2", "model.layers.1.block_sparse_moe.experts.174.w2", "model.layers.1.block_sparse_moe.experts.175.w2", "model.layers.1.block_sparse_moe.experts.176.w2", "model.layers.1.block_sparse_moe.experts.177.w2", "model.layers.1.block_sparse_moe.experts.178.w2", "model.layers.1.block_sparse_moe.experts.179.w2", "model.layers.1.block_sparse_moe.experts.180.w2", "model.layers.1.block_sparse_moe.experts.181.w2", "model.layers.1.block_sparse_moe.experts.182.w2", "model.layers.1.block_sparse_moe.experts.183.w2", "model.layers.1.block_sparse_moe.experts.184.w2", "model.layers.1.block_sparse_moe.experts.185.w2", "model.layers.1.block_sparse_moe.experts.186.w2", "model.layers.1.block_sparse_moe.experts.187.w2", "model.layers.1.block_sparse_moe.experts.188.w2", "model.layers.1.block_sparse_moe.experts.189.w2", "model.layers.1.block_sparse_moe.experts.190.w2", "model.layers.1.block_sparse_moe.experts.191.w2", "model.layers.1.block_sparse_moe.experts.192.w2", "model.layers.1.block_sparse_moe.experts.193.w2", "model.layers.1.block_sparse_moe.experts.194.w2", "model.layers.1.block_sparse_moe.experts.195.w2", "model.layers.1.block_sparse_moe.experts.196.w2", "model.layers.1.block_sparse_moe.experts.197.w2", "model.layers.1.block_sparse_moe.experts.198.w2", "model.layers.1.block_sparse_moe.experts.199.w2", "model.layers.1.block_sparse_moe.experts.200.w2", "model.layers.1.block_sparse_moe.experts.201.w2", "model.layers.1.block_sparse_moe.experts.202.w2", "model.layers.1.block_sparse_moe.experts.203.w2", "model.layers.1.block_sparse_moe.experts.204.w2", "model.layers.1.block_sparse_moe.experts.205.w2", "model.layers.1.block_sparse_moe.experts.206.w2", "model.layers.1.block_sparse_moe.experts.207.w2", "model.layers.1.block_sparse_moe.experts.208.w2", "model.layers.1.block_sparse_moe.experts.209.w2", "model.layers.1.block_sparse_moe.experts.210.w2", "model.layers.1.block_sparse_moe.experts.211.w2", "model.layers.1.block_sparse_moe.experts.212.w2", "model.layers.1.block_sparse_moe.experts.213.w2", "model.layers.1.block_sparse_moe.experts.214.w2", "model.layers.1.block_sparse_moe.experts.215.w2", "model.layers.1.block_sparse_moe.experts.216.w2", "model.layers.1.block_sparse_moe.experts.217.w2", "model.layers.1.block_sparse_moe.experts.218.w2", "model.layers.1.block_sparse_moe.experts.219.w2", "model.layers.1.block_sparse_moe.experts.220.w2", "model.layers.1.block_sparse_moe.experts.221.w2", "model.layers.1.block_sparse_moe.experts.222.w2", "model.layers.1.block_sparse_moe.experts.223.w2", "model.layers.1.block_sparse_moe.experts.224.w2", "model.layers.1.block_sparse_moe.experts.225.w2", "model.layers.1.block_sparse_moe.experts.226.w2", "model.layers.1.block_sparse_moe.experts.227.w2", "model.layers.1.block_sparse_moe.experts.228.w2", "model.layers.1.block_sparse_moe.experts.229.w2", "model.layers.1.block_sparse_moe.experts.230.w2", "model.layers.1.block_sparse_moe.experts.231.w2", "model.layers.1.block_sparse_moe.experts.232.w2", "model.layers.1.block_sparse_moe.experts.233.w2", "model.layers.1.block_sparse_moe.experts.234.w2", "model.layers.1.block_sparse_moe.experts.235.w2", "model.layers.1.block_sparse_moe.experts.236.w2", "model.layers.1.block_sparse_moe.experts.237.w2", "model.layers.1.block_sparse_moe.experts.238.w2", "model.layers.1.block_sparse_moe.experts.239.w2", "model.layers.1.block_sparse_moe.experts.240.w2", "model.layers.1.block_sparse_moe.experts.241.w2", "model.layers.1.block_sparse_moe.experts.242.w2", "model.layers.1.block_sparse_moe.experts.243.w2", "model.layers.1.block_sparse_moe.experts.244.w2", "model.layers.1.block_sparse_moe.experts.245.w2", "model.layers.1.block_sparse_moe.experts.246.w2", "model.layers.1.block_sparse_moe.experts.247.w2", "model.layers.1.block_sparse_moe.experts.248.w2", "model.layers.1.block_sparse_moe.experts.249.w2", "model.layers.1.block_sparse_moe.experts.250.w2", "model.layers.1.block_sparse_moe.experts.251.w2", "model.layers.1.block_sparse_moe.experts.252.w2", "model.layers.1.block_sparse_moe.experts.253.w2", "model.layers.1.block_sparse_moe.experts.254.w2", "model.layers.1.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0001378225162625285, "dbits": 1207959552 } ] }, { "idx": 10, "layers": [ "model.layers.2.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00018764939159154892, "dbits": 18874368 } ] }, { "idx": 11, "layers": [ "model.layers.2.self_attn.k_proj", "model.layers.2.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0005021182820200976, "dbits": 6291456 } ] }, { "idx": 12, "layers": [ "model.layers.2.self_attn.o_proj" ], "candidates": [ { "dkld": 1.5557091683146562e-05, "dbits": 18874368 } ] }, { "idx": 13, "layers": [ "model.layers.2.block_sparse_moe.experts.0.w1", "model.layers.2.block_sparse_moe.experts.1.w1", "model.layers.2.block_sparse_moe.experts.2.w1", "model.layers.2.block_sparse_moe.experts.3.w1", "model.layers.2.block_sparse_moe.experts.4.w1", "model.layers.2.block_sparse_moe.experts.5.w1", "model.layers.2.block_sparse_moe.experts.6.w1", "model.layers.2.block_sparse_moe.experts.7.w1", "model.layers.2.block_sparse_moe.experts.8.w1", "model.layers.2.block_sparse_moe.experts.9.w1", "model.layers.2.block_sparse_moe.experts.10.w1", "model.layers.2.block_sparse_moe.experts.11.w1", "model.layers.2.block_sparse_moe.experts.12.w1", "model.layers.2.block_sparse_moe.experts.13.w1", "model.layers.2.block_sparse_moe.experts.14.w1", "model.layers.2.block_sparse_moe.experts.15.w1", "model.layers.2.block_sparse_moe.experts.16.w1", "model.layers.2.block_sparse_moe.experts.17.w1", "model.layers.2.block_sparse_moe.experts.18.w1", "model.layers.2.block_sparse_moe.experts.19.w1", "model.layers.2.block_sparse_moe.experts.20.w1", "model.layers.2.block_sparse_moe.experts.21.w1", "model.layers.2.block_sparse_moe.experts.22.w1", "model.layers.2.block_sparse_moe.experts.23.w1", "model.layers.2.block_sparse_moe.experts.24.w1", "model.layers.2.block_sparse_moe.experts.25.w1", "model.layers.2.block_sparse_moe.experts.26.w1", "model.layers.2.block_sparse_moe.experts.27.w1", "model.layers.2.block_sparse_moe.experts.28.w1", "model.layers.2.block_sparse_moe.experts.29.w1", "model.layers.2.block_sparse_moe.experts.30.w1", "model.layers.2.block_sparse_moe.experts.31.w1", "model.layers.2.block_sparse_moe.experts.32.w1", "model.layers.2.block_sparse_moe.experts.33.w1", "model.layers.2.block_sparse_moe.experts.34.w1", "model.layers.2.block_sparse_moe.experts.35.w1", "model.layers.2.block_sparse_moe.experts.36.w1", "model.layers.2.block_sparse_moe.experts.37.w1", "model.layers.2.block_sparse_moe.experts.38.w1", "model.layers.2.block_sparse_moe.experts.39.w1", "model.layers.2.block_sparse_moe.experts.40.w1", "model.layers.2.block_sparse_moe.experts.41.w1", "model.layers.2.block_sparse_moe.experts.42.w1", "model.layers.2.block_sparse_moe.experts.43.w1", "model.layers.2.block_sparse_moe.experts.44.w1", "model.layers.2.block_sparse_moe.experts.45.w1", "model.layers.2.block_sparse_moe.experts.46.w1", "model.layers.2.block_sparse_moe.experts.47.w1", "model.layers.2.block_sparse_moe.experts.48.w1", "model.layers.2.block_sparse_moe.experts.49.w1", "model.layers.2.block_sparse_moe.experts.50.w1", "model.layers.2.block_sparse_moe.experts.51.w1", "model.layers.2.block_sparse_moe.experts.52.w1", "model.layers.2.block_sparse_moe.experts.53.w1", "model.layers.2.block_sparse_moe.experts.54.w1", "model.layers.2.block_sparse_moe.experts.55.w1", "model.layers.2.block_sparse_moe.experts.56.w1", "model.layers.2.block_sparse_moe.experts.57.w1", "model.layers.2.block_sparse_moe.experts.58.w1", "model.layers.2.block_sparse_moe.experts.59.w1", "model.layers.2.block_sparse_moe.experts.60.w1", "model.layers.2.block_sparse_moe.experts.61.w1", "model.layers.2.block_sparse_moe.experts.62.w1", "model.layers.2.block_sparse_moe.experts.63.w1", "model.layers.2.block_sparse_moe.experts.64.w1", "model.layers.2.block_sparse_moe.experts.65.w1", "model.layers.2.block_sparse_moe.experts.66.w1", "model.layers.2.block_sparse_moe.experts.67.w1", "model.layers.2.block_sparse_moe.experts.68.w1", "model.layers.2.block_sparse_moe.experts.69.w1", "model.layers.2.block_sparse_moe.experts.70.w1", "model.layers.2.block_sparse_moe.experts.71.w1", "model.layers.2.block_sparse_moe.experts.72.w1", "model.layers.2.block_sparse_moe.experts.73.w1", "model.layers.2.block_sparse_moe.experts.74.w1", "model.layers.2.block_sparse_moe.experts.75.w1", "model.layers.2.block_sparse_moe.experts.76.w1", "model.layers.2.block_sparse_moe.experts.77.w1", "model.layers.2.block_sparse_moe.experts.78.w1", "model.layers.2.block_sparse_moe.experts.79.w1", "model.layers.2.block_sparse_moe.experts.80.w1", "model.layers.2.block_sparse_moe.experts.81.w1", "model.layers.2.block_sparse_moe.experts.82.w1", "model.layers.2.block_sparse_moe.experts.83.w1", "model.layers.2.block_sparse_moe.experts.84.w1", "model.layers.2.block_sparse_moe.experts.85.w1", "model.layers.2.block_sparse_moe.experts.86.w1", "model.layers.2.block_sparse_moe.experts.87.w1", "model.layers.2.block_sparse_moe.experts.88.w1", "model.layers.2.block_sparse_moe.experts.89.w1", "model.layers.2.block_sparse_moe.experts.90.w1", "model.layers.2.block_sparse_moe.experts.91.w1", "model.layers.2.block_sparse_moe.experts.92.w1", "model.layers.2.block_sparse_moe.experts.93.w1", "model.layers.2.block_sparse_moe.experts.94.w1", "model.layers.2.block_sparse_moe.experts.95.w1", "model.layers.2.block_sparse_moe.experts.96.w1", "model.layers.2.block_sparse_moe.experts.97.w1", "model.layers.2.block_sparse_moe.experts.98.w1", "model.layers.2.block_sparse_moe.experts.99.w1", "model.layers.2.block_sparse_moe.experts.100.w1", "model.layers.2.block_sparse_moe.experts.101.w1", "model.layers.2.block_sparse_moe.experts.102.w1", "model.layers.2.block_sparse_moe.experts.103.w1", "model.layers.2.block_sparse_moe.experts.104.w1", "model.layers.2.block_sparse_moe.experts.105.w1", "model.layers.2.block_sparse_moe.experts.106.w1", "model.layers.2.block_sparse_moe.experts.107.w1", "model.layers.2.block_sparse_moe.experts.108.w1", "model.layers.2.block_sparse_moe.experts.109.w1", "model.layers.2.block_sparse_moe.experts.110.w1", "model.layers.2.block_sparse_moe.experts.111.w1", "model.layers.2.block_sparse_moe.experts.112.w1", "model.layers.2.block_sparse_moe.experts.113.w1", "model.layers.2.block_sparse_moe.experts.114.w1", "model.layers.2.block_sparse_moe.experts.115.w1", "model.layers.2.block_sparse_moe.experts.116.w1", "model.layers.2.block_sparse_moe.experts.117.w1", "model.layers.2.block_sparse_moe.experts.118.w1", "model.layers.2.block_sparse_moe.experts.119.w1", "model.layers.2.block_sparse_moe.experts.120.w1", "model.layers.2.block_sparse_moe.experts.121.w1", "model.layers.2.block_sparse_moe.experts.122.w1", "model.layers.2.block_sparse_moe.experts.123.w1", "model.layers.2.block_sparse_moe.experts.124.w1", "model.layers.2.block_sparse_moe.experts.125.w1", "model.layers.2.block_sparse_moe.experts.126.w1", "model.layers.2.block_sparse_moe.experts.127.w1", "model.layers.2.block_sparse_moe.experts.128.w1", "model.layers.2.block_sparse_moe.experts.129.w1", "model.layers.2.block_sparse_moe.experts.130.w1", "model.layers.2.block_sparse_moe.experts.131.w1", "model.layers.2.block_sparse_moe.experts.132.w1", "model.layers.2.block_sparse_moe.experts.133.w1", "model.layers.2.block_sparse_moe.experts.134.w1", "model.layers.2.block_sparse_moe.experts.135.w1", "model.layers.2.block_sparse_moe.experts.136.w1", "model.layers.2.block_sparse_moe.experts.137.w1", "model.layers.2.block_sparse_moe.experts.138.w1", "model.layers.2.block_sparse_moe.experts.139.w1", "model.layers.2.block_sparse_moe.experts.140.w1", "model.layers.2.block_sparse_moe.experts.141.w1", "model.layers.2.block_sparse_moe.experts.142.w1", "model.layers.2.block_sparse_moe.experts.143.w1", "model.layers.2.block_sparse_moe.experts.144.w1", "model.layers.2.block_sparse_moe.experts.145.w1", "model.layers.2.block_sparse_moe.experts.146.w1", "model.layers.2.block_sparse_moe.experts.147.w1", "model.layers.2.block_sparse_moe.experts.148.w1", "model.layers.2.block_sparse_moe.experts.149.w1", "model.layers.2.block_sparse_moe.experts.150.w1", "model.layers.2.block_sparse_moe.experts.151.w1", "model.layers.2.block_sparse_moe.experts.152.w1", "model.layers.2.block_sparse_moe.experts.153.w1", "model.layers.2.block_sparse_moe.experts.154.w1", "model.layers.2.block_sparse_moe.experts.155.w1", "model.layers.2.block_sparse_moe.experts.156.w1", "model.layers.2.block_sparse_moe.experts.157.w1", "model.layers.2.block_sparse_moe.experts.158.w1", "model.layers.2.block_sparse_moe.experts.159.w1", "model.layers.2.block_sparse_moe.experts.160.w1", "model.layers.2.block_sparse_moe.experts.161.w1", "model.layers.2.block_sparse_moe.experts.162.w1", "model.layers.2.block_sparse_moe.experts.163.w1", "model.layers.2.block_sparse_moe.experts.164.w1", "model.layers.2.block_sparse_moe.experts.165.w1", "model.layers.2.block_sparse_moe.experts.166.w1", "model.layers.2.block_sparse_moe.experts.167.w1", "model.layers.2.block_sparse_moe.experts.168.w1", "model.layers.2.block_sparse_moe.experts.169.w1", "model.layers.2.block_sparse_moe.experts.170.w1", "model.layers.2.block_sparse_moe.experts.171.w1", "model.layers.2.block_sparse_moe.experts.172.w1", "model.layers.2.block_sparse_moe.experts.173.w1", "model.layers.2.block_sparse_moe.experts.174.w1", "model.layers.2.block_sparse_moe.experts.175.w1", "model.layers.2.block_sparse_moe.experts.176.w1", "model.layers.2.block_sparse_moe.experts.177.w1", "model.layers.2.block_sparse_moe.experts.178.w1", "model.layers.2.block_sparse_moe.experts.179.w1", "model.layers.2.block_sparse_moe.experts.180.w1", "model.layers.2.block_sparse_moe.experts.181.w1", "model.layers.2.block_sparse_moe.experts.182.w1", "model.layers.2.block_sparse_moe.experts.183.w1", "model.layers.2.block_sparse_moe.experts.184.w1", "model.layers.2.block_sparse_moe.experts.185.w1", "model.layers.2.block_sparse_moe.experts.186.w1", "model.layers.2.block_sparse_moe.experts.187.w1", "model.layers.2.block_sparse_moe.experts.188.w1", "model.layers.2.block_sparse_moe.experts.189.w1", "model.layers.2.block_sparse_moe.experts.190.w1", "model.layers.2.block_sparse_moe.experts.191.w1", "model.layers.2.block_sparse_moe.experts.192.w1", "model.layers.2.block_sparse_moe.experts.193.w1", "model.layers.2.block_sparse_moe.experts.194.w1", "model.layers.2.block_sparse_moe.experts.195.w1", "model.layers.2.block_sparse_moe.experts.196.w1", "model.layers.2.block_sparse_moe.experts.197.w1", "model.layers.2.block_sparse_moe.experts.198.w1", "model.layers.2.block_sparse_moe.experts.199.w1", "model.layers.2.block_sparse_moe.experts.200.w1", "model.layers.2.block_sparse_moe.experts.201.w1", "model.layers.2.block_sparse_moe.experts.202.w1", "model.layers.2.block_sparse_moe.experts.203.w1", "model.layers.2.block_sparse_moe.experts.204.w1", "model.layers.2.block_sparse_moe.experts.205.w1", "model.layers.2.block_sparse_moe.experts.206.w1", "model.layers.2.block_sparse_moe.experts.207.w1", "model.layers.2.block_sparse_moe.experts.208.w1", "model.layers.2.block_sparse_moe.experts.209.w1", "model.layers.2.block_sparse_moe.experts.210.w1", "model.layers.2.block_sparse_moe.experts.211.w1", "model.layers.2.block_sparse_moe.experts.212.w1", "model.layers.2.block_sparse_moe.experts.213.w1", "model.layers.2.block_sparse_moe.experts.214.w1", "model.layers.2.block_sparse_moe.experts.215.w1", "model.layers.2.block_sparse_moe.experts.216.w1", "model.layers.2.block_sparse_moe.experts.217.w1", "model.layers.2.block_sparse_moe.experts.218.w1", "model.layers.2.block_sparse_moe.experts.219.w1", "model.layers.2.block_sparse_moe.experts.220.w1", "model.layers.2.block_sparse_moe.experts.221.w1", "model.layers.2.block_sparse_moe.experts.222.w1", "model.layers.2.block_sparse_moe.experts.223.w1", "model.layers.2.block_sparse_moe.experts.224.w1", "model.layers.2.block_sparse_moe.experts.225.w1", "model.layers.2.block_sparse_moe.experts.226.w1", "model.layers.2.block_sparse_moe.experts.227.w1", "model.layers.2.block_sparse_moe.experts.228.w1", "model.layers.2.block_sparse_moe.experts.229.w1", "model.layers.2.block_sparse_moe.experts.230.w1", "model.layers.2.block_sparse_moe.experts.231.w1", "model.layers.2.block_sparse_moe.experts.232.w1", "model.layers.2.block_sparse_moe.experts.233.w1", "model.layers.2.block_sparse_moe.experts.234.w1", "model.layers.2.block_sparse_moe.experts.235.w1", "model.layers.2.block_sparse_moe.experts.236.w1", "model.layers.2.block_sparse_moe.experts.237.w1", "model.layers.2.block_sparse_moe.experts.238.w1", "model.layers.2.block_sparse_moe.experts.239.w1", "model.layers.2.block_sparse_moe.experts.240.w1", "model.layers.2.block_sparse_moe.experts.241.w1", "model.layers.2.block_sparse_moe.experts.242.w1", "model.layers.2.block_sparse_moe.experts.243.w1", "model.layers.2.block_sparse_moe.experts.244.w1", "model.layers.2.block_sparse_moe.experts.245.w1", "model.layers.2.block_sparse_moe.experts.246.w1", "model.layers.2.block_sparse_moe.experts.247.w1", "model.layers.2.block_sparse_moe.experts.248.w1", "model.layers.2.block_sparse_moe.experts.249.w1", "model.layers.2.block_sparse_moe.experts.250.w1", "model.layers.2.block_sparse_moe.experts.251.w1", "model.layers.2.block_sparse_moe.experts.252.w1", "model.layers.2.block_sparse_moe.experts.253.w1", "model.layers.2.block_sparse_moe.experts.254.w1", "model.layers.2.block_sparse_moe.experts.255.w1", "model.layers.2.block_sparse_moe.experts.0.w3", "model.layers.2.block_sparse_moe.experts.1.w3", "model.layers.2.block_sparse_moe.experts.2.w3", "model.layers.2.block_sparse_moe.experts.3.w3", "model.layers.2.block_sparse_moe.experts.4.w3", "model.layers.2.block_sparse_moe.experts.5.w3", "model.layers.2.block_sparse_moe.experts.6.w3", "model.layers.2.block_sparse_moe.experts.7.w3", "model.layers.2.block_sparse_moe.experts.8.w3", "model.layers.2.block_sparse_moe.experts.9.w3", "model.layers.2.block_sparse_moe.experts.10.w3", "model.layers.2.block_sparse_moe.experts.11.w3", "model.layers.2.block_sparse_moe.experts.12.w3", "model.layers.2.block_sparse_moe.experts.13.w3", "model.layers.2.block_sparse_moe.experts.14.w3", "model.layers.2.block_sparse_moe.experts.15.w3", "model.layers.2.block_sparse_moe.experts.16.w3", "model.layers.2.block_sparse_moe.experts.17.w3", "model.layers.2.block_sparse_moe.experts.18.w3", "model.layers.2.block_sparse_moe.experts.19.w3", "model.layers.2.block_sparse_moe.experts.20.w3", "model.layers.2.block_sparse_moe.experts.21.w3", "model.layers.2.block_sparse_moe.experts.22.w3", "model.layers.2.block_sparse_moe.experts.23.w3", "model.layers.2.block_sparse_moe.experts.24.w3", "model.layers.2.block_sparse_moe.experts.25.w3", "model.layers.2.block_sparse_moe.experts.26.w3", "model.layers.2.block_sparse_moe.experts.27.w3", "model.layers.2.block_sparse_moe.experts.28.w3", "model.layers.2.block_sparse_moe.experts.29.w3", "model.layers.2.block_sparse_moe.experts.30.w3", "model.layers.2.block_sparse_moe.experts.31.w3", "model.layers.2.block_sparse_moe.experts.32.w3", "model.layers.2.block_sparse_moe.experts.33.w3", "model.layers.2.block_sparse_moe.experts.34.w3", "model.layers.2.block_sparse_moe.experts.35.w3", "model.layers.2.block_sparse_moe.experts.36.w3", "model.layers.2.block_sparse_moe.experts.37.w3", "model.layers.2.block_sparse_moe.experts.38.w3", "model.layers.2.block_sparse_moe.experts.39.w3", "model.layers.2.block_sparse_moe.experts.40.w3", "model.layers.2.block_sparse_moe.experts.41.w3", "model.layers.2.block_sparse_moe.experts.42.w3", "model.layers.2.block_sparse_moe.experts.43.w3", "model.layers.2.block_sparse_moe.experts.44.w3", "model.layers.2.block_sparse_moe.experts.45.w3", "model.layers.2.block_sparse_moe.experts.46.w3", "model.layers.2.block_sparse_moe.experts.47.w3", "model.layers.2.block_sparse_moe.experts.48.w3", "model.layers.2.block_sparse_moe.experts.49.w3", "model.layers.2.block_sparse_moe.experts.50.w3", "model.layers.2.block_sparse_moe.experts.51.w3", "model.layers.2.block_sparse_moe.experts.52.w3", "model.layers.2.block_sparse_moe.experts.53.w3", "model.layers.2.block_sparse_moe.experts.54.w3", "model.layers.2.block_sparse_moe.experts.55.w3", "model.layers.2.block_sparse_moe.experts.56.w3", "model.layers.2.block_sparse_moe.experts.57.w3", "model.layers.2.block_sparse_moe.experts.58.w3", "model.layers.2.block_sparse_moe.experts.59.w3", "model.layers.2.block_sparse_moe.experts.60.w3", "model.layers.2.block_sparse_moe.experts.61.w3", "model.layers.2.block_sparse_moe.experts.62.w3", "model.layers.2.block_sparse_moe.experts.63.w3", "model.layers.2.block_sparse_moe.experts.64.w3", "model.layers.2.block_sparse_moe.experts.65.w3", "model.layers.2.block_sparse_moe.experts.66.w3", "model.layers.2.block_sparse_moe.experts.67.w3", "model.layers.2.block_sparse_moe.experts.68.w3", "model.layers.2.block_sparse_moe.experts.69.w3", "model.layers.2.block_sparse_moe.experts.70.w3", "model.layers.2.block_sparse_moe.experts.71.w3", "model.layers.2.block_sparse_moe.experts.72.w3", "model.layers.2.block_sparse_moe.experts.73.w3", "model.layers.2.block_sparse_moe.experts.74.w3", "model.layers.2.block_sparse_moe.experts.75.w3", "model.layers.2.block_sparse_moe.experts.76.w3", "model.layers.2.block_sparse_moe.experts.77.w3", "model.layers.2.block_sparse_moe.experts.78.w3", "model.layers.2.block_sparse_moe.experts.79.w3", "model.layers.2.block_sparse_moe.experts.80.w3", "model.layers.2.block_sparse_moe.experts.81.w3", "model.layers.2.block_sparse_moe.experts.82.w3", "model.layers.2.block_sparse_moe.experts.83.w3", "model.layers.2.block_sparse_moe.experts.84.w3", "model.layers.2.block_sparse_moe.experts.85.w3", "model.layers.2.block_sparse_moe.experts.86.w3", "model.layers.2.block_sparse_moe.experts.87.w3", "model.layers.2.block_sparse_moe.experts.88.w3", "model.layers.2.block_sparse_moe.experts.89.w3", "model.layers.2.block_sparse_moe.experts.90.w3", "model.layers.2.block_sparse_moe.experts.91.w3", "model.layers.2.block_sparse_moe.experts.92.w3", "model.layers.2.block_sparse_moe.experts.93.w3", "model.layers.2.block_sparse_moe.experts.94.w3", "model.layers.2.block_sparse_moe.experts.95.w3", "model.layers.2.block_sparse_moe.experts.96.w3", "model.layers.2.block_sparse_moe.experts.97.w3", "model.layers.2.block_sparse_moe.experts.98.w3", "model.layers.2.block_sparse_moe.experts.99.w3", "model.layers.2.block_sparse_moe.experts.100.w3", "model.layers.2.block_sparse_moe.experts.101.w3", "model.layers.2.block_sparse_moe.experts.102.w3", "model.layers.2.block_sparse_moe.experts.103.w3", "model.layers.2.block_sparse_moe.experts.104.w3", "model.layers.2.block_sparse_moe.experts.105.w3", "model.layers.2.block_sparse_moe.experts.106.w3", "model.layers.2.block_sparse_moe.experts.107.w3", "model.layers.2.block_sparse_moe.experts.108.w3", "model.layers.2.block_sparse_moe.experts.109.w3", "model.layers.2.block_sparse_moe.experts.110.w3", "model.layers.2.block_sparse_moe.experts.111.w3", "model.layers.2.block_sparse_moe.experts.112.w3", "model.layers.2.block_sparse_moe.experts.113.w3", "model.layers.2.block_sparse_moe.experts.114.w3", "model.layers.2.block_sparse_moe.experts.115.w3", "model.layers.2.block_sparse_moe.experts.116.w3", "model.layers.2.block_sparse_moe.experts.117.w3", "model.layers.2.block_sparse_moe.experts.118.w3", "model.layers.2.block_sparse_moe.experts.119.w3", "model.layers.2.block_sparse_moe.experts.120.w3", "model.layers.2.block_sparse_moe.experts.121.w3", "model.layers.2.block_sparse_moe.experts.122.w3", "model.layers.2.block_sparse_moe.experts.123.w3", "model.layers.2.block_sparse_moe.experts.124.w3", "model.layers.2.block_sparse_moe.experts.125.w3", "model.layers.2.block_sparse_moe.experts.126.w3", "model.layers.2.block_sparse_moe.experts.127.w3", "model.layers.2.block_sparse_moe.experts.128.w3", "model.layers.2.block_sparse_moe.experts.129.w3", "model.layers.2.block_sparse_moe.experts.130.w3", "model.layers.2.block_sparse_moe.experts.131.w3", "model.layers.2.block_sparse_moe.experts.132.w3", "model.layers.2.block_sparse_moe.experts.133.w3", "model.layers.2.block_sparse_moe.experts.134.w3", "model.layers.2.block_sparse_moe.experts.135.w3", "model.layers.2.block_sparse_moe.experts.136.w3", "model.layers.2.block_sparse_moe.experts.137.w3", "model.layers.2.block_sparse_moe.experts.138.w3", "model.layers.2.block_sparse_moe.experts.139.w3", "model.layers.2.block_sparse_moe.experts.140.w3", "model.layers.2.block_sparse_moe.experts.141.w3", "model.layers.2.block_sparse_moe.experts.142.w3", "model.layers.2.block_sparse_moe.experts.143.w3", "model.layers.2.block_sparse_moe.experts.144.w3", "model.layers.2.block_sparse_moe.experts.145.w3", "model.layers.2.block_sparse_moe.experts.146.w3", "model.layers.2.block_sparse_moe.experts.147.w3", "model.layers.2.block_sparse_moe.experts.148.w3", "model.layers.2.block_sparse_moe.experts.149.w3", "model.layers.2.block_sparse_moe.experts.150.w3", "model.layers.2.block_sparse_moe.experts.151.w3", "model.layers.2.block_sparse_moe.experts.152.w3", "model.layers.2.block_sparse_moe.experts.153.w3", "model.layers.2.block_sparse_moe.experts.154.w3", "model.layers.2.block_sparse_moe.experts.155.w3", "model.layers.2.block_sparse_moe.experts.156.w3", "model.layers.2.block_sparse_moe.experts.157.w3", "model.layers.2.block_sparse_moe.experts.158.w3", "model.layers.2.block_sparse_moe.experts.159.w3", "model.layers.2.block_sparse_moe.experts.160.w3", "model.layers.2.block_sparse_moe.experts.161.w3", "model.layers.2.block_sparse_moe.experts.162.w3", "model.layers.2.block_sparse_moe.experts.163.w3", "model.layers.2.block_sparse_moe.experts.164.w3", "model.layers.2.block_sparse_moe.experts.165.w3", "model.layers.2.block_sparse_moe.experts.166.w3", "model.layers.2.block_sparse_moe.experts.167.w3", "model.layers.2.block_sparse_moe.experts.168.w3", "model.layers.2.block_sparse_moe.experts.169.w3", "model.layers.2.block_sparse_moe.experts.170.w3", "model.layers.2.block_sparse_moe.experts.171.w3", "model.layers.2.block_sparse_moe.experts.172.w3", "model.layers.2.block_sparse_moe.experts.173.w3", "model.layers.2.block_sparse_moe.experts.174.w3", "model.layers.2.block_sparse_moe.experts.175.w3", "model.layers.2.block_sparse_moe.experts.176.w3", "model.layers.2.block_sparse_moe.experts.177.w3", "model.layers.2.block_sparse_moe.experts.178.w3", "model.layers.2.block_sparse_moe.experts.179.w3", "model.layers.2.block_sparse_moe.experts.180.w3", "model.layers.2.block_sparse_moe.experts.181.w3", "model.layers.2.block_sparse_moe.experts.182.w3", "model.layers.2.block_sparse_moe.experts.183.w3", "model.layers.2.block_sparse_moe.experts.184.w3", "model.layers.2.block_sparse_moe.experts.185.w3", "model.layers.2.block_sparse_moe.experts.186.w3", "model.layers.2.block_sparse_moe.experts.187.w3", "model.layers.2.block_sparse_moe.experts.188.w3", "model.layers.2.block_sparse_moe.experts.189.w3", "model.layers.2.block_sparse_moe.experts.190.w3", "model.layers.2.block_sparse_moe.experts.191.w3", "model.layers.2.block_sparse_moe.experts.192.w3", "model.layers.2.block_sparse_moe.experts.193.w3", "model.layers.2.block_sparse_moe.experts.194.w3", "model.layers.2.block_sparse_moe.experts.195.w3", "model.layers.2.block_sparse_moe.experts.196.w3", "model.layers.2.block_sparse_moe.experts.197.w3", "model.layers.2.block_sparse_moe.experts.198.w3", "model.layers.2.block_sparse_moe.experts.199.w3", "model.layers.2.block_sparse_moe.experts.200.w3", "model.layers.2.block_sparse_moe.experts.201.w3", "model.layers.2.block_sparse_moe.experts.202.w3", "model.layers.2.block_sparse_moe.experts.203.w3", "model.layers.2.block_sparse_moe.experts.204.w3", "model.layers.2.block_sparse_moe.experts.205.w3", "model.layers.2.block_sparse_moe.experts.206.w3", "model.layers.2.block_sparse_moe.experts.207.w3", "model.layers.2.block_sparse_moe.experts.208.w3", "model.layers.2.block_sparse_moe.experts.209.w3", "model.layers.2.block_sparse_moe.experts.210.w3", "model.layers.2.block_sparse_moe.experts.211.w3", "model.layers.2.block_sparse_moe.experts.212.w3", "model.layers.2.block_sparse_moe.experts.213.w3", "model.layers.2.block_sparse_moe.experts.214.w3", "model.layers.2.block_sparse_moe.experts.215.w3", "model.layers.2.block_sparse_moe.experts.216.w3", "model.layers.2.block_sparse_moe.experts.217.w3", "model.layers.2.block_sparse_moe.experts.218.w3", "model.layers.2.block_sparse_moe.experts.219.w3", "model.layers.2.block_sparse_moe.experts.220.w3", "model.layers.2.block_sparse_moe.experts.221.w3", "model.layers.2.block_sparse_moe.experts.222.w3", "model.layers.2.block_sparse_moe.experts.223.w3", "model.layers.2.block_sparse_moe.experts.224.w3", "model.layers.2.block_sparse_moe.experts.225.w3", "model.layers.2.block_sparse_moe.experts.226.w3", "model.layers.2.block_sparse_moe.experts.227.w3", "model.layers.2.block_sparse_moe.experts.228.w3", "model.layers.2.block_sparse_moe.experts.229.w3", "model.layers.2.block_sparse_moe.experts.230.w3", "model.layers.2.block_sparse_moe.experts.231.w3", "model.layers.2.block_sparse_moe.experts.232.w3", "model.layers.2.block_sparse_moe.experts.233.w3", "model.layers.2.block_sparse_moe.experts.234.w3", "model.layers.2.block_sparse_moe.experts.235.w3", "model.layers.2.block_sparse_moe.experts.236.w3", "model.layers.2.block_sparse_moe.experts.237.w3", "model.layers.2.block_sparse_moe.experts.238.w3", "model.layers.2.block_sparse_moe.experts.239.w3", "model.layers.2.block_sparse_moe.experts.240.w3", "model.layers.2.block_sparse_moe.experts.241.w3", "model.layers.2.block_sparse_moe.experts.242.w3", "model.layers.2.block_sparse_moe.experts.243.w3", "model.layers.2.block_sparse_moe.experts.244.w3", "model.layers.2.block_sparse_moe.experts.245.w3", "model.layers.2.block_sparse_moe.experts.246.w3", "model.layers.2.block_sparse_moe.experts.247.w3", "model.layers.2.block_sparse_moe.experts.248.w3", "model.layers.2.block_sparse_moe.experts.249.w3", "model.layers.2.block_sparse_moe.experts.250.w3", "model.layers.2.block_sparse_moe.experts.251.w3", "model.layers.2.block_sparse_moe.experts.252.w3", "model.layers.2.block_sparse_moe.experts.253.w3", "model.layers.2.block_sparse_moe.experts.254.w3", "model.layers.2.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0007368993014097297, "dbits": 2415919104 } ] }, { "idx": 14, "layers": [ "model.layers.2.block_sparse_moe.experts.0.w2", "model.layers.2.block_sparse_moe.experts.1.w2", "model.layers.2.block_sparse_moe.experts.2.w2", "model.layers.2.block_sparse_moe.experts.3.w2", "model.layers.2.block_sparse_moe.experts.4.w2", "model.layers.2.block_sparse_moe.experts.5.w2", "model.layers.2.block_sparse_moe.experts.6.w2", "model.layers.2.block_sparse_moe.experts.7.w2", "model.layers.2.block_sparse_moe.experts.8.w2", "model.layers.2.block_sparse_moe.experts.9.w2", "model.layers.2.block_sparse_moe.experts.10.w2", "model.layers.2.block_sparse_moe.experts.11.w2", "model.layers.2.block_sparse_moe.experts.12.w2", "model.layers.2.block_sparse_moe.experts.13.w2", "model.layers.2.block_sparse_moe.experts.14.w2", "model.layers.2.block_sparse_moe.experts.15.w2", "model.layers.2.block_sparse_moe.experts.16.w2", "model.layers.2.block_sparse_moe.experts.17.w2", "model.layers.2.block_sparse_moe.experts.18.w2", "model.layers.2.block_sparse_moe.experts.19.w2", "model.layers.2.block_sparse_moe.experts.20.w2", "model.layers.2.block_sparse_moe.experts.21.w2", "model.layers.2.block_sparse_moe.experts.22.w2", "model.layers.2.block_sparse_moe.experts.23.w2", "model.layers.2.block_sparse_moe.experts.24.w2", "model.layers.2.block_sparse_moe.experts.25.w2", "model.layers.2.block_sparse_moe.experts.26.w2", "model.layers.2.block_sparse_moe.experts.27.w2", "model.layers.2.block_sparse_moe.experts.28.w2", "model.layers.2.block_sparse_moe.experts.29.w2", "model.layers.2.block_sparse_moe.experts.30.w2", "model.layers.2.block_sparse_moe.experts.31.w2", "model.layers.2.block_sparse_moe.experts.32.w2", "model.layers.2.block_sparse_moe.experts.33.w2", "model.layers.2.block_sparse_moe.experts.34.w2", "model.layers.2.block_sparse_moe.experts.35.w2", "model.layers.2.block_sparse_moe.experts.36.w2", "model.layers.2.block_sparse_moe.experts.37.w2", "model.layers.2.block_sparse_moe.experts.38.w2", "model.layers.2.block_sparse_moe.experts.39.w2", "model.layers.2.block_sparse_moe.experts.40.w2", "model.layers.2.block_sparse_moe.experts.41.w2", "model.layers.2.block_sparse_moe.experts.42.w2", "model.layers.2.block_sparse_moe.experts.43.w2", "model.layers.2.block_sparse_moe.experts.44.w2", "model.layers.2.block_sparse_moe.experts.45.w2", "model.layers.2.block_sparse_moe.experts.46.w2", "model.layers.2.block_sparse_moe.experts.47.w2", "model.layers.2.block_sparse_moe.experts.48.w2", "model.layers.2.block_sparse_moe.experts.49.w2", "model.layers.2.block_sparse_moe.experts.50.w2", "model.layers.2.block_sparse_moe.experts.51.w2", "model.layers.2.block_sparse_moe.experts.52.w2", "model.layers.2.block_sparse_moe.experts.53.w2", "model.layers.2.block_sparse_moe.experts.54.w2", "model.layers.2.block_sparse_moe.experts.55.w2", "model.layers.2.block_sparse_moe.experts.56.w2", "model.layers.2.block_sparse_moe.experts.57.w2", "model.layers.2.block_sparse_moe.experts.58.w2", "model.layers.2.block_sparse_moe.experts.59.w2", "model.layers.2.block_sparse_moe.experts.60.w2", "model.layers.2.block_sparse_moe.experts.61.w2", "model.layers.2.block_sparse_moe.experts.62.w2", "model.layers.2.block_sparse_moe.experts.63.w2", "model.layers.2.block_sparse_moe.experts.64.w2", "model.layers.2.block_sparse_moe.experts.65.w2", "model.layers.2.block_sparse_moe.experts.66.w2", "model.layers.2.block_sparse_moe.experts.67.w2", "model.layers.2.block_sparse_moe.experts.68.w2", "model.layers.2.block_sparse_moe.experts.69.w2", "model.layers.2.block_sparse_moe.experts.70.w2", "model.layers.2.block_sparse_moe.experts.71.w2", "model.layers.2.block_sparse_moe.experts.72.w2", "model.layers.2.block_sparse_moe.experts.73.w2", "model.layers.2.block_sparse_moe.experts.74.w2", "model.layers.2.block_sparse_moe.experts.75.w2", "model.layers.2.block_sparse_moe.experts.76.w2", "model.layers.2.block_sparse_moe.experts.77.w2", "model.layers.2.block_sparse_moe.experts.78.w2", "model.layers.2.block_sparse_moe.experts.79.w2", "model.layers.2.block_sparse_moe.experts.80.w2", "model.layers.2.block_sparse_moe.experts.81.w2", "model.layers.2.block_sparse_moe.experts.82.w2", "model.layers.2.block_sparse_moe.experts.83.w2", "model.layers.2.block_sparse_moe.experts.84.w2", "model.layers.2.block_sparse_moe.experts.85.w2", "model.layers.2.block_sparse_moe.experts.86.w2", "model.layers.2.block_sparse_moe.experts.87.w2", "model.layers.2.block_sparse_moe.experts.88.w2", "model.layers.2.block_sparse_moe.experts.89.w2", "model.layers.2.block_sparse_moe.experts.90.w2", "model.layers.2.block_sparse_moe.experts.91.w2", "model.layers.2.block_sparse_moe.experts.92.w2", "model.layers.2.block_sparse_moe.experts.93.w2", "model.layers.2.block_sparse_moe.experts.94.w2", "model.layers.2.block_sparse_moe.experts.95.w2", "model.layers.2.block_sparse_moe.experts.96.w2", "model.layers.2.block_sparse_moe.experts.97.w2", "model.layers.2.block_sparse_moe.experts.98.w2", "model.layers.2.block_sparse_moe.experts.99.w2", "model.layers.2.block_sparse_moe.experts.100.w2", "model.layers.2.block_sparse_moe.experts.101.w2", "model.layers.2.block_sparse_moe.experts.102.w2", "model.layers.2.block_sparse_moe.experts.103.w2", "model.layers.2.block_sparse_moe.experts.104.w2", "model.layers.2.block_sparse_moe.experts.105.w2", "model.layers.2.block_sparse_moe.experts.106.w2", "model.layers.2.block_sparse_moe.experts.107.w2", "model.layers.2.block_sparse_moe.experts.108.w2", "model.layers.2.block_sparse_moe.experts.109.w2", "model.layers.2.block_sparse_moe.experts.110.w2", "model.layers.2.block_sparse_moe.experts.111.w2", "model.layers.2.block_sparse_moe.experts.112.w2", "model.layers.2.block_sparse_moe.experts.113.w2", "model.layers.2.block_sparse_moe.experts.114.w2", "model.layers.2.block_sparse_moe.experts.115.w2", "model.layers.2.block_sparse_moe.experts.116.w2", "model.layers.2.block_sparse_moe.experts.117.w2", "model.layers.2.block_sparse_moe.experts.118.w2", "model.layers.2.block_sparse_moe.experts.119.w2", "model.layers.2.block_sparse_moe.experts.120.w2", "model.layers.2.block_sparse_moe.experts.121.w2", "model.layers.2.block_sparse_moe.experts.122.w2", "model.layers.2.block_sparse_moe.experts.123.w2", "model.layers.2.block_sparse_moe.experts.124.w2", "model.layers.2.block_sparse_moe.experts.125.w2", "model.layers.2.block_sparse_moe.experts.126.w2", "model.layers.2.block_sparse_moe.experts.127.w2", "model.layers.2.block_sparse_moe.experts.128.w2", "model.layers.2.block_sparse_moe.experts.129.w2", "model.layers.2.block_sparse_moe.experts.130.w2", "model.layers.2.block_sparse_moe.experts.131.w2", "model.layers.2.block_sparse_moe.experts.132.w2", "model.layers.2.block_sparse_moe.experts.133.w2", "model.layers.2.block_sparse_moe.experts.134.w2", "model.layers.2.block_sparse_moe.experts.135.w2", "model.layers.2.block_sparse_moe.experts.136.w2", "model.layers.2.block_sparse_moe.experts.137.w2", "model.layers.2.block_sparse_moe.experts.138.w2", "model.layers.2.block_sparse_moe.experts.139.w2", "model.layers.2.block_sparse_moe.experts.140.w2", "model.layers.2.block_sparse_moe.experts.141.w2", "model.layers.2.block_sparse_moe.experts.142.w2", "model.layers.2.block_sparse_moe.experts.143.w2", "model.layers.2.block_sparse_moe.experts.144.w2", "model.layers.2.block_sparse_moe.experts.145.w2", "model.layers.2.block_sparse_moe.experts.146.w2", "model.layers.2.block_sparse_moe.experts.147.w2", "model.layers.2.block_sparse_moe.experts.148.w2", "model.layers.2.block_sparse_moe.experts.149.w2", "model.layers.2.block_sparse_moe.experts.150.w2", "model.layers.2.block_sparse_moe.experts.151.w2", "model.layers.2.block_sparse_moe.experts.152.w2", "model.layers.2.block_sparse_moe.experts.153.w2", "model.layers.2.block_sparse_moe.experts.154.w2", "model.layers.2.block_sparse_moe.experts.155.w2", "model.layers.2.block_sparse_moe.experts.156.w2", "model.layers.2.block_sparse_moe.experts.157.w2", "model.layers.2.block_sparse_moe.experts.158.w2", "model.layers.2.block_sparse_moe.experts.159.w2", "model.layers.2.block_sparse_moe.experts.160.w2", "model.layers.2.block_sparse_moe.experts.161.w2", "model.layers.2.block_sparse_moe.experts.162.w2", "model.layers.2.block_sparse_moe.experts.163.w2", "model.layers.2.block_sparse_moe.experts.164.w2", "model.layers.2.block_sparse_moe.experts.165.w2", "model.layers.2.block_sparse_moe.experts.166.w2", "model.layers.2.block_sparse_moe.experts.167.w2", "model.layers.2.block_sparse_moe.experts.168.w2", "model.layers.2.block_sparse_moe.experts.169.w2", "model.layers.2.block_sparse_moe.experts.170.w2", "model.layers.2.block_sparse_moe.experts.171.w2", "model.layers.2.block_sparse_moe.experts.172.w2", "model.layers.2.block_sparse_moe.experts.173.w2", "model.layers.2.block_sparse_moe.experts.174.w2", "model.layers.2.block_sparse_moe.experts.175.w2", "model.layers.2.block_sparse_moe.experts.176.w2", "model.layers.2.block_sparse_moe.experts.177.w2", "model.layers.2.block_sparse_moe.experts.178.w2", "model.layers.2.block_sparse_moe.experts.179.w2", "model.layers.2.block_sparse_moe.experts.180.w2", "model.layers.2.block_sparse_moe.experts.181.w2", "model.layers.2.block_sparse_moe.experts.182.w2", "model.layers.2.block_sparse_moe.experts.183.w2", "model.layers.2.block_sparse_moe.experts.184.w2", "model.layers.2.block_sparse_moe.experts.185.w2", "model.layers.2.block_sparse_moe.experts.186.w2", "model.layers.2.block_sparse_moe.experts.187.w2", "model.layers.2.block_sparse_moe.experts.188.w2", "model.layers.2.block_sparse_moe.experts.189.w2", "model.layers.2.block_sparse_moe.experts.190.w2", "model.layers.2.block_sparse_moe.experts.191.w2", "model.layers.2.block_sparse_moe.experts.192.w2", "model.layers.2.block_sparse_moe.experts.193.w2", "model.layers.2.block_sparse_moe.experts.194.w2", "model.layers.2.block_sparse_moe.experts.195.w2", "model.layers.2.block_sparse_moe.experts.196.w2", "model.layers.2.block_sparse_moe.experts.197.w2", "model.layers.2.block_sparse_moe.experts.198.w2", "model.layers.2.block_sparse_moe.experts.199.w2", "model.layers.2.block_sparse_moe.experts.200.w2", "model.layers.2.block_sparse_moe.experts.201.w2", "model.layers.2.block_sparse_moe.experts.202.w2", "model.layers.2.block_sparse_moe.experts.203.w2", "model.layers.2.block_sparse_moe.experts.204.w2", "model.layers.2.block_sparse_moe.experts.205.w2", "model.layers.2.block_sparse_moe.experts.206.w2", "model.layers.2.block_sparse_moe.experts.207.w2", "model.layers.2.block_sparse_moe.experts.208.w2", "model.layers.2.block_sparse_moe.experts.209.w2", "model.layers.2.block_sparse_moe.experts.210.w2", "model.layers.2.block_sparse_moe.experts.211.w2", "model.layers.2.block_sparse_moe.experts.212.w2", "model.layers.2.block_sparse_moe.experts.213.w2", "model.layers.2.block_sparse_moe.experts.214.w2", "model.layers.2.block_sparse_moe.experts.215.w2", "model.layers.2.block_sparse_moe.experts.216.w2", "model.layers.2.block_sparse_moe.experts.217.w2", "model.layers.2.block_sparse_moe.experts.218.w2", "model.layers.2.block_sparse_moe.experts.219.w2", "model.layers.2.block_sparse_moe.experts.220.w2", "model.layers.2.block_sparse_moe.experts.221.w2", "model.layers.2.block_sparse_moe.experts.222.w2", "model.layers.2.block_sparse_moe.experts.223.w2", "model.layers.2.block_sparse_moe.experts.224.w2", "model.layers.2.block_sparse_moe.experts.225.w2", "model.layers.2.block_sparse_moe.experts.226.w2", "model.layers.2.block_sparse_moe.experts.227.w2", "model.layers.2.block_sparse_moe.experts.228.w2", "model.layers.2.block_sparse_moe.experts.229.w2", "model.layers.2.block_sparse_moe.experts.230.w2", "model.layers.2.block_sparse_moe.experts.231.w2", "model.layers.2.block_sparse_moe.experts.232.w2", "model.layers.2.block_sparse_moe.experts.233.w2", "model.layers.2.block_sparse_moe.experts.234.w2", "model.layers.2.block_sparse_moe.experts.235.w2", "model.layers.2.block_sparse_moe.experts.236.w2", "model.layers.2.block_sparse_moe.experts.237.w2", "model.layers.2.block_sparse_moe.experts.238.w2", "model.layers.2.block_sparse_moe.experts.239.w2", "model.layers.2.block_sparse_moe.experts.240.w2", "model.layers.2.block_sparse_moe.experts.241.w2", "model.layers.2.block_sparse_moe.experts.242.w2", "model.layers.2.block_sparse_moe.experts.243.w2", "model.layers.2.block_sparse_moe.experts.244.w2", "model.layers.2.block_sparse_moe.experts.245.w2", "model.layers.2.block_sparse_moe.experts.246.w2", "model.layers.2.block_sparse_moe.experts.247.w2", "model.layers.2.block_sparse_moe.experts.248.w2", "model.layers.2.block_sparse_moe.experts.249.w2", "model.layers.2.block_sparse_moe.experts.250.w2", "model.layers.2.block_sparse_moe.experts.251.w2", "model.layers.2.block_sparse_moe.experts.252.w2", "model.layers.2.block_sparse_moe.experts.253.w2", "model.layers.2.block_sparse_moe.experts.254.w2", "model.layers.2.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0004493514075875338, "dbits": 1207959552 } ] }, { "idx": 15, "layers": [ "model.layers.3.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00028852373361587524, "dbits": 18874368 } ] }, { "idx": 16, "layers": [ "model.layers.3.self_attn.k_proj", "model.layers.3.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00321202352643013, "dbits": 6291456 } ] }, { "idx": 17, "layers": [ "model.layers.3.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0008405266329646055, "dbits": 18874368 } ] }, { "idx": 18, "layers": [ "model.layers.3.block_sparse_moe.experts.0.w1", "model.layers.3.block_sparse_moe.experts.1.w1", "model.layers.3.block_sparse_moe.experts.2.w1", "model.layers.3.block_sparse_moe.experts.3.w1", "model.layers.3.block_sparse_moe.experts.4.w1", "model.layers.3.block_sparse_moe.experts.5.w1", "model.layers.3.block_sparse_moe.experts.6.w1", "model.layers.3.block_sparse_moe.experts.7.w1", "model.layers.3.block_sparse_moe.experts.8.w1", "model.layers.3.block_sparse_moe.experts.9.w1", "model.layers.3.block_sparse_moe.experts.10.w1", "model.layers.3.block_sparse_moe.experts.11.w1", "model.layers.3.block_sparse_moe.experts.12.w1", "model.layers.3.block_sparse_moe.experts.13.w1", "model.layers.3.block_sparse_moe.experts.14.w1", "model.layers.3.block_sparse_moe.experts.15.w1", "model.layers.3.block_sparse_moe.experts.16.w1", "model.layers.3.block_sparse_moe.experts.17.w1", "model.layers.3.block_sparse_moe.experts.18.w1", "model.layers.3.block_sparse_moe.experts.19.w1", "model.layers.3.block_sparse_moe.experts.20.w1", "model.layers.3.block_sparse_moe.experts.21.w1", "model.layers.3.block_sparse_moe.experts.22.w1", "model.layers.3.block_sparse_moe.experts.23.w1", "model.layers.3.block_sparse_moe.experts.24.w1", "model.layers.3.block_sparse_moe.experts.25.w1", "model.layers.3.block_sparse_moe.experts.26.w1", "model.layers.3.block_sparse_moe.experts.27.w1", "model.layers.3.block_sparse_moe.experts.28.w1", "model.layers.3.block_sparse_moe.experts.29.w1", "model.layers.3.block_sparse_moe.experts.30.w1", "model.layers.3.block_sparse_moe.experts.31.w1", "model.layers.3.block_sparse_moe.experts.32.w1", "model.layers.3.block_sparse_moe.experts.33.w1", "model.layers.3.block_sparse_moe.experts.34.w1", "model.layers.3.block_sparse_moe.experts.35.w1", "model.layers.3.block_sparse_moe.experts.36.w1", "model.layers.3.block_sparse_moe.experts.37.w1", "model.layers.3.block_sparse_moe.experts.38.w1", "model.layers.3.block_sparse_moe.experts.39.w1", "model.layers.3.block_sparse_moe.experts.40.w1", "model.layers.3.block_sparse_moe.experts.41.w1", "model.layers.3.block_sparse_moe.experts.42.w1", "model.layers.3.block_sparse_moe.experts.43.w1", "model.layers.3.block_sparse_moe.experts.44.w1", "model.layers.3.block_sparse_moe.experts.45.w1", "model.layers.3.block_sparse_moe.experts.46.w1", "model.layers.3.block_sparse_moe.experts.47.w1", "model.layers.3.block_sparse_moe.experts.48.w1", "model.layers.3.block_sparse_moe.experts.49.w1", "model.layers.3.block_sparse_moe.experts.50.w1", "model.layers.3.block_sparse_moe.experts.51.w1", "model.layers.3.block_sparse_moe.experts.52.w1", "model.layers.3.block_sparse_moe.experts.53.w1", "model.layers.3.block_sparse_moe.experts.54.w1", "model.layers.3.block_sparse_moe.experts.55.w1", "model.layers.3.block_sparse_moe.experts.56.w1", "model.layers.3.block_sparse_moe.experts.57.w1", "model.layers.3.block_sparse_moe.experts.58.w1", "model.layers.3.block_sparse_moe.experts.59.w1", "model.layers.3.block_sparse_moe.experts.60.w1", "model.layers.3.block_sparse_moe.experts.61.w1", "model.layers.3.block_sparse_moe.experts.62.w1", "model.layers.3.block_sparse_moe.experts.63.w1", "model.layers.3.block_sparse_moe.experts.64.w1", "model.layers.3.block_sparse_moe.experts.65.w1", "model.layers.3.block_sparse_moe.experts.66.w1", "model.layers.3.block_sparse_moe.experts.67.w1", "model.layers.3.block_sparse_moe.experts.68.w1", "model.layers.3.block_sparse_moe.experts.69.w1", "model.layers.3.block_sparse_moe.experts.70.w1", "model.layers.3.block_sparse_moe.experts.71.w1", "model.layers.3.block_sparse_moe.experts.72.w1", "model.layers.3.block_sparse_moe.experts.73.w1", "model.layers.3.block_sparse_moe.experts.74.w1", "model.layers.3.block_sparse_moe.experts.75.w1", "model.layers.3.block_sparse_moe.experts.76.w1", "model.layers.3.block_sparse_moe.experts.77.w1", "model.layers.3.block_sparse_moe.experts.78.w1", "model.layers.3.block_sparse_moe.experts.79.w1", "model.layers.3.block_sparse_moe.experts.80.w1", "model.layers.3.block_sparse_moe.experts.81.w1", "model.layers.3.block_sparse_moe.experts.82.w1", "model.layers.3.block_sparse_moe.experts.83.w1", "model.layers.3.block_sparse_moe.experts.84.w1", "model.layers.3.block_sparse_moe.experts.85.w1", "model.layers.3.block_sparse_moe.experts.86.w1", "model.layers.3.block_sparse_moe.experts.87.w1", "model.layers.3.block_sparse_moe.experts.88.w1", "model.layers.3.block_sparse_moe.experts.89.w1", "model.layers.3.block_sparse_moe.experts.90.w1", "model.layers.3.block_sparse_moe.experts.91.w1", "model.layers.3.block_sparse_moe.experts.92.w1", "model.layers.3.block_sparse_moe.experts.93.w1", "model.layers.3.block_sparse_moe.experts.94.w1", "model.layers.3.block_sparse_moe.experts.95.w1", "model.layers.3.block_sparse_moe.experts.96.w1", "model.layers.3.block_sparse_moe.experts.97.w1", "model.layers.3.block_sparse_moe.experts.98.w1", "model.layers.3.block_sparse_moe.experts.99.w1", "model.layers.3.block_sparse_moe.experts.100.w1", "model.layers.3.block_sparse_moe.experts.101.w1", "model.layers.3.block_sparse_moe.experts.102.w1", "model.layers.3.block_sparse_moe.experts.103.w1", "model.layers.3.block_sparse_moe.experts.104.w1", "model.layers.3.block_sparse_moe.experts.105.w1", "model.layers.3.block_sparse_moe.experts.106.w1", "model.layers.3.block_sparse_moe.experts.107.w1", "model.layers.3.block_sparse_moe.experts.108.w1", "model.layers.3.block_sparse_moe.experts.109.w1", "model.layers.3.block_sparse_moe.experts.110.w1", "model.layers.3.block_sparse_moe.experts.111.w1", "model.layers.3.block_sparse_moe.experts.112.w1", "model.layers.3.block_sparse_moe.experts.113.w1", "model.layers.3.block_sparse_moe.experts.114.w1", "model.layers.3.block_sparse_moe.experts.115.w1", "model.layers.3.block_sparse_moe.experts.116.w1", "model.layers.3.block_sparse_moe.experts.117.w1", "model.layers.3.block_sparse_moe.experts.118.w1", "model.layers.3.block_sparse_moe.experts.119.w1", "model.layers.3.block_sparse_moe.experts.120.w1", "model.layers.3.block_sparse_moe.experts.121.w1", "model.layers.3.block_sparse_moe.experts.122.w1", "model.layers.3.block_sparse_moe.experts.123.w1", "model.layers.3.block_sparse_moe.experts.124.w1", "model.layers.3.block_sparse_moe.experts.125.w1", "model.layers.3.block_sparse_moe.experts.126.w1", "model.layers.3.block_sparse_moe.experts.127.w1", "model.layers.3.block_sparse_moe.experts.128.w1", "model.layers.3.block_sparse_moe.experts.129.w1", "model.layers.3.block_sparse_moe.experts.130.w1", "model.layers.3.block_sparse_moe.experts.131.w1", "model.layers.3.block_sparse_moe.experts.132.w1", "model.layers.3.block_sparse_moe.experts.133.w1", "model.layers.3.block_sparse_moe.experts.134.w1", "model.layers.3.block_sparse_moe.experts.135.w1", "model.layers.3.block_sparse_moe.experts.136.w1", "model.layers.3.block_sparse_moe.experts.137.w1", "model.layers.3.block_sparse_moe.experts.138.w1", "model.layers.3.block_sparse_moe.experts.139.w1", "model.layers.3.block_sparse_moe.experts.140.w1", "model.layers.3.block_sparse_moe.experts.141.w1", "model.layers.3.block_sparse_moe.experts.142.w1", "model.layers.3.block_sparse_moe.experts.143.w1", "model.layers.3.block_sparse_moe.experts.144.w1", "model.layers.3.block_sparse_moe.experts.145.w1", "model.layers.3.block_sparse_moe.experts.146.w1", "model.layers.3.block_sparse_moe.experts.147.w1", "model.layers.3.block_sparse_moe.experts.148.w1", "model.layers.3.block_sparse_moe.experts.149.w1", "model.layers.3.block_sparse_moe.experts.150.w1", "model.layers.3.block_sparse_moe.experts.151.w1", "model.layers.3.block_sparse_moe.experts.152.w1", "model.layers.3.block_sparse_moe.experts.153.w1", "model.layers.3.block_sparse_moe.experts.154.w1", "model.layers.3.block_sparse_moe.experts.155.w1", "model.layers.3.block_sparse_moe.experts.156.w1", "model.layers.3.block_sparse_moe.experts.157.w1", "model.layers.3.block_sparse_moe.experts.158.w1", "model.layers.3.block_sparse_moe.experts.159.w1", "model.layers.3.block_sparse_moe.experts.160.w1", "model.layers.3.block_sparse_moe.experts.161.w1", "model.layers.3.block_sparse_moe.experts.162.w1", "model.layers.3.block_sparse_moe.experts.163.w1", "model.layers.3.block_sparse_moe.experts.164.w1", "model.layers.3.block_sparse_moe.experts.165.w1", "model.layers.3.block_sparse_moe.experts.166.w1", "model.layers.3.block_sparse_moe.experts.167.w1", "model.layers.3.block_sparse_moe.experts.168.w1", "model.layers.3.block_sparse_moe.experts.169.w1", "model.layers.3.block_sparse_moe.experts.170.w1", "model.layers.3.block_sparse_moe.experts.171.w1", "model.layers.3.block_sparse_moe.experts.172.w1", "model.layers.3.block_sparse_moe.experts.173.w1", "model.layers.3.block_sparse_moe.experts.174.w1", "model.layers.3.block_sparse_moe.experts.175.w1", "model.layers.3.block_sparse_moe.experts.176.w1", "model.layers.3.block_sparse_moe.experts.177.w1", "model.layers.3.block_sparse_moe.experts.178.w1", "model.layers.3.block_sparse_moe.experts.179.w1", "model.layers.3.block_sparse_moe.experts.180.w1", "model.layers.3.block_sparse_moe.experts.181.w1", "model.layers.3.block_sparse_moe.experts.182.w1", "model.layers.3.block_sparse_moe.experts.183.w1", "model.layers.3.block_sparse_moe.experts.184.w1", "model.layers.3.block_sparse_moe.experts.185.w1", "model.layers.3.block_sparse_moe.experts.186.w1", "model.layers.3.block_sparse_moe.experts.187.w1", "model.layers.3.block_sparse_moe.experts.188.w1", "model.layers.3.block_sparse_moe.experts.189.w1", "model.layers.3.block_sparse_moe.experts.190.w1", "model.layers.3.block_sparse_moe.experts.191.w1", "model.layers.3.block_sparse_moe.experts.192.w1", "model.layers.3.block_sparse_moe.experts.193.w1", "model.layers.3.block_sparse_moe.experts.194.w1", "model.layers.3.block_sparse_moe.experts.195.w1", "model.layers.3.block_sparse_moe.experts.196.w1", "model.layers.3.block_sparse_moe.experts.197.w1", "model.layers.3.block_sparse_moe.experts.198.w1", "model.layers.3.block_sparse_moe.experts.199.w1", "model.layers.3.block_sparse_moe.experts.200.w1", "model.layers.3.block_sparse_moe.experts.201.w1", "model.layers.3.block_sparse_moe.experts.202.w1", "model.layers.3.block_sparse_moe.experts.203.w1", "model.layers.3.block_sparse_moe.experts.204.w1", "model.layers.3.block_sparse_moe.experts.205.w1", "model.layers.3.block_sparse_moe.experts.206.w1", "model.layers.3.block_sparse_moe.experts.207.w1", "model.layers.3.block_sparse_moe.experts.208.w1", "model.layers.3.block_sparse_moe.experts.209.w1", "model.layers.3.block_sparse_moe.experts.210.w1", "model.layers.3.block_sparse_moe.experts.211.w1", "model.layers.3.block_sparse_moe.experts.212.w1", "model.layers.3.block_sparse_moe.experts.213.w1", "model.layers.3.block_sparse_moe.experts.214.w1", "model.layers.3.block_sparse_moe.experts.215.w1", "model.layers.3.block_sparse_moe.experts.216.w1", "model.layers.3.block_sparse_moe.experts.217.w1", "model.layers.3.block_sparse_moe.experts.218.w1", "model.layers.3.block_sparse_moe.experts.219.w1", "model.layers.3.block_sparse_moe.experts.220.w1", "model.layers.3.block_sparse_moe.experts.221.w1", "model.layers.3.block_sparse_moe.experts.222.w1", "model.layers.3.block_sparse_moe.experts.223.w1", "model.layers.3.block_sparse_moe.experts.224.w1", "model.layers.3.block_sparse_moe.experts.225.w1", "model.layers.3.block_sparse_moe.experts.226.w1", "model.layers.3.block_sparse_moe.experts.227.w1", "model.layers.3.block_sparse_moe.experts.228.w1", "model.layers.3.block_sparse_moe.experts.229.w1", "model.layers.3.block_sparse_moe.experts.230.w1", "model.layers.3.block_sparse_moe.experts.231.w1", "model.layers.3.block_sparse_moe.experts.232.w1", "model.layers.3.block_sparse_moe.experts.233.w1", "model.layers.3.block_sparse_moe.experts.234.w1", "model.layers.3.block_sparse_moe.experts.235.w1", "model.layers.3.block_sparse_moe.experts.236.w1", "model.layers.3.block_sparse_moe.experts.237.w1", "model.layers.3.block_sparse_moe.experts.238.w1", "model.layers.3.block_sparse_moe.experts.239.w1", "model.layers.3.block_sparse_moe.experts.240.w1", "model.layers.3.block_sparse_moe.experts.241.w1", "model.layers.3.block_sparse_moe.experts.242.w1", "model.layers.3.block_sparse_moe.experts.243.w1", "model.layers.3.block_sparse_moe.experts.244.w1", "model.layers.3.block_sparse_moe.experts.245.w1", "model.layers.3.block_sparse_moe.experts.246.w1", "model.layers.3.block_sparse_moe.experts.247.w1", "model.layers.3.block_sparse_moe.experts.248.w1", "model.layers.3.block_sparse_moe.experts.249.w1", "model.layers.3.block_sparse_moe.experts.250.w1", "model.layers.3.block_sparse_moe.experts.251.w1", "model.layers.3.block_sparse_moe.experts.252.w1", "model.layers.3.block_sparse_moe.experts.253.w1", "model.layers.3.block_sparse_moe.experts.254.w1", "model.layers.3.block_sparse_moe.experts.255.w1", "model.layers.3.block_sparse_moe.experts.0.w3", "model.layers.3.block_sparse_moe.experts.1.w3", "model.layers.3.block_sparse_moe.experts.2.w3", "model.layers.3.block_sparse_moe.experts.3.w3", "model.layers.3.block_sparse_moe.experts.4.w3", "model.layers.3.block_sparse_moe.experts.5.w3", "model.layers.3.block_sparse_moe.experts.6.w3", "model.layers.3.block_sparse_moe.experts.7.w3", "model.layers.3.block_sparse_moe.experts.8.w3", "model.layers.3.block_sparse_moe.experts.9.w3", "model.layers.3.block_sparse_moe.experts.10.w3", "model.layers.3.block_sparse_moe.experts.11.w3", "model.layers.3.block_sparse_moe.experts.12.w3", "model.layers.3.block_sparse_moe.experts.13.w3", "model.layers.3.block_sparse_moe.experts.14.w3", "model.layers.3.block_sparse_moe.experts.15.w3", "model.layers.3.block_sparse_moe.experts.16.w3", "model.layers.3.block_sparse_moe.experts.17.w3", "model.layers.3.block_sparse_moe.experts.18.w3", "model.layers.3.block_sparse_moe.experts.19.w3", "model.layers.3.block_sparse_moe.experts.20.w3", "model.layers.3.block_sparse_moe.experts.21.w3", "model.layers.3.block_sparse_moe.experts.22.w3", "model.layers.3.block_sparse_moe.experts.23.w3", "model.layers.3.block_sparse_moe.experts.24.w3", "model.layers.3.block_sparse_moe.experts.25.w3", "model.layers.3.block_sparse_moe.experts.26.w3", "model.layers.3.block_sparse_moe.experts.27.w3", "model.layers.3.block_sparse_moe.experts.28.w3", "model.layers.3.block_sparse_moe.experts.29.w3", "model.layers.3.block_sparse_moe.experts.30.w3", "model.layers.3.block_sparse_moe.experts.31.w3", "model.layers.3.block_sparse_moe.experts.32.w3", "model.layers.3.block_sparse_moe.experts.33.w3", "model.layers.3.block_sparse_moe.experts.34.w3", "model.layers.3.block_sparse_moe.experts.35.w3", "model.layers.3.block_sparse_moe.experts.36.w3", "model.layers.3.block_sparse_moe.experts.37.w3", "model.layers.3.block_sparse_moe.experts.38.w3", "model.layers.3.block_sparse_moe.experts.39.w3", "model.layers.3.block_sparse_moe.experts.40.w3", "model.layers.3.block_sparse_moe.experts.41.w3", "model.layers.3.block_sparse_moe.experts.42.w3", "model.layers.3.block_sparse_moe.experts.43.w3", "model.layers.3.block_sparse_moe.experts.44.w3", "model.layers.3.block_sparse_moe.experts.45.w3", "model.layers.3.block_sparse_moe.experts.46.w3", "model.layers.3.block_sparse_moe.experts.47.w3", "model.layers.3.block_sparse_moe.experts.48.w3", "model.layers.3.block_sparse_moe.experts.49.w3", "model.layers.3.block_sparse_moe.experts.50.w3", "model.layers.3.block_sparse_moe.experts.51.w3", "model.layers.3.block_sparse_moe.experts.52.w3", "model.layers.3.block_sparse_moe.experts.53.w3", "model.layers.3.block_sparse_moe.experts.54.w3", "model.layers.3.block_sparse_moe.experts.55.w3", "model.layers.3.block_sparse_moe.experts.56.w3", "model.layers.3.block_sparse_moe.experts.57.w3", "model.layers.3.block_sparse_moe.experts.58.w3", "model.layers.3.block_sparse_moe.experts.59.w3", "model.layers.3.block_sparse_moe.experts.60.w3", "model.layers.3.block_sparse_moe.experts.61.w3", "model.layers.3.block_sparse_moe.experts.62.w3", "model.layers.3.block_sparse_moe.experts.63.w3", "model.layers.3.block_sparse_moe.experts.64.w3", "model.layers.3.block_sparse_moe.experts.65.w3", "model.layers.3.block_sparse_moe.experts.66.w3", "model.layers.3.block_sparse_moe.experts.67.w3", "model.layers.3.block_sparse_moe.experts.68.w3", "model.layers.3.block_sparse_moe.experts.69.w3", "model.layers.3.block_sparse_moe.experts.70.w3", "model.layers.3.block_sparse_moe.experts.71.w3", "model.layers.3.block_sparse_moe.experts.72.w3", "model.layers.3.block_sparse_moe.experts.73.w3", "model.layers.3.block_sparse_moe.experts.74.w3", "model.layers.3.block_sparse_moe.experts.75.w3", "model.layers.3.block_sparse_moe.experts.76.w3", "model.layers.3.block_sparse_moe.experts.77.w3", "model.layers.3.block_sparse_moe.experts.78.w3", "model.layers.3.block_sparse_moe.experts.79.w3", "model.layers.3.block_sparse_moe.experts.80.w3", "model.layers.3.block_sparse_moe.experts.81.w3", "model.layers.3.block_sparse_moe.experts.82.w3", "model.layers.3.block_sparse_moe.experts.83.w3", "model.layers.3.block_sparse_moe.experts.84.w3", "model.layers.3.block_sparse_moe.experts.85.w3", "model.layers.3.block_sparse_moe.experts.86.w3", "model.layers.3.block_sparse_moe.experts.87.w3", "model.layers.3.block_sparse_moe.experts.88.w3", "model.layers.3.block_sparse_moe.experts.89.w3", "model.layers.3.block_sparse_moe.experts.90.w3", "model.layers.3.block_sparse_moe.experts.91.w3", "model.layers.3.block_sparse_moe.experts.92.w3", "model.layers.3.block_sparse_moe.experts.93.w3", "model.layers.3.block_sparse_moe.experts.94.w3", "model.layers.3.block_sparse_moe.experts.95.w3", "model.layers.3.block_sparse_moe.experts.96.w3", "model.layers.3.block_sparse_moe.experts.97.w3", "model.layers.3.block_sparse_moe.experts.98.w3", "model.layers.3.block_sparse_moe.experts.99.w3", "model.layers.3.block_sparse_moe.experts.100.w3", "model.layers.3.block_sparse_moe.experts.101.w3", "model.layers.3.block_sparse_moe.experts.102.w3", "model.layers.3.block_sparse_moe.experts.103.w3", "model.layers.3.block_sparse_moe.experts.104.w3", "model.layers.3.block_sparse_moe.experts.105.w3", "model.layers.3.block_sparse_moe.experts.106.w3", "model.layers.3.block_sparse_moe.experts.107.w3", "model.layers.3.block_sparse_moe.experts.108.w3", "model.layers.3.block_sparse_moe.experts.109.w3", "model.layers.3.block_sparse_moe.experts.110.w3", "model.layers.3.block_sparse_moe.experts.111.w3", "model.layers.3.block_sparse_moe.experts.112.w3", "model.layers.3.block_sparse_moe.experts.113.w3", "model.layers.3.block_sparse_moe.experts.114.w3", "model.layers.3.block_sparse_moe.experts.115.w3", "model.layers.3.block_sparse_moe.experts.116.w3", "model.layers.3.block_sparse_moe.experts.117.w3", "model.layers.3.block_sparse_moe.experts.118.w3", "model.layers.3.block_sparse_moe.experts.119.w3", "model.layers.3.block_sparse_moe.experts.120.w3", "model.layers.3.block_sparse_moe.experts.121.w3", "model.layers.3.block_sparse_moe.experts.122.w3", "model.layers.3.block_sparse_moe.experts.123.w3", "model.layers.3.block_sparse_moe.experts.124.w3", "model.layers.3.block_sparse_moe.experts.125.w3", "model.layers.3.block_sparse_moe.experts.126.w3", "model.layers.3.block_sparse_moe.experts.127.w3", "model.layers.3.block_sparse_moe.experts.128.w3", "model.layers.3.block_sparse_moe.experts.129.w3", "model.layers.3.block_sparse_moe.experts.130.w3", "model.layers.3.block_sparse_moe.experts.131.w3", "model.layers.3.block_sparse_moe.experts.132.w3", "model.layers.3.block_sparse_moe.experts.133.w3", "model.layers.3.block_sparse_moe.experts.134.w3", "model.layers.3.block_sparse_moe.experts.135.w3", "model.layers.3.block_sparse_moe.experts.136.w3", "model.layers.3.block_sparse_moe.experts.137.w3", "model.layers.3.block_sparse_moe.experts.138.w3", "model.layers.3.block_sparse_moe.experts.139.w3", "model.layers.3.block_sparse_moe.experts.140.w3", "model.layers.3.block_sparse_moe.experts.141.w3", "model.layers.3.block_sparse_moe.experts.142.w3", "model.layers.3.block_sparse_moe.experts.143.w3", "model.layers.3.block_sparse_moe.experts.144.w3", "model.layers.3.block_sparse_moe.experts.145.w3", "model.layers.3.block_sparse_moe.experts.146.w3", "model.layers.3.block_sparse_moe.experts.147.w3", "model.layers.3.block_sparse_moe.experts.148.w3", "model.layers.3.block_sparse_moe.experts.149.w3", "model.layers.3.block_sparse_moe.experts.150.w3", "model.layers.3.block_sparse_moe.experts.151.w3", "model.layers.3.block_sparse_moe.experts.152.w3", "model.layers.3.block_sparse_moe.experts.153.w3", "model.layers.3.block_sparse_moe.experts.154.w3", "model.layers.3.block_sparse_moe.experts.155.w3", "model.layers.3.block_sparse_moe.experts.156.w3", "model.layers.3.block_sparse_moe.experts.157.w3", "model.layers.3.block_sparse_moe.experts.158.w3", "model.layers.3.block_sparse_moe.experts.159.w3", "model.layers.3.block_sparse_moe.experts.160.w3", "model.layers.3.block_sparse_moe.experts.161.w3", "model.layers.3.block_sparse_moe.experts.162.w3", "model.layers.3.block_sparse_moe.experts.163.w3", "model.layers.3.block_sparse_moe.experts.164.w3", "model.layers.3.block_sparse_moe.experts.165.w3", "model.layers.3.block_sparse_moe.experts.166.w3", "model.layers.3.block_sparse_moe.experts.167.w3", "model.layers.3.block_sparse_moe.experts.168.w3", "model.layers.3.block_sparse_moe.experts.169.w3", "model.layers.3.block_sparse_moe.experts.170.w3", "model.layers.3.block_sparse_moe.experts.171.w3", "model.layers.3.block_sparse_moe.experts.172.w3", "model.layers.3.block_sparse_moe.experts.173.w3", "model.layers.3.block_sparse_moe.experts.174.w3", "model.layers.3.block_sparse_moe.experts.175.w3", "model.layers.3.block_sparse_moe.experts.176.w3", "model.layers.3.block_sparse_moe.experts.177.w3", "model.layers.3.block_sparse_moe.experts.178.w3", "model.layers.3.block_sparse_moe.experts.179.w3", "model.layers.3.block_sparse_moe.experts.180.w3", "model.layers.3.block_sparse_moe.experts.181.w3", "model.layers.3.block_sparse_moe.experts.182.w3", "model.layers.3.block_sparse_moe.experts.183.w3", "model.layers.3.block_sparse_moe.experts.184.w3", "model.layers.3.block_sparse_moe.experts.185.w3", "model.layers.3.block_sparse_moe.experts.186.w3", "model.layers.3.block_sparse_moe.experts.187.w3", "model.layers.3.block_sparse_moe.experts.188.w3", "model.layers.3.block_sparse_moe.experts.189.w3", "model.layers.3.block_sparse_moe.experts.190.w3", "model.layers.3.block_sparse_moe.experts.191.w3", "model.layers.3.block_sparse_moe.experts.192.w3", "model.layers.3.block_sparse_moe.experts.193.w3", "model.layers.3.block_sparse_moe.experts.194.w3", "model.layers.3.block_sparse_moe.experts.195.w3", "model.layers.3.block_sparse_moe.experts.196.w3", "model.layers.3.block_sparse_moe.experts.197.w3", "model.layers.3.block_sparse_moe.experts.198.w3", "model.layers.3.block_sparse_moe.experts.199.w3", "model.layers.3.block_sparse_moe.experts.200.w3", "model.layers.3.block_sparse_moe.experts.201.w3", "model.layers.3.block_sparse_moe.experts.202.w3", "model.layers.3.block_sparse_moe.experts.203.w3", "model.layers.3.block_sparse_moe.experts.204.w3", "model.layers.3.block_sparse_moe.experts.205.w3", "model.layers.3.block_sparse_moe.experts.206.w3", "model.layers.3.block_sparse_moe.experts.207.w3", "model.layers.3.block_sparse_moe.experts.208.w3", "model.layers.3.block_sparse_moe.experts.209.w3", "model.layers.3.block_sparse_moe.experts.210.w3", "model.layers.3.block_sparse_moe.experts.211.w3", "model.layers.3.block_sparse_moe.experts.212.w3", "model.layers.3.block_sparse_moe.experts.213.w3", "model.layers.3.block_sparse_moe.experts.214.w3", "model.layers.3.block_sparse_moe.experts.215.w3", "model.layers.3.block_sparse_moe.experts.216.w3", "model.layers.3.block_sparse_moe.experts.217.w3", "model.layers.3.block_sparse_moe.experts.218.w3", "model.layers.3.block_sparse_moe.experts.219.w3", "model.layers.3.block_sparse_moe.experts.220.w3", "model.layers.3.block_sparse_moe.experts.221.w3", "model.layers.3.block_sparse_moe.experts.222.w3", "model.layers.3.block_sparse_moe.experts.223.w3", "model.layers.3.block_sparse_moe.experts.224.w3", "model.layers.3.block_sparse_moe.experts.225.w3", "model.layers.3.block_sparse_moe.experts.226.w3", "model.layers.3.block_sparse_moe.experts.227.w3", "model.layers.3.block_sparse_moe.experts.228.w3", "model.layers.3.block_sparse_moe.experts.229.w3", "model.layers.3.block_sparse_moe.experts.230.w3", "model.layers.3.block_sparse_moe.experts.231.w3", "model.layers.3.block_sparse_moe.experts.232.w3", "model.layers.3.block_sparse_moe.experts.233.w3", "model.layers.3.block_sparse_moe.experts.234.w3", "model.layers.3.block_sparse_moe.experts.235.w3", "model.layers.3.block_sparse_moe.experts.236.w3", "model.layers.3.block_sparse_moe.experts.237.w3", "model.layers.3.block_sparse_moe.experts.238.w3", "model.layers.3.block_sparse_moe.experts.239.w3", "model.layers.3.block_sparse_moe.experts.240.w3", "model.layers.3.block_sparse_moe.experts.241.w3", "model.layers.3.block_sparse_moe.experts.242.w3", "model.layers.3.block_sparse_moe.experts.243.w3", "model.layers.3.block_sparse_moe.experts.244.w3", "model.layers.3.block_sparse_moe.experts.245.w3", "model.layers.3.block_sparse_moe.experts.246.w3", "model.layers.3.block_sparse_moe.experts.247.w3", "model.layers.3.block_sparse_moe.experts.248.w3", "model.layers.3.block_sparse_moe.experts.249.w3", "model.layers.3.block_sparse_moe.experts.250.w3", "model.layers.3.block_sparse_moe.experts.251.w3", "model.layers.3.block_sparse_moe.experts.252.w3", "model.layers.3.block_sparse_moe.experts.253.w3", "model.layers.3.block_sparse_moe.experts.254.w3", "model.layers.3.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00024693533778190335, "dbits": 2415919104 } ] }, { "idx": 19, "layers": [ "model.layers.3.block_sparse_moe.experts.0.w2", "model.layers.3.block_sparse_moe.experts.1.w2", "model.layers.3.block_sparse_moe.experts.2.w2", "model.layers.3.block_sparse_moe.experts.3.w2", "model.layers.3.block_sparse_moe.experts.4.w2", "model.layers.3.block_sparse_moe.experts.5.w2", "model.layers.3.block_sparse_moe.experts.6.w2", "model.layers.3.block_sparse_moe.experts.7.w2", "model.layers.3.block_sparse_moe.experts.8.w2", "model.layers.3.block_sparse_moe.experts.9.w2", "model.layers.3.block_sparse_moe.experts.10.w2", "model.layers.3.block_sparse_moe.experts.11.w2", "model.layers.3.block_sparse_moe.experts.12.w2", "model.layers.3.block_sparse_moe.experts.13.w2", "model.layers.3.block_sparse_moe.experts.14.w2", "model.layers.3.block_sparse_moe.experts.15.w2", "model.layers.3.block_sparse_moe.experts.16.w2", "model.layers.3.block_sparse_moe.experts.17.w2", "model.layers.3.block_sparse_moe.experts.18.w2", "model.layers.3.block_sparse_moe.experts.19.w2", "model.layers.3.block_sparse_moe.experts.20.w2", "model.layers.3.block_sparse_moe.experts.21.w2", "model.layers.3.block_sparse_moe.experts.22.w2", "model.layers.3.block_sparse_moe.experts.23.w2", "model.layers.3.block_sparse_moe.experts.24.w2", "model.layers.3.block_sparse_moe.experts.25.w2", "model.layers.3.block_sparse_moe.experts.26.w2", "model.layers.3.block_sparse_moe.experts.27.w2", "model.layers.3.block_sparse_moe.experts.28.w2", "model.layers.3.block_sparse_moe.experts.29.w2", "model.layers.3.block_sparse_moe.experts.30.w2", "model.layers.3.block_sparse_moe.experts.31.w2", "model.layers.3.block_sparse_moe.experts.32.w2", "model.layers.3.block_sparse_moe.experts.33.w2", "model.layers.3.block_sparse_moe.experts.34.w2", "model.layers.3.block_sparse_moe.experts.35.w2", "model.layers.3.block_sparse_moe.experts.36.w2", "model.layers.3.block_sparse_moe.experts.37.w2", "model.layers.3.block_sparse_moe.experts.38.w2", "model.layers.3.block_sparse_moe.experts.39.w2", "model.layers.3.block_sparse_moe.experts.40.w2", "model.layers.3.block_sparse_moe.experts.41.w2", "model.layers.3.block_sparse_moe.experts.42.w2", "model.layers.3.block_sparse_moe.experts.43.w2", "model.layers.3.block_sparse_moe.experts.44.w2", "model.layers.3.block_sparse_moe.experts.45.w2", "model.layers.3.block_sparse_moe.experts.46.w2", "model.layers.3.block_sparse_moe.experts.47.w2", "model.layers.3.block_sparse_moe.experts.48.w2", "model.layers.3.block_sparse_moe.experts.49.w2", "model.layers.3.block_sparse_moe.experts.50.w2", "model.layers.3.block_sparse_moe.experts.51.w2", "model.layers.3.block_sparse_moe.experts.52.w2", "model.layers.3.block_sparse_moe.experts.53.w2", "model.layers.3.block_sparse_moe.experts.54.w2", "model.layers.3.block_sparse_moe.experts.55.w2", "model.layers.3.block_sparse_moe.experts.56.w2", "model.layers.3.block_sparse_moe.experts.57.w2", "model.layers.3.block_sparse_moe.experts.58.w2", "model.layers.3.block_sparse_moe.experts.59.w2", "model.layers.3.block_sparse_moe.experts.60.w2", "model.layers.3.block_sparse_moe.experts.61.w2", "model.layers.3.block_sparse_moe.experts.62.w2", "model.layers.3.block_sparse_moe.experts.63.w2", "model.layers.3.block_sparse_moe.experts.64.w2", "model.layers.3.block_sparse_moe.experts.65.w2", "model.layers.3.block_sparse_moe.experts.66.w2", "model.layers.3.block_sparse_moe.experts.67.w2", "model.layers.3.block_sparse_moe.experts.68.w2", "model.layers.3.block_sparse_moe.experts.69.w2", "model.layers.3.block_sparse_moe.experts.70.w2", "model.layers.3.block_sparse_moe.experts.71.w2", "model.layers.3.block_sparse_moe.experts.72.w2", "model.layers.3.block_sparse_moe.experts.73.w2", "model.layers.3.block_sparse_moe.experts.74.w2", "model.layers.3.block_sparse_moe.experts.75.w2", "model.layers.3.block_sparse_moe.experts.76.w2", "model.layers.3.block_sparse_moe.experts.77.w2", "model.layers.3.block_sparse_moe.experts.78.w2", "model.layers.3.block_sparse_moe.experts.79.w2", "model.layers.3.block_sparse_moe.experts.80.w2", "model.layers.3.block_sparse_moe.experts.81.w2", "model.layers.3.block_sparse_moe.experts.82.w2", "model.layers.3.block_sparse_moe.experts.83.w2", "model.layers.3.block_sparse_moe.experts.84.w2", "model.layers.3.block_sparse_moe.experts.85.w2", "model.layers.3.block_sparse_moe.experts.86.w2", "model.layers.3.block_sparse_moe.experts.87.w2", "model.layers.3.block_sparse_moe.experts.88.w2", "model.layers.3.block_sparse_moe.experts.89.w2", "model.layers.3.block_sparse_moe.experts.90.w2", "model.layers.3.block_sparse_moe.experts.91.w2", "model.layers.3.block_sparse_moe.experts.92.w2", "model.layers.3.block_sparse_moe.experts.93.w2", "model.layers.3.block_sparse_moe.experts.94.w2", "model.layers.3.block_sparse_moe.experts.95.w2", "model.layers.3.block_sparse_moe.experts.96.w2", "model.layers.3.block_sparse_moe.experts.97.w2", "model.layers.3.block_sparse_moe.experts.98.w2", "model.layers.3.block_sparse_moe.experts.99.w2", "model.layers.3.block_sparse_moe.experts.100.w2", "model.layers.3.block_sparse_moe.experts.101.w2", "model.layers.3.block_sparse_moe.experts.102.w2", "model.layers.3.block_sparse_moe.experts.103.w2", "model.layers.3.block_sparse_moe.experts.104.w2", "model.layers.3.block_sparse_moe.experts.105.w2", "model.layers.3.block_sparse_moe.experts.106.w2", "model.layers.3.block_sparse_moe.experts.107.w2", "model.layers.3.block_sparse_moe.experts.108.w2", "model.layers.3.block_sparse_moe.experts.109.w2", "model.layers.3.block_sparse_moe.experts.110.w2", "model.layers.3.block_sparse_moe.experts.111.w2", "model.layers.3.block_sparse_moe.experts.112.w2", "model.layers.3.block_sparse_moe.experts.113.w2", "model.layers.3.block_sparse_moe.experts.114.w2", "model.layers.3.block_sparse_moe.experts.115.w2", "model.layers.3.block_sparse_moe.experts.116.w2", "model.layers.3.block_sparse_moe.experts.117.w2", "model.layers.3.block_sparse_moe.experts.118.w2", "model.layers.3.block_sparse_moe.experts.119.w2", "model.layers.3.block_sparse_moe.experts.120.w2", "model.layers.3.block_sparse_moe.experts.121.w2", "model.layers.3.block_sparse_moe.experts.122.w2", "model.layers.3.block_sparse_moe.experts.123.w2", "model.layers.3.block_sparse_moe.experts.124.w2", "model.layers.3.block_sparse_moe.experts.125.w2", "model.layers.3.block_sparse_moe.experts.126.w2", "model.layers.3.block_sparse_moe.experts.127.w2", "model.layers.3.block_sparse_moe.experts.128.w2", "model.layers.3.block_sparse_moe.experts.129.w2", "model.layers.3.block_sparse_moe.experts.130.w2", "model.layers.3.block_sparse_moe.experts.131.w2", "model.layers.3.block_sparse_moe.experts.132.w2", "model.layers.3.block_sparse_moe.experts.133.w2", "model.layers.3.block_sparse_moe.experts.134.w2", "model.layers.3.block_sparse_moe.experts.135.w2", "model.layers.3.block_sparse_moe.experts.136.w2", "model.layers.3.block_sparse_moe.experts.137.w2", "model.layers.3.block_sparse_moe.experts.138.w2", "model.layers.3.block_sparse_moe.experts.139.w2", "model.layers.3.block_sparse_moe.experts.140.w2", "model.layers.3.block_sparse_moe.experts.141.w2", "model.layers.3.block_sparse_moe.experts.142.w2", "model.layers.3.block_sparse_moe.experts.143.w2", "model.layers.3.block_sparse_moe.experts.144.w2", "model.layers.3.block_sparse_moe.experts.145.w2", "model.layers.3.block_sparse_moe.experts.146.w2", "model.layers.3.block_sparse_moe.experts.147.w2", "model.layers.3.block_sparse_moe.experts.148.w2", "model.layers.3.block_sparse_moe.experts.149.w2", "model.layers.3.block_sparse_moe.experts.150.w2", "model.layers.3.block_sparse_moe.experts.151.w2", "model.layers.3.block_sparse_moe.experts.152.w2", "model.layers.3.block_sparse_moe.experts.153.w2", "model.layers.3.block_sparse_moe.experts.154.w2", "model.layers.3.block_sparse_moe.experts.155.w2", "model.layers.3.block_sparse_moe.experts.156.w2", "model.layers.3.block_sparse_moe.experts.157.w2", "model.layers.3.block_sparse_moe.experts.158.w2", "model.layers.3.block_sparse_moe.experts.159.w2", "model.layers.3.block_sparse_moe.experts.160.w2", "model.layers.3.block_sparse_moe.experts.161.w2", "model.layers.3.block_sparse_moe.experts.162.w2", "model.layers.3.block_sparse_moe.experts.163.w2", "model.layers.3.block_sparse_moe.experts.164.w2", "model.layers.3.block_sparse_moe.experts.165.w2", "model.layers.3.block_sparse_moe.experts.166.w2", "model.layers.3.block_sparse_moe.experts.167.w2", "model.layers.3.block_sparse_moe.experts.168.w2", "model.layers.3.block_sparse_moe.experts.169.w2", "model.layers.3.block_sparse_moe.experts.170.w2", "model.layers.3.block_sparse_moe.experts.171.w2", "model.layers.3.block_sparse_moe.experts.172.w2", "model.layers.3.block_sparse_moe.experts.173.w2", "model.layers.3.block_sparse_moe.experts.174.w2", "model.layers.3.block_sparse_moe.experts.175.w2", "model.layers.3.block_sparse_moe.experts.176.w2", "model.layers.3.block_sparse_moe.experts.177.w2", "model.layers.3.block_sparse_moe.experts.178.w2", "model.layers.3.block_sparse_moe.experts.179.w2", "model.layers.3.block_sparse_moe.experts.180.w2", "model.layers.3.block_sparse_moe.experts.181.w2", "model.layers.3.block_sparse_moe.experts.182.w2", "model.layers.3.block_sparse_moe.experts.183.w2", "model.layers.3.block_sparse_moe.experts.184.w2", "model.layers.3.block_sparse_moe.experts.185.w2", "model.layers.3.block_sparse_moe.experts.186.w2", "model.layers.3.block_sparse_moe.experts.187.w2", "model.layers.3.block_sparse_moe.experts.188.w2", "model.layers.3.block_sparse_moe.experts.189.w2", "model.layers.3.block_sparse_moe.experts.190.w2", "model.layers.3.block_sparse_moe.experts.191.w2", "model.layers.3.block_sparse_moe.experts.192.w2", "model.layers.3.block_sparse_moe.experts.193.w2", "model.layers.3.block_sparse_moe.experts.194.w2", "model.layers.3.block_sparse_moe.experts.195.w2", "model.layers.3.block_sparse_moe.experts.196.w2", "model.layers.3.block_sparse_moe.experts.197.w2", "model.layers.3.block_sparse_moe.experts.198.w2", "model.layers.3.block_sparse_moe.experts.199.w2", "model.layers.3.block_sparse_moe.experts.200.w2", "model.layers.3.block_sparse_moe.experts.201.w2", "model.layers.3.block_sparse_moe.experts.202.w2", "model.layers.3.block_sparse_moe.experts.203.w2", "model.layers.3.block_sparse_moe.experts.204.w2", "model.layers.3.block_sparse_moe.experts.205.w2", "model.layers.3.block_sparse_moe.experts.206.w2", "model.layers.3.block_sparse_moe.experts.207.w2", "model.layers.3.block_sparse_moe.experts.208.w2", "model.layers.3.block_sparse_moe.experts.209.w2", "model.layers.3.block_sparse_moe.experts.210.w2", "model.layers.3.block_sparse_moe.experts.211.w2", "model.layers.3.block_sparse_moe.experts.212.w2", "model.layers.3.block_sparse_moe.experts.213.w2", "model.layers.3.block_sparse_moe.experts.214.w2", "model.layers.3.block_sparse_moe.experts.215.w2", "model.layers.3.block_sparse_moe.experts.216.w2", "model.layers.3.block_sparse_moe.experts.217.w2", "model.layers.3.block_sparse_moe.experts.218.w2", "model.layers.3.block_sparse_moe.experts.219.w2", "model.layers.3.block_sparse_moe.experts.220.w2", "model.layers.3.block_sparse_moe.experts.221.w2", "model.layers.3.block_sparse_moe.experts.222.w2", "model.layers.3.block_sparse_moe.experts.223.w2", "model.layers.3.block_sparse_moe.experts.224.w2", "model.layers.3.block_sparse_moe.experts.225.w2", "model.layers.3.block_sparse_moe.experts.226.w2", "model.layers.3.block_sparse_moe.experts.227.w2", "model.layers.3.block_sparse_moe.experts.228.w2", "model.layers.3.block_sparse_moe.experts.229.w2", "model.layers.3.block_sparse_moe.experts.230.w2", "model.layers.3.block_sparse_moe.experts.231.w2", "model.layers.3.block_sparse_moe.experts.232.w2", "model.layers.3.block_sparse_moe.experts.233.w2", "model.layers.3.block_sparse_moe.experts.234.w2", "model.layers.3.block_sparse_moe.experts.235.w2", "model.layers.3.block_sparse_moe.experts.236.w2", "model.layers.3.block_sparse_moe.experts.237.w2", "model.layers.3.block_sparse_moe.experts.238.w2", "model.layers.3.block_sparse_moe.experts.239.w2", "model.layers.3.block_sparse_moe.experts.240.w2", "model.layers.3.block_sparse_moe.experts.241.w2", "model.layers.3.block_sparse_moe.experts.242.w2", "model.layers.3.block_sparse_moe.experts.243.w2", "model.layers.3.block_sparse_moe.experts.244.w2", "model.layers.3.block_sparse_moe.experts.245.w2", "model.layers.3.block_sparse_moe.experts.246.w2", "model.layers.3.block_sparse_moe.experts.247.w2", "model.layers.3.block_sparse_moe.experts.248.w2", "model.layers.3.block_sparse_moe.experts.249.w2", "model.layers.3.block_sparse_moe.experts.250.w2", "model.layers.3.block_sparse_moe.experts.251.w2", "model.layers.3.block_sparse_moe.experts.252.w2", "model.layers.3.block_sparse_moe.experts.253.w2", "model.layers.3.block_sparse_moe.experts.254.w2", "model.layers.3.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00039850529283284863, "dbits": 1207959552 } ] }, { "idx": 20, "layers": [ "model.layers.4.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0011573772877454813, "dbits": 18874368 } ] }, { "idx": 21, "layers": [ "model.layers.4.self_attn.k_proj", "model.layers.4.self_attn.v_proj" ], "candidates": [ { "dkld": -0.009553419239819047, "dbits": 6291456 } ] }, { "idx": 22, "layers": [ "model.layers.4.self_attn.o_proj" ], "candidates": [ { "dkld": -0.004154526256024846, "dbits": 18874368 } ] }, { "idx": 23, "layers": [ "model.layers.4.block_sparse_moe.experts.0.w1", "model.layers.4.block_sparse_moe.experts.1.w1", "model.layers.4.block_sparse_moe.experts.2.w1", "model.layers.4.block_sparse_moe.experts.3.w1", "model.layers.4.block_sparse_moe.experts.4.w1", "model.layers.4.block_sparse_moe.experts.5.w1", "model.layers.4.block_sparse_moe.experts.6.w1", "model.layers.4.block_sparse_moe.experts.7.w1", "model.layers.4.block_sparse_moe.experts.8.w1", "model.layers.4.block_sparse_moe.experts.9.w1", "model.layers.4.block_sparse_moe.experts.10.w1", "model.layers.4.block_sparse_moe.experts.11.w1", "model.layers.4.block_sparse_moe.experts.12.w1", "model.layers.4.block_sparse_moe.experts.13.w1", "model.layers.4.block_sparse_moe.experts.14.w1", "model.layers.4.block_sparse_moe.experts.15.w1", "model.layers.4.block_sparse_moe.experts.16.w1", "model.layers.4.block_sparse_moe.experts.17.w1", "model.layers.4.block_sparse_moe.experts.18.w1", "model.layers.4.block_sparse_moe.experts.19.w1", "model.layers.4.block_sparse_moe.experts.20.w1", "model.layers.4.block_sparse_moe.experts.21.w1", "model.layers.4.block_sparse_moe.experts.22.w1", "model.layers.4.block_sparse_moe.experts.23.w1", "model.layers.4.block_sparse_moe.experts.24.w1", "model.layers.4.block_sparse_moe.experts.25.w1", "model.layers.4.block_sparse_moe.experts.26.w1", "model.layers.4.block_sparse_moe.experts.27.w1", "model.layers.4.block_sparse_moe.experts.28.w1", "model.layers.4.block_sparse_moe.experts.29.w1", "model.layers.4.block_sparse_moe.experts.30.w1", "model.layers.4.block_sparse_moe.experts.31.w1", "model.layers.4.block_sparse_moe.experts.32.w1", "model.layers.4.block_sparse_moe.experts.33.w1", "model.layers.4.block_sparse_moe.experts.34.w1", "model.layers.4.block_sparse_moe.experts.35.w1", "model.layers.4.block_sparse_moe.experts.36.w1", "model.layers.4.block_sparse_moe.experts.37.w1", "model.layers.4.block_sparse_moe.experts.38.w1", "model.layers.4.block_sparse_moe.experts.39.w1", "model.layers.4.block_sparse_moe.experts.40.w1", "model.layers.4.block_sparse_moe.experts.41.w1", "model.layers.4.block_sparse_moe.experts.42.w1", "model.layers.4.block_sparse_moe.experts.43.w1", "model.layers.4.block_sparse_moe.experts.44.w1", "model.layers.4.block_sparse_moe.experts.45.w1", "model.layers.4.block_sparse_moe.experts.46.w1", "model.layers.4.block_sparse_moe.experts.47.w1", "model.layers.4.block_sparse_moe.experts.48.w1", "model.layers.4.block_sparse_moe.experts.49.w1", "model.layers.4.block_sparse_moe.experts.50.w1", "model.layers.4.block_sparse_moe.experts.51.w1", "model.layers.4.block_sparse_moe.experts.52.w1", "model.layers.4.block_sparse_moe.experts.53.w1", "model.layers.4.block_sparse_moe.experts.54.w1", "model.layers.4.block_sparse_moe.experts.55.w1", "model.layers.4.block_sparse_moe.experts.56.w1", "model.layers.4.block_sparse_moe.experts.57.w1", "model.layers.4.block_sparse_moe.experts.58.w1", "model.layers.4.block_sparse_moe.experts.59.w1", "model.layers.4.block_sparse_moe.experts.60.w1", "model.layers.4.block_sparse_moe.experts.61.w1", "model.layers.4.block_sparse_moe.experts.62.w1", "model.layers.4.block_sparse_moe.experts.63.w1", "model.layers.4.block_sparse_moe.experts.64.w1", "model.layers.4.block_sparse_moe.experts.65.w1", "model.layers.4.block_sparse_moe.experts.66.w1", "model.layers.4.block_sparse_moe.experts.67.w1", "model.layers.4.block_sparse_moe.experts.68.w1", "model.layers.4.block_sparse_moe.experts.69.w1", "model.layers.4.block_sparse_moe.experts.70.w1", "model.layers.4.block_sparse_moe.experts.71.w1", "model.layers.4.block_sparse_moe.experts.72.w1", "model.layers.4.block_sparse_moe.experts.73.w1", "model.layers.4.block_sparse_moe.experts.74.w1", "model.layers.4.block_sparse_moe.experts.75.w1", "model.layers.4.block_sparse_moe.experts.76.w1", "model.layers.4.block_sparse_moe.experts.77.w1", "model.layers.4.block_sparse_moe.experts.78.w1", "model.layers.4.block_sparse_moe.experts.79.w1", "model.layers.4.block_sparse_moe.experts.80.w1", "model.layers.4.block_sparse_moe.experts.81.w1", "model.layers.4.block_sparse_moe.experts.82.w1", "model.layers.4.block_sparse_moe.experts.83.w1", "model.layers.4.block_sparse_moe.experts.84.w1", "model.layers.4.block_sparse_moe.experts.85.w1", "model.layers.4.block_sparse_moe.experts.86.w1", "model.layers.4.block_sparse_moe.experts.87.w1", "model.layers.4.block_sparse_moe.experts.88.w1", "model.layers.4.block_sparse_moe.experts.89.w1", "model.layers.4.block_sparse_moe.experts.90.w1", "model.layers.4.block_sparse_moe.experts.91.w1", "model.layers.4.block_sparse_moe.experts.92.w1", "model.layers.4.block_sparse_moe.experts.93.w1", "model.layers.4.block_sparse_moe.experts.94.w1", "model.layers.4.block_sparse_moe.experts.95.w1", "model.layers.4.block_sparse_moe.experts.96.w1", "model.layers.4.block_sparse_moe.experts.97.w1", "model.layers.4.block_sparse_moe.experts.98.w1", "model.layers.4.block_sparse_moe.experts.99.w1", "model.layers.4.block_sparse_moe.experts.100.w1", "model.layers.4.block_sparse_moe.experts.101.w1", "model.layers.4.block_sparse_moe.experts.102.w1", "model.layers.4.block_sparse_moe.experts.103.w1", "model.layers.4.block_sparse_moe.experts.104.w1", "model.layers.4.block_sparse_moe.experts.105.w1", "model.layers.4.block_sparse_moe.experts.106.w1", "model.layers.4.block_sparse_moe.experts.107.w1", "model.layers.4.block_sparse_moe.experts.108.w1", "model.layers.4.block_sparse_moe.experts.109.w1", "model.layers.4.block_sparse_moe.experts.110.w1", "model.layers.4.block_sparse_moe.experts.111.w1", "model.layers.4.block_sparse_moe.experts.112.w1", "model.layers.4.block_sparse_moe.experts.113.w1", "model.layers.4.block_sparse_moe.experts.114.w1", "model.layers.4.block_sparse_moe.experts.115.w1", "model.layers.4.block_sparse_moe.experts.116.w1", "model.layers.4.block_sparse_moe.experts.117.w1", "model.layers.4.block_sparse_moe.experts.118.w1", "model.layers.4.block_sparse_moe.experts.119.w1", "model.layers.4.block_sparse_moe.experts.120.w1", "model.layers.4.block_sparse_moe.experts.121.w1", "model.layers.4.block_sparse_moe.experts.122.w1", "model.layers.4.block_sparse_moe.experts.123.w1", "model.layers.4.block_sparse_moe.experts.124.w1", "model.layers.4.block_sparse_moe.experts.125.w1", "model.layers.4.block_sparse_moe.experts.126.w1", "model.layers.4.block_sparse_moe.experts.127.w1", "model.layers.4.block_sparse_moe.experts.128.w1", "model.layers.4.block_sparse_moe.experts.129.w1", "model.layers.4.block_sparse_moe.experts.130.w1", "model.layers.4.block_sparse_moe.experts.131.w1", "model.layers.4.block_sparse_moe.experts.132.w1", "model.layers.4.block_sparse_moe.experts.133.w1", "model.layers.4.block_sparse_moe.experts.134.w1", "model.layers.4.block_sparse_moe.experts.135.w1", "model.layers.4.block_sparse_moe.experts.136.w1", "model.layers.4.block_sparse_moe.experts.137.w1", "model.layers.4.block_sparse_moe.experts.138.w1", "model.layers.4.block_sparse_moe.experts.139.w1", "model.layers.4.block_sparse_moe.experts.140.w1", "model.layers.4.block_sparse_moe.experts.141.w1", "model.layers.4.block_sparse_moe.experts.142.w1", "model.layers.4.block_sparse_moe.experts.143.w1", "model.layers.4.block_sparse_moe.experts.144.w1", "model.layers.4.block_sparse_moe.experts.145.w1", "model.layers.4.block_sparse_moe.experts.146.w1", "model.layers.4.block_sparse_moe.experts.147.w1", "model.layers.4.block_sparse_moe.experts.148.w1", "model.layers.4.block_sparse_moe.experts.149.w1", "model.layers.4.block_sparse_moe.experts.150.w1", "model.layers.4.block_sparse_moe.experts.151.w1", "model.layers.4.block_sparse_moe.experts.152.w1", "model.layers.4.block_sparse_moe.experts.153.w1", "model.layers.4.block_sparse_moe.experts.154.w1", "model.layers.4.block_sparse_moe.experts.155.w1", "model.layers.4.block_sparse_moe.experts.156.w1", "model.layers.4.block_sparse_moe.experts.157.w1", "model.layers.4.block_sparse_moe.experts.158.w1", "model.layers.4.block_sparse_moe.experts.159.w1", "model.layers.4.block_sparse_moe.experts.160.w1", "model.layers.4.block_sparse_moe.experts.161.w1", "model.layers.4.block_sparse_moe.experts.162.w1", "model.layers.4.block_sparse_moe.experts.163.w1", "model.layers.4.block_sparse_moe.experts.164.w1", "model.layers.4.block_sparse_moe.experts.165.w1", "model.layers.4.block_sparse_moe.experts.166.w1", "model.layers.4.block_sparse_moe.experts.167.w1", "model.layers.4.block_sparse_moe.experts.168.w1", "model.layers.4.block_sparse_moe.experts.169.w1", "model.layers.4.block_sparse_moe.experts.170.w1", "model.layers.4.block_sparse_moe.experts.171.w1", "model.layers.4.block_sparse_moe.experts.172.w1", "model.layers.4.block_sparse_moe.experts.173.w1", "model.layers.4.block_sparse_moe.experts.174.w1", "model.layers.4.block_sparse_moe.experts.175.w1", "model.layers.4.block_sparse_moe.experts.176.w1", "model.layers.4.block_sparse_moe.experts.177.w1", "model.layers.4.block_sparse_moe.experts.178.w1", "model.layers.4.block_sparse_moe.experts.179.w1", "model.layers.4.block_sparse_moe.experts.180.w1", "model.layers.4.block_sparse_moe.experts.181.w1", "model.layers.4.block_sparse_moe.experts.182.w1", "model.layers.4.block_sparse_moe.experts.183.w1", "model.layers.4.block_sparse_moe.experts.184.w1", "model.layers.4.block_sparse_moe.experts.185.w1", "model.layers.4.block_sparse_moe.experts.186.w1", "model.layers.4.block_sparse_moe.experts.187.w1", "model.layers.4.block_sparse_moe.experts.188.w1", "model.layers.4.block_sparse_moe.experts.189.w1", "model.layers.4.block_sparse_moe.experts.190.w1", "model.layers.4.block_sparse_moe.experts.191.w1", "model.layers.4.block_sparse_moe.experts.192.w1", "model.layers.4.block_sparse_moe.experts.193.w1", "model.layers.4.block_sparse_moe.experts.194.w1", "model.layers.4.block_sparse_moe.experts.195.w1", "model.layers.4.block_sparse_moe.experts.196.w1", "model.layers.4.block_sparse_moe.experts.197.w1", "model.layers.4.block_sparse_moe.experts.198.w1", "model.layers.4.block_sparse_moe.experts.199.w1", "model.layers.4.block_sparse_moe.experts.200.w1", "model.layers.4.block_sparse_moe.experts.201.w1", "model.layers.4.block_sparse_moe.experts.202.w1", "model.layers.4.block_sparse_moe.experts.203.w1", "model.layers.4.block_sparse_moe.experts.204.w1", "model.layers.4.block_sparse_moe.experts.205.w1", "model.layers.4.block_sparse_moe.experts.206.w1", "model.layers.4.block_sparse_moe.experts.207.w1", "model.layers.4.block_sparse_moe.experts.208.w1", "model.layers.4.block_sparse_moe.experts.209.w1", "model.layers.4.block_sparse_moe.experts.210.w1", "model.layers.4.block_sparse_moe.experts.211.w1", "model.layers.4.block_sparse_moe.experts.212.w1", "model.layers.4.block_sparse_moe.experts.213.w1", "model.layers.4.block_sparse_moe.experts.214.w1", "model.layers.4.block_sparse_moe.experts.215.w1", "model.layers.4.block_sparse_moe.experts.216.w1", "model.layers.4.block_sparse_moe.experts.217.w1", "model.layers.4.block_sparse_moe.experts.218.w1", "model.layers.4.block_sparse_moe.experts.219.w1", "model.layers.4.block_sparse_moe.experts.220.w1", "model.layers.4.block_sparse_moe.experts.221.w1", "model.layers.4.block_sparse_moe.experts.222.w1", "model.layers.4.block_sparse_moe.experts.223.w1", "model.layers.4.block_sparse_moe.experts.224.w1", "model.layers.4.block_sparse_moe.experts.225.w1", "model.layers.4.block_sparse_moe.experts.226.w1", "model.layers.4.block_sparse_moe.experts.227.w1", "model.layers.4.block_sparse_moe.experts.228.w1", "model.layers.4.block_sparse_moe.experts.229.w1", "model.layers.4.block_sparse_moe.experts.230.w1", "model.layers.4.block_sparse_moe.experts.231.w1", "model.layers.4.block_sparse_moe.experts.232.w1", "model.layers.4.block_sparse_moe.experts.233.w1", "model.layers.4.block_sparse_moe.experts.234.w1", "model.layers.4.block_sparse_moe.experts.235.w1", "model.layers.4.block_sparse_moe.experts.236.w1", "model.layers.4.block_sparse_moe.experts.237.w1", "model.layers.4.block_sparse_moe.experts.238.w1", "model.layers.4.block_sparse_moe.experts.239.w1", "model.layers.4.block_sparse_moe.experts.240.w1", "model.layers.4.block_sparse_moe.experts.241.w1", "model.layers.4.block_sparse_moe.experts.242.w1", "model.layers.4.block_sparse_moe.experts.243.w1", "model.layers.4.block_sparse_moe.experts.244.w1", "model.layers.4.block_sparse_moe.experts.245.w1", "model.layers.4.block_sparse_moe.experts.246.w1", "model.layers.4.block_sparse_moe.experts.247.w1", "model.layers.4.block_sparse_moe.experts.248.w1", "model.layers.4.block_sparse_moe.experts.249.w1", "model.layers.4.block_sparse_moe.experts.250.w1", "model.layers.4.block_sparse_moe.experts.251.w1", "model.layers.4.block_sparse_moe.experts.252.w1", "model.layers.4.block_sparse_moe.experts.253.w1", "model.layers.4.block_sparse_moe.experts.254.w1", "model.layers.4.block_sparse_moe.experts.255.w1", "model.layers.4.block_sparse_moe.experts.0.w3", "model.layers.4.block_sparse_moe.experts.1.w3", "model.layers.4.block_sparse_moe.experts.2.w3", "model.layers.4.block_sparse_moe.experts.3.w3", "model.layers.4.block_sparse_moe.experts.4.w3", "model.layers.4.block_sparse_moe.experts.5.w3", "model.layers.4.block_sparse_moe.experts.6.w3", "model.layers.4.block_sparse_moe.experts.7.w3", "model.layers.4.block_sparse_moe.experts.8.w3", "model.layers.4.block_sparse_moe.experts.9.w3", "model.layers.4.block_sparse_moe.experts.10.w3", "model.layers.4.block_sparse_moe.experts.11.w3", "model.layers.4.block_sparse_moe.experts.12.w3", "model.layers.4.block_sparse_moe.experts.13.w3", "model.layers.4.block_sparse_moe.experts.14.w3", "model.layers.4.block_sparse_moe.experts.15.w3", "model.layers.4.block_sparse_moe.experts.16.w3", "model.layers.4.block_sparse_moe.experts.17.w3", "model.layers.4.block_sparse_moe.experts.18.w3", "model.layers.4.block_sparse_moe.experts.19.w3", "model.layers.4.block_sparse_moe.experts.20.w3", "model.layers.4.block_sparse_moe.experts.21.w3", "model.layers.4.block_sparse_moe.experts.22.w3", "model.layers.4.block_sparse_moe.experts.23.w3", "model.layers.4.block_sparse_moe.experts.24.w3", "model.layers.4.block_sparse_moe.experts.25.w3", "model.layers.4.block_sparse_moe.experts.26.w3", "model.layers.4.block_sparse_moe.experts.27.w3", "model.layers.4.block_sparse_moe.experts.28.w3", "model.layers.4.block_sparse_moe.experts.29.w3", "model.layers.4.block_sparse_moe.experts.30.w3", "model.layers.4.block_sparse_moe.experts.31.w3", "model.layers.4.block_sparse_moe.experts.32.w3", "model.layers.4.block_sparse_moe.experts.33.w3", "model.layers.4.block_sparse_moe.experts.34.w3", "model.layers.4.block_sparse_moe.experts.35.w3", "model.layers.4.block_sparse_moe.experts.36.w3", "model.layers.4.block_sparse_moe.experts.37.w3", "model.layers.4.block_sparse_moe.experts.38.w3", "model.layers.4.block_sparse_moe.experts.39.w3", "model.layers.4.block_sparse_moe.experts.40.w3", "model.layers.4.block_sparse_moe.experts.41.w3", "model.layers.4.block_sparse_moe.experts.42.w3", "model.layers.4.block_sparse_moe.experts.43.w3", "model.layers.4.block_sparse_moe.experts.44.w3", "model.layers.4.block_sparse_moe.experts.45.w3", "model.layers.4.block_sparse_moe.experts.46.w3", "model.layers.4.block_sparse_moe.experts.47.w3", "model.layers.4.block_sparse_moe.experts.48.w3", "model.layers.4.block_sparse_moe.experts.49.w3", "model.layers.4.block_sparse_moe.experts.50.w3", "model.layers.4.block_sparse_moe.experts.51.w3", "model.layers.4.block_sparse_moe.experts.52.w3", "model.layers.4.block_sparse_moe.experts.53.w3", "model.layers.4.block_sparse_moe.experts.54.w3", "model.layers.4.block_sparse_moe.experts.55.w3", "model.layers.4.block_sparse_moe.experts.56.w3", "model.layers.4.block_sparse_moe.experts.57.w3", "model.layers.4.block_sparse_moe.experts.58.w3", "model.layers.4.block_sparse_moe.experts.59.w3", "model.layers.4.block_sparse_moe.experts.60.w3", "model.layers.4.block_sparse_moe.experts.61.w3", "model.layers.4.block_sparse_moe.experts.62.w3", "model.layers.4.block_sparse_moe.experts.63.w3", "model.layers.4.block_sparse_moe.experts.64.w3", "model.layers.4.block_sparse_moe.experts.65.w3", "model.layers.4.block_sparse_moe.experts.66.w3", "model.layers.4.block_sparse_moe.experts.67.w3", "model.layers.4.block_sparse_moe.experts.68.w3", "model.layers.4.block_sparse_moe.experts.69.w3", "model.layers.4.block_sparse_moe.experts.70.w3", "model.layers.4.block_sparse_moe.experts.71.w3", "model.layers.4.block_sparse_moe.experts.72.w3", "model.layers.4.block_sparse_moe.experts.73.w3", "model.layers.4.block_sparse_moe.experts.74.w3", "model.layers.4.block_sparse_moe.experts.75.w3", "model.layers.4.block_sparse_moe.experts.76.w3", "model.layers.4.block_sparse_moe.experts.77.w3", "model.layers.4.block_sparse_moe.experts.78.w3", "model.layers.4.block_sparse_moe.experts.79.w3", "model.layers.4.block_sparse_moe.experts.80.w3", "model.layers.4.block_sparse_moe.experts.81.w3", "model.layers.4.block_sparse_moe.experts.82.w3", "model.layers.4.block_sparse_moe.experts.83.w3", "model.layers.4.block_sparse_moe.experts.84.w3", "model.layers.4.block_sparse_moe.experts.85.w3", "model.layers.4.block_sparse_moe.experts.86.w3", "model.layers.4.block_sparse_moe.experts.87.w3", "model.layers.4.block_sparse_moe.experts.88.w3", "model.layers.4.block_sparse_moe.experts.89.w3", "model.layers.4.block_sparse_moe.experts.90.w3", "model.layers.4.block_sparse_moe.experts.91.w3", "model.layers.4.block_sparse_moe.experts.92.w3", "model.layers.4.block_sparse_moe.experts.93.w3", "model.layers.4.block_sparse_moe.experts.94.w3", "model.layers.4.block_sparse_moe.experts.95.w3", "model.layers.4.block_sparse_moe.experts.96.w3", "model.layers.4.block_sparse_moe.experts.97.w3", "model.layers.4.block_sparse_moe.experts.98.w3", "model.layers.4.block_sparse_moe.experts.99.w3", "model.layers.4.block_sparse_moe.experts.100.w3", "model.layers.4.block_sparse_moe.experts.101.w3", "model.layers.4.block_sparse_moe.experts.102.w3", "model.layers.4.block_sparse_moe.experts.103.w3", "model.layers.4.block_sparse_moe.experts.104.w3", "model.layers.4.block_sparse_moe.experts.105.w3", "model.layers.4.block_sparse_moe.experts.106.w3", "model.layers.4.block_sparse_moe.experts.107.w3", "model.layers.4.block_sparse_moe.experts.108.w3", "model.layers.4.block_sparse_moe.experts.109.w3", "model.layers.4.block_sparse_moe.experts.110.w3", "model.layers.4.block_sparse_moe.experts.111.w3", "model.layers.4.block_sparse_moe.experts.112.w3", "model.layers.4.block_sparse_moe.experts.113.w3", "model.layers.4.block_sparse_moe.experts.114.w3", "model.layers.4.block_sparse_moe.experts.115.w3", "model.layers.4.block_sparse_moe.experts.116.w3", "model.layers.4.block_sparse_moe.experts.117.w3", "model.layers.4.block_sparse_moe.experts.118.w3", "model.layers.4.block_sparse_moe.experts.119.w3", "model.layers.4.block_sparse_moe.experts.120.w3", "model.layers.4.block_sparse_moe.experts.121.w3", "model.layers.4.block_sparse_moe.experts.122.w3", "model.layers.4.block_sparse_moe.experts.123.w3", "model.layers.4.block_sparse_moe.experts.124.w3", "model.layers.4.block_sparse_moe.experts.125.w3", "model.layers.4.block_sparse_moe.experts.126.w3", "model.layers.4.block_sparse_moe.experts.127.w3", "model.layers.4.block_sparse_moe.experts.128.w3", "model.layers.4.block_sparse_moe.experts.129.w3", "model.layers.4.block_sparse_moe.experts.130.w3", "model.layers.4.block_sparse_moe.experts.131.w3", "model.layers.4.block_sparse_moe.experts.132.w3", "model.layers.4.block_sparse_moe.experts.133.w3", "model.layers.4.block_sparse_moe.experts.134.w3", "model.layers.4.block_sparse_moe.experts.135.w3", "model.layers.4.block_sparse_moe.experts.136.w3", "model.layers.4.block_sparse_moe.experts.137.w3", "model.layers.4.block_sparse_moe.experts.138.w3", "model.layers.4.block_sparse_moe.experts.139.w3", "model.layers.4.block_sparse_moe.experts.140.w3", "model.layers.4.block_sparse_moe.experts.141.w3", "model.layers.4.block_sparse_moe.experts.142.w3", "model.layers.4.block_sparse_moe.experts.143.w3", "model.layers.4.block_sparse_moe.experts.144.w3", "model.layers.4.block_sparse_moe.experts.145.w3", "model.layers.4.block_sparse_moe.experts.146.w3", "model.layers.4.block_sparse_moe.experts.147.w3", "model.layers.4.block_sparse_moe.experts.148.w3", "model.layers.4.block_sparse_moe.experts.149.w3", "model.layers.4.block_sparse_moe.experts.150.w3", "model.layers.4.block_sparse_moe.experts.151.w3", "model.layers.4.block_sparse_moe.experts.152.w3", "model.layers.4.block_sparse_moe.experts.153.w3", "model.layers.4.block_sparse_moe.experts.154.w3", "model.layers.4.block_sparse_moe.experts.155.w3", "model.layers.4.block_sparse_moe.experts.156.w3", "model.layers.4.block_sparse_moe.experts.157.w3", "model.layers.4.block_sparse_moe.experts.158.w3", "model.layers.4.block_sparse_moe.experts.159.w3", "model.layers.4.block_sparse_moe.experts.160.w3", "model.layers.4.block_sparse_moe.experts.161.w3", "model.layers.4.block_sparse_moe.experts.162.w3", "model.layers.4.block_sparse_moe.experts.163.w3", "model.layers.4.block_sparse_moe.experts.164.w3", "model.layers.4.block_sparse_moe.experts.165.w3", "model.layers.4.block_sparse_moe.experts.166.w3", "model.layers.4.block_sparse_moe.experts.167.w3", "model.layers.4.block_sparse_moe.experts.168.w3", "model.layers.4.block_sparse_moe.experts.169.w3", "model.layers.4.block_sparse_moe.experts.170.w3", "model.layers.4.block_sparse_moe.experts.171.w3", "model.layers.4.block_sparse_moe.experts.172.w3", "model.layers.4.block_sparse_moe.experts.173.w3", "model.layers.4.block_sparse_moe.experts.174.w3", "model.layers.4.block_sparse_moe.experts.175.w3", "model.layers.4.block_sparse_moe.experts.176.w3", "model.layers.4.block_sparse_moe.experts.177.w3", "model.layers.4.block_sparse_moe.experts.178.w3", "model.layers.4.block_sparse_moe.experts.179.w3", "model.layers.4.block_sparse_moe.experts.180.w3", "model.layers.4.block_sparse_moe.experts.181.w3", "model.layers.4.block_sparse_moe.experts.182.w3", "model.layers.4.block_sparse_moe.experts.183.w3", "model.layers.4.block_sparse_moe.experts.184.w3", "model.layers.4.block_sparse_moe.experts.185.w3", "model.layers.4.block_sparse_moe.experts.186.w3", "model.layers.4.block_sparse_moe.experts.187.w3", "model.layers.4.block_sparse_moe.experts.188.w3", "model.layers.4.block_sparse_moe.experts.189.w3", "model.layers.4.block_sparse_moe.experts.190.w3", "model.layers.4.block_sparse_moe.experts.191.w3", "model.layers.4.block_sparse_moe.experts.192.w3", "model.layers.4.block_sparse_moe.experts.193.w3", "model.layers.4.block_sparse_moe.experts.194.w3", "model.layers.4.block_sparse_moe.experts.195.w3", "model.layers.4.block_sparse_moe.experts.196.w3", "model.layers.4.block_sparse_moe.experts.197.w3", "model.layers.4.block_sparse_moe.experts.198.w3", "model.layers.4.block_sparse_moe.experts.199.w3", "model.layers.4.block_sparse_moe.experts.200.w3", "model.layers.4.block_sparse_moe.experts.201.w3", "model.layers.4.block_sparse_moe.experts.202.w3", "model.layers.4.block_sparse_moe.experts.203.w3", "model.layers.4.block_sparse_moe.experts.204.w3", "model.layers.4.block_sparse_moe.experts.205.w3", "model.layers.4.block_sparse_moe.experts.206.w3", "model.layers.4.block_sparse_moe.experts.207.w3", "model.layers.4.block_sparse_moe.experts.208.w3", "model.layers.4.block_sparse_moe.experts.209.w3", "model.layers.4.block_sparse_moe.experts.210.w3", "model.layers.4.block_sparse_moe.experts.211.w3", "model.layers.4.block_sparse_moe.experts.212.w3", "model.layers.4.block_sparse_moe.experts.213.w3", "model.layers.4.block_sparse_moe.experts.214.w3", "model.layers.4.block_sparse_moe.experts.215.w3", "model.layers.4.block_sparse_moe.experts.216.w3", "model.layers.4.block_sparse_moe.experts.217.w3", "model.layers.4.block_sparse_moe.experts.218.w3", "model.layers.4.block_sparse_moe.experts.219.w3", "model.layers.4.block_sparse_moe.experts.220.w3", "model.layers.4.block_sparse_moe.experts.221.w3", "model.layers.4.block_sparse_moe.experts.222.w3", "model.layers.4.block_sparse_moe.experts.223.w3", "model.layers.4.block_sparse_moe.experts.224.w3", "model.layers.4.block_sparse_moe.experts.225.w3", "model.layers.4.block_sparse_moe.experts.226.w3", "model.layers.4.block_sparse_moe.experts.227.w3", "model.layers.4.block_sparse_moe.experts.228.w3", "model.layers.4.block_sparse_moe.experts.229.w3", "model.layers.4.block_sparse_moe.experts.230.w3", "model.layers.4.block_sparse_moe.experts.231.w3", "model.layers.4.block_sparse_moe.experts.232.w3", "model.layers.4.block_sparse_moe.experts.233.w3", "model.layers.4.block_sparse_moe.experts.234.w3", "model.layers.4.block_sparse_moe.experts.235.w3", "model.layers.4.block_sparse_moe.experts.236.w3", "model.layers.4.block_sparse_moe.experts.237.w3", "model.layers.4.block_sparse_moe.experts.238.w3", "model.layers.4.block_sparse_moe.experts.239.w3", "model.layers.4.block_sparse_moe.experts.240.w3", "model.layers.4.block_sparse_moe.experts.241.w3", "model.layers.4.block_sparse_moe.experts.242.w3", "model.layers.4.block_sparse_moe.experts.243.w3", "model.layers.4.block_sparse_moe.experts.244.w3", "model.layers.4.block_sparse_moe.experts.245.w3", "model.layers.4.block_sparse_moe.experts.246.w3", "model.layers.4.block_sparse_moe.experts.247.w3", "model.layers.4.block_sparse_moe.experts.248.w3", "model.layers.4.block_sparse_moe.experts.249.w3", "model.layers.4.block_sparse_moe.experts.250.w3", "model.layers.4.block_sparse_moe.experts.251.w3", "model.layers.4.block_sparse_moe.experts.252.w3", "model.layers.4.block_sparse_moe.experts.253.w3", "model.layers.4.block_sparse_moe.experts.254.w3", "model.layers.4.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0003164267167449081, "dbits": 2415919104 } ] }, { "idx": 24, "layers": [ "model.layers.4.block_sparse_moe.experts.0.w2", "model.layers.4.block_sparse_moe.experts.1.w2", "model.layers.4.block_sparse_moe.experts.2.w2", "model.layers.4.block_sparse_moe.experts.3.w2", "model.layers.4.block_sparse_moe.experts.4.w2", "model.layers.4.block_sparse_moe.experts.5.w2", "model.layers.4.block_sparse_moe.experts.6.w2", "model.layers.4.block_sparse_moe.experts.7.w2", "model.layers.4.block_sparse_moe.experts.8.w2", "model.layers.4.block_sparse_moe.experts.9.w2", "model.layers.4.block_sparse_moe.experts.10.w2", "model.layers.4.block_sparse_moe.experts.11.w2", "model.layers.4.block_sparse_moe.experts.12.w2", "model.layers.4.block_sparse_moe.experts.13.w2", "model.layers.4.block_sparse_moe.experts.14.w2", "model.layers.4.block_sparse_moe.experts.15.w2", "model.layers.4.block_sparse_moe.experts.16.w2", "model.layers.4.block_sparse_moe.experts.17.w2", "model.layers.4.block_sparse_moe.experts.18.w2", "model.layers.4.block_sparse_moe.experts.19.w2", "model.layers.4.block_sparse_moe.experts.20.w2", "model.layers.4.block_sparse_moe.experts.21.w2", "model.layers.4.block_sparse_moe.experts.22.w2", "model.layers.4.block_sparse_moe.experts.23.w2", "model.layers.4.block_sparse_moe.experts.24.w2", "model.layers.4.block_sparse_moe.experts.25.w2", "model.layers.4.block_sparse_moe.experts.26.w2", "model.layers.4.block_sparse_moe.experts.27.w2", "model.layers.4.block_sparse_moe.experts.28.w2", "model.layers.4.block_sparse_moe.experts.29.w2", "model.layers.4.block_sparse_moe.experts.30.w2", "model.layers.4.block_sparse_moe.experts.31.w2", "model.layers.4.block_sparse_moe.experts.32.w2", "model.layers.4.block_sparse_moe.experts.33.w2", "model.layers.4.block_sparse_moe.experts.34.w2", "model.layers.4.block_sparse_moe.experts.35.w2", "model.layers.4.block_sparse_moe.experts.36.w2", "model.layers.4.block_sparse_moe.experts.37.w2", "model.layers.4.block_sparse_moe.experts.38.w2", "model.layers.4.block_sparse_moe.experts.39.w2", "model.layers.4.block_sparse_moe.experts.40.w2", "model.layers.4.block_sparse_moe.experts.41.w2", "model.layers.4.block_sparse_moe.experts.42.w2", "model.layers.4.block_sparse_moe.experts.43.w2", "model.layers.4.block_sparse_moe.experts.44.w2", "model.layers.4.block_sparse_moe.experts.45.w2", "model.layers.4.block_sparse_moe.experts.46.w2", "model.layers.4.block_sparse_moe.experts.47.w2", "model.layers.4.block_sparse_moe.experts.48.w2", "model.layers.4.block_sparse_moe.experts.49.w2", "model.layers.4.block_sparse_moe.experts.50.w2", "model.layers.4.block_sparse_moe.experts.51.w2", "model.layers.4.block_sparse_moe.experts.52.w2", "model.layers.4.block_sparse_moe.experts.53.w2", "model.layers.4.block_sparse_moe.experts.54.w2", "model.layers.4.block_sparse_moe.experts.55.w2", "model.layers.4.block_sparse_moe.experts.56.w2", "model.layers.4.block_sparse_moe.experts.57.w2", "model.layers.4.block_sparse_moe.experts.58.w2", "model.layers.4.block_sparse_moe.experts.59.w2", "model.layers.4.block_sparse_moe.experts.60.w2", "model.layers.4.block_sparse_moe.experts.61.w2", "model.layers.4.block_sparse_moe.experts.62.w2", "model.layers.4.block_sparse_moe.experts.63.w2", "model.layers.4.block_sparse_moe.experts.64.w2", "model.layers.4.block_sparse_moe.experts.65.w2", "model.layers.4.block_sparse_moe.experts.66.w2", "model.layers.4.block_sparse_moe.experts.67.w2", "model.layers.4.block_sparse_moe.experts.68.w2", "model.layers.4.block_sparse_moe.experts.69.w2", "model.layers.4.block_sparse_moe.experts.70.w2", "model.layers.4.block_sparse_moe.experts.71.w2", "model.layers.4.block_sparse_moe.experts.72.w2", "model.layers.4.block_sparse_moe.experts.73.w2", "model.layers.4.block_sparse_moe.experts.74.w2", "model.layers.4.block_sparse_moe.experts.75.w2", "model.layers.4.block_sparse_moe.experts.76.w2", "model.layers.4.block_sparse_moe.experts.77.w2", "model.layers.4.block_sparse_moe.experts.78.w2", "model.layers.4.block_sparse_moe.experts.79.w2", "model.layers.4.block_sparse_moe.experts.80.w2", "model.layers.4.block_sparse_moe.experts.81.w2", "model.layers.4.block_sparse_moe.experts.82.w2", "model.layers.4.block_sparse_moe.experts.83.w2", "model.layers.4.block_sparse_moe.experts.84.w2", "model.layers.4.block_sparse_moe.experts.85.w2", "model.layers.4.block_sparse_moe.experts.86.w2", "model.layers.4.block_sparse_moe.experts.87.w2", "model.layers.4.block_sparse_moe.experts.88.w2", "model.layers.4.block_sparse_moe.experts.89.w2", "model.layers.4.block_sparse_moe.experts.90.w2", "model.layers.4.block_sparse_moe.experts.91.w2", "model.layers.4.block_sparse_moe.experts.92.w2", "model.layers.4.block_sparse_moe.experts.93.w2", "model.layers.4.block_sparse_moe.experts.94.w2", "model.layers.4.block_sparse_moe.experts.95.w2", "model.layers.4.block_sparse_moe.experts.96.w2", "model.layers.4.block_sparse_moe.experts.97.w2", "model.layers.4.block_sparse_moe.experts.98.w2", "model.layers.4.block_sparse_moe.experts.99.w2", "model.layers.4.block_sparse_moe.experts.100.w2", "model.layers.4.block_sparse_moe.experts.101.w2", "model.layers.4.block_sparse_moe.experts.102.w2", "model.layers.4.block_sparse_moe.experts.103.w2", "model.layers.4.block_sparse_moe.experts.104.w2", "model.layers.4.block_sparse_moe.experts.105.w2", "model.layers.4.block_sparse_moe.experts.106.w2", "model.layers.4.block_sparse_moe.experts.107.w2", "model.layers.4.block_sparse_moe.experts.108.w2", "model.layers.4.block_sparse_moe.experts.109.w2", "model.layers.4.block_sparse_moe.experts.110.w2", "model.layers.4.block_sparse_moe.experts.111.w2", "model.layers.4.block_sparse_moe.experts.112.w2", "model.layers.4.block_sparse_moe.experts.113.w2", "model.layers.4.block_sparse_moe.experts.114.w2", "model.layers.4.block_sparse_moe.experts.115.w2", "model.layers.4.block_sparse_moe.experts.116.w2", "model.layers.4.block_sparse_moe.experts.117.w2", "model.layers.4.block_sparse_moe.experts.118.w2", "model.layers.4.block_sparse_moe.experts.119.w2", "model.layers.4.block_sparse_moe.experts.120.w2", "model.layers.4.block_sparse_moe.experts.121.w2", "model.layers.4.block_sparse_moe.experts.122.w2", "model.layers.4.block_sparse_moe.experts.123.w2", "model.layers.4.block_sparse_moe.experts.124.w2", "model.layers.4.block_sparse_moe.experts.125.w2", "model.layers.4.block_sparse_moe.experts.126.w2", "model.layers.4.block_sparse_moe.experts.127.w2", "model.layers.4.block_sparse_moe.experts.128.w2", "model.layers.4.block_sparse_moe.experts.129.w2", "model.layers.4.block_sparse_moe.experts.130.w2", "model.layers.4.block_sparse_moe.experts.131.w2", "model.layers.4.block_sparse_moe.experts.132.w2", "model.layers.4.block_sparse_moe.experts.133.w2", "model.layers.4.block_sparse_moe.experts.134.w2", "model.layers.4.block_sparse_moe.experts.135.w2", "model.layers.4.block_sparse_moe.experts.136.w2", "model.layers.4.block_sparse_moe.experts.137.w2", "model.layers.4.block_sparse_moe.experts.138.w2", "model.layers.4.block_sparse_moe.experts.139.w2", "model.layers.4.block_sparse_moe.experts.140.w2", "model.layers.4.block_sparse_moe.experts.141.w2", "model.layers.4.block_sparse_moe.experts.142.w2", "model.layers.4.block_sparse_moe.experts.143.w2", "model.layers.4.block_sparse_moe.experts.144.w2", "model.layers.4.block_sparse_moe.experts.145.w2", "model.layers.4.block_sparse_moe.experts.146.w2", "model.layers.4.block_sparse_moe.experts.147.w2", "model.layers.4.block_sparse_moe.experts.148.w2", "model.layers.4.block_sparse_moe.experts.149.w2", "model.layers.4.block_sparse_moe.experts.150.w2", "model.layers.4.block_sparse_moe.experts.151.w2", "model.layers.4.block_sparse_moe.experts.152.w2", "model.layers.4.block_sparse_moe.experts.153.w2", "model.layers.4.block_sparse_moe.experts.154.w2", "model.layers.4.block_sparse_moe.experts.155.w2", "model.layers.4.block_sparse_moe.experts.156.w2", "model.layers.4.block_sparse_moe.experts.157.w2", "model.layers.4.block_sparse_moe.experts.158.w2", "model.layers.4.block_sparse_moe.experts.159.w2", "model.layers.4.block_sparse_moe.experts.160.w2", "model.layers.4.block_sparse_moe.experts.161.w2", "model.layers.4.block_sparse_moe.experts.162.w2", "model.layers.4.block_sparse_moe.experts.163.w2", "model.layers.4.block_sparse_moe.experts.164.w2", "model.layers.4.block_sparse_moe.experts.165.w2", "model.layers.4.block_sparse_moe.experts.166.w2", "model.layers.4.block_sparse_moe.experts.167.w2", "model.layers.4.block_sparse_moe.experts.168.w2", "model.layers.4.block_sparse_moe.experts.169.w2", "model.layers.4.block_sparse_moe.experts.170.w2", "model.layers.4.block_sparse_moe.experts.171.w2", "model.layers.4.block_sparse_moe.experts.172.w2", "model.layers.4.block_sparse_moe.experts.173.w2", "model.layers.4.block_sparse_moe.experts.174.w2", "model.layers.4.block_sparse_moe.experts.175.w2", "model.layers.4.block_sparse_moe.experts.176.w2", "model.layers.4.block_sparse_moe.experts.177.w2", "model.layers.4.block_sparse_moe.experts.178.w2", "model.layers.4.block_sparse_moe.experts.179.w2", "model.layers.4.block_sparse_moe.experts.180.w2", "model.layers.4.block_sparse_moe.experts.181.w2", "model.layers.4.block_sparse_moe.experts.182.w2", "model.layers.4.block_sparse_moe.experts.183.w2", "model.layers.4.block_sparse_moe.experts.184.w2", "model.layers.4.block_sparse_moe.experts.185.w2", "model.layers.4.block_sparse_moe.experts.186.w2", "model.layers.4.block_sparse_moe.experts.187.w2", "model.layers.4.block_sparse_moe.experts.188.w2", "model.layers.4.block_sparse_moe.experts.189.w2", "model.layers.4.block_sparse_moe.experts.190.w2", "model.layers.4.block_sparse_moe.experts.191.w2", "model.layers.4.block_sparse_moe.experts.192.w2", "model.layers.4.block_sparse_moe.experts.193.w2", "model.layers.4.block_sparse_moe.experts.194.w2", "model.layers.4.block_sparse_moe.experts.195.w2", "model.layers.4.block_sparse_moe.experts.196.w2", "model.layers.4.block_sparse_moe.experts.197.w2", "model.layers.4.block_sparse_moe.experts.198.w2", "model.layers.4.block_sparse_moe.experts.199.w2", "model.layers.4.block_sparse_moe.experts.200.w2", "model.layers.4.block_sparse_moe.experts.201.w2", "model.layers.4.block_sparse_moe.experts.202.w2", "model.layers.4.block_sparse_moe.experts.203.w2", "model.layers.4.block_sparse_moe.experts.204.w2", "model.layers.4.block_sparse_moe.experts.205.w2", "model.layers.4.block_sparse_moe.experts.206.w2", "model.layers.4.block_sparse_moe.experts.207.w2", "model.layers.4.block_sparse_moe.experts.208.w2", "model.layers.4.block_sparse_moe.experts.209.w2", "model.layers.4.block_sparse_moe.experts.210.w2", "model.layers.4.block_sparse_moe.experts.211.w2", "model.layers.4.block_sparse_moe.experts.212.w2", "model.layers.4.block_sparse_moe.experts.213.w2", "model.layers.4.block_sparse_moe.experts.214.w2", "model.layers.4.block_sparse_moe.experts.215.w2", "model.layers.4.block_sparse_moe.experts.216.w2", "model.layers.4.block_sparse_moe.experts.217.w2", "model.layers.4.block_sparse_moe.experts.218.w2", "model.layers.4.block_sparse_moe.experts.219.w2", "model.layers.4.block_sparse_moe.experts.220.w2", "model.layers.4.block_sparse_moe.experts.221.w2", "model.layers.4.block_sparse_moe.experts.222.w2", "model.layers.4.block_sparse_moe.experts.223.w2", "model.layers.4.block_sparse_moe.experts.224.w2", "model.layers.4.block_sparse_moe.experts.225.w2", "model.layers.4.block_sparse_moe.experts.226.w2", "model.layers.4.block_sparse_moe.experts.227.w2", "model.layers.4.block_sparse_moe.experts.228.w2", "model.layers.4.block_sparse_moe.experts.229.w2", "model.layers.4.block_sparse_moe.experts.230.w2", "model.layers.4.block_sparse_moe.experts.231.w2", "model.layers.4.block_sparse_moe.experts.232.w2", "model.layers.4.block_sparse_moe.experts.233.w2", "model.layers.4.block_sparse_moe.experts.234.w2", "model.layers.4.block_sparse_moe.experts.235.w2", "model.layers.4.block_sparse_moe.experts.236.w2", "model.layers.4.block_sparse_moe.experts.237.w2", "model.layers.4.block_sparse_moe.experts.238.w2", "model.layers.4.block_sparse_moe.experts.239.w2", "model.layers.4.block_sparse_moe.experts.240.w2", "model.layers.4.block_sparse_moe.experts.241.w2", "model.layers.4.block_sparse_moe.experts.242.w2", "model.layers.4.block_sparse_moe.experts.243.w2", "model.layers.4.block_sparse_moe.experts.244.w2", "model.layers.4.block_sparse_moe.experts.245.w2", "model.layers.4.block_sparse_moe.experts.246.w2", "model.layers.4.block_sparse_moe.experts.247.w2", "model.layers.4.block_sparse_moe.experts.248.w2", "model.layers.4.block_sparse_moe.experts.249.w2", "model.layers.4.block_sparse_moe.experts.250.w2", "model.layers.4.block_sparse_moe.experts.251.w2", "model.layers.4.block_sparse_moe.experts.252.w2", "model.layers.4.block_sparse_moe.experts.253.w2", "model.layers.4.block_sparse_moe.experts.254.w2", "model.layers.4.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.000524848140776149, "dbits": 1207959552 } ] }, { "idx": 25, "layers": [ "model.layers.5.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0003802554681897191, "dbits": 18874368 } ] }, { "idx": 26, "layers": [ "model.layers.5.self_attn.k_proj", "model.layers.5.self_attn.v_proj" ], "candidates": [ { "dkld": 0.004881759360432628, "dbits": 6291456 } ] }, { "idx": 27, "layers": [ "model.layers.5.self_attn.o_proj" ], "candidates": [ { "dkld": 0.001883195154368869, "dbits": 18874368 } ] }, { "idx": 28, "layers": [ "model.layers.5.block_sparse_moe.experts.0.w1", "model.layers.5.block_sparse_moe.experts.1.w1", "model.layers.5.block_sparse_moe.experts.2.w1", "model.layers.5.block_sparse_moe.experts.3.w1", "model.layers.5.block_sparse_moe.experts.4.w1", "model.layers.5.block_sparse_moe.experts.5.w1", "model.layers.5.block_sparse_moe.experts.6.w1", "model.layers.5.block_sparse_moe.experts.7.w1", "model.layers.5.block_sparse_moe.experts.8.w1", "model.layers.5.block_sparse_moe.experts.9.w1", "model.layers.5.block_sparse_moe.experts.10.w1", "model.layers.5.block_sparse_moe.experts.11.w1", "model.layers.5.block_sparse_moe.experts.12.w1", "model.layers.5.block_sparse_moe.experts.13.w1", "model.layers.5.block_sparse_moe.experts.14.w1", "model.layers.5.block_sparse_moe.experts.15.w1", "model.layers.5.block_sparse_moe.experts.16.w1", "model.layers.5.block_sparse_moe.experts.17.w1", "model.layers.5.block_sparse_moe.experts.18.w1", "model.layers.5.block_sparse_moe.experts.19.w1", "model.layers.5.block_sparse_moe.experts.20.w1", "model.layers.5.block_sparse_moe.experts.21.w1", "model.layers.5.block_sparse_moe.experts.22.w1", "model.layers.5.block_sparse_moe.experts.23.w1", "model.layers.5.block_sparse_moe.experts.24.w1", "model.layers.5.block_sparse_moe.experts.25.w1", "model.layers.5.block_sparse_moe.experts.26.w1", "model.layers.5.block_sparse_moe.experts.27.w1", "model.layers.5.block_sparse_moe.experts.28.w1", "model.layers.5.block_sparse_moe.experts.29.w1", "model.layers.5.block_sparse_moe.experts.30.w1", "model.layers.5.block_sparse_moe.experts.31.w1", "model.layers.5.block_sparse_moe.experts.32.w1", "model.layers.5.block_sparse_moe.experts.33.w1", "model.layers.5.block_sparse_moe.experts.34.w1", "model.layers.5.block_sparse_moe.experts.35.w1", "model.layers.5.block_sparse_moe.experts.36.w1", "model.layers.5.block_sparse_moe.experts.37.w1", "model.layers.5.block_sparse_moe.experts.38.w1", "model.layers.5.block_sparse_moe.experts.39.w1", "model.layers.5.block_sparse_moe.experts.40.w1", "model.layers.5.block_sparse_moe.experts.41.w1", "model.layers.5.block_sparse_moe.experts.42.w1", "model.layers.5.block_sparse_moe.experts.43.w1", "model.layers.5.block_sparse_moe.experts.44.w1", "model.layers.5.block_sparse_moe.experts.45.w1", "model.layers.5.block_sparse_moe.experts.46.w1", "model.layers.5.block_sparse_moe.experts.47.w1", "model.layers.5.block_sparse_moe.experts.48.w1", "model.layers.5.block_sparse_moe.experts.49.w1", "model.layers.5.block_sparse_moe.experts.50.w1", "model.layers.5.block_sparse_moe.experts.51.w1", "model.layers.5.block_sparse_moe.experts.52.w1", "model.layers.5.block_sparse_moe.experts.53.w1", "model.layers.5.block_sparse_moe.experts.54.w1", "model.layers.5.block_sparse_moe.experts.55.w1", "model.layers.5.block_sparse_moe.experts.56.w1", "model.layers.5.block_sparse_moe.experts.57.w1", "model.layers.5.block_sparse_moe.experts.58.w1", "model.layers.5.block_sparse_moe.experts.59.w1", "model.layers.5.block_sparse_moe.experts.60.w1", "model.layers.5.block_sparse_moe.experts.61.w1", "model.layers.5.block_sparse_moe.experts.62.w1", "model.layers.5.block_sparse_moe.experts.63.w1", "model.layers.5.block_sparse_moe.experts.64.w1", "model.layers.5.block_sparse_moe.experts.65.w1", "model.layers.5.block_sparse_moe.experts.66.w1", "model.layers.5.block_sparse_moe.experts.67.w1", "model.layers.5.block_sparse_moe.experts.68.w1", "model.layers.5.block_sparse_moe.experts.69.w1", "model.layers.5.block_sparse_moe.experts.70.w1", "model.layers.5.block_sparse_moe.experts.71.w1", "model.layers.5.block_sparse_moe.experts.72.w1", "model.layers.5.block_sparse_moe.experts.73.w1", "model.layers.5.block_sparse_moe.experts.74.w1", "model.layers.5.block_sparse_moe.experts.75.w1", "model.layers.5.block_sparse_moe.experts.76.w1", "model.layers.5.block_sparse_moe.experts.77.w1", "model.layers.5.block_sparse_moe.experts.78.w1", "model.layers.5.block_sparse_moe.experts.79.w1", "model.layers.5.block_sparse_moe.experts.80.w1", "model.layers.5.block_sparse_moe.experts.81.w1", "model.layers.5.block_sparse_moe.experts.82.w1", "model.layers.5.block_sparse_moe.experts.83.w1", "model.layers.5.block_sparse_moe.experts.84.w1", "model.layers.5.block_sparse_moe.experts.85.w1", "model.layers.5.block_sparse_moe.experts.86.w1", "model.layers.5.block_sparse_moe.experts.87.w1", "model.layers.5.block_sparse_moe.experts.88.w1", "model.layers.5.block_sparse_moe.experts.89.w1", "model.layers.5.block_sparse_moe.experts.90.w1", "model.layers.5.block_sparse_moe.experts.91.w1", "model.layers.5.block_sparse_moe.experts.92.w1", "model.layers.5.block_sparse_moe.experts.93.w1", "model.layers.5.block_sparse_moe.experts.94.w1", "model.layers.5.block_sparse_moe.experts.95.w1", "model.layers.5.block_sparse_moe.experts.96.w1", "model.layers.5.block_sparse_moe.experts.97.w1", "model.layers.5.block_sparse_moe.experts.98.w1", "model.layers.5.block_sparse_moe.experts.99.w1", "model.layers.5.block_sparse_moe.experts.100.w1", "model.layers.5.block_sparse_moe.experts.101.w1", "model.layers.5.block_sparse_moe.experts.102.w1", "model.layers.5.block_sparse_moe.experts.103.w1", "model.layers.5.block_sparse_moe.experts.104.w1", "model.layers.5.block_sparse_moe.experts.105.w1", "model.layers.5.block_sparse_moe.experts.106.w1", "model.layers.5.block_sparse_moe.experts.107.w1", "model.layers.5.block_sparse_moe.experts.108.w1", "model.layers.5.block_sparse_moe.experts.109.w1", "model.layers.5.block_sparse_moe.experts.110.w1", "model.layers.5.block_sparse_moe.experts.111.w1", "model.layers.5.block_sparse_moe.experts.112.w1", "model.layers.5.block_sparse_moe.experts.113.w1", "model.layers.5.block_sparse_moe.experts.114.w1", "model.layers.5.block_sparse_moe.experts.115.w1", "model.layers.5.block_sparse_moe.experts.116.w1", "model.layers.5.block_sparse_moe.experts.117.w1", "model.layers.5.block_sparse_moe.experts.118.w1", "model.layers.5.block_sparse_moe.experts.119.w1", "model.layers.5.block_sparse_moe.experts.120.w1", "model.layers.5.block_sparse_moe.experts.121.w1", "model.layers.5.block_sparse_moe.experts.122.w1", "model.layers.5.block_sparse_moe.experts.123.w1", "model.layers.5.block_sparse_moe.experts.124.w1", "model.layers.5.block_sparse_moe.experts.125.w1", "model.layers.5.block_sparse_moe.experts.126.w1", "model.layers.5.block_sparse_moe.experts.127.w1", "model.layers.5.block_sparse_moe.experts.128.w1", "model.layers.5.block_sparse_moe.experts.129.w1", "model.layers.5.block_sparse_moe.experts.130.w1", "model.layers.5.block_sparse_moe.experts.131.w1", "model.layers.5.block_sparse_moe.experts.132.w1", "model.layers.5.block_sparse_moe.experts.133.w1", "model.layers.5.block_sparse_moe.experts.134.w1", "model.layers.5.block_sparse_moe.experts.135.w1", "model.layers.5.block_sparse_moe.experts.136.w1", "model.layers.5.block_sparse_moe.experts.137.w1", "model.layers.5.block_sparse_moe.experts.138.w1", "model.layers.5.block_sparse_moe.experts.139.w1", "model.layers.5.block_sparse_moe.experts.140.w1", "model.layers.5.block_sparse_moe.experts.141.w1", "model.layers.5.block_sparse_moe.experts.142.w1", "model.layers.5.block_sparse_moe.experts.143.w1", "model.layers.5.block_sparse_moe.experts.144.w1", "model.layers.5.block_sparse_moe.experts.145.w1", "model.layers.5.block_sparse_moe.experts.146.w1", "model.layers.5.block_sparse_moe.experts.147.w1", "model.layers.5.block_sparse_moe.experts.148.w1", "model.layers.5.block_sparse_moe.experts.149.w1", "model.layers.5.block_sparse_moe.experts.150.w1", "model.layers.5.block_sparse_moe.experts.151.w1", "model.layers.5.block_sparse_moe.experts.152.w1", "model.layers.5.block_sparse_moe.experts.153.w1", "model.layers.5.block_sparse_moe.experts.154.w1", "model.layers.5.block_sparse_moe.experts.155.w1", "model.layers.5.block_sparse_moe.experts.156.w1", "model.layers.5.block_sparse_moe.experts.157.w1", "model.layers.5.block_sparse_moe.experts.158.w1", "model.layers.5.block_sparse_moe.experts.159.w1", "model.layers.5.block_sparse_moe.experts.160.w1", "model.layers.5.block_sparse_moe.experts.161.w1", "model.layers.5.block_sparse_moe.experts.162.w1", "model.layers.5.block_sparse_moe.experts.163.w1", "model.layers.5.block_sparse_moe.experts.164.w1", "model.layers.5.block_sparse_moe.experts.165.w1", "model.layers.5.block_sparse_moe.experts.166.w1", "model.layers.5.block_sparse_moe.experts.167.w1", "model.layers.5.block_sparse_moe.experts.168.w1", "model.layers.5.block_sparse_moe.experts.169.w1", "model.layers.5.block_sparse_moe.experts.170.w1", "model.layers.5.block_sparse_moe.experts.171.w1", "model.layers.5.block_sparse_moe.experts.172.w1", "model.layers.5.block_sparse_moe.experts.173.w1", "model.layers.5.block_sparse_moe.experts.174.w1", "model.layers.5.block_sparse_moe.experts.175.w1", "model.layers.5.block_sparse_moe.experts.176.w1", "model.layers.5.block_sparse_moe.experts.177.w1", "model.layers.5.block_sparse_moe.experts.178.w1", "model.layers.5.block_sparse_moe.experts.179.w1", "model.layers.5.block_sparse_moe.experts.180.w1", "model.layers.5.block_sparse_moe.experts.181.w1", "model.layers.5.block_sparse_moe.experts.182.w1", "model.layers.5.block_sparse_moe.experts.183.w1", "model.layers.5.block_sparse_moe.experts.184.w1", "model.layers.5.block_sparse_moe.experts.185.w1", "model.layers.5.block_sparse_moe.experts.186.w1", "model.layers.5.block_sparse_moe.experts.187.w1", "model.layers.5.block_sparse_moe.experts.188.w1", "model.layers.5.block_sparse_moe.experts.189.w1", "model.layers.5.block_sparse_moe.experts.190.w1", "model.layers.5.block_sparse_moe.experts.191.w1", "model.layers.5.block_sparse_moe.experts.192.w1", "model.layers.5.block_sparse_moe.experts.193.w1", "model.layers.5.block_sparse_moe.experts.194.w1", "model.layers.5.block_sparse_moe.experts.195.w1", "model.layers.5.block_sparse_moe.experts.196.w1", "model.layers.5.block_sparse_moe.experts.197.w1", "model.layers.5.block_sparse_moe.experts.198.w1", "model.layers.5.block_sparse_moe.experts.199.w1", "model.layers.5.block_sparse_moe.experts.200.w1", "model.layers.5.block_sparse_moe.experts.201.w1", "model.layers.5.block_sparse_moe.experts.202.w1", "model.layers.5.block_sparse_moe.experts.203.w1", "model.layers.5.block_sparse_moe.experts.204.w1", "model.layers.5.block_sparse_moe.experts.205.w1", "model.layers.5.block_sparse_moe.experts.206.w1", "model.layers.5.block_sparse_moe.experts.207.w1", "model.layers.5.block_sparse_moe.experts.208.w1", "model.layers.5.block_sparse_moe.experts.209.w1", "model.layers.5.block_sparse_moe.experts.210.w1", "model.layers.5.block_sparse_moe.experts.211.w1", "model.layers.5.block_sparse_moe.experts.212.w1", "model.layers.5.block_sparse_moe.experts.213.w1", "model.layers.5.block_sparse_moe.experts.214.w1", "model.layers.5.block_sparse_moe.experts.215.w1", "model.layers.5.block_sparse_moe.experts.216.w1", "model.layers.5.block_sparse_moe.experts.217.w1", "model.layers.5.block_sparse_moe.experts.218.w1", "model.layers.5.block_sparse_moe.experts.219.w1", "model.layers.5.block_sparse_moe.experts.220.w1", "model.layers.5.block_sparse_moe.experts.221.w1", "model.layers.5.block_sparse_moe.experts.222.w1", "model.layers.5.block_sparse_moe.experts.223.w1", "model.layers.5.block_sparse_moe.experts.224.w1", "model.layers.5.block_sparse_moe.experts.225.w1", "model.layers.5.block_sparse_moe.experts.226.w1", "model.layers.5.block_sparse_moe.experts.227.w1", "model.layers.5.block_sparse_moe.experts.228.w1", "model.layers.5.block_sparse_moe.experts.229.w1", "model.layers.5.block_sparse_moe.experts.230.w1", "model.layers.5.block_sparse_moe.experts.231.w1", "model.layers.5.block_sparse_moe.experts.232.w1", "model.layers.5.block_sparse_moe.experts.233.w1", "model.layers.5.block_sparse_moe.experts.234.w1", "model.layers.5.block_sparse_moe.experts.235.w1", "model.layers.5.block_sparse_moe.experts.236.w1", "model.layers.5.block_sparse_moe.experts.237.w1", "model.layers.5.block_sparse_moe.experts.238.w1", "model.layers.5.block_sparse_moe.experts.239.w1", "model.layers.5.block_sparse_moe.experts.240.w1", "model.layers.5.block_sparse_moe.experts.241.w1", "model.layers.5.block_sparse_moe.experts.242.w1", "model.layers.5.block_sparse_moe.experts.243.w1", "model.layers.5.block_sparse_moe.experts.244.w1", "model.layers.5.block_sparse_moe.experts.245.w1", "model.layers.5.block_sparse_moe.experts.246.w1", "model.layers.5.block_sparse_moe.experts.247.w1", "model.layers.5.block_sparse_moe.experts.248.w1", "model.layers.5.block_sparse_moe.experts.249.w1", "model.layers.5.block_sparse_moe.experts.250.w1", "model.layers.5.block_sparse_moe.experts.251.w1", "model.layers.5.block_sparse_moe.experts.252.w1", "model.layers.5.block_sparse_moe.experts.253.w1", "model.layers.5.block_sparse_moe.experts.254.w1", "model.layers.5.block_sparse_moe.experts.255.w1", "model.layers.5.block_sparse_moe.experts.0.w3", "model.layers.5.block_sparse_moe.experts.1.w3", "model.layers.5.block_sparse_moe.experts.2.w3", "model.layers.5.block_sparse_moe.experts.3.w3", "model.layers.5.block_sparse_moe.experts.4.w3", "model.layers.5.block_sparse_moe.experts.5.w3", "model.layers.5.block_sparse_moe.experts.6.w3", "model.layers.5.block_sparse_moe.experts.7.w3", "model.layers.5.block_sparse_moe.experts.8.w3", "model.layers.5.block_sparse_moe.experts.9.w3", "model.layers.5.block_sparse_moe.experts.10.w3", "model.layers.5.block_sparse_moe.experts.11.w3", "model.layers.5.block_sparse_moe.experts.12.w3", "model.layers.5.block_sparse_moe.experts.13.w3", "model.layers.5.block_sparse_moe.experts.14.w3", "model.layers.5.block_sparse_moe.experts.15.w3", "model.layers.5.block_sparse_moe.experts.16.w3", "model.layers.5.block_sparse_moe.experts.17.w3", "model.layers.5.block_sparse_moe.experts.18.w3", "model.layers.5.block_sparse_moe.experts.19.w3", "model.layers.5.block_sparse_moe.experts.20.w3", "model.layers.5.block_sparse_moe.experts.21.w3", "model.layers.5.block_sparse_moe.experts.22.w3", "model.layers.5.block_sparse_moe.experts.23.w3", "model.layers.5.block_sparse_moe.experts.24.w3", "model.layers.5.block_sparse_moe.experts.25.w3", "model.layers.5.block_sparse_moe.experts.26.w3", "model.layers.5.block_sparse_moe.experts.27.w3", "model.layers.5.block_sparse_moe.experts.28.w3", "model.layers.5.block_sparse_moe.experts.29.w3", "model.layers.5.block_sparse_moe.experts.30.w3", "model.layers.5.block_sparse_moe.experts.31.w3", "model.layers.5.block_sparse_moe.experts.32.w3", "model.layers.5.block_sparse_moe.experts.33.w3", "model.layers.5.block_sparse_moe.experts.34.w3", "model.layers.5.block_sparse_moe.experts.35.w3", "model.layers.5.block_sparse_moe.experts.36.w3", "model.layers.5.block_sparse_moe.experts.37.w3", "model.layers.5.block_sparse_moe.experts.38.w3", "model.layers.5.block_sparse_moe.experts.39.w3", "model.layers.5.block_sparse_moe.experts.40.w3", "model.layers.5.block_sparse_moe.experts.41.w3", "model.layers.5.block_sparse_moe.experts.42.w3", "model.layers.5.block_sparse_moe.experts.43.w3", "model.layers.5.block_sparse_moe.experts.44.w3", "model.layers.5.block_sparse_moe.experts.45.w3", "model.layers.5.block_sparse_moe.experts.46.w3", "model.layers.5.block_sparse_moe.experts.47.w3", "model.layers.5.block_sparse_moe.experts.48.w3", "model.layers.5.block_sparse_moe.experts.49.w3", "model.layers.5.block_sparse_moe.experts.50.w3", "model.layers.5.block_sparse_moe.experts.51.w3", "model.layers.5.block_sparse_moe.experts.52.w3", "model.layers.5.block_sparse_moe.experts.53.w3", "model.layers.5.block_sparse_moe.experts.54.w3", "model.layers.5.block_sparse_moe.experts.55.w3", "model.layers.5.block_sparse_moe.experts.56.w3", "model.layers.5.block_sparse_moe.experts.57.w3", "model.layers.5.block_sparse_moe.experts.58.w3", "model.layers.5.block_sparse_moe.experts.59.w3", "model.layers.5.block_sparse_moe.experts.60.w3", "model.layers.5.block_sparse_moe.experts.61.w3", "model.layers.5.block_sparse_moe.experts.62.w3", "model.layers.5.block_sparse_moe.experts.63.w3", "model.layers.5.block_sparse_moe.experts.64.w3", "model.layers.5.block_sparse_moe.experts.65.w3", "model.layers.5.block_sparse_moe.experts.66.w3", "model.layers.5.block_sparse_moe.experts.67.w3", "model.layers.5.block_sparse_moe.experts.68.w3", "model.layers.5.block_sparse_moe.experts.69.w3", "model.layers.5.block_sparse_moe.experts.70.w3", "model.layers.5.block_sparse_moe.experts.71.w3", "model.layers.5.block_sparse_moe.experts.72.w3", "model.layers.5.block_sparse_moe.experts.73.w3", "model.layers.5.block_sparse_moe.experts.74.w3", "model.layers.5.block_sparse_moe.experts.75.w3", "model.layers.5.block_sparse_moe.experts.76.w3", "model.layers.5.block_sparse_moe.experts.77.w3", "model.layers.5.block_sparse_moe.experts.78.w3", "model.layers.5.block_sparse_moe.experts.79.w3", "model.layers.5.block_sparse_moe.experts.80.w3", "model.layers.5.block_sparse_moe.experts.81.w3", "model.layers.5.block_sparse_moe.experts.82.w3", "model.layers.5.block_sparse_moe.experts.83.w3", "model.layers.5.block_sparse_moe.experts.84.w3", "model.layers.5.block_sparse_moe.experts.85.w3", "model.layers.5.block_sparse_moe.experts.86.w3", "model.layers.5.block_sparse_moe.experts.87.w3", "model.layers.5.block_sparse_moe.experts.88.w3", "model.layers.5.block_sparse_moe.experts.89.w3", "model.layers.5.block_sparse_moe.experts.90.w3", "model.layers.5.block_sparse_moe.experts.91.w3", "model.layers.5.block_sparse_moe.experts.92.w3", "model.layers.5.block_sparse_moe.experts.93.w3", "model.layers.5.block_sparse_moe.experts.94.w3", "model.layers.5.block_sparse_moe.experts.95.w3", "model.layers.5.block_sparse_moe.experts.96.w3", "model.layers.5.block_sparse_moe.experts.97.w3", "model.layers.5.block_sparse_moe.experts.98.w3", "model.layers.5.block_sparse_moe.experts.99.w3", "model.layers.5.block_sparse_moe.experts.100.w3", "model.layers.5.block_sparse_moe.experts.101.w3", "model.layers.5.block_sparse_moe.experts.102.w3", "model.layers.5.block_sparse_moe.experts.103.w3", "model.layers.5.block_sparse_moe.experts.104.w3", "model.layers.5.block_sparse_moe.experts.105.w3", "model.layers.5.block_sparse_moe.experts.106.w3", "model.layers.5.block_sparse_moe.experts.107.w3", "model.layers.5.block_sparse_moe.experts.108.w3", "model.layers.5.block_sparse_moe.experts.109.w3", "model.layers.5.block_sparse_moe.experts.110.w3", "model.layers.5.block_sparse_moe.experts.111.w3", "model.layers.5.block_sparse_moe.experts.112.w3", "model.layers.5.block_sparse_moe.experts.113.w3", "model.layers.5.block_sparse_moe.experts.114.w3", "model.layers.5.block_sparse_moe.experts.115.w3", "model.layers.5.block_sparse_moe.experts.116.w3", "model.layers.5.block_sparse_moe.experts.117.w3", "model.layers.5.block_sparse_moe.experts.118.w3", "model.layers.5.block_sparse_moe.experts.119.w3", "model.layers.5.block_sparse_moe.experts.120.w3", "model.layers.5.block_sparse_moe.experts.121.w3", "model.layers.5.block_sparse_moe.experts.122.w3", "model.layers.5.block_sparse_moe.experts.123.w3", "model.layers.5.block_sparse_moe.experts.124.w3", "model.layers.5.block_sparse_moe.experts.125.w3", "model.layers.5.block_sparse_moe.experts.126.w3", "model.layers.5.block_sparse_moe.experts.127.w3", "model.layers.5.block_sparse_moe.experts.128.w3", "model.layers.5.block_sparse_moe.experts.129.w3", "model.layers.5.block_sparse_moe.experts.130.w3", "model.layers.5.block_sparse_moe.experts.131.w3", "model.layers.5.block_sparse_moe.experts.132.w3", "model.layers.5.block_sparse_moe.experts.133.w3", "model.layers.5.block_sparse_moe.experts.134.w3", "model.layers.5.block_sparse_moe.experts.135.w3", "model.layers.5.block_sparse_moe.experts.136.w3", "model.layers.5.block_sparse_moe.experts.137.w3", "model.layers.5.block_sparse_moe.experts.138.w3", "model.layers.5.block_sparse_moe.experts.139.w3", "model.layers.5.block_sparse_moe.experts.140.w3", "model.layers.5.block_sparse_moe.experts.141.w3", "model.layers.5.block_sparse_moe.experts.142.w3", "model.layers.5.block_sparse_moe.experts.143.w3", "model.layers.5.block_sparse_moe.experts.144.w3", "model.layers.5.block_sparse_moe.experts.145.w3", "model.layers.5.block_sparse_moe.experts.146.w3", "model.layers.5.block_sparse_moe.experts.147.w3", "model.layers.5.block_sparse_moe.experts.148.w3", "model.layers.5.block_sparse_moe.experts.149.w3", "model.layers.5.block_sparse_moe.experts.150.w3", "model.layers.5.block_sparse_moe.experts.151.w3", "model.layers.5.block_sparse_moe.experts.152.w3", "model.layers.5.block_sparse_moe.experts.153.w3", "model.layers.5.block_sparse_moe.experts.154.w3", "model.layers.5.block_sparse_moe.experts.155.w3", "model.layers.5.block_sparse_moe.experts.156.w3", "model.layers.5.block_sparse_moe.experts.157.w3", "model.layers.5.block_sparse_moe.experts.158.w3", "model.layers.5.block_sparse_moe.experts.159.w3", "model.layers.5.block_sparse_moe.experts.160.w3", "model.layers.5.block_sparse_moe.experts.161.w3", "model.layers.5.block_sparse_moe.experts.162.w3", "model.layers.5.block_sparse_moe.experts.163.w3", "model.layers.5.block_sparse_moe.experts.164.w3", "model.layers.5.block_sparse_moe.experts.165.w3", "model.layers.5.block_sparse_moe.experts.166.w3", "model.layers.5.block_sparse_moe.experts.167.w3", "model.layers.5.block_sparse_moe.experts.168.w3", "model.layers.5.block_sparse_moe.experts.169.w3", "model.layers.5.block_sparse_moe.experts.170.w3", "model.layers.5.block_sparse_moe.experts.171.w3", "model.layers.5.block_sparse_moe.experts.172.w3", "model.layers.5.block_sparse_moe.experts.173.w3", "model.layers.5.block_sparse_moe.experts.174.w3", "model.layers.5.block_sparse_moe.experts.175.w3", "model.layers.5.block_sparse_moe.experts.176.w3", "model.layers.5.block_sparse_moe.experts.177.w3", "model.layers.5.block_sparse_moe.experts.178.w3", "model.layers.5.block_sparse_moe.experts.179.w3", "model.layers.5.block_sparse_moe.experts.180.w3", "model.layers.5.block_sparse_moe.experts.181.w3", "model.layers.5.block_sparse_moe.experts.182.w3", "model.layers.5.block_sparse_moe.experts.183.w3", "model.layers.5.block_sparse_moe.experts.184.w3", "model.layers.5.block_sparse_moe.experts.185.w3", "model.layers.5.block_sparse_moe.experts.186.w3", "model.layers.5.block_sparse_moe.experts.187.w3", "model.layers.5.block_sparse_moe.experts.188.w3", "model.layers.5.block_sparse_moe.experts.189.w3", "model.layers.5.block_sparse_moe.experts.190.w3", "model.layers.5.block_sparse_moe.experts.191.w3", "model.layers.5.block_sparse_moe.experts.192.w3", "model.layers.5.block_sparse_moe.experts.193.w3", "model.layers.5.block_sparse_moe.experts.194.w3", "model.layers.5.block_sparse_moe.experts.195.w3", "model.layers.5.block_sparse_moe.experts.196.w3", "model.layers.5.block_sparse_moe.experts.197.w3", "model.layers.5.block_sparse_moe.experts.198.w3", "model.layers.5.block_sparse_moe.experts.199.w3", "model.layers.5.block_sparse_moe.experts.200.w3", "model.layers.5.block_sparse_moe.experts.201.w3", "model.layers.5.block_sparse_moe.experts.202.w3", "model.layers.5.block_sparse_moe.experts.203.w3", "model.layers.5.block_sparse_moe.experts.204.w3", "model.layers.5.block_sparse_moe.experts.205.w3", "model.layers.5.block_sparse_moe.experts.206.w3", "model.layers.5.block_sparse_moe.experts.207.w3", "model.layers.5.block_sparse_moe.experts.208.w3", "model.layers.5.block_sparse_moe.experts.209.w3", "model.layers.5.block_sparse_moe.experts.210.w3", "model.layers.5.block_sparse_moe.experts.211.w3", "model.layers.5.block_sparse_moe.experts.212.w3", "model.layers.5.block_sparse_moe.experts.213.w3", "model.layers.5.block_sparse_moe.experts.214.w3", "model.layers.5.block_sparse_moe.experts.215.w3", "model.layers.5.block_sparse_moe.experts.216.w3", "model.layers.5.block_sparse_moe.experts.217.w3", "model.layers.5.block_sparse_moe.experts.218.w3", "model.layers.5.block_sparse_moe.experts.219.w3", "model.layers.5.block_sparse_moe.experts.220.w3", "model.layers.5.block_sparse_moe.experts.221.w3", "model.layers.5.block_sparse_moe.experts.222.w3", "model.layers.5.block_sparse_moe.experts.223.w3", "model.layers.5.block_sparse_moe.experts.224.w3", "model.layers.5.block_sparse_moe.experts.225.w3", "model.layers.5.block_sparse_moe.experts.226.w3", "model.layers.5.block_sparse_moe.experts.227.w3", "model.layers.5.block_sparse_moe.experts.228.w3", "model.layers.5.block_sparse_moe.experts.229.w3", "model.layers.5.block_sparse_moe.experts.230.w3", "model.layers.5.block_sparse_moe.experts.231.w3", "model.layers.5.block_sparse_moe.experts.232.w3", "model.layers.5.block_sparse_moe.experts.233.w3", "model.layers.5.block_sparse_moe.experts.234.w3", "model.layers.5.block_sparse_moe.experts.235.w3", "model.layers.5.block_sparse_moe.experts.236.w3", "model.layers.5.block_sparse_moe.experts.237.w3", "model.layers.5.block_sparse_moe.experts.238.w3", "model.layers.5.block_sparse_moe.experts.239.w3", "model.layers.5.block_sparse_moe.experts.240.w3", "model.layers.5.block_sparse_moe.experts.241.w3", "model.layers.5.block_sparse_moe.experts.242.w3", "model.layers.5.block_sparse_moe.experts.243.w3", "model.layers.5.block_sparse_moe.experts.244.w3", "model.layers.5.block_sparse_moe.experts.245.w3", "model.layers.5.block_sparse_moe.experts.246.w3", "model.layers.5.block_sparse_moe.experts.247.w3", "model.layers.5.block_sparse_moe.experts.248.w3", "model.layers.5.block_sparse_moe.experts.249.w3", "model.layers.5.block_sparse_moe.experts.250.w3", "model.layers.5.block_sparse_moe.experts.251.w3", "model.layers.5.block_sparse_moe.experts.252.w3", "model.layers.5.block_sparse_moe.experts.253.w3", "model.layers.5.block_sparse_moe.experts.254.w3", "model.layers.5.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.002395780012011531, "dbits": 2415919104 } ] }, { "idx": 29, "layers": [ "model.layers.5.block_sparse_moe.experts.0.w2", "model.layers.5.block_sparse_moe.experts.1.w2", "model.layers.5.block_sparse_moe.experts.2.w2", "model.layers.5.block_sparse_moe.experts.3.w2", "model.layers.5.block_sparse_moe.experts.4.w2", "model.layers.5.block_sparse_moe.experts.5.w2", "model.layers.5.block_sparse_moe.experts.6.w2", "model.layers.5.block_sparse_moe.experts.7.w2", "model.layers.5.block_sparse_moe.experts.8.w2", "model.layers.5.block_sparse_moe.experts.9.w2", "model.layers.5.block_sparse_moe.experts.10.w2", "model.layers.5.block_sparse_moe.experts.11.w2", "model.layers.5.block_sparse_moe.experts.12.w2", "model.layers.5.block_sparse_moe.experts.13.w2", "model.layers.5.block_sparse_moe.experts.14.w2", "model.layers.5.block_sparse_moe.experts.15.w2", "model.layers.5.block_sparse_moe.experts.16.w2", "model.layers.5.block_sparse_moe.experts.17.w2", "model.layers.5.block_sparse_moe.experts.18.w2", "model.layers.5.block_sparse_moe.experts.19.w2", "model.layers.5.block_sparse_moe.experts.20.w2", "model.layers.5.block_sparse_moe.experts.21.w2", "model.layers.5.block_sparse_moe.experts.22.w2", "model.layers.5.block_sparse_moe.experts.23.w2", "model.layers.5.block_sparse_moe.experts.24.w2", "model.layers.5.block_sparse_moe.experts.25.w2", "model.layers.5.block_sparse_moe.experts.26.w2", "model.layers.5.block_sparse_moe.experts.27.w2", "model.layers.5.block_sparse_moe.experts.28.w2", "model.layers.5.block_sparse_moe.experts.29.w2", "model.layers.5.block_sparse_moe.experts.30.w2", "model.layers.5.block_sparse_moe.experts.31.w2", "model.layers.5.block_sparse_moe.experts.32.w2", "model.layers.5.block_sparse_moe.experts.33.w2", "model.layers.5.block_sparse_moe.experts.34.w2", "model.layers.5.block_sparse_moe.experts.35.w2", "model.layers.5.block_sparse_moe.experts.36.w2", "model.layers.5.block_sparse_moe.experts.37.w2", "model.layers.5.block_sparse_moe.experts.38.w2", "model.layers.5.block_sparse_moe.experts.39.w2", "model.layers.5.block_sparse_moe.experts.40.w2", "model.layers.5.block_sparse_moe.experts.41.w2", "model.layers.5.block_sparse_moe.experts.42.w2", "model.layers.5.block_sparse_moe.experts.43.w2", "model.layers.5.block_sparse_moe.experts.44.w2", "model.layers.5.block_sparse_moe.experts.45.w2", "model.layers.5.block_sparse_moe.experts.46.w2", "model.layers.5.block_sparse_moe.experts.47.w2", "model.layers.5.block_sparse_moe.experts.48.w2", "model.layers.5.block_sparse_moe.experts.49.w2", "model.layers.5.block_sparse_moe.experts.50.w2", "model.layers.5.block_sparse_moe.experts.51.w2", "model.layers.5.block_sparse_moe.experts.52.w2", "model.layers.5.block_sparse_moe.experts.53.w2", "model.layers.5.block_sparse_moe.experts.54.w2", "model.layers.5.block_sparse_moe.experts.55.w2", "model.layers.5.block_sparse_moe.experts.56.w2", "model.layers.5.block_sparse_moe.experts.57.w2", "model.layers.5.block_sparse_moe.experts.58.w2", "model.layers.5.block_sparse_moe.experts.59.w2", "model.layers.5.block_sparse_moe.experts.60.w2", "model.layers.5.block_sparse_moe.experts.61.w2", "model.layers.5.block_sparse_moe.experts.62.w2", "model.layers.5.block_sparse_moe.experts.63.w2", "model.layers.5.block_sparse_moe.experts.64.w2", "model.layers.5.block_sparse_moe.experts.65.w2", "model.layers.5.block_sparse_moe.experts.66.w2", "model.layers.5.block_sparse_moe.experts.67.w2", "model.layers.5.block_sparse_moe.experts.68.w2", "model.layers.5.block_sparse_moe.experts.69.w2", "model.layers.5.block_sparse_moe.experts.70.w2", "model.layers.5.block_sparse_moe.experts.71.w2", "model.layers.5.block_sparse_moe.experts.72.w2", "model.layers.5.block_sparse_moe.experts.73.w2", "model.layers.5.block_sparse_moe.experts.74.w2", "model.layers.5.block_sparse_moe.experts.75.w2", "model.layers.5.block_sparse_moe.experts.76.w2", "model.layers.5.block_sparse_moe.experts.77.w2", "model.layers.5.block_sparse_moe.experts.78.w2", "model.layers.5.block_sparse_moe.experts.79.w2", "model.layers.5.block_sparse_moe.experts.80.w2", "model.layers.5.block_sparse_moe.experts.81.w2", "model.layers.5.block_sparse_moe.experts.82.w2", "model.layers.5.block_sparse_moe.experts.83.w2", "model.layers.5.block_sparse_moe.experts.84.w2", "model.layers.5.block_sparse_moe.experts.85.w2", "model.layers.5.block_sparse_moe.experts.86.w2", "model.layers.5.block_sparse_moe.experts.87.w2", "model.layers.5.block_sparse_moe.experts.88.w2", "model.layers.5.block_sparse_moe.experts.89.w2", "model.layers.5.block_sparse_moe.experts.90.w2", "model.layers.5.block_sparse_moe.experts.91.w2", "model.layers.5.block_sparse_moe.experts.92.w2", "model.layers.5.block_sparse_moe.experts.93.w2", "model.layers.5.block_sparse_moe.experts.94.w2", "model.layers.5.block_sparse_moe.experts.95.w2", "model.layers.5.block_sparse_moe.experts.96.w2", "model.layers.5.block_sparse_moe.experts.97.w2", "model.layers.5.block_sparse_moe.experts.98.w2", "model.layers.5.block_sparse_moe.experts.99.w2", "model.layers.5.block_sparse_moe.experts.100.w2", "model.layers.5.block_sparse_moe.experts.101.w2", "model.layers.5.block_sparse_moe.experts.102.w2", "model.layers.5.block_sparse_moe.experts.103.w2", "model.layers.5.block_sparse_moe.experts.104.w2", "model.layers.5.block_sparse_moe.experts.105.w2", "model.layers.5.block_sparse_moe.experts.106.w2", "model.layers.5.block_sparse_moe.experts.107.w2", "model.layers.5.block_sparse_moe.experts.108.w2", "model.layers.5.block_sparse_moe.experts.109.w2", "model.layers.5.block_sparse_moe.experts.110.w2", "model.layers.5.block_sparse_moe.experts.111.w2", "model.layers.5.block_sparse_moe.experts.112.w2", "model.layers.5.block_sparse_moe.experts.113.w2", "model.layers.5.block_sparse_moe.experts.114.w2", "model.layers.5.block_sparse_moe.experts.115.w2", "model.layers.5.block_sparse_moe.experts.116.w2", "model.layers.5.block_sparse_moe.experts.117.w2", "model.layers.5.block_sparse_moe.experts.118.w2", "model.layers.5.block_sparse_moe.experts.119.w2", "model.layers.5.block_sparse_moe.experts.120.w2", "model.layers.5.block_sparse_moe.experts.121.w2", "model.layers.5.block_sparse_moe.experts.122.w2", "model.layers.5.block_sparse_moe.experts.123.w2", "model.layers.5.block_sparse_moe.experts.124.w2", "model.layers.5.block_sparse_moe.experts.125.w2", "model.layers.5.block_sparse_moe.experts.126.w2", "model.layers.5.block_sparse_moe.experts.127.w2", "model.layers.5.block_sparse_moe.experts.128.w2", "model.layers.5.block_sparse_moe.experts.129.w2", "model.layers.5.block_sparse_moe.experts.130.w2", "model.layers.5.block_sparse_moe.experts.131.w2", "model.layers.5.block_sparse_moe.experts.132.w2", "model.layers.5.block_sparse_moe.experts.133.w2", "model.layers.5.block_sparse_moe.experts.134.w2", "model.layers.5.block_sparse_moe.experts.135.w2", "model.layers.5.block_sparse_moe.experts.136.w2", "model.layers.5.block_sparse_moe.experts.137.w2", "model.layers.5.block_sparse_moe.experts.138.w2", "model.layers.5.block_sparse_moe.experts.139.w2", "model.layers.5.block_sparse_moe.experts.140.w2", "model.layers.5.block_sparse_moe.experts.141.w2", "model.layers.5.block_sparse_moe.experts.142.w2", "model.layers.5.block_sparse_moe.experts.143.w2", "model.layers.5.block_sparse_moe.experts.144.w2", "model.layers.5.block_sparse_moe.experts.145.w2", "model.layers.5.block_sparse_moe.experts.146.w2", "model.layers.5.block_sparse_moe.experts.147.w2", "model.layers.5.block_sparse_moe.experts.148.w2", "model.layers.5.block_sparse_moe.experts.149.w2", "model.layers.5.block_sparse_moe.experts.150.w2", "model.layers.5.block_sparse_moe.experts.151.w2", "model.layers.5.block_sparse_moe.experts.152.w2", "model.layers.5.block_sparse_moe.experts.153.w2", "model.layers.5.block_sparse_moe.experts.154.w2", "model.layers.5.block_sparse_moe.experts.155.w2", "model.layers.5.block_sparse_moe.experts.156.w2", "model.layers.5.block_sparse_moe.experts.157.w2", "model.layers.5.block_sparse_moe.experts.158.w2", "model.layers.5.block_sparse_moe.experts.159.w2", "model.layers.5.block_sparse_moe.experts.160.w2", "model.layers.5.block_sparse_moe.experts.161.w2", "model.layers.5.block_sparse_moe.experts.162.w2", "model.layers.5.block_sparse_moe.experts.163.w2", "model.layers.5.block_sparse_moe.experts.164.w2", "model.layers.5.block_sparse_moe.experts.165.w2", "model.layers.5.block_sparse_moe.experts.166.w2", "model.layers.5.block_sparse_moe.experts.167.w2", "model.layers.5.block_sparse_moe.experts.168.w2", "model.layers.5.block_sparse_moe.experts.169.w2", "model.layers.5.block_sparse_moe.experts.170.w2", "model.layers.5.block_sparse_moe.experts.171.w2", "model.layers.5.block_sparse_moe.experts.172.w2", "model.layers.5.block_sparse_moe.experts.173.w2", "model.layers.5.block_sparse_moe.experts.174.w2", "model.layers.5.block_sparse_moe.experts.175.w2", "model.layers.5.block_sparse_moe.experts.176.w2", "model.layers.5.block_sparse_moe.experts.177.w2", "model.layers.5.block_sparse_moe.experts.178.w2", "model.layers.5.block_sparse_moe.experts.179.w2", "model.layers.5.block_sparse_moe.experts.180.w2", "model.layers.5.block_sparse_moe.experts.181.w2", "model.layers.5.block_sparse_moe.experts.182.w2", "model.layers.5.block_sparse_moe.experts.183.w2", "model.layers.5.block_sparse_moe.experts.184.w2", "model.layers.5.block_sparse_moe.experts.185.w2", "model.layers.5.block_sparse_moe.experts.186.w2", "model.layers.5.block_sparse_moe.experts.187.w2", "model.layers.5.block_sparse_moe.experts.188.w2", "model.layers.5.block_sparse_moe.experts.189.w2", "model.layers.5.block_sparse_moe.experts.190.w2", "model.layers.5.block_sparse_moe.experts.191.w2", "model.layers.5.block_sparse_moe.experts.192.w2", "model.layers.5.block_sparse_moe.experts.193.w2", "model.layers.5.block_sparse_moe.experts.194.w2", "model.layers.5.block_sparse_moe.experts.195.w2", "model.layers.5.block_sparse_moe.experts.196.w2", "model.layers.5.block_sparse_moe.experts.197.w2", "model.layers.5.block_sparse_moe.experts.198.w2", "model.layers.5.block_sparse_moe.experts.199.w2", "model.layers.5.block_sparse_moe.experts.200.w2", "model.layers.5.block_sparse_moe.experts.201.w2", "model.layers.5.block_sparse_moe.experts.202.w2", "model.layers.5.block_sparse_moe.experts.203.w2", "model.layers.5.block_sparse_moe.experts.204.w2", "model.layers.5.block_sparse_moe.experts.205.w2", "model.layers.5.block_sparse_moe.experts.206.w2", "model.layers.5.block_sparse_moe.experts.207.w2", "model.layers.5.block_sparse_moe.experts.208.w2", "model.layers.5.block_sparse_moe.experts.209.w2", "model.layers.5.block_sparse_moe.experts.210.w2", "model.layers.5.block_sparse_moe.experts.211.w2", "model.layers.5.block_sparse_moe.experts.212.w2", "model.layers.5.block_sparse_moe.experts.213.w2", "model.layers.5.block_sparse_moe.experts.214.w2", "model.layers.5.block_sparse_moe.experts.215.w2", "model.layers.5.block_sparse_moe.experts.216.w2", "model.layers.5.block_sparse_moe.experts.217.w2", "model.layers.5.block_sparse_moe.experts.218.w2", "model.layers.5.block_sparse_moe.experts.219.w2", "model.layers.5.block_sparse_moe.experts.220.w2", "model.layers.5.block_sparse_moe.experts.221.w2", "model.layers.5.block_sparse_moe.experts.222.w2", "model.layers.5.block_sparse_moe.experts.223.w2", "model.layers.5.block_sparse_moe.experts.224.w2", "model.layers.5.block_sparse_moe.experts.225.w2", "model.layers.5.block_sparse_moe.experts.226.w2", "model.layers.5.block_sparse_moe.experts.227.w2", "model.layers.5.block_sparse_moe.experts.228.w2", "model.layers.5.block_sparse_moe.experts.229.w2", "model.layers.5.block_sparse_moe.experts.230.w2", "model.layers.5.block_sparse_moe.experts.231.w2", "model.layers.5.block_sparse_moe.experts.232.w2", "model.layers.5.block_sparse_moe.experts.233.w2", "model.layers.5.block_sparse_moe.experts.234.w2", "model.layers.5.block_sparse_moe.experts.235.w2", "model.layers.5.block_sparse_moe.experts.236.w2", "model.layers.5.block_sparse_moe.experts.237.w2", "model.layers.5.block_sparse_moe.experts.238.w2", "model.layers.5.block_sparse_moe.experts.239.w2", "model.layers.5.block_sparse_moe.experts.240.w2", "model.layers.5.block_sparse_moe.experts.241.w2", "model.layers.5.block_sparse_moe.experts.242.w2", "model.layers.5.block_sparse_moe.experts.243.w2", "model.layers.5.block_sparse_moe.experts.244.w2", "model.layers.5.block_sparse_moe.experts.245.w2", "model.layers.5.block_sparse_moe.experts.246.w2", "model.layers.5.block_sparse_moe.experts.247.w2", "model.layers.5.block_sparse_moe.experts.248.w2", "model.layers.5.block_sparse_moe.experts.249.w2", "model.layers.5.block_sparse_moe.experts.250.w2", "model.layers.5.block_sparse_moe.experts.251.w2", "model.layers.5.block_sparse_moe.experts.252.w2", "model.layers.5.block_sparse_moe.experts.253.w2", "model.layers.5.block_sparse_moe.experts.254.w2", "model.layers.5.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0019345173612237004, "dbits": 1207959552 } ] }, { "idx": 30, "layers": [ "model.layers.6.self_attn.q_proj" ], "candidates": [ { "dkld": -1.087039709093962e-06, "dbits": 18874368 } ] }, { "idx": 31, "layers": [ "model.layers.6.self_attn.k_proj", "model.layers.6.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0006557853892445592, "dbits": 6291456 } ] }, { "idx": 32, "layers": [ "model.layers.6.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0009809574112296104, "dbits": 18874368 } ] }, { "idx": 33, "layers": [ "model.layers.6.block_sparse_moe.experts.0.w1", "model.layers.6.block_sparse_moe.experts.1.w1", "model.layers.6.block_sparse_moe.experts.2.w1", "model.layers.6.block_sparse_moe.experts.3.w1", "model.layers.6.block_sparse_moe.experts.4.w1", "model.layers.6.block_sparse_moe.experts.5.w1", "model.layers.6.block_sparse_moe.experts.6.w1", "model.layers.6.block_sparse_moe.experts.7.w1", "model.layers.6.block_sparse_moe.experts.8.w1", "model.layers.6.block_sparse_moe.experts.9.w1", "model.layers.6.block_sparse_moe.experts.10.w1", "model.layers.6.block_sparse_moe.experts.11.w1", "model.layers.6.block_sparse_moe.experts.12.w1", "model.layers.6.block_sparse_moe.experts.13.w1", "model.layers.6.block_sparse_moe.experts.14.w1", "model.layers.6.block_sparse_moe.experts.15.w1", "model.layers.6.block_sparse_moe.experts.16.w1", "model.layers.6.block_sparse_moe.experts.17.w1", "model.layers.6.block_sparse_moe.experts.18.w1", "model.layers.6.block_sparse_moe.experts.19.w1", "model.layers.6.block_sparse_moe.experts.20.w1", "model.layers.6.block_sparse_moe.experts.21.w1", "model.layers.6.block_sparse_moe.experts.22.w1", "model.layers.6.block_sparse_moe.experts.23.w1", "model.layers.6.block_sparse_moe.experts.24.w1", "model.layers.6.block_sparse_moe.experts.25.w1", "model.layers.6.block_sparse_moe.experts.26.w1", "model.layers.6.block_sparse_moe.experts.27.w1", "model.layers.6.block_sparse_moe.experts.28.w1", "model.layers.6.block_sparse_moe.experts.29.w1", "model.layers.6.block_sparse_moe.experts.30.w1", "model.layers.6.block_sparse_moe.experts.31.w1", "model.layers.6.block_sparse_moe.experts.32.w1", "model.layers.6.block_sparse_moe.experts.33.w1", "model.layers.6.block_sparse_moe.experts.34.w1", "model.layers.6.block_sparse_moe.experts.35.w1", "model.layers.6.block_sparse_moe.experts.36.w1", "model.layers.6.block_sparse_moe.experts.37.w1", "model.layers.6.block_sparse_moe.experts.38.w1", "model.layers.6.block_sparse_moe.experts.39.w1", "model.layers.6.block_sparse_moe.experts.40.w1", "model.layers.6.block_sparse_moe.experts.41.w1", "model.layers.6.block_sparse_moe.experts.42.w1", "model.layers.6.block_sparse_moe.experts.43.w1", "model.layers.6.block_sparse_moe.experts.44.w1", "model.layers.6.block_sparse_moe.experts.45.w1", "model.layers.6.block_sparse_moe.experts.46.w1", "model.layers.6.block_sparse_moe.experts.47.w1", "model.layers.6.block_sparse_moe.experts.48.w1", "model.layers.6.block_sparse_moe.experts.49.w1", "model.layers.6.block_sparse_moe.experts.50.w1", "model.layers.6.block_sparse_moe.experts.51.w1", "model.layers.6.block_sparse_moe.experts.52.w1", "model.layers.6.block_sparse_moe.experts.53.w1", "model.layers.6.block_sparse_moe.experts.54.w1", "model.layers.6.block_sparse_moe.experts.55.w1", "model.layers.6.block_sparse_moe.experts.56.w1", "model.layers.6.block_sparse_moe.experts.57.w1", "model.layers.6.block_sparse_moe.experts.58.w1", "model.layers.6.block_sparse_moe.experts.59.w1", "model.layers.6.block_sparse_moe.experts.60.w1", "model.layers.6.block_sparse_moe.experts.61.w1", "model.layers.6.block_sparse_moe.experts.62.w1", "model.layers.6.block_sparse_moe.experts.63.w1", "model.layers.6.block_sparse_moe.experts.64.w1", "model.layers.6.block_sparse_moe.experts.65.w1", "model.layers.6.block_sparse_moe.experts.66.w1", "model.layers.6.block_sparse_moe.experts.67.w1", "model.layers.6.block_sparse_moe.experts.68.w1", "model.layers.6.block_sparse_moe.experts.69.w1", "model.layers.6.block_sparse_moe.experts.70.w1", "model.layers.6.block_sparse_moe.experts.71.w1", "model.layers.6.block_sparse_moe.experts.72.w1", "model.layers.6.block_sparse_moe.experts.73.w1", "model.layers.6.block_sparse_moe.experts.74.w1", "model.layers.6.block_sparse_moe.experts.75.w1", "model.layers.6.block_sparse_moe.experts.76.w1", "model.layers.6.block_sparse_moe.experts.77.w1", "model.layers.6.block_sparse_moe.experts.78.w1", "model.layers.6.block_sparse_moe.experts.79.w1", "model.layers.6.block_sparse_moe.experts.80.w1", "model.layers.6.block_sparse_moe.experts.81.w1", "model.layers.6.block_sparse_moe.experts.82.w1", "model.layers.6.block_sparse_moe.experts.83.w1", "model.layers.6.block_sparse_moe.experts.84.w1", "model.layers.6.block_sparse_moe.experts.85.w1", "model.layers.6.block_sparse_moe.experts.86.w1", "model.layers.6.block_sparse_moe.experts.87.w1", "model.layers.6.block_sparse_moe.experts.88.w1", "model.layers.6.block_sparse_moe.experts.89.w1", "model.layers.6.block_sparse_moe.experts.90.w1", "model.layers.6.block_sparse_moe.experts.91.w1", "model.layers.6.block_sparse_moe.experts.92.w1", "model.layers.6.block_sparse_moe.experts.93.w1", "model.layers.6.block_sparse_moe.experts.94.w1", "model.layers.6.block_sparse_moe.experts.95.w1", "model.layers.6.block_sparse_moe.experts.96.w1", "model.layers.6.block_sparse_moe.experts.97.w1", "model.layers.6.block_sparse_moe.experts.98.w1", "model.layers.6.block_sparse_moe.experts.99.w1", "model.layers.6.block_sparse_moe.experts.100.w1", "model.layers.6.block_sparse_moe.experts.101.w1", "model.layers.6.block_sparse_moe.experts.102.w1", "model.layers.6.block_sparse_moe.experts.103.w1", "model.layers.6.block_sparse_moe.experts.104.w1", "model.layers.6.block_sparse_moe.experts.105.w1", "model.layers.6.block_sparse_moe.experts.106.w1", "model.layers.6.block_sparse_moe.experts.107.w1", "model.layers.6.block_sparse_moe.experts.108.w1", "model.layers.6.block_sparse_moe.experts.109.w1", "model.layers.6.block_sparse_moe.experts.110.w1", "model.layers.6.block_sparse_moe.experts.111.w1", "model.layers.6.block_sparse_moe.experts.112.w1", "model.layers.6.block_sparse_moe.experts.113.w1", "model.layers.6.block_sparse_moe.experts.114.w1", "model.layers.6.block_sparse_moe.experts.115.w1", "model.layers.6.block_sparse_moe.experts.116.w1", "model.layers.6.block_sparse_moe.experts.117.w1", "model.layers.6.block_sparse_moe.experts.118.w1", "model.layers.6.block_sparse_moe.experts.119.w1", "model.layers.6.block_sparse_moe.experts.120.w1", "model.layers.6.block_sparse_moe.experts.121.w1", "model.layers.6.block_sparse_moe.experts.122.w1", "model.layers.6.block_sparse_moe.experts.123.w1", "model.layers.6.block_sparse_moe.experts.124.w1", "model.layers.6.block_sparse_moe.experts.125.w1", "model.layers.6.block_sparse_moe.experts.126.w1", "model.layers.6.block_sparse_moe.experts.127.w1", "model.layers.6.block_sparse_moe.experts.128.w1", "model.layers.6.block_sparse_moe.experts.129.w1", "model.layers.6.block_sparse_moe.experts.130.w1", "model.layers.6.block_sparse_moe.experts.131.w1", "model.layers.6.block_sparse_moe.experts.132.w1", "model.layers.6.block_sparse_moe.experts.133.w1", "model.layers.6.block_sparse_moe.experts.134.w1", "model.layers.6.block_sparse_moe.experts.135.w1", "model.layers.6.block_sparse_moe.experts.136.w1", "model.layers.6.block_sparse_moe.experts.137.w1", "model.layers.6.block_sparse_moe.experts.138.w1", "model.layers.6.block_sparse_moe.experts.139.w1", "model.layers.6.block_sparse_moe.experts.140.w1", "model.layers.6.block_sparse_moe.experts.141.w1", "model.layers.6.block_sparse_moe.experts.142.w1", "model.layers.6.block_sparse_moe.experts.143.w1", "model.layers.6.block_sparse_moe.experts.144.w1", "model.layers.6.block_sparse_moe.experts.145.w1", "model.layers.6.block_sparse_moe.experts.146.w1", "model.layers.6.block_sparse_moe.experts.147.w1", "model.layers.6.block_sparse_moe.experts.148.w1", "model.layers.6.block_sparse_moe.experts.149.w1", "model.layers.6.block_sparse_moe.experts.150.w1", "model.layers.6.block_sparse_moe.experts.151.w1", "model.layers.6.block_sparse_moe.experts.152.w1", "model.layers.6.block_sparse_moe.experts.153.w1", "model.layers.6.block_sparse_moe.experts.154.w1", "model.layers.6.block_sparse_moe.experts.155.w1", "model.layers.6.block_sparse_moe.experts.156.w1", "model.layers.6.block_sparse_moe.experts.157.w1", "model.layers.6.block_sparse_moe.experts.158.w1", "model.layers.6.block_sparse_moe.experts.159.w1", "model.layers.6.block_sparse_moe.experts.160.w1", "model.layers.6.block_sparse_moe.experts.161.w1", "model.layers.6.block_sparse_moe.experts.162.w1", "model.layers.6.block_sparse_moe.experts.163.w1", "model.layers.6.block_sparse_moe.experts.164.w1", "model.layers.6.block_sparse_moe.experts.165.w1", "model.layers.6.block_sparse_moe.experts.166.w1", "model.layers.6.block_sparse_moe.experts.167.w1", "model.layers.6.block_sparse_moe.experts.168.w1", "model.layers.6.block_sparse_moe.experts.169.w1", "model.layers.6.block_sparse_moe.experts.170.w1", "model.layers.6.block_sparse_moe.experts.171.w1", "model.layers.6.block_sparse_moe.experts.172.w1", "model.layers.6.block_sparse_moe.experts.173.w1", "model.layers.6.block_sparse_moe.experts.174.w1", "model.layers.6.block_sparse_moe.experts.175.w1", "model.layers.6.block_sparse_moe.experts.176.w1", "model.layers.6.block_sparse_moe.experts.177.w1", "model.layers.6.block_sparse_moe.experts.178.w1", "model.layers.6.block_sparse_moe.experts.179.w1", "model.layers.6.block_sparse_moe.experts.180.w1", "model.layers.6.block_sparse_moe.experts.181.w1", "model.layers.6.block_sparse_moe.experts.182.w1", "model.layers.6.block_sparse_moe.experts.183.w1", "model.layers.6.block_sparse_moe.experts.184.w1", "model.layers.6.block_sparse_moe.experts.185.w1", "model.layers.6.block_sparse_moe.experts.186.w1", "model.layers.6.block_sparse_moe.experts.187.w1", "model.layers.6.block_sparse_moe.experts.188.w1", "model.layers.6.block_sparse_moe.experts.189.w1", "model.layers.6.block_sparse_moe.experts.190.w1", "model.layers.6.block_sparse_moe.experts.191.w1", "model.layers.6.block_sparse_moe.experts.192.w1", "model.layers.6.block_sparse_moe.experts.193.w1", "model.layers.6.block_sparse_moe.experts.194.w1", "model.layers.6.block_sparse_moe.experts.195.w1", "model.layers.6.block_sparse_moe.experts.196.w1", "model.layers.6.block_sparse_moe.experts.197.w1", "model.layers.6.block_sparse_moe.experts.198.w1", "model.layers.6.block_sparse_moe.experts.199.w1", "model.layers.6.block_sparse_moe.experts.200.w1", "model.layers.6.block_sparse_moe.experts.201.w1", "model.layers.6.block_sparse_moe.experts.202.w1", "model.layers.6.block_sparse_moe.experts.203.w1", "model.layers.6.block_sparse_moe.experts.204.w1", "model.layers.6.block_sparse_moe.experts.205.w1", "model.layers.6.block_sparse_moe.experts.206.w1", "model.layers.6.block_sparse_moe.experts.207.w1", "model.layers.6.block_sparse_moe.experts.208.w1", "model.layers.6.block_sparse_moe.experts.209.w1", "model.layers.6.block_sparse_moe.experts.210.w1", "model.layers.6.block_sparse_moe.experts.211.w1", "model.layers.6.block_sparse_moe.experts.212.w1", "model.layers.6.block_sparse_moe.experts.213.w1", "model.layers.6.block_sparse_moe.experts.214.w1", "model.layers.6.block_sparse_moe.experts.215.w1", "model.layers.6.block_sparse_moe.experts.216.w1", "model.layers.6.block_sparse_moe.experts.217.w1", "model.layers.6.block_sparse_moe.experts.218.w1", "model.layers.6.block_sparse_moe.experts.219.w1", "model.layers.6.block_sparse_moe.experts.220.w1", "model.layers.6.block_sparse_moe.experts.221.w1", "model.layers.6.block_sparse_moe.experts.222.w1", "model.layers.6.block_sparse_moe.experts.223.w1", "model.layers.6.block_sparse_moe.experts.224.w1", "model.layers.6.block_sparse_moe.experts.225.w1", "model.layers.6.block_sparse_moe.experts.226.w1", "model.layers.6.block_sparse_moe.experts.227.w1", "model.layers.6.block_sparse_moe.experts.228.w1", "model.layers.6.block_sparse_moe.experts.229.w1", "model.layers.6.block_sparse_moe.experts.230.w1", "model.layers.6.block_sparse_moe.experts.231.w1", "model.layers.6.block_sparse_moe.experts.232.w1", "model.layers.6.block_sparse_moe.experts.233.w1", "model.layers.6.block_sparse_moe.experts.234.w1", "model.layers.6.block_sparse_moe.experts.235.w1", "model.layers.6.block_sparse_moe.experts.236.w1", "model.layers.6.block_sparse_moe.experts.237.w1", "model.layers.6.block_sparse_moe.experts.238.w1", "model.layers.6.block_sparse_moe.experts.239.w1", "model.layers.6.block_sparse_moe.experts.240.w1", "model.layers.6.block_sparse_moe.experts.241.w1", "model.layers.6.block_sparse_moe.experts.242.w1", "model.layers.6.block_sparse_moe.experts.243.w1", "model.layers.6.block_sparse_moe.experts.244.w1", "model.layers.6.block_sparse_moe.experts.245.w1", "model.layers.6.block_sparse_moe.experts.246.w1", "model.layers.6.block_sparse_moe.experts.247.w1", "model.layers.6.block_sparse_moe.experts.248.w1", "model.layers.6.block_sparse_moe.experts.249.w1", "model.layers.6.block_sparse_moe.experts.250.w1", "model.layers.6.block_sparse_moe.experts.251.w1", "model.layers.6.block_sparse_moe.experts.252.w1", "model.layers.6.block_sparse_moe.experts.253.w1", "model.layers.6.block_sparse_moe.experts.254.w1", "model.layers.6.block_sparse_moe.experts.255.w1", "model.layers.6.block_sparse_moe.experts.0.w3", "model.layers.6.block_sparse_moe.experts.1.w3", "model.layers.6.block_sparse_moe.experts.2.w3", "model.layers.6.block_sparse_moe.experts.3.w3", "model.layers.6.block_sparse_moe.experts.4.w3", "model.layers.6.block_sparse_moe.experts.5.w3", "model.layers.6.block_sparse_moe.experts.6.w3", "model.layers.6.block_sparse_moe.experts.7.w3", "model.layers.6.block_sparse_moe.experts.8.w3", "model.layers.6.block_sparse_moe.experts.9.w3", "model.layers.6.block_sparse_moe.experts.10.w3", "model.layers.6.block_sparse_moe.experts.11.w3", "model.layers.6.block_sparse_moe.experts.12.w3", "model.layers.6.block_sparse_moe.experts.13.w3", "model.layers.6.block_sparse_moe.experts.14.w3", "model.layers.6.block_sparse_moe.experts.15.w3", "model.layers.6.block_sparse_moe.experts.16.w3", "model.layers.6.block_sparse_moe.experts.17.w3", "model.layers.6.block_sparse_moe.experts.18.w3", "model.layers.6.block_sparse_moe.experts.19.w3", "model.layers.6.block_sparse_moe.experts.20.w3", "model.layers.6.block_sparse_moe.experts.21.w3", "model.layers.6.block_sparse_moe.experts.22.w3", "model.layers.6.block_sparse_moe.experts.23.w3", "model.layers.6.block_sparse_moe.experts.24.w3", "model.layers.6.block_sparse_moe.experts.25.w3", "model.layers.6.block_sparse_moe.experts.26.w3", "model.layers.6.block_sparse_moe.experts.27.w3", "model.layers.6.block_sparse_moe.experts.28.w3", "model.layers.6.block_sparse_moe.experts.29.w3", "model.layers.6.block_sparse_moe.experts.30.w3", "model.layers.6.block_sparse_moe.experts.31.w3", "model.layers.6.block_sparse_moe.experts.32.w3", "model.layers.6.block_sparse_moe.experts.33.w3", "model.layers.6.block_sparse_moe.experts.34.w3", "model.layers.6.block_sparse_moe.experts.35.w3", "model.layers.6.block_sparse_moe.experts.36.w3", "model.layers.6.block_sparse_moe.experts.37.w3", "model.layers.6.block_sparse_moe.experts.38.w3", "model.layers.6.block_sparse_moe.experts.39.w3", "model.layers.6.block_sparse_moe.experts.40.w3", "model.layers.6.block_sparse_moe.experts.41.w3", "model.layers.6.block_sparse_moe.experts.42.w3", "model.layers.6.block_sparse_moe.experts.43.w3", "model.layers.6.block_sparse_moe.experts.44.w3", "model.layers.6.block_sparse_moe.experts.45.w3", "model.layers.6.block_sparse_moe.experts.46.w3", "model.layers.6.block_sparse_moe.experts.47.w3", "model.layers.6.block_sparse_moe.experts.48.w3", "model.layers.6.block_sparse_moe.experts.49.w3", "model.layers.6.block_sparse_moe.experts.50.w3", "model.layers.6.block_sparse_moe.experts.51.w3", "model.layers.6.block_sparse_moe.experts.52.w3", "model.layers.6.block_sparse_moe.experts.53.w3", "model.layers.6.block_sparse_moe.experts.54.w3", "model.layers.6.block_sparse_moe.experts.55.w3", "model.layers.6.block_sparse_moe.experts.56.w3", "model.layers.6.block_sparse_moe.experts.57.w3", "model.layers.6.block_sparse_moe.experts.58.w3", "model.layers.6.block_sparse_moe.experts.59.w3", "model.layers.6.block_sparse_moe.experts.60.w3", "model.layers.6.block_sparse_moe.experts.61.w3", "model.layers.6.block_sparse_moe.experts.62.w3", "model.layers.6.block_sparse_moe.experts.63.w3", "model.layers.6.block_sparse_moe.experts.64.w3", "model.layers.6.block_sparse_moe.experts.65.w3", "model.layers.6.block_sparse_moe.experts.66.w3", "model.layers.6.block_sparse_moe.experts.67.w3", "model.layers.6.block_sparse_moe.experts.68.w3", "model.layers.6.block_sparse_moe.experts.69.w3", "model.layers.6.block_sparse_moe.experts.70.w3", "model.layers.6.block_sparse_moe.experts.71.w3", "model.layers.6.block_sparse_moe.experts.72.w3", "model.layers.6.block_sparse_moe.experts.73.w3", "model.layers.6.block_sparse_moe.experts.74.w3", "model.layers.6.block_sparse_moe.experts.75.w3", "model.layers.6.block_sparse_moe.experts.76.w3", "model.layers.6.block_sparse_moe.experts.77.w3", "model.layers.6.block_sparse_moe.experts.78.w3", "model.layers.6.block_sparse_moe.experts.79.w3", "model.layers.6.block_sparse_moe.experts.80.w3", "model.layers.6.block_sparse_moe.experts.81.w3", "model.layers.6.block_sparse_moe.experts.82.w3", "model.layers.6.block_sparse_moe.experts.83.w3", "model.layers.6.block_sparse_moe.experts.84.w3", "model.layers.6.block_sparse_moe.experts.85.w3", "model.layers.6.block_sparse_moe.experts.86.w3", "model.layers.6.block_sparse_moe.experts.87.w3", "model.layers.6.block_sparse_moe.experts.88.w3", "model.layers.6.block_sparse_moe.experts.89.w3", "model.layers.6.block_sparse_moe.experts.90.w3", "model.layers.6.block_sparse_moe.experts.91.w3", "model.layers.6.block_sparse_moe.experts.92.w3", "model.layers.6.block_sparse_moe.experts.93.w3", "model.layers.6.block_sparse_moe.experts.94.w3", "model.layers.6.block_sparse_moe.experts.95.w3", "model.layers.6.block_sparse_moe.experts.96.w3", "model.layers.6.block_sparse_moe.experts.97.w3", "model.layers.6.block_sparse_moe.experts.98.w3", "model.layers.6.block_sparse_moe.experts.99.w3", "model.layers.6.block_sparse_moe.experts.100.w3", "model.layers.6.block_sparse_moe.experts.101.w3", "model.layers.6.block_sparse_moe.experts.102.w3", "model.layers.6.block_sparse_moe.experts.103.w3", "model.layers.6.block_sparse_moe.experts.104.w3", "model.layers.6.block_sparse_moe.experts.105.w3", "model.layers.6.block_sparse_moe.experts.106.w3", "model.layers.6.block_sparse_moe.experts.107.w3", "model.layers.6.block_sparse_moe.experts.108.w3", "model.layers.6.block_sparse_moe.experts.109.w3", "model.layers.6.block_sparse_moe.experts.110.w3", "model.layers.6.block_sparse_moe.experts.111.w3", "model.layers.6.block_sparse_moe.experts.112.w3", "model.layers.6.block_sparse_moe.experts.113.w3", "model.layers.6.block_sparse_moe.experts.114.w3", "model.layers.6.block_sparse_moe.experts.115.w3", "model.layers.6.block_sparse_moe.experts.116.w3", "model.layers.6.block_sparse_moe.experts.117.w3", "model.layers.6.block_sparse_moe.experts.118.w3", "model.layers.6.block_sparse_moe.experts.119.w3", "model.layers.6.block_sparse_moe.experts.120.w3", "model.layers.6.block_sparse_moe.experts.121.w3", "model.layers.6.block_sparse_moe.experts.122.w3", "model.layers.6.block_sparse_moe.experts.123.w3", "model.layers.6.block_sparse_moe.experts.124.w3", "model.layers.6.block_sparse_moe.experts.125.w3", "model.layers.6.block_sparse_moe.experts.126.w3", "model.layers.6.block_sparse_moe.experts.127.w3", "model.layers.6.block_sparse_moe.experts.128.w3", "model.layers.6.block_sparse_moe.experts.129.w3", "model.layers.6.block_sparse_moe.experts.130.w3", "model.layers.6.block_sparse_moe.experts.131.w3", "model.layers.6.block_sparse_moe.experts.132.w3", "model.layers.6.block_sparse_moe.experts.133.w3", "model.layers.6.block_sparse_moe.experts.134.w3", "model.layers.6.block_sparse_moe.experts.135.w3", "model.layers.6.block_sparse_moe.experts.136.w3", "model.layers.6.block_sparse_moe.experts.137.w3", "model.layers.6.block_sparse_moe.experts.138.w3", "model.layers.6.block_sparse_moe.experts.139.w3", "model.layers.6.block_sparse_moe.experts.140.w3", "model.layers.6.block_sparse_moe.experts.141.w3", "model.layers.6.block_sparse_moe.experts.142.w3", "model.layers.6.block_sparse_moe.experts.143.w3", "model.layers.6.block_sparse_moe.experts.144.w3", "model.layers.6.block_sparse_moe.experts.145.w3", "model.layers.6.block_sparse_moe.experts.146.w3", "model.layers.6.block_sparse_moe.experts.147.w3", "model.layers.6.block_sparse_moe.experts.148.w3", "model.layers.6.block_sparse_moe.experts.149.w3", "model.layers.6.block_sparse_moe.experts.150.w3", "model.layers.6.block_sparse_moe.experts.151.w3", "model.layers.6.block_sparse_moe.experts.152.w3", "model.layers.6.block_sparse_moe.experts.153.w3", "model.layers.6.block_sparse_moe.experts.154.w3", "model.layers.6.block_sparse_moe.experts.155.w3", "model.layers.6.block_sparse_moe.experts.156.w3", "model.layers.6.block_sparse_moe.experts.157.w3", "model.layers.6.block_sparse_moe.experts.158.w3", "model.layers.6.block_sparse_moe.experts.159.w3", "model.layers.6.block_sparse_moe.experts.160.w3", "model.layers.6.block_sparse_moe.experts.161.w3", "model.layers.6.block_sparse_moe.experts.162.w3", "model.layers.6.block_sparse_moe.experts.163.w3", "model.layers.6.block_sparse_moe.experts.164.w3", "model.layers.6.block_sparse_moe.experts.165.w3", "model.layers.6.block_sparse_moe.experts.166.w3", "model.layers.6.block_sparse_moe.experts.167.w3", "model.layers.6.block_sparse_moe.experts.168.w3", "model.layers.6.block_sparse_moe.experts.169.w3", "model.layers.6.block_sparse_moe.experts.170.w3", "model.layers.6.block_sparse_moe.experts.171.w3", "model.layers.6.block_sparse_moe.experts.172.w3", "model.layers.6.block_sparse_moe.experts.173.w3", "model.layers.6.block_sparse_moe.experts.174.w3", "model.layers.6.block_sparse_moe.experts.175.w3", "model.layers.6.block_sparse_moe.experts.176.w3", "model.layers.6.block_sparse_moe.experts.177.w3", "model.layers.6.block_sparse_moe.experts.178.w3", "model.layers.6.block_sparse_moe.experts.179.w3", "model.layers.6.block_sparse_moe.experts.180.w3", "model.layers.6.block_sparse_moe.experts.181.w3", "model.layers.6.block_sparse_moe.experts.182.w3", "model.layers.6.block_sparse_moe.experts.183.w3", "model.layers.6.block_sparse_moe.experts.184.w3", "model.layers.6.block_sparse_moe.experts.185.w3", "model.layers.6.block_sparse_moe.experts.186.w3", "model.layers.6.block_sparse_moe.experts.187.w3", "model.layers.6.block_sparse_moe.experts.188.w3", "model.layers.6.block_sparse_moe.experts.189.w3", "model.layers.6.block_sparse_moe.experts.190.w3", "model.layers.6.block_sparse_moe.experts.191.w3", "model.layers.6.block_sparse_moe.experts.192.w3", "model.layers.6.block_sparse_moe.experts.193.w3", "model.layers.6.block_sparse_moe.experts.194.w3", "model.layers.6.block_sparse_moe.experts.195.w3", "model.layers.6.block_sparse_moe.experts.196.w3", "model.layers.6.block_sparse_moe.experts.197.w3", "model.layers.6.block_sparse_moe.experts.198.w3", "model.layers.6.block_sparse_moe.experts.199.w3", "model.layers.6.block_sparse_moe.experts.200.w3", "model.layers.6.block_sparse_moe.experts.201.w3", "model.layers.6.block_sparse_moe.experts.202.w3", "model.layers.6.block_sparse_moe.experts.203.w3", "model.layers.6.block_sparse_moe.experts.204.w3", "model.layers.6.block_sparse_moe.experts.205.w3", "model.layers.6.block_sparse_moe.experts.206.w3", "model.layers.6.block_sparse_moe.experts.207.w3", "model.layers.6.block_sparse_moe.experts.208.w3", "model.layers.6.block_sparse_moe.experts.209.w3", "model.layers.6.block_sparse_moe.experts.210.w3", "model.layers.6.block_sparse_moe.experts.211.w3", "model.layers.6.block_sparse_moe.experts.212.w3", "model.layers.6.block_sparse_moe.experts.213.w3", "model.layers.6.block_sparse_moe.experts.214.w3", "model.layers.6.block_sparse_moe.experts.215.w3", "model.layers.6.block_sparse_moe.experts.216.w3", "model.layers.6.block_sparse_moe.experts.217.w3", "model.layers.6.block_sparse_moe.experts.218.w3", "model.layers.6.block_sparse_moe.experts.219.w3", "model.layers.6.block_sparse_moe.experts.220.w3", "model.layers.6.block_sparse_moe.experts.221.w3", "model.layers.6.block_sparse_moe.experts.222.w3", "model.layers.6.block_sparse_moe.experts.223.w3", "model.layers.6.block_sparse_moe.experts.224.w3", "model.layers.6.block_sparse_moe.experts.225.w3", "model.layers.6.block_sparse_moe.experts.226.w3", "model.layers.6.block_sparse_moe.experts.227.w3", "model.layers.6.block_sparse_moe.experts.228.w3", "model.layers.6.block_sparse_moe.experts.229.w3", "model.layers.6.block_sparse_moe.experts.230.w3", "model.layers.6.block_sparse_moe.experts.231.w3", "model.layers.6.block_sparse_moe.experts.232.w3", "model.layers.6.block_sparse_moe.experts.233.w3", "model.layers.6.block_sparse_moe.experts.234.w3", "model.layers.6.block_sparse_moe.experts.235.w3", "model.layers.6.block_sparse_moe.experts.236.w3", "model.layers.6.block_sparse_moe.experts.237.w3", "model.layers.6.block_sparse_moe.experts.238.w3", "model.layers.6.block_sparse_moe.experts.239.w3", "model.layers.6.block_sparse_moe.experts.240.w3", "model.layers.6.block_sparse_moe.experts.241.w3", "model.layers.6.block_sparse_moe.experts.242.w3", "model.layers.6.block_sparse_moe.experts.243.w3", "model.layers.6.block_sparse_moe.experts.244.w3", "model.layers.6.block_sparse_moe.experts.245.w3", "model.layers.6.block_sparse_moe.experts.246.w3", "model.layers.6.block_sparse_moe.experts.247.w3", "model.layers.6.block_sparse_moe.experts.248.w3", "model.layers.6.block_sparse_moe.experts.249.w3", "model.layers.6.block_sparse_moe.experts.250.w3", "model.layers.6.block_sparse_moe.experts.251.w3", "model.layers.6.block_sparse_moe.experts.252.w3", "model.layers.6.block_sparse_moe.experts.253.w3", "model.layers.6.block_sparse_moe.experts.254.w3", "model.layers.6.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0005956016480922616, "dbits": 2415919104 } ] }, { "idx": 34, "layers": [ "model.layers.6.block_sparse_moe.experts.0.w2", "model.layers.6.block_sparse_moe.experts.1.w2", "model.layers.6.block_sparse_moe.experts.2.w2", "model.layers.6.block_sparse_moe.experts.3.w2", "model.layers.6.block_sparse_moe.experts.4.w2", "model.layers.6.block_sparse_moe.experts.5.w2", "model.layers.6.block_sparse_moe.experts.6.w2", "model.layers.6.block_sparse_moe.experts.7.w2", "model.layers.6.block_sparse_moe.experts.8.w2", "model.layers.6.block_sparse_moe.experts.9.w2", "model.layers.6.block_sparse_moe.experts.10.w2", "model.layers.6.block_sparse_moe.experts.11.w2", "model.layers.6.block_sparse_moe.experts.12.w2", "model.layers.6.block_sparse_moe.experts.13.w2", "model.layers.6.block_sparse_moe.experts.14.w2", "model.layers.6.block_sparse_moe.experts.15.w2", "model.layers.6.block_sparse_moe.experts.16.w2", "model.layers.6.block_sparse_moe.experts.17.w2", "model.layers.6.block_sparse_moe.experts.18.w2", "model.layers.6.block_sparse_moe.experts.19.w2", "model.layers.6.block_sparse_moe.experts.20.w2", "model.layers.6.block_sparse_moe.experts.21.w2", "model.layers.6.block_sparse_moe.experts.22.w2", "model.layers.6.block_sparse_moe.experts.23.w2", "model.layers.6.block_sparse_moe.experts.24.w2", "model.layers.6.block_sparse_moe.experts.25.w2", "model.layers.6.block_sparse_moe.experts.26.w2", "model.layers.6.block_sparse_moe.experts.27.w2", "model.layers.6.block_sparse_moe.experts.28.w2", "model.layers.6.block_sparse_moe.experts.29.w2", "model.layers.6.block_sparse_moe.experts.30.w2", "model.layers.6.block_sparse_moe.experts.31.w2", "model.layers.6.block_sparse_moe.experts.32.w2", "model.layers.6.block_sparse_moe.experts.33.w2", "model.layers.6.block_sparse_moe.experts.34.w2", "model.layers.6.block_sparse_moe.experts.35.w2", "model.layers.6.block_sparse_moe.experts.36.w2", "model.layers.6.block_sparse_moe.experts.37.w2", "model.layers.6.block_sparse_moe.experts.38.w2", "model.layers.6.block_sparse_moe.experts.39.w2", "model.layers.6.block_sparse_moe.experts.40.w2", "model.layers.6.block_sparse_moe.experts.41.w2", "model.layers.6.block_sparse_moe.experts.42.w2", "model.layers.6.block_sparse_moe.experts.43.w2", "model.layers.6.block_sparse_moe.experts.44.w2", "model.layers.6.block_sparse_moe.experts.45.w2", "model.layers.6.block_sparse_moe.experts.46.w2", "model.layers.6.block_sparse_moe.experts.47.w2", "model.layers.6.block_sparse_moe.experts.48.w2", "model.layers.6.block_sparse_moe.experts.49.w2", "model.layers.6.block_sparse_moe.experts.50.w2", "model.layers.6.block_sparse_moe.experts.51.w2", "model.layers.6.block_sparse_moe.experts.52.w2", "model.layers.6.block_sparse_moe.experts.53.w2", "model.layers.6.block_sparse_moe.experts.54.w2", "model.layers.6.block_sparse_moe.experts.55.w2", "model.layers.6.block_sparse_moe.experts.56.w2", "model.layers.6.block_sparse_moe.experts.57.w2", "model.layers.6.block_sparse_moe.experts.58.w2", "model.layers.6.block_sparse_moe.experts.59.w2", "model.layers.6.block_sparse_moe.experts.60.w2", "model.layers.6.block_sparse_moe.experts.61.w2", "model.layers.6.block_sparse_moe.experts.62.w2", "model.layers.6.block_sparse_moe.experts.63.w2", "model.layers.6.block_sparse_moe.experts.64.w2", "model.layers.6.block_sparse_moe.experts.65.w2", "model.layers.6.block_sparse_moe.experts.66.w2", "model.layers.6.block_sparse_moe.experts.67.w2", "model.layers.6.block_sparse_moe.experts.68.w2", "model.layers.6.block_sparse_moe.experts.69.w2", "model.layers.6.block_sparse_moe.experts.70.w2", "model.layers.6.block_sparse_moe.experts.71.w2", "model.layers.6.block_sparse_moe.experts.72.w2", "model.layers.6.block_sparse_moe.experts.73.w2", "model.layers.6.block_sparse_moe.experts.74.w2", "model.layers.6.block_sparse_moe.experts.75.w2", "model.layers.6.block_sparse_moe.experts.76.w2", "model.layers.6.block_sparse_moe.experts.77.w2", "model.layers.6.block_sparse_moe.experts.78.w2", "model.layers.6.block_sparse_moe.experts.79.w2", "model.layers.6.block_sparse_moe.experts.80.w2", "model.layers.6.block_sparse_moe.experts.81.w2", "model.layers.6.block_sparse_moe.experts.82.w2", "model.layers.6.block_sparse_moe.experts.83.w2", "model.layers.6.block_sparse_moe.experts.84.w2", "model.layers.6.block_sparse_moe.experts.85.w2", "model.layers.6.block_sparse_moe.experts.86.w2", "model.layers.6.block_sparse_moe.experts.87.w2", "model.layers.6.block_sparse_moe.experts.88.w2", "model.layers.6.block_sparse_moe.experts.89.w2", "model.layers.6.block_sparse_moe.experts.90.w2", "model.layers.6.block_sparse_moe.experts.91.w2", "model.layers.6.block_sparse_moe.experts.92.w2", "model.layers.6.block_sparse_moe.experts.93.w2", "model.layers.6.block_sparse_moe.experts.94.w2", "model.layers.6.block_sparse_moe.experts.95.w2", "model.layers.6.block_sparse_moe.experts.96.w2", "model.layers.6.block_sparse_moe.experts.97.w2", "model.layers.6.block_sparse_moe.experts.98.w2", "model.layers.6.block_sparse_moe.experts.99.w2", "model.layers.6.block_sparse_moe.experts.100.w2", "model.layers.6.block_sparse_moe.experts.101.w2", "model.layers.6.block_sparse_moe.experts.102.w2", "model.layers.6.block_sparse_moe.experts.103.w2", "model.layers.6.block_sparse_moe.experts.104.w2", "model.layers.6.block_sparse_moe.experts.105.w2", "model.layers.6.block_sparse_moe.experts.106.w2", "model.layers.6.block_sparse_moe.experts.107.w2", "model.layers.6.block_sparse_moe.experts.108.w2", "model.layers.6.block_sparse_moe.experts.109.w2", "model.layers.6.block_sparse_moe.experts.110.w2", "model.layers.6.block_sparse_moe.experts.111.w2", "model.layers.6.block_sparse_moe.experts.112.w2", "model.layers.6.block_sparse_moe.experts.113.w2", "model.layers.6.block_sparse_moe.experts.114.w2", "model.layers.6.block_sparse_moe.experts.115.w2", "model.layers.6.block_sparse_moe.experts.116.w2", "model.layers.6.block_sparse_moe.experts.117.w2", "model.layers.6.block_sparse_moe.experts.118.w2", "model.layers.6.block_sparse_moe.experts.119.w2", "model.layers.6.block_sparse_moe.experts.120.w2", "model.layers.6.block_sparse_moe.experts.121.w2", "model.layers.6.block_sparse_moe.experts.122.w2", "model.layers.6.block_sparse_moe.experts.123.w2", "model.layers.6.block_sparse_moe.experts.124.w2", "model.layers.6.block_sparse_moe.experts.125.w2", "model.layers.6.block_sparse_moe.experts.126.w2", "model.layers.6.block_sparse_moe.experts.127.w2", "model.layers.6.block_sparse_moe.experts.128.w2", "model.layers.6.block_sparse_moe.experts.129.w2", "model.layers.6.block_sparse_moe.experts.130.w2", "model.layers.6.block_sparse_moe.experts.131.w2", "model.layers.6.block_sparse_moe.experts.132.w2", "model.layers.6.block_sparse_moe.experts.133.w2", "model.layers.6.block_sparse_moe.experts.134.w2", "model.layers.6.block_sparse_moe.experts.135.w2", "model.layers.6.block_sparse_moe.experts.136.w2", "model.layers.6.block_sparse_moe.experts.137.w2", "model.layers.6.block_sparse_moe.experts.138.w2", "model.layers.6.block_sparse_moe.experts.139.w2", "model.layers.6.block_sparse_moe.experts.140.w2", "model.layers.6.block_sparse_moe.experts.141.w2", "model.layers.6.block_sparse_moe.experts.142.w2", "model.layers.6.block_sparse_moe.experts.143.w2", "model.layers.6.block_sparse_moe.experts.144.w2", "model.layers.6.block_sparse_moe.experts.145.w2", "model.layers.6.block_sparse_moe.experts.146.w2", "model.layers.6.block_sparse_moe.experts.147.w2", "model.layers.6.block_sparse_moe.experts.148.w2", "model.layers.6.block_sparse_moe.experts.149.w2", "model.layers.6.block_sparse_moe.experts.150.w2", "model.layers.6.block_sparse_moe.experts.151.w2", "model.layers.6.block_sparse_moe.experts.152.w2", "model.layers.6.block_sparse_moe.experts.153.w2", "model.layers.6.block_sparse_moe.experts.154.w2", "model.layers.6.block_sparse_moe.experts.155.w2", "model.layers.6.block_sparse_moe.experts.156.w2", "model.layers.6.block_sparse_moe.experts.157.w2", "model.layers.6.block_sparse_moe.experts.158.w2", "model.layers.6.block_sparse_moe.experts.159.w2", "model.layers.6.block_sparse_moe.experts.160.w2", "model.layers.6.block_sparse_moe.experts.161.w2", "model.layers.6.block_sparse_moe.experts.162.w2", "model.layers.6.block_sparse_moe.experts.163.w2", "model.layers.6.block_sparse_moe.experts.164.w2", "model.layers.6.block_sparse_moe.experts.165.w2", "model.layers.6.block_sparse_moe.experts.166.w2", "model.layers.6.block_sparse_moe.experts.167.w2", "model.layers.6.block_sparse_moe.experts.168.w2", "model.layers.6.block_sparse_moe.experts.169.w2", "model.layers.6.block_sparse_moe.experts.170.w2", "model.layers.6.block_sparse_moe.experts.171.w2", "model.layers.6.block_sparse_moe.experts.172.w2", "model.layers.6.block_sparse_moe.experts.173.w2", "model.layers.6.block_sparse_moe.experts.174.w2", "model.layers.6.block_sparse_moe.experts.175.w2", "model.layers.6.block_sparse_moe.experts.176.w2", "model.layers.6.block_sparse_moe.experts.177.w2", "model.layers.6.block_sparse_moe.experts.178.w2", "model.layers.6.block_sparse_moe.experts.179.w2", "model.layers.6.block_sparse_moe.experts.180.w2", "model.layers.6.block_sparse_moe.experts.181.w2", "model.layers.6.block_sparse_moe.experts.182.w2", "model.layers.6.block_sparse_moe.experts.183.w2", "model.layers.6.block_sparse_moe.experts.184.w2", "model.layers.6.block_sparse_moe.experts.185.w2", "model.layers.6.block_sparse_moe.experts.186.w2", "model.layers.6.block_sparse_moe.experts.187.w2", "model.layers.6.block_sparse_moe.experts.188.w2", "model.layers.6.block_sparse_moe.experts.189.w2", "model.layers.6.block_sparse_moe.experts.190.w2", "model.layers.6.block_sparse_moe.experts.191.w2", "model.layers.6.block_sparse_moe.experts.192.w2", "model.layers.6.block_sparse_moe.experts.193.w2", "model.layers.6.block_sparse_moe.experts.194.w2", "model.layers.6.block_sparse_moe.experts.195.w2", "model.layers.6.block_sparse_moe.experts.196.w2", "model.layers.6.block_sparse_moe.experts.197.w2", "model.layers.6.block_sparse_moe.experts.198.w2", "model.layers.6.block_sparse_moe.experts.199.w2", "model.layers.6.block_sparse_moe.experts.200.w2", "model.layers.6.block_sparse_moe.experts.201.w2", "model.layers.6.block_sparse_moe.experts.202.w2", "model.layers.6.block_sparse_moe.experts.203.w2", "model.layers.6.block_sparse_moe.experts.204.w2", "model.layers.6.block_sparse_moe.experts.205.w2", "model.layers.6.block_sparse_moe.experts.206.w2", "model.layers.6.block_sparse_moe.experts.207.w2", "model.layers.6.block_sparse_moe.experts.208.w2", "model.layers.6.block_sparse_moe.experts.209.w2", "model.layers.6.block_sparse_moe.experts.210.w2", "model.layers.6.block_sparse_moe.experts.211.w2", "model.layers.6.block_sparse_moe.experts.212.w2", "model.layers.6.block_sparse_moe.experts.213.w2", "model.layers.6.block_sparse_moe.experts.214.w2", "model.layers.6.block_sparse_moe.experts.215.w2", "model.layers.6.block_sparse_moe.experts.216.w2", "model.layers.6.block_sparse_moe.experts.217.w2", "model.layers.6.block_sparse_moe.experts.218.w2", "model.layers.6.block_sparse_moe.experts.219.w2", "model.layers.6.block_sparse_moe.experts.220.w2", "model.layers.6.block_sparse_moe.experts.221.w2", "model.layers.6.block_sparse_moe.experts.222.w2", "model.layers.6.block_sparse_moe.experts.223.w2", "model.layers.6.block_sparse_moe.experts.224.w2", "model.layers.6.block_sparse_moe.experts.225.w2", "model.layers.6.block_sparse_moe.experts.226.w2", "model.layers.6.block_sparse_moe.experts.227.w2", "model.layers.6.block_sparse_moe.experts.228.w2", "model.layers.6.block_sparse_moe.experts.229.w2", "model.layers.6.block_sparse_moe.experts.230.w2", "model.layers.6.block_sparse_moe.experts.231.w2", "model.layers.6.block_sparse_moe.experts.232.w2", "model.layers.6.block_sparse_moe.experts.233.w2", "model.layers.6.block_sparse_moe.experts.234.w2", "model.layers.6.block_sparse_moe.experts.235.w2", "model.layers.6.block_sparse_moe.experts.236.w2", "model.layers.6.block_sparse_moe.experts.237.w2", "model.layers.6.block_sparse_moe.experts.238.w2", "model.layers.6.block_sparse_moe.experts.239.w2", "model.layers.6.block_sparse_moe.experts.240.w2", "model.layers.6.block_sparse_moe.experts.241.w2", "model.layers.6.block_sparse_moe.experts.242.w2", "model.layers.6.block_sparse_moe.experts.243.w2", "model.layers.6.block_sparse_moe.experts.244.w2", "model.layers.6.block_sparse_moe.experts.245.w2", "model.layers.6.block_sparse_moe.experts.246.w2", "model.layers.6.block_sparse_moe.experts.247.w2", "model.layers.6.block_sparse_moe.experts.248.w2", "model.layers.6.block_sparse_moe.experts.249.w2", "model.layers.6.block_sparse_moe.experts.250.w2", "model.layers.6.block_sparse_moe.experts.251.w2", "model.layers.6.block_sparse_moe.experts.252.w2", "model.layers.6.block_sparse_moe.experts.253.w2", "model.layers.6.block_sparse_moe.experts.254.w2", "model.layers.6.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00013669002801179608, "dbits": 1207959552 } ] }, { "idx": 35, "layers": [ "model.layers.7.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0004553996026515905, "dbits": 18874368 } ] }, { "idx": 36, "layers": [ "model.layers.7.self_attn.k_proj", "model.layers.7.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0045372197404503906, "dbits": 6291456 } ] }, { "idx": 37, "layers": [ "model.layers.7.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0004515955224633217, "dbits": 18874368 } ] }, { "idx": 38, "layers": [ "model.layers.7.block_sparse_moe.experts.0.w1", "model.layers.7.block_sparse_moe.experts.1.w1", "model.layers.7.block_sparse_moe.experts.2.w1", "model.layers.7.block_sparse_moe.experts.3.w1", "model.layers.7.block_sparse_moe.experts.4.w1", "model.layers.7.block_sparse_moe.experts.5.w1", "model.layers.7.block_sparse_moe.experts.6.w1", "model.layers.7.block_sparse_moe.experts.7.w1", "model.layers.7.block_sparse_moe.experts.8.w1", "model.layers.7.block_sparse_moe.experts.9.w1", "model.layers.7.block_sparse_moe.experts.10.w1", "model.layers.7.block_sparse_moe.experts.11.w1", "model.layers.7.block_sparse_moe.experts.12.w1", "model.layers.7.block_sparse_moe.experts.13.w1", "model.layers.7.block_sparse_moe.experts.14.w1", "model.layers.7.block_sparse_moe.experts.15.w1", "model.layers.7.block_sparse_moe.experts.16.w1", "model.layers.7.block_sparse_moe.experts.17.w1", "model.layers.7.block_sparse_moe.experts.18.w1", "model.layers.7.block_sparse_moe.experts.19.w1", "model.layers.7.block_sparse_moe.experts.20.w1", "model.layers.7.block_sparse_moe.experts.21.w1", "model.layers.7.block_sparse_moe.experts.22.w1", "model.layers.7.block_sparse_moe.experts.23.w1", "model.layers.7.block_sparse_moe.experts.24.w1", "model.layers.7.block_sparse_moe.experts.25.w1", "model.layers.7.block_sparse_moe.experts.26.w1", "model.layers.7.block_sparse_moe.experts.27.w1", "model.layers.7.block_sparse_moe.experts.28.w1", "model.layers.7.block_sparse_moe.experts.29.w1", "model.layers.7.block_sparse_moe.experts.30.w1", "model.layers.7.block_sparse_moe.experts.31.w1", "model.layers.7.block_sparse_moe.experts.32.w1", "model.layers.7.block_sparse_moe.experts.33.w1", "model.layers.7.block_sparse_moe.experts.34.w1", "model.layers.7.block_sparse_moe.experts.35.w1", "model.layers.7.block_sparse_moe.experts.36.w1", "model.layers.7.block_sparse_moe.experts.37.w1", "model.layers.7.block_sparse_moe.experts.38.w1", "model.layers.7.block_sparse_moe.experts.39.w1", "model.layers.7.block_sparse_moe.experts.40.w1", "model.layers.7.block_sparse_moe.experts.41.w1", "model.layers.7.block_sparse_moe.experts.42.w1", "model.layers.7.block_sparse_moe.experts.43.w1", "model.layers.7.block_sparse_moe.experts.44.w1", "model.layers.7.block_sparse_moe.experts.45.w1", "model.layers.7.block_sparse_moe.experts.46.w1", "model.layers.7.block_sparse_moe.experts.47.w1", "model.layers.7.block_sparse_moe.experts.48.w1", "model.layers.7.block_sparse_moe.experts.49.w1", "model.layers.7.block_sparse_moe.experts.50.w1", "model.layers.7.block_sparse_moe.experts.51.w1", "model.layers.7.block_sparse_moe.experts.52.w1", "model.layers.7.block_sparse_moe.experts.53.w1", "model.layers.7.block_sparse_moe.experts.54.w1", "model.layers.7.block_sparse_moe.experts.55.w1", "model.layers.7.block_sparse_moe.experts.56.w1", "model.layers.7.block_sparse_moe.experts.57.w1", "model.layers.7.block_sparse_moe.experts.58.w1", "model.layers.7.block_sparse_moe.experts.59.w1", "model.layers.7.block_sparse_moe.experts.60.w1", "model.layers.7.block_sparse_moe.experts.61.w1", "model.layers.7.block_sparse_moe.experts.62.w1", "model.layers.7.block_sparse_moe.experts.63.w1", "model.layers.7.block_sparse_moe.experts.64.w1", "model.layers.7.block_sparse_moe.experts.65.w1", "model.layers.7.block_sparse_moe.experts.66.w1", "model.layers.7.block_sparse_moe.experts.67.w1", "model.layers.7.block_sparse_moe.experts.68.w1", "model.layers.7.block_sparse_moe.experts.69.w1", "model.layers.7.block_sparse_moe.experts.70.w1", "model.layers.7.block_sparse_moe.experts.71.w1", "model.layers.7.block_sparse_moe.experts.72.w1", "model.layers.7.block_sparse_moe.experts.73.w1", "model.layers.7.block_sparse_moe.experts.74.w1", "model.layers.7.block_sparse_moe.experts.75.w1", "model.layers.7.block_sparse_moe.experts.76.w1", "model.layers.7.block_sparse_moe.experts.77.w1", "model.layers.7.block_sparse_moe.experts.78.w1", "model.layers.7.block_sparse_moe.experts.79.w1", "model.layers.7.block_sparse_moe.experts.80.w1", "model.layers.7.block_sparse_moe.experts.81.w1", "model.layers.7.block_sparse_moe.experts.82.w1", "model.layers.7.block_sparse_moe.experts.83.w1", "model.layers.7.block_sparse_moe.experts.84.w1", "model.layers.7.block_sparse_moe.experts.85.w1", "model.layers.7.block_sparse_moe.experts.86.w1", "model.layers.7.block_sparse_moe.experts.87.w1", "model.layers.7.block_sparse_moe.experts.88.w1", "model.layers.7.block_sparse_moe.experts.89.w1", "model.layers.7.block_sparse_moe.experts.90.w1", "model.layers.7.block_sparse_moe.experts.91.w1", "model.layers.7.block_sparse_moe.experts.92.w1", "model.layers.7.block_sparse_moe.experts.93.w1", "model.layers.7.block_sparse_moe.experts.94.w1", "model.layers.7.block_sparse_moe.experts.95.w1", "model.layers.7.block_sparse_moe.experts.96.w1", "model.layers.7.block_sparse_moe.experts.97.w1", "model.layers.7.block_sparse_moe.experts.98.w1", "model.layers.7.block_sparse_moe.experts.99.w1", "model.layers.7.block_sparse_moe.experts.100.w1", "model.layers.7.block_sparse_moe.experts.101.w1", "model.layers.7.block_sparse_moe.experts.102.w1", "model.layers.7.block_sparse_moe.experts.103.w1", "model.layers.7.block_sparse_moe.experts.104.w1", "model.layers.7.block_sparse_moe.experts.105.w1", "model.layers.7.block_sparse_moe.experts.106.w1", "model.layers.7.block_sparse_moe.experts.107.w1", "model.layers.7.block_sparse_moe.experts.108.w1", "model.layers.7.block_sparse_moe.experts.109.w1", "model.layers.7.block_sparse_moe.experts.110.w1", "model.layers.7.block_sparse_moe.experts.111.w1", "model.layers.7.block_sparse_moe.experts.112.w1", "model.layers.7.block_sparse_moe.experts.113.w1", "model.layers.7.block_sparse_moe.experts.114.w1", "model.layers.7.block_sparse_moe.experts.115.w1", "model.layers.7.block_sparse_moe.experts.116.w1", "model.layers.7.block_sparse_moe.experts.117.w1", "model.layers.7.block_sparse_moe.experts.118.w1", "model.layers.7.block_sparse_moe.experts.119.w1", "model.layers.7.block_sparse_moe.experts.120.w1", "model.layers.7.block_sparse_moe.experts.121.w1", "model.layers.7.block_sparse_moe.experts.122.w1", "model.layers.7.block_sparse_moe.experts.123.w1", "model.layers.7.block_sparse_moe.experts.124.w1", "model.layers.7.block_sparse_moe.experts.125.w1", "model.layers.7.block_sparse_moe.experts.126.w1", "model.layers.7.block_sparse_moe.experts.127.w1", "model.layers.7.block_sparse_moe.experts.128.w1", "model.layers.7.block_sparse_moe.experts.129.w1", "model.layers.7.block_sparse_moe.experts.130.w1", "model.layers.7.block_sparse_moe.experts.131.w1", "model.layers.7.block_sparse_moe.experts.132.w1", "model.layers.7.block_sparse_moe.experts.133.w1", "model.layers.7.block_sparse_moe.experts.134.w1", "model.layers.7.block_sparse_moe.experts.135.w1", "model.layers.7.block_sparse_moe.experts.136.w1", "model.layers.7.block_sparse_moe.experts.137.w1", "model.layers.7.block_sparse_moe.experts.138.w1", "model.layers.7.block_sparse_moe.experts.139.w1", "model.layers.7.block_sparse_moe.experts.140.w1", "model.layers.7.block_sparse_moe.experts.141.w1", "model.layers.7.block_sparse_moe.experts.142.w1", "model.layers.7.block_sparse_moe.experts.143.w1", "model.layers.7.block_sparse_moe.experts.144.w1", "model.layers.7.block_sparse_moe.experts.145.w1", "model.layers.7.block_sparse_moe.experts.146.w1", "model.layers.7.block_sparse_moe.experts.147.w1", "model.layers.7.block_sparse_moe.experts.148.w1", "model.layers.7.block_sparse_moe.experts.149.w1", "model.layers.7.block_sparse_moe.experts.150.w1", "model.layers.7.block_sparse_moe.experts.151.w1", "model.layers.7.block_sparse_moe.experts.152.w1", "model.layers.7.block_sparse_moe.experts.153.w1", "model.layers.7.block_sparse_moe.experts.154.w1", "model.layers.7.block_sparse_moe.experts.155.w1", "model.layers.7.block_sparse_moe.experts.156.w1", "model.layers.7.block_sparse_moe.experts.157.w1", "model.layers.7.block_sparse_moe.experts.158.w1", "model.layers.7.block_sparse_moe.experts.159.w1", "model.layers.7.block_sparse_moe.experts.160.w1", "model.layers.7.block_sparse_moe.experts.161.w1", "model.layers.7.block_sparse_moe.experts.162.w1", "model.layers.7.block_sparse_moe.experts.163.w1", "model.layers.7.block_sparse_moe.experts.164.w1", "model.layers.7.block_sparse_moe.experts.165.w1", "model.layers.7.block_sparse_moe.experts.166.w1", "model.layers.7.block_sparse_moe.experts.167.w1", "model.layers.7.block_sparse_moe.experts.168.w1", "model.layers.7.block_sparse_moe.experts.169.w1", "model.layers.7.block_sparse_moe.experts.170.w1", "model.layers.7.block_sparse_moe.experts.171.w1", "model.layers.7.block_sparse_moe.experts.172.w1", "model.layers.7.block_sparse_moe.experts.173.w1", "model.layers.7.block_sparse_moe.experts.174.w1", "model.layers.7.block_sparse_moe.experts.175.w1", "model.layers.7.block_sparse_moe.experts.176.w1", "model.layers.7.block_sparse_moe.experts.177.w1", "model.layers.7.block_sparse_moe.experts.178.w1", "model.layers.7.block_sparse_moe.experts.179.w1", "model.layers.7.block_sparse_moe.experts.180.w1", "model.layers.7.block_sparse_moe.experts.181.w1", "model.layers.7.block_sparse_moe.experts.182.w1", "model.layers.7.block_sparse_moe.experts.183.w1", "model.layers.7.block_sparse_moe.experts.184.w1", "model.layers.7.block_sparse_moe.experts.185.w1", "model.layers.7.block_sparse_moe.experts.186.w1", "model.layers.7.block_sparse_moe.experts.187.w1", "model.layers.7.block_sparse_moe.experts.188.w1", "model.layers.7.block_sparse_moe.experts.189.w1", "model.layers.7.block_sparse_moe.experts.190.w1", "model.layers.7.block_sparse_moe.experts.191.w1", "model.layers.7.block_sparse_moe.experts.192.w1", "model.layers.7.block_sparse_moe.experts.193.w1", "model.layers.7.block_sparse_moe.experts.194.w1", "model.layers.7.block_sparse_moe.experts.195.w1", "model.layers.7.block_sparse_moe.experts.196.w1", "model.layers.7.block_sparse_moe.experts.197.w1", "model.layers.7.block_sparse_moe.experts.198.w1", "model.layers.7.block_sparse_moe.experts.199.w1", "model.layers.7.block_sparse_moe.experts.200.w1", "model.layers.7.block_sparse_moe.experts.201.w1", "model.layers.7.block_sparse_moe.experts.202.w1", "model.layers.7.block_sparse_moe.experts.203.w1", "model.layers.7.block_sparse_moe.experts.204.w1", "model.layers.7.block_sparse_moe.experts.205.w1", "model.layers.7.block_sparse_moe.experts.206.w1", "model.layers.7.block_sparse_moe.experts.207.w1", "model.layers.7.block_sparse_moe.experts.208.w1", "model.layers.7.block_sparse_moe.experts.209.w1", "model.layers.7.block_sparse_moe.experts.210.w1", "model.layers.7.block_sparse_moe.experts.211.w1", "model.layers.7.block_sparse_moe.experts.212.w1", "model.layers.7.block_sparse_moe.experts.213.w1", "model.layers.7.block_sparse_moe.experts.214.w1", "model.layers.7.block_sparse_moe.experts.215.w1", "model.layers.7.block_sparse_moe.experts.216.w1", "model.layers.7.block_sparse_moe.experts.217.w1", "model.layers.7.block_sparse_moe.experts.218.w1", "model.layers.7.block_sparse_moe.experts.219.w1", "model.layers.7.block_sparse_moe.experts.220.w1", "model.layers.7.block_sparse_moe.experts.221.w1", "model.layers.7.block_sparse_moe.experts.222.w1", "model.layers.7.block_sparse_moe.experts.223.w1", "model.layers.7.block_sparse_moe.experts.224.w1", "model.layers.7.block_sparse_moe.experts.225.w1", "model.layers.7.block_sparse_moe.experts.226.w1", "model.layers.7.block_sparse_moe.experts.227.w1", "model.layers.7.block_sparse_moe.experts.228.w1", "model.layers.7.block_sparse_moe.experts.229.w1", "model.layers.7.block_sparse_moe.experts.230.w1", "model.layers.7.block_sparse_moe.experts.231.w1", "model.layers.7.block_sparse_moe.experts.232.w1", "model.layers.7.block_sparse_moe.experts.233.w1", "model.layers.7.block_sparse_moe.experts.234.w1", "model.layers.7.block_sparse_moe.experts.235.w1", "model.layers.7.block_sparse_moe.experts.236.w1", "model.layers.7.block_sparse_moe.experts.237.w1", "model.layers.7.block_sparse_moe.experts.238.w1", "model.layers.7.block_sparse_moe.experts.239.w1", "model.layers.7.block_sparse_moe.experts.240.w1", "model.layers.7.block_sparse_moe.experts.241.w1", "model.layers.7.block_sparse_moe.experts.242.w1", "model.layers.7.block_sparse_moe.experts.243.w1", "model.layers.7.block_sparse_moe.experts.244.w1", "model.layers.7.block_sparse_moe.experts.245.w1", "model.layers.7.block_sparse_moe.experts.246.w1", "model.layers.7.block_sparse_moe.experts.247.w1", "model.layers.7.block_sparse_moe.experts.248.w1", "model.layers.7.block_sparse_moe.experts.249.w1", "model.layers.7.block_sparse_moe.experts.250.w1", "model.layers.7.block_sparse_moe.experts.251.w1", "model.layers.7.block_sparse_moe.experts.252.w1", "model.layers.7.block_sparse_moe.experts.253.w1", "model.layers.7.block_sparse_moe.experts.254.w1", "model.layers.7.block_sparse_moe.experts.255.w1", "model.layers.7.block_sparse_moe.experts.0.w3", "model.layers.7.block_sparse_moe.experts.1.w3", "model.layers.7.block_sparse_moe.experts.2.w3", "model.layers.7.block_sparse_moe.experts.3.w3", "model.layers.7.block_sparse_moe.experts.4.w3", "model.layers.7.block_sparse_moe.experts.5.w3", "model.layers.7.block_sparse_moe.experts.6.w3", "model.layers.7.block_sparse_moe.experts.7.w3", "model.layers.7.block_sparse_moe.experts.8.w3", "model.layers.7.block_sparse_moe.experts.9.w3", "model.layers.7.block_sparse_moe.experts.10.w3", "model.layers.7.block_sparse_moe.experts.11.w3", "model.layers.7.block_sparse_moe.experts.12.w3", "model.layers.7.block_sparse_moe.experts.13.w3", "model.layers.7.block_sparse_moe.experts.14.w3", "model.layers.7.block_sparse_moe.experts.15.w3", "model.layers.7.block_sparse_moe.experts.16.w3", "model.layers.7.block_sparse_moe.experts.17.w3", "model.layers.7.block_sparse_moe.experts.18.w3", "model.layers.7.block_sparse_moe.experts.19.w3", "model.layers.7.block_sparse_moe.experts.20.w3", "model.layers.7.block_sparse_moe.experts.21.w3", "model.layers.7.block_sparse_moe.experts.22.w3", "model.layers.7.block_sparse_moe.experts.23.w3", "model.layers.7.block_sparse_moe.experts.24.w3", "model.layers.7.block_sparse_moe.experts.25.w3", "model.layers.7.block_sparse_moe.experts.26.w3", "model.layers.7.block_sparse_moe.experts.27.w3", "model.layers.7.block_sparse_moe.experts.28.w3", "model.layers.7.block_sparse_moe.experts.29.w3", "model.layers.7.block_sparse_moe.experts.30.w3", "model.layers.7.block_sparse_moe.experts.31.w3", "model.layers.7.block_sparse_moe.experts.32.w3", "model.layers.7.block_sparse_moe.experts.33.w3", "model.layers.7.block_sparse_moe.experts.34.w3", "model.layers.7.block_sparse_moe.experts.35.w3", "model.layers.7.block_sparse_moe.experts.36.w3", "model.layers.7.block_sparse_moe.experts.37.w3", "model.layers.7.block_sparse_moe.experts.38.w3", "model.layers.7.block_sparse_moe.experts.39.w3", "model.layers.7.block_sparse_moe.experts.40.w3", "model.layers.7.block_sparse_moe.experts.41.w3", "model.layers.7.block_sparse_moe.experts.42.w3", "model.layers.7.block_sparse_moe.experts.43.w3", "model.layers.7.block_sparse_moe.experts.44.w3", "model.layers.7.block_sparse_moe.experts.45.w3", "model.layers.7.block_sparse_moe.experts.46.w3", "model.layers.7.block_sparse_moe.experts.47.w3", "model.layers.7.block_sparse_moe.experts.48.w3", "model.layers.7.block_sparse_moe.experts.49.w3", "model.layers.7.block_sparse_moe.experts.50.w3", "model.layers.7.block_sparse_moe.experts.51.w3", "model.layers.7.block_sparse_moe.experts.52.w3", "model.layers.7.block_sparse_moe.experts.53.w3", "model.layers.7.block_sparse_moe.experts.54.w3", "model.layers.7.block_sparse_moe.experts.55.w3", "model.layers.7.block_sparse_moe.experts.56.w3", "model.layers.7.block_sparse_moe.experts.57.w3", "model.layers.7.block_sparse_moe.experts.58.w3", "model.layers.7.block_sparse_moe.experts.59.w3", "model.layers.7.block_sparse_moe.experts.60.w3", "model.layers.7.block_sparse_moe.experts.61.w3", "model.layers.7.block_sparse_moe.experts.62.w3", "model.layers.7.block_sparse_moe.experts.63.w3", "model.layers.7.block_sparse_moe.experts.64.w3", "model.layers.7.block_sparse_moe.experts.65.w3", "model.layers.7.block_sparse_moe.experts.66.w3", "model.layers.7.block_sparse_moe.experts.67.w3", "model.layers.7.block_sparse_moe.experts.68.w3", "model.layers.7.block_sparse_moe.experts.69.w3", "model.layers.7.block_sparse_moe.experts.70.w3", "model.layers.7.block_sparse_moe.experts.71.w3", "model.layers.7.block_sparse_moe.experts.72.w3", "model.layers.7.block_sparse_moe.experts.73.w3", "model.layers.7.block_sparse_moe.experts.74.w3", "model.layers.7.block_sparse_moe.experts.75.w3", "model.layers.7.block_sparse_moe.experts.76.w3", "model.layers.7.block_sparse_moe.experts.77.w3", "model.layers.7.block_sparse_moe.experts.78.w3", "model.layers.7.block_sparse_moe.experts.79.w3", "model.layers.7.block_sparse_moe.experts.80.w3", "model.layers.7.block_sparse_moe.experts.81.w3", "model.layers.7.block_sparse_moe.experts.82.w3", "model.layers.7.block_sparse_moe.experts.83.w3", "model.layers.7.block_sparse_moe.experts.84.w3", "model.layers.7.block_sparse_moe.experts.85.w3", "model.layers.7.block_sparse_moe.experts.86.w3", "model.layers.7.block_sparse_moe.experts.87.w3", "model.layers.7.block_sparse_moe.experts.88.w3", "model.layers.7.block_sparse_moe.experts.89.w3", "model.layers.7.block_sparse_moe.experts.90.w3", "model.layers.7.block_sparse_moe.experts.91.w3", "model.layers.7.block_sparse_moe.experts.92.w3", "model.layers.7.block_sparse_moe.experts.93.w3", "model.layers.7.block_sparse_moe.experts.94.w3", "model.layers.7.block_sparse_moe.experts.95.w3", "model.layers.7.block_sparse_moe.experts.96.w3", "model.layers.7.block_sparse_moe.experts.97.w3", "model.layers.7.block_sparse_moe.experts.98.w3", "model.layers.7.block_sparse_moe.experts.99.w3", "model.layers.7.block_sparse_moe.experts.100.w3", "model.layers.7.block_sparse_moe.experts.101.w3", "model.layers.7.block_sparse_moe.experts.102.w3", "model.layers.7.block_sparse_moe.experts.103.w3", "model.layers.7.block_sparse_moe.experts.104.w3", "model.layers.7.block_sparse_moe.experts.105.w3", "model.layers.7.block_sparse_moe.experts.106.w3", "model.layers.7.block_sparse_moe.experts.107.w3", "model.layers.7.block_sparse_moe.experts.108.w3", "model.layers.7.block_sparse_moe.experts.109.w3", "model.layers.7.block_sparse_moe.experts.110.w3", "model.layers.7.block_sparse_moe.experts.111.w3", "model.layers.7.block_sparse_moe.experts.112.w3", "model.layers.7.block_sparse_moe.experts.113.w3", "model.layers.7.block_sparse_moe.experts.114.w3", "model.layers.7.block_sparse_moe.experts.115.w3", "model.layers.7.block_sparse_moe.experts.116.w3", "model.layers.7.block_sparse_moe.experts.117.w3", "model.layers.7.block_sparse_moe.experts.118.w3", "model.layers.7.block_sparse_moe.experts.119.w3", "model.layers.7.block_sparse_moe.experts.120.w3", "model.layers.7.block_sparse_moe.experts.121.w3", "model.layers.7.block_sparse_moe.experts.122.w3", "model.layers.7.block_sparse_moe.experts.123.w3", "model.layers.7.block_sparse_moe.experts.124.w3", "model.layers.7.block_sparse_moe.experts.125.w3", "model.layers.7.block_sparse_moe.experts.126.w3", "model.layers.7.block_sparse_moe.experts.127.w3", "model.layers.7.block_sparse_moe.experts.128.w3", "model.layers.7.block_sparse_moe.experts.129.w3", "model.layers.7.block_sparse_moe.experts.130.w3", "model.layers.7.block_sparse_moe.experts.131.w3", "model.layers.7.block_sparse_moe.experts.132.w3", "model.layers.7.block_sparse_moe.experts.133.w3", "model.layers.7.block_sparse_moe.experts.134.w3", "model.layers.7.block_sparse_moe.experts.135.w3", "model.layers.7.block_sparse_moe.experts.136.w3", "model.layers.7.block_sparse_moe.experts.137.w3", "model.layers.7.block_sparse_moe.experts.138.w3", "model.layers.7.block_sparse_moe.experts.139.w3", "model.layers.7.block_sparse_moe.experts.140.w3", "model.layers.7.block_sparse_moe.experts.141.w3", "model.layers.7.block_sparse_moe.experts.142.w3", "model.layers.7.block_sparse_moe.experts.143.w3", "model.layers.7.block_sparse_moe.experts.144.w3", "model.layers.7.block_sparse_moe.experts.145.w3", "model.layers.7.block_sparse_moe.experts.146.w3", "model.layers.7.block_sparse_moe.experts.147.w3", "model.layers.7.block_sparse_moe.experts.148.w3", "model.layers.7.block_sparse_moe.experts.149.w3", "model.layers.7.block_sparse_moe.experts.150.w3", "model.layers.7.block_sparse_moe.experts.151.w3", "model.layers.7.block_sparse_moe.experts.152.w3", "model.layers.7.block_sparse_moe.experts.153.w3", "model.layers.7.block_sparse_moe.experts.154.w3", "model.layers.7.block_sparse_moe.experts.155.w3", "model.layers.7.block_sparse_moe.experts.156.w3", "model.layers.7.block_sparse_moe.experts.157.w3", "model.layers.7.block_sparse_moe.experts.158.w3", "model.layers.7.block_sparse_moe.experts.159.w3", "model.layers.7.block_sparse_moe.experts.160.w3", "model.layers.7.block_sparse_moe.experts.161.w3", "model.layers.7.block_sparse_moe.experts.162.w3", "model.layers.7.block_sparse_moe.experts.163.w3", "model.layers.7.block_sparse_moe.experts.164.w3", "model.layers.7.block_sparse_moe.experts.165.w3", "model.layers.7.block_sparse_moe.experts.166.w3", "model.layers.7.block_sparse_moe.experts.167.w3", "model.layers.7.block_sparse_moe.experts.168.w3", "model.layers.7.block_sparse_moe.experts.169.w3", "model.layers.7.block_sparse_moe.experts.170.w3", "model.layers.7.block_sparse_moe.experts.171.w3", "model.layers.7.block_sparse_moe.experts.172.w3", "model.layers.7.block_sparse_moe.experts.173.w3", "model.layers.7.block_sparse_moe.experts.174.w3", "model.layers.7.block_sparse_moe.experts.175.w3", "model.layers.7.block_sparse_moe.experts.176.w3", "model.layers.7.block_sparse_moe.experts.177.w3", "model.layers.7.block_sparse_moe.experts.178.w3", "model.layers.7.block_sparse_moe.experts.179.w3", "model.layers.7.block_sparse_moe.experts.180.w3", "model.layers.7.block_sparse_moe.experts.181.w3", "model.layers.7.block_sparse_moe.experts.182.w3", "model.layers.7.block_sparse_moe.experts.183.w3", "model.layers.7.block_sparse_moe.experts.184.w3", "model.layers.7.block_sparse_moe.experts.185.w3", "model.layers.7.block_sparse_moe.experts.186.w3", "model.layers.7.block_sparse_moe.experts.187.w3", "model.layers.7.block_sparse_moe.experts.188.w3", "model.layers.7.block_sparse_moe.experts.189.w3", "model.layers.7.block_sparse_moe.experts.190.w3", "model.layers.7.block_sparse_moe.experts.191.w3", "model.layers.7.block_sparse_moe.experts.192.w3", "model.layers.7.block_sparse_moe.experts.193.w3", "model.layers.7.block_sparse_moe.experts.194.w3", "model.layers.7.block_sparse_moe.experts.195.w3", "model.layers.7.block_sparse_moe.experts.196.w3", "model.layers.7.block_sparse_moe.experts.197.w3", "model.layers.7.block_sparse_moe.experts.198.w3", "model.layers.7.block_sparse_moe.experts.199.w3", "model.layers.7.block_sparse_moe.experts.200.w3", "model.layers.7.block_sparse_moe.experts.201.w3", "model.layers.7.block_sparse_moe.experts.202.w3", "model.layers.7.block_sparse_moe.experts.203.w3", "model.layers.7.block_sparse_moe.experts.204.w3", "model.layers.7.block_sparse_moe.experts.205.w3", "model.layers.7.block_sparse_moe.experts.206.w3", "model.layers.7.block_sparse_moe.experts.207.w3", "model.layers.7.block_sparse_moe.experts.208.w3", "model.layers.7.block_sparse_moe.experts.209.w3", "model.layers.7.block_sparse_moe.experts.210.w3", "model.layers.7.block_sparse_moe.experts.211.w3", "model.layers.7.block_sparse_moe.experts.212.w3", "model.layers.7.block_sparse_moe.experts.213.w3", "model.layers.7.block_sparse_moe.experts.214.w3", "model.layers.7.block_sparse_moe.experts.215.w3", "model.layers.7.block_sparse_moe.experts.216.w3", "model.layers.7.block_sparse_moe.experts.217.w3", "model.layers.7.block_sparse_moe.experts.218.w3", "model.layers.7.block_sparse_moe.experts.219.w3", "model.layers.7.block_sparse_moe.experts.220.w3", "model.layers.7.block_sparse_moe.experts.221.w3", "model.layers.7.block_sparse_moe.experts.222.w3", "model.layers.7.block_sparse_moe.experts.223.w3", "model.layers.7.block_sparse_moe.experts.224.w3", "model.layers.7.block_sparse_moe.experts.225.w3", "model.layers.7.block_sparse_moe.experts.226.w3", "model.layers.7.block_sparse_moe.experts.227.w3", "model.layers.7.block_sparse_moe.experts.228.w3", "model.layers.7.block_sparse_moe.experts.229.w3", "model.layers.7.block_sparse_moe.experts.230.w3", "model.layers.7.block_sparse_moe.experts.231.w3", "model.layers.7.block_sparse_moe.experts.232.w3", "model.layers.7.block_sparse_moe.experts.233.w3", "model.layers.7.block_sparse_moe.experts.234.w3", "model.layers.7.block_sparse_moe.experts.235.w3", "model.layers.7.block_sparse_moe.experts.236.w3", "model.layers.7.block_sparse_moe.experts.237.w3", "model.layers.7.block_sparse_moe.experts.238.w3", "model.layers.7.block_sparse_moe.experts.239.w3", "model.layers.7.block_sparse_moe.experts.240.w3", "model.layers.7.block_sparse_moe.experts.241.w3", "model.layers.7.block_sparse_moe.experts.242.w3", "model.layers.7.block_sparse_moe.experts.243.w3", "model.layers.7.block_sparse_moe.experts.244.w3", "model.layers.7.block_sparse_moe.experts.245.w3", "model.layers.7.block_sparse_moe.experts.246.w3", "model.layers.7.block_sparse_moe.experts.247.w3", "model.layers.7.block_sparse_moe.experts.248.w3", "model.layers.7.block_sparse_moe.experts.249.w3", "model.layers.7.block_sparse_moe.experts.250.w3", "model.layers.7.block_sparse_moe.experts.251.w3", "model.layers.7.block_sparse_moe.experts.252.w3", "model.layers.7.block_sparse_moe.experts.253.w3", "model.layers.7.block_sparse_moe.experts.254.w3", "model.layers.7.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 5.11612743139267e-05, "dbits": 2415919104 } ] }, { "idx": 39, "layers": [ "model.layers.7.block_sparse_moe.experts.0.w2", "model.layers.7.block_sparse_moe.experts.1.w2", "model.layers.7.block_sparse_moe.experts.2.w2", "model.layers.7.block_sparse_moe.experts.3.w2", "model.layers.7.block_sparse_moe.experts.4.w2", "model.layers.7.block_sparse_moe.experts.5.w2", "model.layers.7.block_sparse_moe.experts.6.w2", "model.layers.7.block_sparse_moe.experts.7.w2", "model.layers.7.block_sparse_moe.experts.8.w2", "model.layers.7.block_sparse_moe.experts.9.w2", "model.layers.7.block_sparse_moe.experts.10.w2", "model.layers.7.block_sparse_moe.experts.11.w2", "model.layers.7.block_sparse_moe.experts.12.w2", "model.layers.7.block_sparse_moe.experts.13.w2", "model.layers.7.block_sparse_moe.experts.14.w2", "model.layers.7.block_sparse_moe.experts.15.w2", "model.layers.7.block_sparse_moe.experts.16.w2", "model.layers.7.block_sparse_moe.experts.17.w2", "model.layers.7.block_sparse_moe.experts.18.w2", "model.layers.7.block_sparse_moe.experts.19.w2", "model.layers.7.block_sparse_moe.experts.20.w2", "model.layers.7.block_sparse_moe.experts.21.w2", "model.layers.7.block_sparse_moe.experts.22.w2", "model.layers.7.block_sparse_moe.experts.23.w2", "model.layers.7.block_sparse_moe.experts.24.w2", "model.layers.7.block_sparse_moe.experts.25.w2", "model.layers.7.block_sparse_moe.experts.26.w2", "model.layers.7.block_sparse_moe.experts.27.w2", "model.layers.7.block_sparse_moe.experts.28.w2", "model.layers.7.block_sparse_moe.experts.29.w2", "model.layers.7.block_sparse_moe.experts.30.w2", "model.layers.7.block_sparse_moe.experts.31.w2", "model.layers.7.block_sparse_moe.experts.32.w2", "model.layers.7.block_sparse_moe.experts.33.w2", "model.layers.7.block_sparse_moe.experts.34.w2", "model.layers.7.block_sparse_moe.experts.35.w2", "model.layers.7.block_sparse_moe.experts.36.w2", "model.layers.7.block_sparse_moe.experts.37.w2", "model.layers.7.block_sparse_moe.experts.38.w2", "model.layers.7.block_sparse_moe.experts.39.w2", "model.layers.7.block_sparse_moe.experts.40.w2", "model.layers.7.block_sparse_moe.experts.41.w2", "model.layers.7.block_sparse_moe.experts.42.w2", "model.layers.7.block_sparse_moe.experts.43.w2", "model.layers.7.block_sparse_moe.experts.44.w2", "model.layers.7.block_sparse_moe.experts.45.w2", "model.layers.7.block_sparse_moe.experts.46.w2", "model.layers.7.block_sparse_moe.experts.47.w2", "model.layers.7.block_sparse_moe.experts.48.w2", "model.layers.7.block_sparse_moe.experts.49.w2", "model.layers.7.block_sparse_moe.experts.50.w2", "model.layers.7.block_sparse_moe.experts.51.w2", "model.layers.7.block_sparse_moe.experts.52.w2", "model.layers.7.block_sparse_moe.experts.53.w2", "model.layers.7.block_sparse_moe.experts.54.w2", "model.layers.7.block_sparse_moe.experts.55.w2", "model.layers.7.block_sparse_moe.experts.56.w2", "model.layers.7.block_sparse_moe.experts.57.w2", "model.layers.7.block_sparse_moe.experts.58.w2", "model.layers.7.block_sparse_moe.experts.59.w2", "model.layers.7.block_sparse_moe.experts.60.w2", "model.layers.7.block_sparse_moe.experts.61.w2", "model.layers.7.block_sparse_moe.experts.62.w2", "model.layers.7.block_sparse_moe.experts.63.w2", "model.layers.7.block_sparse_moe.experts.64.w2", "model.layers.7.block_sparse_moe.experts.65.w2", "model.layers.7.block_sparse_moe.experts.66.w2", "model.layers.7.block_sparse_moe.experts.67.w2", "model.layers.7.block_sparse_moe.experts.68.w2", "model.layers.7.block_sparse_moe.experts.69.w2", "model.layers.7.block_sparse_moe.experts.70.w2", "model.layers.7.block_sparse_moe.experts.71.w2", "model.layers.7.block_sparse_moe.experts.72.w2", "model.layers.7.block_sparse_moe.experts.73.w2", "model.layers.7.block_sparse_moe.experts.74.w2", "model.layers.7.block_sparse_moe.experts.75.w2", "model.layers.7.block_sparse_moe.experts.76.w2", "model.layers.7.block_sparse_moe.experts.77.w2", "model.layers.7.block_sparse_moe.experts.78.w2", "model.layers.7.block_sparse_moe.experts.79.w2", "model.layers.7.block_sparse_moe.experts.80.w2", "model.layers.7.block_sparse_moe.experts.81.w2", "model.layers.7.block_sparse_moe.experts.82.w2", "model.layers.7.block_sparse_moe.experts.83.w2", "model.layers.7.block_sparse_moe.experts.84.w2", "model.layers.7.block_sparse_moe.experts.85.w2", "model.layers.7.block_sparse_moe.experts.86.w2", "model.layers.7.block_sparse_moe.experts.87.w2", "model.layers.7.block_sparse_moe.experts.88.w2", "model.layers.7.block_sparse_moe.experts.89.w2", "model.layers.7.block_sparse_moe.experts.90.w2", "model.layers.7.block_sparse_moe.experts.91.w2", "model.layers.7.block_sparse_moe.experts.92.w2", "model.layers.7.block_sparse_moe.experts.93.w2", "model.layers.7.block_sparse_moe.experts.94.w2", "model.layers.7.block_sparse_moe.experts.95.w2", "model.layers.7.block_sparse_moe.experts.96.w2", "model.layers.7.block_sparse_moe.experts.97.w2", "model.layers.7.block_sparse_moe.experts.98.w2", "model.layers.7.block_sparse_moe.experts.99.w2", "model.layers.7.block_sparse_moe.experts.100.w2", "model.layers.7.block_sparse_moe.experts.101.w2", "model.layers.7.block_sparse_moe.experts.102.w2", "model.layers.7.block_sparse_moe.experts.103.w2", "model.layers.7.block_sparse_moe.experts.104.w2", "model.layers.7.block_sparse_moe.experts.105.w2", "model.layers.7.block_sparse_moe.experts.106.w2", "model.layers.7.block_sparse_moe.experts.107.w2", "model.layers.7.block_sparse_moe.experts.108.w2", "model.layers.7.block_sparse_moe.experts.109.w2", "model.layers.7.block_sparse_moe.experts.110.w2", "model.layers.7.block_sparse_moe.experts.111.w2", "model.layers.7.block_sparse_moe.experts.112.w2", "model.layers.7.block_sparse_moe.experts.113.w2", "model.layers.7.block_sparse_moe.experts.114.w2", "model.layers.7.block_sparse_moe.experts.115.w2", "model.layers.7.block_sparse_moe.experts.116.w2", "model.layers.7.block_sparse_moe.experts.117.w2", "model.layers.7.block_sparse_moe.experts.118.w2", "model.layers.7.block_sparse_moe.experts.119.w2", "model.layers.7.block_sparse_moe.experts.120.w2", "model.layers.7.block_sparse_moe.experts.121.w2", "model.layers.7.block_sparse_moe.experts.122.w2", "model.layers.7.block_sparse_moe.experts.123.w2", "model.layers.7.block_sparse_moe.experts.124.w2", "model.layers.7.block_sparse_moe.experts.125.w2", "model.layers.7.block_sparse_moe.experts.126.w2", "model.layers.7.block_sparse_moe.experts.127.w2", "model.layers.7.block_sparse_moe.experts.128.w2", "model.layers.7.block_sparse_moe.experts.129.w2", "model.layers.7.block_sparse_moe.experts.130.w2", "model.layers.7.block_sparse_moe.experts.131.w2", "model.layers.7.block_sparse_moe.experts.132.w2", "model.layers.7.block_sparse_moe.experts.133.w2", "model.layers.7.block_sparse_moe.experts.134.w2", "model.layers.7.block_sparse_moe.experts.135.w2", "model.layers.7.block_sparse_moe.experts.136.w2", "model.layers.7.block_sparse_moe.experts.137.w2", "model.layers.7.block_sparse_moe.experts.138.w2", "model.layers.7.block_sparse_moe.experts.139.w2", "model.layers.7.block_sparse_moe.experts.140.w2", "model.layers.7.block_sparse_moe.experts.141.w2", "model.layers.7.block_sparse_moe.experts.142.w2", "model.layers.7.block_sparse_moe.experts.143.w2", "model.layers.7.block_sparse_moe.experts.144.w2", "model.layers.7.block_sparse_moe.experts.145.w2", "model.layers.7.block_sparse_moe.experts.146.w2", "model.layers.7.block_sparse_moe.experts.147.w2", "model.layers.7.block_sparse_moe.experts.148.w2", "model.layers.7.block_sparse_moe.experts.149.w2", "model.layers.7.block_sparse_moe.experts.150.w2", "model.layers.7.block_sparse_moe.experts.151.w2", "model.layers.7.block_sparse_moe.experts.152.w2", "model.layers.7.block_sparse_moe.experts.153.w2", "model.layers.7.block_sparse_moe.experts.154.w2", "model.layers.7.block_sparse_moe.experts.155.w2", "model.layers.7.block_sparse_moe.experts.156.w2", "model.layers.7.block_sparse_moe.experts.157.w2", "model.layers.7.block_sparse_moe.experts.158.w2", "model.layers.7.block_sparse_moe.experts.159.w2", "model.layers.7.block_sparse_moe.experts.160.w2", "model.layers.7.block_sparse_moe.experts.161.w2", "model.layers.7.block_sparse_moe.experts.162.w2", "model.layers.7.block_sparse_moe.experts.163.w2", "model.layers.7.block_sparse_moe.experts.164.w2", "model.layers.7.block_sparse_moe.experts.165.w2", "model.layers.7.block_sparse_moe.experts.166.w2", "model.layers.7.block_sparse_moe.experts.167.w2", "model.layers.7.block_sparse_moe.experts.168.w2", "model.layers.7.block_sparse_moe.experts.169.w2", "model.layers.7.block_sparse_moe.experts.170.w2", "model.layers.7.block_sparse_moe.experts.171.w2", "model.layers.7.block_sparse_moe.experts.172.w2", "model.layers.7.block_sparse_moe.experts.173.w2", "model.layers.7.block_sparse_moe.experts.174.w2", "model.layers.7.block_sparse_moe.experts.175.w2", "model.layers.7.block_sparse_moe.experts.176.w2", "model.layers.7.block_sparse_moe.experts.177.w2", "model.layers.7.block_sparse_moe.experts.178.w2", "model.layers.7.block_sparse_moe.experts.179.w2", "model.layers.7.block_sparse_moe.experts.180.w2", "model.layers.7.block_sparse_moe.experts.181.w2", "model.layers.7.block_sparse_moe.experts.182.w2", "model.layers.7.block_sparse_moe.experts.183.w2", "model.layers.7.block_sparse_moe.experts.184.w2", "model.layers.7.block_sparse_moe.experts.185.w2", "model.layers.7.block_sparse_moe.experts.186.w2", "model.layers.7.block_sparse_moe.experts.187.w2", "model.layers.7.block_sparse_moe.experts.188.w2", "model.layers.7.block_sparse_moe.experts.189.w2", "model.layers.7.block_sparse_moe.experts.190.w2", "model.layers.7.block_sparse_moe.experts.191.w2", "model.layers.7.block_sparse_moe.experts.192.w2", "model.layers.7.block_sparse_moe.experts.193.w2", "model.layers.7.block_sparse_moe.experts.194.w2", "model.layers.7.block_sparse_moe.experts.195.w2", "model.layers.7.block_sparse_moe.experts.196.w2", "model.layers.7.block_sparse_moe.experts.197.w2", "model.layers.7.block_sparse_moe.experts.198.w2", "model.layers.7.block_sparse_moe.experts.199.w2", "model.layers.7.block_sparse_moe.experts.200.w2", "model.layers.7.block_sparse_moe.experts.201.w2", "model.layers.7.block_sparse_moe.experts.202.w2", "model.layers.7.block_sparse_moe.experts.203.w2", "model.layers.7.block_sparse_moe.experts.204.w2", "model.layers.7.block_sparse_moe.experts.205.w2", "model.layers.7.block_sparse_moe.experts.206.w2", "model.layers.7.block_sparse_moe.experts.207.w2", "model.layers.7.block_sparse_moe.experts.208.w2", "model.layers.7.block_sparse_moe.experts.209.w2", "model.layers.7.block_sparse_moe.experts.210.w2", "model.layers.7.block_sparse_moe.experts.211.w2", "model.layers.7.block_sparse_moe.experts.212.w2", "model.layers.7.block_sparse_moe.experts.213.w2", "model.layers.7.block_sparse_moe.experts.214.w2", "model.layers.7.block_sparse_moe.experts.215.w2", "model.layers.7.block_sparse_moe.experts.216.w2", "model.layers.7.block_sparse_moe.experts.217.w2", "model.layers.7.block_sparse_moe.experts.218.w2", "model.layers.7.block_sparse_moe.experts.219.w2", "model.layers.7.block_sparse_moe.experts.220.w2", "model.layers.7.block_sparse_moe.experts.221.w2", "model.layers.7.block_sparse_moe.experts.222.w2", "model.layers.7.block_sparse_moe.experts.223.w2", "model.layers.7.block_sparse_moe.experts.224.w2", "model.layers.7.block_sparse_moe.experts.225.w2", "model.layers.7.block_sparse_moe.experts.226.w2", "model.layers.7.block_sparse_moe.experts.227.w2", "model.layers.7.block_sparse_moe.experts.228.w2", "model.layers.7.block_sparse_moe.experts.229.w2", "model.layers.7.block_sparse_moe.experts.230.w2", "model.layers.7.block_sparse_moe.experts.231.w2", "model.layers.7.block_sparse_moe.experts.232.w2", "model.layers.7.block_sparse_moe.experts.233.w2", "model.layers.7.block_sparse_moe.experts.234.w2", "model.layers.7.block_sparse_moe.experts.235.w2", "model.layers.7.block_sparse_moe.experts.236.w2", "model.layers.7.block_sparse_moe.experts.237.w2", "model.layers.7.block_sparse_moe.experts.238.w2", "model.layers.7.block_sparse_moe.experts.239.w2", "model.layers.7.block_sparse_moe.experts.240.w2", "model.layers.7.block_sparse_moe.experts.241.w2", "model.layers.7.block_sparse_moe.experts.242.w2", "model.layers.7.block_sparse_moe.experts.243.w2", "model.layers.7.block_sparse_moe.experts.244.w2", "model.layers.7.block_sparse_moe.experts.245.w2", "model.layers.7.block_sparse_moe.experts.246.w2", "model.layers.7.block_sparse_moe.experts.247.w2", "model.layers.7.block_sparse_moe.experts.248.w2", "model.layers.7.block_sparse_moe.experts.249.w2", "model.layers.7.block_sparse_moe.experts.250.w2", "model.layers.7.block_sparse_moe.experts.251.w2", "model.layers.7.block_sparse_moe.experts.252.w2", "model.layers.7.block_sparse_moe.experts.253.w2", "model.layers.7.block_sparse_moe.experts.254.w2", "model.layers.7.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00018719993531703116, "dbits": 1207959552 } ] }, { "idx": 40, "layers": [ "model.layers.8.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0004889111965894755, "dbits": 18874368 } ] }, { "idx": 41, "layers": [ "model.layers.8.self_attn.k_proj", "model.layers.8.self_attn.v_proj" ], "candidates": [ { "dkld": -2.465918660164712e-05, "dbits": 6291456 } ] }, { "idx": 42, "layers": [ "model.layers.8.self_attn.o_proj" ], "candidates": [ { "dkld": -2.6213005185210525e-06, "dbits": 18874368 } ] }, { "idx": 43, "layers": [ "model.layers.8.block_sparse_moe.experts.0.w1", "model.layers.8.block_sparse_moe.experts.1.w1", "model.layers.8.block_sparse_moe.experts.2.w1", "model.layers.8.block_sparse_moe.experts.3.w1", "model.layers.8.block_sparse_moe.experts.4.w1", "model.layers.8.block_sparse_moe.experts.5.w1", "model.layers.8.block_sparse_moe.experts.6.w1", "model.layers.8.block_sparse_moe.experts.7.w1", "model.layers.8.block_sparse_moe.experts.8.w1", "model.layers.8.block_sparse_moe.experts.9.w1", "model.layers.8.block_sparse_moe.experts.10.w1", "model.layers.8.block_sparse_moe.experts.11.w1", "model.layers.8.block_sparse_moe.experts.12.w1", "model.layers.8.block_sparse_moe.experts.13.w1", "model.layers.8.block_sparse_moe.experts.14.w1", "model.layers.8.block_sparse_moe.experts.15.w1", "model.layers.8.block_sparse_moe.experts.16.w1", "model.layers.8.block_sparse_moe.experts.17.w1", "model.layers.8.block_sparse_moe.experts.18.w1", "model.layers.8.block_sparse_moe.experts.19.w1", "model.layers.8.block_sparse_moe.experts.20.w1", "model.layers.8.block_sparse_moe.experts.21.w1", "model.layers.8.block_sparse_moe.experts.22.w1", "model.layers.8.block_sparse_moe.experts.23.w1", "model.layers.8.block_sparse_moe.experts.24.w1", "model.layers.8.block_sparse_moe.experts.25.w1", "model.layers.8.block_sparse_moe.experts.26.w1", "model.layers.8.block_sparse_moe.experts.27.w1", "model.layers.8.block_sparse_moe.experts.28.w1", "model.layers.8.block_sparse_moe.experts.29.w1", "model.layers.8.block_sparse_moe.experts.30.w1", "model.layers.8.block_sparse_moe.experts.31.w1", "model.layers.8.block_sparse_moe.experts.32.w1", "model.layers.8.block_sparse_moe.experts.33.w1", "model.layers.8.block_sparse_moe.experts.34.w1", "model.layers.8.block_sparse_moe.experts.35.w1", "model.layers.8.block_sparse_moe.experts.36.w1", "model.layers.8.block_sparse_moe.experts.37.w1", "model.layers.8.block_sparse_moe.experts.38.w1", "model.layers.8.block_sparse_moe.experts.39.w1", "model.layers.8.block_sparse_moe.experts.40.w1", "model.layers.8.block_sparse_moe.experts.41.w1", "model.layers.8.block_sparse_moe.experts.42.w1", "model.layers.8.block_sparse_moe.experts.43.w1", "model.layers.8.block_sparse_moe.experts.44.w1", "model.layers.8.block_sparse_moe.experts.45.w1", "model.layers.8.block_sparse_moe.experts.46.w1", "model.layers.8.block_sparse_moe.experts.47.w1", "model.layers.8.block_sparse_moe.experts.48.w1", "model.layers.8.block_sparse_moe.experts.49.w1", "model.layers.8.block_sparse_moe.experts.50.w1", "model.layers.8.block_sparse_moe.experts.51.w1", "model.layers.8.block_sparse_moe.experts.52.w1", "model.layers.8.block_sparse_moe.experts.53.w1", "model.layers.8.block_sparse_moe.experts.54.w1", "model.layers.8.block_sparse_moe.experts.55.w1", "model.layers.8.block_sparse_moe.experts.56.w1", "model.layers.8.block_sparse_moe.experts.57.w1", "model.layers.8.block_sparse_moe.experts.58.w1", "model.layers.8.block_sparse_moe.experts.59.w1", "model.layers.8.block_sparse_moe.experts.60.w1", "model.layers.8.block_sparse_moe.experts.61.w1", "model.layers.8.block_sparse_moe.experts.62.w1", "model.layers.8.block_sparse_moe.experts.63.w1", "model.layers.8.block_sparse_moe.experts.64.w1", "model.layers.8.block_sparse_moe.experts.65.w1", "model.layers.8.block_sparse_moe.experts.66.w1", "model.layers.8.block_sparse_moe.experts.67.w1", "model.layers.8.block_sparse_moe.experts.68.w1", "model.layers.8.block_sparse_moe.experts.69.w1", "model.layers.8.block_sparse_moe.experts.70.w1", "model.layers.8.block_sparse_moe.experts.71.w1", "model.layers.8.block_sparse_moe.experts.72.w1", "model.layers.8.block_sparse_moe.experts.73.w1", "model.layers.8.block_sparse_moe.experts.74.w1", "model.layers.8.block_sparse_moe.experts.75.w1", "model.layers.8.block_sparse_moe.experts.76.w1", "model.layers.8.block_sparse_moe.experts.77.w1", "model.layers.8.block_sparse_moe.experts.78.w1", "model.layers.8.block_sparse_moe.experts.79.w1", "model.layers.8.block_sparse_moe.experts.80.w1", "model.layers.8.block_sparse_moe.experts.81.w1", "model.layers.8.block_sparse_moe.experts.82.w1", "model.layers.8.block_sparse_moe.experts.83.w1", "model.layers.8.block_sparse_moe.experts.84.w1", "model.layers.8.block_sparse_moe.experts.85.w1", "model.layers.8.block_sparse_moe.experts.86.w1", "model.layers.8.block_sparse_moe.experts.87.w1", "model.layers.8.block_sparse_moe.experts.88.w1", "model.layers.8.block_sparse_moe.experts.89.w1", "model.layers.8.block_sparse_moe.experts.90.w1", "model.layers.8.block_sparse_moe.experts.91.w1", "model.layers.8.block_sparse_moe.experts.92.w1", "model.layers.8.block_sparse_moe.experts.93.w1", "model.layers.8.block_sparse_moe.experts.94.w1", "model.layers.8.block_sparse_moe.experts.95.w1", "model.layers.8.block_sparse_moe.experts.96.w1", "model.layers.8.block_sparse_moe.experts.97.w1", "model.layers.8.block_sparse_moe.experts.98.w1", "model.layers.8.block_sparse_moe.experts.99.w1", "model.layers.8.block_sparse_moe.experts.100.w1", "model.layers.8.block_sparse_moe.experts.101.w1", "model.layers.8.block_sparse_moe.experts.102.w1", "model.layers.8.block_sparse_moe.experts.103.w1", "model.layers.8.block_sparse_moe.experts.104.w1", "model.layers.8.block_sparse_moe.experts.105.w1", "model.layers.8.block_sparse_moe.experts.106.w1", "model.layers.8.block_sparse_moe.experts.107.w1", "model.layers.8.block_sparse_moe.experts.108.w1", "model.layers.8.block_sparse_moe.experts.109.w1", "model.layers.8.block_sparse_moe.experts.110.w1", "model.layers.8.block_sparse_moe.experts.111.w1", "model.layers.8.block_sparse_moe.experts.112.w1", "model.layers.8.block_sparse_moe.experts.113.w1", "model.layers.8.block_sparse_moe.experts.114.w1", "model.layers.8.block_sparse_moe.experts.115.w1", "model.layers.8.block_sparse_moe.experts.116.w1", "model.layers.8.block_sparse_moe.experts.117.w1", "model.layers.8.block_sparse_moe.experts.118.w1", "model.layers.8.block_sparse_moe.experts.119.w1", "model.layers.8.block_sparse_moe.experts.120.w1", "model.layers.8.block_sparse_moe.experts.121.w1", "model.layers.8.block_sparse_moe.experts.122.w1", "model.layers.8.block_sparse_moe.experts.123.w1", "model.layers.8.block_sparse_moe.experts.124.w1", "model.layers.8.block_sparse_moe.experts.125.w1", "model.layers.8.block_sparse_moe.experts.126.w1", "model.layers.8.block_sparse_moe.experts.127.w1", "model.layers.8.block_sparse_moe.experts.128.w1", "model.layers.8.block_sparse_moe.experts.129.w1", "model.layers.8.block_sparse_moe.experts.130.w1", "model.layers.8.block_sparse_moe.experts.131.w1", "model.layers.8.block_sparse_moe.experts.132.w1", "model.layers.8.block_sparse_moe.experts.133.w1", "model.layers.8.block_sparse_moe.experts.134.w1", "model.layers.8.block_sparse_moe.experts.135.w1", "model.layers.8.block_sparse_moe.experts.136.w1", "model.layers.8.block_sparse_moe.experts.137.w1", "model.layers.8.block_sparse_moe.experts.138.w1", "model.layers.8.block_sparse_moe.experts.139.w1", "model.layers.8.block_sparse_moe.experts.140.w1", "model.layers.8.block_sparse_moe.experts.141.w1", "model.layers.8.block_sparse_moe.experts.142.w1", "model.layers.8.block_sparse_moe.experts.143.w1", "model.layers.8.block_sparse_moe.experts.144.w1", "model.layers.8.block_sparse_moe.experts.145.w1", "model.layers.8.block_sparse_moe.experts.146.w1", "model.layers.8.block_sparse_moe.experts.147.w1", "model.layers.8.block_sparse_moe.experts.148.w1", "model.layers.8.block_sparse_moe.experts.149.w1", "model.layers.8.block_sparse_moe.experts.150.w1", "model.layers.8.block_sparse_moe.experts.151.w1", "model.layers.8.block_sparse_moe.experts.152.w1", "model.layers.8.block_sparse_moe.experts.153.w1", "model.layers.8.block_sparse_moe.experts.154.w1", "model.layers.8.block_sparse_moe.experts.155.w1", "model.layers.8.block_sparse_moe.experts.156.w1", "model.layers.8.block_sparse_moe.experts.157.w1", "model.layers.8.block_sparse_moe.experts.158.w1", "model.layers.8.block_sparse_moe.experts.159.w1", "model.layers.8.block_sparse_moe.experts.160.w1", "model.layers.8.block_sparse_moe.experts.161.w1", "model.layers.8.block_sparse_moe.experts.162.w1", "model.layers.8.block_sparse_moe.experts.163.w1", "model.layers.8.block_sparse_moe.experts.164.w1", "model.layers.8.block_sparse_moe.experts.165.w1", "model.layers.8.block_sparse_moe.experts.166.w1", "model.layers.8.block_sparse_moe.experts.167.w1", "model.layers.8.block_sparse_moe.experts.168.w1", "model.layers.8.block_sparse_moe.experts.169.w1", "model.layers.8.block_sparse_moe.experts.170.w1", "model.layers.8.block_sparse_moe.experts.171.w1", "model.layers.8.block_sparse_moe.experts.172.w1", "model.layers.8.block_sparse_moe.experts.173.w1", "model.layers.8.block_sparse_moe.experts.174.w1", "model.layers.8.block_sparse_moe.experts.175.w1", "model.layers.8.block_sparse_moe.experts.176.w1", "model.layers.8.block_sparse_moe.experts.177.w1", "model.layers.8.block_sparse_moe.experts.178.w1", "model.layers.8.block_sparse_moe.experts.179.w1", "model.layers.8.block_sparse_moe.experts.180.w1", "model.layers.8.block_sparse_moe.experts.181.w1", "model.layers.8.block_sparse_moe.experts.182.w1", "model.layers.8.block_sparse_moe.experts.183.w1", "model.layers.8.block_sparse_moe.experts.184.w1", "model.layers.8.block_sparse_moe.experts.185.w1", "model.layers.8.block_sparse_moe.experts.186.w1", "model.layers.8.block_sparse_moe.experts.187.w1", "model.layers.8.block_sparse_moe.experts.188.w1", "model.layers.8.block_sparse_moe.experts.189.w1", "model.layers.8.block_sparse_moe.experts.190.w1", "model.layers.8.block_sparse_moe.experts.191.w1", "model.layers.8.block_sparse_moe.experts.192.w1", "model.layers.8.block_sparse_moe.experts.193.w1", "model.layers.8.block_sparse_moe.experts.194.w1", "model.layers.8.block_sparse_moe.experts.195.w1", "model.layers.8.block_sparse_moe.experts.196.w1", "model.layers.8.block_sparse_moe.experts.197.w1", "model.layers.8.block_sparse_moe.experts.198.w1", "model.layers.8.block_sparse_moe.experts.199.w1", "model.layers.8.block_sparse_moe.experts.200.w1", "model.layers.8.block_sparse_moe.experts.201.w1", "model.layers.8.block_sparse_moe.experts.202.w1", "model.layers.8.block_sparse_moe.experts.203.w1", "model.layers.8.block_sparse_moe.experts.204.w1", "model.layers.8.block_sparse_moe.experts.205.w1", "model.layers.8.block_sparse_moe.experts.206.w1", "model.layers.8.block_sparse_moe.experts.207.w1", "model.layers.8.block_sparse_moe.experts.208.w1", "model.layers.8.block_sparse_moe.experts.209.w1", "model.layers.8.block_sparse_moe.experts.210.w1", "model.layers.8.block_sparse_moe.experts.211.w1", "model.layers.8.block_sparse_moe.experts.212.w1", "model.layers.8.block_sparse_moe.experts.213.w1", "model.layers.8.block_sparse_moe.experts.214.w1", "model.layers.8.block_sparse_moe.experts.215.w1", "model.layers.8.block_sparse_moe.experts.216.w1", "model.layers.8.block_sparse_moe.experts.217.w1", "model.layers.8.block_sparse_moe.experts.218.w1", "model.layers.8.block_sparse_moe.experts.219.w1", "model.layers.8.block_sparse_moe.experts.220.w1", "model.layers.8.block_sparse_moe.experts.221.w1", "model.layers.8.block_sparse_moe.experts.222.w1", "model.layers.8.block_sparse_moe.experts.223.w1", "model.layers.8.block_sparse_moe.experts.224.w1", "model.layers.8.block_sparse_moe.experts.225.w1", "model.layers.8.block_sparse_moe.experts.226.w1", "model.layers.8.block_sparse_moe.experts.227.w1", "model.layers.8.block_sparse_moe.experts.228.w1", "model.layers.8.block_sparse_moe.experts.229.w1", "model.layers.8.block_sparse_moe.experts.230.w1", "model.layers.8.block_sparse_moe.experts.231.w1", "model.layers.8.block_sparse_moe.experts.232.w1", "model.layers.8.block_sparse_moe.experts.233.w1", "model.layers.8.block_sparse_moe.experts.234.w1", "model.layers.8.block_sparse_moe.experts.235.w1", "model.layers.8.block_sparse_moe.experts.236.w1", "model.layers.8.block_sparse_moe.experts.237.w1", "model.layers.8.block_sparse_moe.experts.238.w1", "model.layers.8.block_sparse_moe.experts.239.w1", "model.layers.8.block_sparse_moe.experts.240.w1", "model.layers.8.block_sparse_moe.experts.241.w1", "model.layers.8.block_sparse_moe.experts.242.w1", "model.layers.8.block_sparse_moe.experts.243.w1", "model.layers.8.block_sparse_moe.experts.244.w1", "model.layers.8.block_sparse_moe.experts.245.w1", "model.layers.8.block_sparse_moe.experts.246.w1", "model.layers.8.block_sparse_moe.experts.247.w1", "model.layers.8.block_sparse_moe.experts.248.w1", "model.layers.8.block_sparse_moe.experts.249.w1", "model.layers.8.block_sparse_moe.experts.250.w1", "model.layers.8.block_sparse_moe.experts.251.w1", "model.layers.8.block_sparse_moe.experts.252.w1", "model.layers.8.block_sparse_moe.experts.253.w1", "model.layers.8.block_sparse_moe.experts.254.w1", "model.layers.8.block_sparse_moe.experts.255.w1", "model.layers.8.block_sparse_moe.experts.0.w3", "model.layers.8.block_sparse_moe.experts.1.w3", "model.layers.8.block_sparse_moe.experts.2.w3", "model.layers.8.block_sparse_moe.experts.3.w3", "model.layers.8.block_sparse_moe.experts.4.w3", "model.layers.8.block_sparse_moe.experts.5.w3", "model.layers.8.block_sparse_moe.experts.6.w3", "model.layers.8.block_sparse_moe.experts.7.w3", "model.layers.8.block_sparse_moe.experts.8.w3", "model.layers.8.block_sparse_moe.experts.9.w3", "model.layers.8.block_sparse_moe.experts.10.w3", "model.layers.8.block_sparse_moe.experts.11.w3", "model.layers.8.block_sparse_moe.experts.12.w3", "model.layers.8.block_sparse_moe.experts.13.w3", "model.layers.8.block_sparse_moe.experts.14.w3", "model.layers.8.block_sparse_moe.experts.15.w3", "model.layers.8.block_sparse_moe.experts.16.w3", "model.layers.8.block_sparse_moe.experts.17.w3", "model.layers.8.block_sparse_moe.experts.18.w3", "model.layers.8.block_sparse_moe.experts.19.w3", "model.layers.8.block_sparse_moe.experts.20.w3", "model.layers.8.block_sparse_moe.experts.21.w3", "model.layers.8.block_sparse_moe.experts.22.w3", "model.layers.8.block_sparse_moe.experts.23.w3", "model.layers.8.block_sparse_moe.experts.24.w3", "model.layers.8.block_sparse_moe.experts.25.w3", "model.layers.8.block_sparse_moe.experts.26.w3", "model.layers.8.block_sparse_moe.experts.27.w3", "model.layers.8.block_sparse_moe.experts.28.w3", "model.layers.8.block_sparse_moe.experts.29.w3", "model.layers.8.block_sparse_moe.experts.30.w3", "model.layers.8.block_sparse_moe.experts.31.w3", "model.layers.8.block_sparse_moe.experts.32.w3", "model.layers.8.block_sparse_moe.experts.33.w3", "model.layers.8.block_sparse_moe.experts.34.w3", "model.layers.8.block_sparse_moe.experts.35.w3", "model.layers.8.block_sparse_moe.experts.36.w3", "model.layers.8.block_sparse_moe.experts.37.w3", "model.layers.8.block_sparse_moe.experts.38.w3", "model.layers.8.block_sparse_moe.experts.39.w3", "model.layers.8.block_sparse_moe.experts.40.w3", "model.layers.8.block_sparse_moe.experts.41.w3", "model.layers.8.block_sparse_moe.experts.42.w3", "model.layers.8.block_sparse_moe.experts.43.w3", "model.layers.8.block_sparse_moe.experts.44.w3", "model.layers.8.block_sparse_moe.experts.45.w3", "model.layers.8.block_sparse_moe.experts.46.w3", "model.layers.8.block_sparse_moe.experts.47.w3", "model.layers.8.block_sparse_moe.experts.48.w3", "model.layers.8.block_sparse_moe.experts.49.w3", "model.layers.8.block_sparse_moe.experts.50.w3", "model.layers.8.block_sparse_moe.experts.51.w3", "model.layers.8.block_sparse_moe.experts.52.w3", "model.layers.8.block_sparse_moe.experts.53.w3", "model.layers.8.block_sparse_moe.experts.54.w3", "model.layers.8.block_sparse_moe.experts.55.w3", "model.layers.8.block_sparse_moe.experts.56.w3", "model.layers.8.block_sparse_moe.experts.57.w3", "model.layers.8.block_sparse_moe.experts.58.w3", "model.layers.8.block_sparse_moe.experts.59.w3", "model.layers.8.block_sparse_moe.experts.60.w3", "model.layers.8.block_sparse_moe.experts.61.w3", "model.layers.8.block_sparse_moe.experts.62.w3", "model.layers.8.block_sparse_moe.experts.63.w3", "model.layers.8.block_sparse_moe.experts.64.w3", "model.layers.8.block_sparse_moe.experts.65.w3", "model.layers.8.block_sparse_moe.experts.66.w3", "model.layers.8.block_sparse_moe.experts.67.w3", "model.layers.8.block_sparse_moe.experts.68.w3", "model.layers.8.block_sparse_moe.experts.69.w3", "model.layers.8.block_sparse_moe.experts.70.w3", "model.layers.8.block_sparse_moe.experts.71.w3", "model.layers.8.block_sparse_moe.experts.72.w3", "model.layers.8.block_sparse_moe.experts.73.w3", "model.layers.8.block_sparse_moe.experts.74.w3", "model.layers.8.block_sparse_moe.experts.75.w3", "model.layers.8.block_sparse_moe.experts.76.w3", "model.layers.8.block_sparse_moe.experts.77.w3", "model.layers.8.block_sparse_moe.experts.78.w3", "model.layers.8.block_sparse_moe.experts.79.w3", "model.layers.8.block_sparse_moe.experts.80.w3", "model.layers.8.block_sparse_moe.experts.81.w3", "model.layers.8.block_sparse_moe.experts.82.w3", "model.layers.8.block_sparse_moe.experts.83.w3", "model.layers.8.block_sparse_moe.experts.84.w3", "model.layers.8.block_sparse_moe.experts.85.w3", "model.layers.8.block_sparse_moe.experts.86.w3", "model.layers.8.block_sparse_moe.experts.87.w3", "model.layers.8.block_sparse_moe.experts.88.w3", "model.layers.8.block_sparse_moe.experts.89.w3", "model.layers.8.block_sparse_moe.experts.90.w3", "model.layers.8.block_sparse_moe.experts.91.w3", "model.layers.8.block_sparse_moe.experts.92.w3", "model.layers.8.block_sparse_moe.experts.93.w3", "model.layers.8.block_sparse_moe.experts.94.w3", "model.layers.8.block_sparse_moe.experts.95.w3", "model.layers.8.block_sparse_moe.experts.96.w3", "model.layers.8.block_sparse_moe.experts.97.w3", "model.layers.8.block_sparse_moe.experts.98.w3", "model.layers.8.block_sparse_moe.experts.99.w3", "model.layers.8.block_sparse_moe.experts.100.w3", "model.layers.8.block_sparse_moe.experts.101.w3", "model.layers.8.block_sparse_moe.experts.102.w3", "model.layers.8.block_sparse_moe.experts.103.w3", "model.layers.8.block_sparse_moe.experts.104.w3", "model.layers.8.block_sparse_moe.experts.105.w3", "model.layers.8.block_sparse_moe.experts.106.w3", "model.layers.8.block_sparse_moe.experts.107.w3", "model.layers.8.block_sparse_moe.experts.108.w3", "model.layers.8.block_sparse_moe.experts.109.w3", "model.layers.8.block_sparse_moe.experts.110.w3", "model.layers.8.block_sparse_moe.experts.111.w3", "model.layers.8.block_sparse_moe.experts.112.w3", "model.layers.8.block_sparse_moe.experts.113.w3", "model.layers.8.block_sparse_moe.experts.114.w3", "model.layers.8.block_sparse_moe.experts.115.w3", "model.layers.8.block_sparse_moe.experts.116.w3", "model.layers.8.block_sparse_moe.experts.117.w3", "model.layers.8.block_sparse_moe.experts.118.w3", "model.layers.8.block_sparse_moe.experts.119.w3", "model.layers.8.block_sparse_moe.experts.120.w3", "model.layers.8.block_sparse_moe.experts.121.w3", "model.layers.8.block_sparse_moe.experts.122.w3", "model.layers.8.block_sparse_moe.experts.123.w3", "model.layers.8.block_sparse_moe.experts.124.w3", "model.layers.8.block_sparse_moe.experts.125.w3", "model.layers.8.block_sparse_moe.experts.126.w3", "model.layers.8.block_sparse_moe.experts.127.w3", "model.layers.8.block_sparse_moe.experts.128.w3", "model.layers.8.block_sparse_moe.experts.129.w3", "model.layers.8.block_sparse_moe.experts.130.w3", "model.layers.8.block_sparse_moe.experts.131.w3", "model.layers.8.block_sparse_moe.experts.132.w3", "model.layers.8.block_sparse_moe.experts.133.w3", "model.layers.8.block_sparse_moe.experts.134.w3", "model.layers.8.block_sparse_moe.experts.135.w3", "model.layers.8.block_sparse_moe.experts.136.w3", "model.layers.8.block_sparse_moe.experts.137.w3", "model.layers.8.block_sparse_moe.experts.138.w3", "model.layers.8.block_sparse_moe.experts.139.w3", "model.layers.8.block_sparse_moe.experts.140.w3", "model.layers.8.block_sparse_moe.experts.141.w3", "model.layers.8.block_sparse_moe.experts.142.w3", "model.layers.8.block_sparse_moe.experts.143.w3", "model.layers.8.block_sparse_moe.experts.144.w3", "model.layers.8.block_sparse_moe.experts.145.w3", "model.layers.8.block_sparse_moe.experts.146.w3", "model.layers.8.block_sparse_moe.experts.147.w3", "model.layers.8.block_sparse_moe.experts.148.w3", "model.layers.8.block_sparse_moe.experts.149.w3", "model.layers.8.block_sparse_moe.experts.150.w3", "model.layers.8.block_sparse_moe.experts.151.w3", "model.layers.8.block_sparse_moe.experts.152.w3", "model.layers.8.block_sparse_moe.experts.153.w3", "model.layers.8.block_sparse_moe.experts.154.w3", "model.layers.8.block_sparse_moe.experts.155.w3", "model.layers.8.block_sparse_moe.experts.156.w3", "model.layers.8.block_sparse_moe.experts.157.w3", "model.layers.8.block_sparse_moe.experts.158.w3", "model.layers.8.block_sparse_moe.experts.159.w3", "model.layers.8.block_sparse_moe.experts.160.w3", "model.layers.8.block_sparse_moe.experts.161.w3", "model.layers.8.block_sparse_moe.experts.162.w3", "model.layers.8.block_sparse_moe.experts.163.w3", "model.layers.8.block_sparse_moe.experts.164.w3", "model.layers.8.block_sparse_moe.experts.165.w3", "model.layers.8.block_sparse_moe.experts.166.w3", "model.layers.8.block_sparse_moe.experts.167.w3", "model.layers.8.block_sparse_moe.experts.168.w3", "model.layers.8.block_sparse_moe.experts.169.w3", "model.layers.8.block_sparse_moe.experts.170.w3", "model.layers.8.block_sparse_moe.experts.171.w3", "model.layers.8.block_sparse_moe.experts.172.w3", "model.layers.8.block_sparse_moe.experts.173.w3", "model.layers.8.block_sparse_moe.experts.174.w3", "model.layers.8.block_sparse_moe.experts.175.w3", "model.layers.8.block_sparse_moe.experts.176.w3", "model.layers.8.block_sparse_moe.experts.177.w3", "model.layers.8.block_sparse_moe.experts.178.w3", "model.layers.8.block_sparse_moe.experts.179.w3", "model.layers.8.block_sparse_moe.experts.180.w3", "model.layers.8.block_sparse_moe.experts.181.w3", "model.layers.8.block_sparse_moe.experts.182.w3", "model.layers.8.block_sparse_moe.experts.183.w3", "model.layers.8.block_sparse_moe.experts.184.w3", "model.layers.8.block_sparse_moe.experts.185.w3", "model.layers.8.block_sparse_moe.experts.186.w3", "model.layers.8.block_sparse_moe.experts.187.w3", "model.layers.8.block_sparse_moe.experts.188.w3", "model.layers.8.block_sparse_moe.experts.189.w3", "model.layers.8.block_sparse_moe.experts.190.w3", "model.layers.8.block_sparse_moe.experts.191.w3", "model.layers.8.block_sparse_moe.experts.192.w3", "model.layers.8.block_sparse_moe.experts.193.w3", "model.layers.8.block_sparse_moe.experts.194.w3", "model.layers.8.block_sparse_moe.experts.195.w3", "model.layers.8.block_sparse_moe.experts.196.w3", "model.layers.8.block_sparse_moe.experts.197.w3", "model.layers.8.block_sparse_moe.experts.198.w3", "model.layers.8.block_sparse_moe.experts.199.w3", "model.layers.8.block_sparse_moe.experts.200.w3", "model.layers.8.block_sparse_moe.experts.201.w3", "model.layers.8.block_sparse_moe.experts.202.w3", "model.layers.8.block_sparse_moe.experts.203.w3", "model.layers.8.block_sparse_moe.experts.204.w3", "model.layers.8.block_sparse_moe.experts.205.w3", "model.layers.8.block_sparse_moe.experts.206.w3", "model.layers.8.block_sparse_moe.experts.207.w3", "model.layers.8.block_sparse_moe.experts.208.w3", "model.layers.8.block_sparse_moe.experts.209.w3", "model.layers.8.block_sparse_moe.experts.210.w3", "model.layers.8.block_sparse_moe.experts.211.w3", "model.layers.8.block_sparse_moe.experts.212.w3", "model.layers.8.block_sparse_moe.experts.213.w3", "model.layers.8.block_sparse_moe.experts.214.w3", "model.layers.8.block_sparse_moe.experts.215.w3", "model.layers.8.block_sparse_moe.experts.216.w3", "model.layers.8.block_sparse_moe.experts.217.w3", "model.layers.8.block_sparse_moe.experts.218.w3", "model.layers.8.block_sparse_moe.experts.219.w3", "model.layers.8.block_sparse_moe.experts.220.w3", "model.layers.8.block_sparse_moe.experts.221.w3", "model.layers.8.block_sparse_moe.experts.222.w3", "model.layers.8.block_sparse_moe.experts.223.w3", "model.layers.8.block_sparse_moe.experts.224.w3", "model.layers.8.block_sparse_moe.experts.225.w3", "model.layers.8.block_sparse_moe.experts.226.w3", "model.layers.8.block_sparse_moe.experts.227.w3", "model.layers.8.block_sparse_moe.experts.228.w3", "model.layers.8.block_sparse_moe.experts.229.w3", "model.layers.8.block_sparse_moe.experts.230.w3", "model.layers.8.block_sparse_moe.experts.231.w3", "model.layers.8.block_sparse_moe.experts.232.w3", "model.layers.8.block_sparse_moe.experts.233.w3", "model.layers.8.block_sparse_moe.experts.234.w3", "model.layers.8.block_sparse_moe.experts.235.w3", "model.layers.8.block_sparse_moe.experts.236.w3", "model.layers.8.block_sparse_moe.experts.237.w3", "model.layers.8.block_sparse_moe.experts.238.w3", "model.layers.8.block_sparse_moe.experts.239.w3", "model.layers.8.block_sparse_moe.experts.240.w3", "model.layers.8.block_sparse_moe.experts.241.w3", "model.layers.8.block_sparse_moe.experts.242.w3", "model.layers.8.block_sparse_moe.experts.243.w3", "model.layers.8.block_sparse_moe.experts.244.w3", "model.layers.8.block_sparse_moe.experts.245.w3", "model.layers.8.block_sparse_moe.experts.246.w3", "model.layers.8.block_sparse_moe.experts.247.w3", "model.layers.8.block_sparse_moe.experts.248.w3", "model.layers.8.block_sparse_moe.experts.249.w3", "model.layers.8.block_sparse_moe.experts.250.w3", "model.layers.8.block_sparse_moe.experts.251.w3", "model.layers.8.block_sparse_moe.experts.252.w3", "model.layers.8.block_sparse_moe.experts.253.w3", "model.layers.8.block_sparse_moe.experts.254.w3", "model.layers.8.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0004034785553812953, "dbits": 2415919104 } ] }, { "idx": 44, "layers": [ "model.layers.8.block_sparse_moe.experts.0.w2", "model.layers.8.block_sparse_moe.experts.1.w2", "model.layers.8.block_sparse_moe.experts.2.w2", "model.layers.8.block_sparse_moe.experts.3.w2", "model.layers.8.block_sparse_moe.experts.4.w2", "model.layers.8.block_sparse_moe.experts.5.w2", "model.layers.8.block_sparse_moe.experts.6.w2", "model.layers.8.block_sparse_moe.experts.7.w2", "model.layers.8.block_sparse_moe.experts.8.w2", "model.layers.8.block_sparse_moe.experts.9.w2", "model.layers.8.block_sparse_moe.experts.10.w2", "model.layers.8.block_sparse_moe.experts.11.w2", "model.layers.8.block_sparse_moe.experts.12.w2", "model.layers.8.block_sparse_moe.experts.13.w2", "model.layers.8.block_sparse_moe.experts.14.w2", "model.layers.8.block_sparse_moe.experts.15.w2", "model.layers.8.block_sparse_moe.experts.16.w2", "model.layers.8.block_sparse_moe.experts.17.w2", "model.layers.8.block_sparse_moe.experts.18.w2", "model.layers.8.block_sparse_moe.experts.19.w2", "model.layers.8.block_sparse_moe.experts.20.w2", "model.layers.8.block_sparse_moe.experts.21.w2", "model.layers.8.block_sparse_moe.experts.22.w2", "model.layers.8.block_sparse_moe.experts.23.w2", "model.layers.8.block_sparse_moe.experts.24.w2", "model.layers.8.block_sparse_moe.experts.25.w2", "model.layers.8.block_sparse_moe.experts.26.w2", "model.layers.8.block_sparse_moe.experts.27.w2", "model.layers.8.block_sparse_moe.experts.28.w2", "model.layers.8.block_sparse_moe.experts.29.w2", "model.layers.8.block_sparse_moe.experts.30.w2", "model.layers.8.block_sparse_moe.experts.31.w2", "model.layers.8.block_sparse_moe.experts.32.w2", "model.layers.8.block_sparse_moe.experts.33.w2", "model.layers.8.block_sparse_moe.experts.34.w2", "model.layers.8.block_sparse_moe.experts.35.w2", "model.layers.8.block_sparse_moe.experts.36.w2", "model.layers.8.block_sparse_moe.experts.37.w2", "model.layers.8.block_sparse_moe.experts.38.w2", "model.layers.8.block_sparse_moe.experts.39.w2", "model.layers.8.block_sparse_moe.experts.40.w2", "model.layers.8.block_sparse_moe.experts.41.w2", "model.layers.8.block_sparse_moe.experts.42.w2", "model.layers.8.block_sparse_moe.experts.43.w2", "model.layers.8.block_sparse_moe.experts.44.w2", "model.layers.8.block_sparse_moe.experts.45.w2", "model.layers.8.block_sparse_moe.experts.46.w2", "model.layers.8.block_sparse_moe.experts.47.w2", "model.layers.8.block_sparse_moe.experts.48.w2", "model.layers.8.block_sparse_moe.experts.49.w2", "model.layers.8.block_sparse_moe.experts.50.w2", "model.layers.8.block_sparse_moe.experts.51.w2", "model.layers.8.block_sparse_moe.experts.52.w2", "model.layers.8.block_sparse_moe.experts.53.w2", "model.layers.8.block_sparse_moe.experts.54.w2", "model.layers.8.block_sparse_moe.experts.55.w2", "model.layers.8.block_sparse_moe.experts.56.w2", "model.layers.8.block_sparse_moe.experts.57.w2", "model.layers.8.block_sparse_moe.experts.58.w2", "model.layers.8.block_sparse_moe.experts.59.w2", "model.layers.8.block_sparse_moe.experts.60.w2", "model.layers.8.block_sparse_moe.experts.61.w2", "model.layers.8.block_sparse_moe.experts.62.w2", "model.layers.8.block_sparse_moe.experts.63.w2", "model.layers.8.block_sparse_moe.experts.64.w2", "model.layers.8.block_sparse_moe.experts.65.w2", "model.layers.8.block_sparse_moe.experts.66.w2", "model.layers.8.block_sparse_moe.experts.67.w2", "model.layers.8.block_sparse_moe.experts.68.w2", "model.layers.8.block_sparse_moe.experts.69.w2", "model.layers.8.block_sparse_moe.experts.70.w2", "model.layers.8.block_sparse_moe.experts.71.w2", "model.layers.8.block_sparse_moe.experts.72.w2", "model.layers.8.block_sparse_moe.experts.73.w2", "model.layers.8.block_sparse_moe.experts.74.w2", "model.layers.8.block_sparse_moe.experts.75.w2", "model.layers.8.block_sparse_moe.experts.76.w2", "model.layers.8.block_sparse_moe.experts.77.w2", "model.layers.8.block_sparse_moe.experts.78.w2", "model.layers.8.block_sparse_moe.experts.79.w2", "model.layers.8.block_sparse_moe.experts.80.w2", "model.layers.8.block_sparse_moe.experts.81.w2", "model.layers.8.block_sparse_moe.experts.82.w2", "model.layers.8.block_sparse_moe.experts.83.w2", "model.layers.8.block_sparse_moe.experts.84.w2", "model.layers.8.block_sparse_moe.experts.85.w2", "model.layers.8.block_sparse_moe.experts.86.w2", "model.layers.8.block_sparse_moe.experts.87.w2", "model.layers.8.block_sparse_moe.experts.88.w2", "model.layers.8.block_sparse_moe.experts.89.w2", "model.layers.8.block_sparse_moe.experts.90.w2", "model.layers.8.block_sparse_moe.experts.91.w2", "model.layers.8.block_sparse_moe.experts.92.w2", "model.layers.8.block_sparse_moe.experts.93.w2", "model.layers.8.block_sparse_moe.experts.94.w2", "model.layers.8.block_sparse_moe.experts.95.w2", "model.layers.8.block_sparse_moe.experts.96.w2", "model.layers.8.block_sparse_moe.experts.97.w2", "model.layers.8.block_sparse_moe.experts.98.w2", "model.layers.8.block_sparse_moe.experts.99.w2", "model.layers.8.block_sparse_moe.experts.100.w2", "model.layers.8.block_sparse_moe.experts.101.w2", "model.layers.8.block_sparse_moe.experts.102.w2", "model.layers.8.block_sparse_moe.experts.103.w2", "model.layers.8.block_sparse_moe.experts.104.w2", "model.layers.8.block_sparse_moe.experts.105.w2", "model.layers.8.block_sparse_moe.experts.106.w2", "model.layers.8.block_sparse_moe.experts.107.w2", "model.layers.8.block_sparse_moe.experts.108.w2", "model.layers.8.block_sparse_moe.experts.109.w2", "model.layers.8.block_sparse_moe.experts.110.w2", "model.layers.8.block_sparse_moe.experts.111.w2", "model.layers.8.block_sparse_moe.experts.112.w2", "model.layers.8.block_sparse_moe.experts.113.w2", "model.layers.8.block_sparse_moe.experts.114.w2", "model.layers.8.block_sparse_moe.experts.115.w2", "model.layers.8.block_sparse_moe.experts.116.w2", "model.layers.8.block_sparse_moe.experts.117.w2", "model.layers.8.block_sparse_moe.experts.118.w2", "model.layers.8.block_sparse_moe.experts.119.w2", "model.layers.8.block_sparse_moe.experts.120.w2", "model.layers.8.block_sparse_moe.experts.121.w2", "model.layers.8.block_sparse_moe.experts.122.w2", "model.layers.8.block_sparse_moe.experts.123.w2", "model.layers.8.block_sparse_moe.experts.124.w2", "model.layers.8.block_sparse_moe.experts.125.w2", "model.layers.8.block_sparse_moe.experts.126.w2", "model.layers.8.block_sparse_moe.experts.127.w2", "model.layers.8.block_sparse_moe.experts.128.w2", "model.layers.8.block_sparse_moe.experts.129.w2", "model.layers.8.block_sparse_moe.experts.130.w2", "model.layers.8.block_sparse_moe.experts.131.w2", "model.layers.8.block_sparse_moe.experts.132.w2", "model.layers.8.block_sparse_moe.experts.133.w2", "model.layers.8.block_sparse_moe.experts.134.w2", "model.layers.8.block_sparse_moe.experts.135.w2", "model.layers.8.block_sparse_moe.experts.136.w2", "model.layers.8.block_sparse_moe.experts.137.w2", "model.layers.8.block_sparse_moe.experts.138.w2", "model.layers.8.block_sparse_moe.experts.139.w2", "model.layers.8.block_sparse_moe.experts.140.w2", "model.layers.8.block_sparse_moe.experts.141.w2", "model.layers.8.block_sparse_moe.experts.142.w2", "model.layers.8.block_sparse_moe.experts.143.w2", "model.layers.8.block_sparse_moe.experts.144.w2", "model.layers.8.block_sparse_moe.experts.145.w2", "model.layers.8.block_sparse_moe.experts.146.w2", "model.layers.8.block_sparse_moe.experts.147.w2", "model.layers.8.block_sparse_moe.experts.148.w2", "model.layers.8.block_sparse_moe.experts.149.w2", "model.layers.8.block_sparse_moe.experts.150.w2", "model.layers.8.block_sparse_moe.experts.151.w2", "model.layers.8.block_sparse_moe.experts.152.w2", "model.layers.8.block_sparse_moe.experts.153.w2", "model.layers.8.block_sparse_moe.experts.154.w2", "model.layers.8.block_sparse_moe.experts.155.w2", "model.layers.8.block_sparse_moe.experts.156.w2", "model.layers.8.block_sparse_moe.experts.157.w2", "model.layers.8.block_sparse_moe.experts.158.w2", "model.layers.8.block_sparse_moe.experts.159.w2", "model.layers.8.block_sparse_moe.experts.160.w2", "model.layers.8.block_sparse_moe.experts.161.w2", "model.layers.8.block_sparse_moe.experts.162.w2", "model.layers.8.block_sparse_moe.experts.163.w2", "model.layers.8.block_sparse_moe.experts.164.w2", "model.layers.8.block_sparse_moe.experts.165.w2", "model.layers.8.block_sparse_moe.experts.166.w2", "model.layers.8.block_sparse_moe.experts.167.w2", "model.layers.8.block_sparse_moe.experts.168.w2", "model.layers.8.block_sparse_moe.experts.169.w2", "model.layers.8.block_sparse_moe.experts.170.w2", "model.layers.8.block_sparse_moe.experts.171.w2", "model.layers.8.block_sparse_moe.experts.172.w2", "model.layers.8.block_sparse_moe.experts.173.w2", "model.layers.8.block_sparse_moe.experts.174.w2", "model.layers.8.block_sparse_moe.experts.175.w2", "model.layers.8.block_sparse_moe.experts.176.w2", "model.layers.8.block_sparse_moe.experts.177.w2", "model.layers.8.block_sparse_moe.experts.178.w2", "model.layers.8.block_sparse_moe.experts.179.w2", "model.layers.8.block_sparse_moe.experts.180.w2", "model.layers.8.block_sparse_moe.experts.181.w2", "model.layers.8.block_sparse_moe.experts.182.w2", "model.layers.8.block_sparse_moe.experts.183.w2", "model.layers.8.block_sparse_moe.experts.184.w2", "model.layers.8.block_sparse_moe.experts.185.w2", "model.layers.8.block_sparse_moe.experts.186.w2", "model.layers.8.block_sparse_moe.experts.187.w2", "model.layers.8.block_sparse_moe.experts.188.w2", "model.layers.8.block_sparse_moe.experts.189.w2", "model.layers.8.block_sparse_moe.experts.190.w2", "model.layers.8.block_sparse_moe.experts.191.w2", "model.layers.8.block_sparse_moe.experts.192.w2", "model.layers.8.block_sparse_moe.experts.193.w2", "model.layers.8.block_sparse_moe.experts.194.w2", "model.layers.8.block_sparse_moe.experts.195.w2", "model.layers.8.block_sparse_moe.experts.196.w2", "model.layers.8.block_sparse_moe.experts.197.w2", "model.layers.8.block_sparse_moe.experts.198.w2", "model.layers.8.block_sparse_moe.experts.199.w2", "model.layers.8.block_sparse_moe.experts.200.w2", "model.layers.8.block_sparse_moe.experts.201.w2", "model.layers.8.block_sparse_moe.experts.202.w2", "model.layers.8.block_sparse_moe.experts.203.w2", "model.layers.8.block_sparse_moe.experts.204.w2", "model.layers.8.block_sparse_moe.experts.205.w2", "model.layers.8.block_sparse_moe.experts.206.w2", "model.layers.8.block_sparse_moe.experts.207.w2", "model.layers.8.block_sparse_moe.experts.208.w2", "model.layers.8.block_sparse_moe.experts.209.w2", "model.layers.8.block_sparse_moe.experts.210.w2", "model.layers.8.block_sparse_moe.experts.211.w2", "model.layers.8.block_sparse_moe.experts.212.w2", "model.layers.8.block_sparse_moe.experts.213.w2", "model.layers.8.block_sparse_moe.experts.214.w2", "model.layers.8.block_sparse_moe.experts.215.w2", "model.layers.8.block_sparse_moe.experts.216.w2", "model.layers.8.block_sparse_moe.experts.217.w2", "model.layers.8.block_sparse_moe.experts.218.w2", "model.layers.8.block_sparse_moe.experts.219.w2", "model.layers.8.block_sparse_moe.experts.220.w2", "model.layers.8.block_sparse_moe.experts.221.w2", "model.layers.8.block_sparse_moe.experts.222.w2", "model.layers.8.block_sparse_moe.experts.223.w2", "model.layers.8.block_sparse_moe.experts.224.w2", "model.layers.8.block_sparse_moe.experts.225.w2", "model.layers.8.block_sparse_moe.experts.226.w2", "model.layers.8.block_sparse_moe.experts.227.w2", "model.layers.8.block_sparse_moe.experts.228.w2", "model.layers.8.block_sparse_moe.experts.229.w2", "model.layers.8.block_sparse_moe.experts.230.w2", "model.layers.8.block_sparse_moe.experts.231.w2", "model.layers.8.block_sparse_moe.experts.232.w2", "model.layers.8.block_sparse_moe.experts.233.w2", "model.layers.8.block_sparse_moe.experts.234.w2", "model.layers.8.block_sparse_moe.experts.235.w2", "model.layers.8.block_sparse_moe.experts.236.w2", "model.layers.8.block_sparse_moe.experts.237.w2", "model.layers.8.block_sparse_moe.experts.238.w2", "model.layers.8.block_sparse_moe.experts.239.w2", "model.layers.8.block_sparse_moe.experts.240.w2", "model.layers.8.block_sparse_moe.experts.241.w2", "model.layers.8.block_sparse_moe.experts.242.w2", "model.layers.8.block_sparse_moe.experts.243.w2", "model.layers.8.block_sparse_moe.experts.244.w2", "model.layers.8.block_sparse_moe.experts.245.w2", "model.layers.8.block_sparse_moe.experts.246.w2", "model.layers.8.block_sparse_moe.experts.247.w2", "model.layers.8.block_sparse_moe.experts.248.w2", "model.layers.8.block_sparse_moe.experts.249.w2", "model.layers.8.block_sparse_moe.experts.250.w2", "model.layers.8.block_sparse_moe.experts.251.w2", "model.layers.8.block_sparse_moe.experts.252.w2", "model.layers.8.block_sparse_moe.experts.253.w2", "model.layers.8.block_sparse_moe.experts.254.w2", "model.layers.8.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00010738577693701623, "dbits": 1207959552 } ] }, { "idx": 45, "layers": [ "model.layers.9.self_attn.q_proj" ], "candidates": [ { "dkld": -0.000342673622071743, "dbits": 18874368 } ] }, { "idx": 46, "layers": [ "model.layers.9.self_attn.k_proj", "model.layers.9.self_attn.v_proj" ], "candidates": [ { "dkld": -0.001433586888015273, "dbits": 6291456 } ] }, { "idx": 47, "layers": [ "model.layers.9.self_attn.o_proj" ], "candidates": [ { "dkld": 0.000628168508410451, "dbits": 18874368 } ] }, { "idx": 48, "layers": [ "model.layers.9.block_sparse_moe.experts.0.w1", "model.layers.9.block_sparse_moe.experts.1.w1", "model.layers.9.block_sparse_moe.experts.2.w1", "model.layers.9.block_sparse_moe.experts.3.w1", "model.layers.9.block_sparse_moe.experts.4.w1", "model.layers.9.block_sparse_moe.experts.5.w1", "model.layers.9.block_sparse_moe.experts.6.w1", "model.layers.9.block_sparse_moe.experts.7.w1", "model.layers.9.block_sparse_moe.experts.8.w1", "model.layers.9.block_sparse_moe.experts.9.w1", "model.layers.9.block_sparse_moe.experts.10.w1", "model.layers.9.block_sparse_moe.experts.11.w1", "model.layers.9.block_sparse_moe.experts.12.w1", "model.layers.9.block_sparse_moe.experts.13.w1", "model.layers.9.block_sparse_moe.experts.14.w1", "model.layers.9.block_sparse_moe.experts.15.w1", "model.layers.9.block_sparse_moe.experts.16.w1", "model.layers.9.block_sparse_moe.experts.17.w1", "model.layers.9.block_sparse_moe.experts.18.w1", "model.layers.9.block_sparse_moe.experts.19.w1", "model.layers.9.block_sparse_moe.experts.20.w1", "model.layers.9.block_sparse_moe.experts.21.w1", "model.layers.9.block_sparse_moe.experts.22.w1", "model.layers.9.block_sparse_moe.experts.23.w1", "model.layers.9.block_sparse_moe.experts.24.w1", "model.layers.9.block_sparse_moe.experts.25.w1", "model.layers.9.block_sparse_moe.experts.26.w1", "model.layers.9.block_sparse_moe.experts.27.w1", "model.layers.9.block_sparse_moe.experts.28.w1", "model.layers.9.block_sparse_moe.experts.29.w1", "model.layers.9.block_sparse_moe.experts.30.w1", "model.layers.9.block_sparse_moe.experts.31.w1", "model.layers.9.block_sparse_moe.experts.32.w1", "model.layers.9.block_sparse_moe.experts.33.w1", "model.layers.9.block_sparse_moe.experts.34.w1", "model.layers.9.block_sparse_moe.experts.35.w1", "model.layers.9.block_sparse_moe.experts.36.w1", "model.layers.9.block_sparse_moe.experts.37.w1", "model.layers.9.block_sparse_moe.experts.38.w1", "model.layers.9.block_sparse_moe.experts.39.w1", "model.layers.9.block_sparse_moe.experts.40.w1", "model.layers.9.block_sparse_moe.experts.41.w1", "model.layers.9.block_sparse_moe.experts.42.w1", "model.layers.9.block_sparse_moe.experts.43.w1", "model.layers.9.block_sparse_moe.experts.44.w1", "model.layers.9.block_sparse_moe.experts.45.w1", "model.layers.9.block_sparse_moe.experts.46.w1", "model.layers.9.block_sparse_moe.experts.47.w1", "model.layers.9.block_sparse_moe.experts.48.w1", "model.layers.9.block_sparse_moe.experts.49.w1", "model.layers.9.block_sparse_moe.experts.50.w1", "model.layers.9.block_sparse_moe.experts.51.w1", "model.layers.9.block_sparse_moe.experts.52.w1", "model.layers.9.block_sparse_moe.experts.53.w1", "model.layers.9.block_sparse_moe.experts.54.w1", "model.layers.9.block_sparse_moe.experts.55.w1", "model.layers.9.block_sparse_moe.experts.56.w1", "model.layers.9.block_sparse_moe.experts.57.w1", "model.layers.9.block_sparse_moe.experts.58.w1", "model.layers.9.block_sparse_moe.experts.59.w1", "model.layers.9.block_sparse_moe.experts.60.w1", "model.layers.9.block_sparse_moe.experts.61.w1", "model.layers.9.block_sparse_moe.experts.62.w1", "model.layers.9.block_sparse_moe.experts.63.w1", "model.layers.9.block_sparse_moe.experts.64.w1", "model.layers.9.block_sparse_moe.experts.65.w1", "model.layers.9.block_sparse_moe.experts.66.w1", "model.layers.9.block_sparse_moe.experts.67.w1", "model.layers.9.block_sparse_moe.experts.68.w1", "model.layers.9.block_sparse_moe.experts.69.w1", "model.layers.9.block_sparse_moe.experts.70.w1", "model.layers.9.block_sparse_moe.experts.71.w1", "model.layers.9.block_sparse_moe.experts.72.w1", "model.layers.9.block_sparse_moe.experts.73.w1", "model.layers.9.block_sparse_moe.experts.74.w1", "model.layers.9.block_sparse_moe.experts.75.w1", "model.layers.9.block_sparse_moe.experts.76.w1", "model.layers.9.block_sparse_moe.experts.77.w1", "model.layers.9.block_sparse_moe.experts.78.w1", "model.layers.9.block_sparse_moe.experts.79.w1", "model.layers.9.block_sparse_moe.experts.80.w1", "model.layers.9.block_sparse_moe.experts.81.w1", "model.layers.9.block_sparse_moe.experts.82.w1", "model.layers.9.block_sparse_moe.experts.83.w1", "model.layers.9.block_sparse_moe.experts.84.w1", "model.layers.9.block_sparse_moe.experts.85.w1", "model.layers.9.block_sparse_moe.experts.86.w1", "model.layers.9.block_sparse_moe.experts.87.w1", "model.layers.9.block_sparse_moe.experts.88.w1", "model.layers.9.block_sparse_moe.experts.89.w1", "model.layers.9.block_sparse_moe.experts.90.w1", "model.layers.9.block_sparse_moe.experts.91.w1", "model.layers.9.block_sparse_moe.experts.92.w1", "model.layers.9.block_sparse_moe.experts.93.w1", "model.layers.9.block_sparse_moe.experts.94.w1", "model.layers.9.block_sparse_moe.experts.95.w1", "model.layers.9.block_sparse_moe.experts.96.w1", "model.layers.9.block_sparse_moe.experts.97.w1", "model.layers.9.block_sparse_moe.experts.98.w1", "model.layers.9.block_sparse_moe.experts.99.w1", "model.layers.9.block_sparse_moe.experts.100.w1", "model.layers.9.block_sparse_moe.experts.101.w1", "model.layers.9.block_sparse_moe.experts.102.w1", "model.layers.9.block_sparse_moe.experts.103.w1", "model.layers.9.block_sparse_moe.experts.104.w1", "model.layers.9.block_sparse_moe.experts.105.w1", "model.layers.9.block_sparse_moe.experts.106.w1", "model.layers.9.block_sparse_moe.experts.107.w1", "model.layers.9.block_sparse_moe.experts.108.w1", "model.layers.9.block_sparse_moe.experts.109.w1", "model.layers.9.block_sparse_moe.experts.110.w1", "model.layers.9.block_sparse_moe.experts.111.w1", "model.layers.9.block_sparse_moe.experts.112.w1", "model.layers.9.block_sparse_moe.experts.113.w1", "model.layers.9.block_sparse_moe.experts.114.w1", "model.layers.9.block_sparse_moe.experts.115.w1", "model.layers.9.block_sparse_moe.experts.116.w1", "model.layers.9.block_sparse_moe.experts.117.w1", "model.layers.9.block_sparse_moe.experts.118.w1", "model.layers.9.block_sparse_moe.experts.119.w1", "model.layers.9.block_sparse_moe.experts.120.w1", "model.layers.9.block_sparse_moe.experts.121.w1", "model.layers.9.block_sparse_moe.experts.122.w1", "model.layers.9.block_sparse_moe.experts.123.w1", "model.layers.9.block_sparse_moe.experts.124.w1", "model.layers.9.block_sparse_moe.experts.125.w1", "model.layers.9.block_sparse_moe.experts.126.w1", "model.layers.9.block_sparse_moe.experts.127.w1", "model.layers.9.block_sparse_moe.experts.128.w1", "model.layers.9.block_sparse_moe.experts.129.w1", "model.layers.9.block_sparse_moe.experts.130.w1", "model.layers.9.block_sparse_moe.experts.131.w1", "model.layers.9.block_sparse_moe.experts.132.w1", "model.layers.9.block_sparse_moe.experts.133.w1", "model.layers.9.block_sparse_moe.experts.134.w1", "model.layers.9.block_sparse_moe.experts.135.w1", "model.layers.9.block_sparse_moe.experts.136.w1", "model.layers.9.block_sparse_moe.experts.137.w1", "model.layers.9.block_sparse_moe.experts.138.w1", "model.layers.9.block_sparse_moe.experts.139.w1", "model.layers.9.block_sparse_moe.experts.140.w1", "model.layers.9.block_sparse_moe.experts.141.w1", "model.layers.9.block_sparse_moe.experts.142.w1", "model.layers.9.block_sparse_moe.experts.143.w1", "model.layers.9.block_sparse_moe.experts.144.w1", "model.layers.9.block_sparse_moe.experts.145.w1", "model.layers.9.block_sparse_moe.experts.146.w1", "model.layers.9.block_sparse_moe.experts.147.w1", "model.layers.9.block_sparse_moe.experts.148.w1", "model.layers.9.block_sparse_moe.experts.149.w1", "model.layers.9.block_sparse_moe.experts.150.w1", "model.layers.9.block_sparse_moe.experts.151.w1", "model.layers.9.block_sparse_moe.experts.152.w1", "model.layers.9.block_sparse_moe.experts.153.w1", "model.layers.9.block_sparse_moe.experts.154.w1", "model.layers.9.block_sparse_moe.experts.155.w1", "model.layers.9.block_sparse_moe.experts.156.w1", "model.layers.9.block_sparse_moe.experts.157.w1", "model.layers.9.block_sparse_moe.experts.158.w1", "model.layers.9.block_sparse_moe.experts.159.w1", "model.layers.9.block_sparse_moe.experts.160.w1", "model.layers.9.block_sparse_moe.experts.161.w1", "model.layers.9.block_sparse_moe.experts.162.w1", "model.layers.9.block_sparse_moe.experts.163.w1", "model.layers.9.block_sparse_moe.experts.164.w1", "model.layers.9.block_sparse_moe.experts.165.w1", "model.layers.9.block_sparse_moe.experts.166.w1", "model.layers.9.block_sparse_moe.experts.167.w1", "model.layers.9.block_sparse_moe.experts.168.w1", "model.layers.9.block_sparse_moe.experts.169.w1", "model.layers.9.block_sparse_moe.experts.170.w1", "model.layers.9.block_sparse_moe.experts.171.w1", "model.layers.9.block_sparse_moe.experts.172.w1", "model.layers.9.block_sparse_moe.experts.173.w1", "model.layers.9.block_sparse_moe.experts.174.w1", "model.layers.9.block_sparse_moe.experts.175.w1", "model.layers.9.block_sparse_moe.experts.176.w1", "model.layers.9.block_sparse_moe.experts.177.w1", "model.layers.9.block_sparse_moe.experts.178.w1", "model.layers.9.block_sparse_moe.experts.179.w1", "model.layers.9.block_sparse_moe.experts.180.w1", "model.layers.9.block_sparse_moe.experts.181.w1", "model.layers.9.block_sparse_moe.experts.182.w1", "model.layers.9.block_sparse_moe.experts.183.w1", "model.layers.9.block_sparse_moe.experts.184.w1", "model.layers.9.block_sparse_moe.experts.185.w1", "model.layers.9.block_sparse_moe.experts.186.w1", "model.layers.9.block_sparse_moe.experts.187.w1", "model.layers.9.block_sparse_moe.experts.188.w1", "model.layers.9.block_sparse_moe.experts.189.w1", "model.layers.9.block_sparse_moe.experts.190.w1", "model.layers.9.block_sparse_moe.experts.191.w1", "model.layers.9.block_sparse_moe.experts.192.w1", "model.layers.9.block_sparse_moe.experts.193.w1", "model.layers.9.block_sparse_moe.experts.194.w1", "model.layers.9.block_sparse_moe.experts.195.w1", "model.layers.9.block_sparse_moe.experts.196.w1", "model.layers.9.block_sparse_moe.experts.197.w1", "model.layers.9.block_sparse_moe.experts.198.w1", "model.layers.9.block_sparse_moe.experts.199.w1", "model.layers.9.block_sparse_moe.experts.200.w1", "model.layers.9.block_sparse_moe.experts.201.w1", "model.layers.9.block_sparse_moe.experts.202.w1", "model.layers.9.block_sparse_moe.experts.203.w1", "model.layers.9.block_sparse_moe.experts.204.w1", "model.layers.9.block_sparse_moe.experts.205.w1", "model.layers.9.block_sparse_moe.experts.206.w1", "model.layers.9.block_sparse_moe.experts.207.w1", "model.layers.9.block_sparse_moe.experts.208.w1", "model.layers.9.block_sparse_moe.experts.209.w1", "model.layers.9.block_sparse_moe.experts.210.w1", "model.layers.9.block_sparse_moe.experts.211.w1", "model.layers.9.block_sparse_moe.experts.212.w1", "model.layers.9.block_sparse_moe.experts.213.w1", "model.layers.9.block_sparse_moe.experts.214.w1", "model.layers.9.block_sparse_moe.experts.215.w1", "model.layers.9.block_sparse_moe.experts.216.w1", "model.layers.9.block_sparse_moe.experts.217.w1", "model.layers.9.block_sparse_moe.experts.218.w1", "model.layers.9.block_sparse_moe.experts.219.w1", "model.layers.9.block_sparse_moe.experts.220.w1", "model.layers.9.block_sparse_moe.experts.221.w1", "model.layers.9.block_sparse_moe.experts.222.w1", "model.layers.9.block_sparse_moe.experts.223.w1", "model.layers.9.block_sparse_moe.experts.224.w1", "model.layers.9.block_sparse_moe.experts.225.w1", "model.layers.9.block_sparse_moe.experts.226.w1", "model.layers.9.block_sparse_moe.experts.227.w1", "model.layers.9.block_sparse_moe.experts.228.w1", "model.layers.9.block_sparse_moe.experts.229.w1", "model.layers.9.block_sparse_moe.experts.230.w1", "model.layers.9.block_sparse_moe.experts.231.w1", "model.layers.9.block_sparse_moe.experts.232.w1", "model.layers.9.block_sparse_moe.experts.233.w1", "model.layers.9.block_sparse_moe.experts.234.w1", "model.layers.9.block_sparse_moe.experts.235.w1", "model.layers.9.block_sparse_moe.experts.236.w1", "model.layers.9.block_sparse_moe.experts.237.w1", "model.layers.9.block_sparse_moe.experts.238.w1", "model.layers.9.block_sparse_moe.experts.239.w1", "model.layers.9.block_sparse_moe.experts.240.w1", "model.layers.9.block_sparse_moe.experts.241.w1", "model.layers.9.block_sparse_moe.experts.242.w1", "model.layers.9.block_sparse_moe.experts.243.w1", "model.layers.9.block_sparse_moe.experts.244.w1", "model.layers.9.block_sparse_moe.experts.245.w1", "model.layers.9.block_sparse_moe.experts.246.w1", "model.layers.9.block_sparse_moe.experts.247.w1", "model.layers.9.block_sparse_moe.experts.248.w1", "model.layers.9.block_sparse_moe.experts.249.w1", "model.layers.9.block_sparse_moe.experts.250.w1", "model.layers.9.block_sparse_moe.experts.251.w1", "model.layers.9.block_sparse_moe.experts.252.w1", "model.layers.9.block_sparse_moe.experts.253.w1", "model.layers.9.block_sparse_moe.experts.254.w1", "model.layers.9.block_sparse_moe.experts.255.w1", "model.layers.9.block_sparse_moe.experts.0.w3", "model.layers.9.block_sparse_moe.experts.1.w3", "model.layers.9.block_sparse_moe.experts.2.w3", "model.layers.9.block_sparse_moe.experts.3.w3", "model.layers.9.block_sparse_moe.experts.4.w3", "model.layers.9.block_sparse_moe.experts.5.w3", "model.layers.9.block_sparse_moe.experts.6.w3", "model.layers.9.block_sparse_moe.experts.7.w3", "model.layers.9.block_sparse_moe.experts.8.w3", "model.layers.9.block_sparse_moe.experts.9.w3", "model.layers.9.block_sparse_moe.experts.10.w3", "model.layers.9.block_sparse_moe.experts.11.w3", "model.layers.9.block_sparse_moe.experts.12.w3", "model.layers.9.block_sparse_moe.experts.13.w3", "model.layers.9.block_sparse_moe.experts.14.w3", "model.layers.9.block_sparse_moe.experts.15.w3", "model.layers.9.block_sparse_moe.experts.16.w3", "model.layers.9.block_sparse_moe.experts.17.w3", "model.layers.9.block_sparse_moe.experts.18.w3", "model.layers.9.block_sparse_moe.experts.19.w3", "model.layers.9.block_sparse_moe.experts.20.w3", "model.layers.9.block_sparse_moe.experts.21.w3", "model.layers.9.block_sparse_moe.experts.22.w3", "model.layers.9.block_sparse_moe.experts.23.w3", "model.layers.9.block_sparse_moe.experts.24.w3", "model.layers.9.block_sparse_moe.experts.25.w3", "model.layers.9.block_sparse_moe.experts.26.w3", "model.layers.9.block_sparse_moe.experts.27.w3", "model.layers.9.block_sparse_moe.experts.28.w3", "model.layers.9.block_sparse_moe.experts.29.w3", "model.layers.9.block_sparse_moe.experts.30.w3", "model.layers.9.block_sparse_moe.experts.31.w3", "model.layers.9.block_sparse_moe.experts.32.w3", "model.layers.9.block_sparse_moe.experts.33.w3", "model.layers.9.block_sparse_moe.experts.34.w3", "model.layers.9.block_sparse_moe.experts.35.w3", "model.layers.9.block_sparse_moe.experts.36.w3", "model.layers.9.block_sparse_moe.experts.37.w3", "model.layers.9.block_sparse_moe.experts.38.w3", "model.layers.9.block_sparse_moe.experts.39.w3", "model.layers.9.block_sparse_moe.experts.40.w3", "model.layers.9.block_sparse_moe.experts.41.w3", "model.layers.9.block_sparse_moe.experts.42.w3", "model.layers.9.block_sparse_moe.experts.43.w3", "model.layers.9.block_sparse_moe.experts.44.w3", "model.layers.9.block_sparse_moe.experts.45.w3", "model.layers.9.block_sparse_moe.experts.46.w3", "model.layers.9.block_sparse_moe.experts.47.w3", "model.layers.9.block_sparse_moe.experts.48.w3", "model.layers.9.block_sparse_moe.experts.49.w3", "model.layers.9.block_sparse_moe.experts.50.w3", "model.layers.9.block_sparse_moe.experts.51.w3", "model.layers.9.block_sparse_moe.experts.52.w3", "model.layers.9.block_sparse_moe.experts.53.w3", "model.layers.9.block_sparse_moe.experts.54.w3", "model.layers.9.block_sparse_moe.experts.55.w3", "model.layers.9.block_sparse_moe.experts.56.w3", "model.layers.9.block_sparse_moe.experts.57.w3", "model.layers.9.block_sparse_moe.experts.58.w3", "model.layers.9.block_sparse_moe.experts.59.w3", "model.layers.9.block_sparse_moe.experts.60.w3", "model.layers.9.block_sparse_moe.experts.61.w3", "model.layers.9.block_sparse_moe.experts.62.w3", "model.layers.9.block_sparse_moe.experts.63.w3", "model.layers.9.block_sparse_moe.experts.64.w3", "model.layers.9.block_sparse_moe.experts.65.w3", "model.layers.9.block_sparse_moe.experts.66.w3", "model.layers.9.block_sparse_moe.experts.67.w3", "model.layers.9.block_sparse_moe.experts.68.w3", "model.layers.9.block_sparse_moe.experts.69.w3", "model.layers.9.block_sparse_moe.experts.70.w3", "model.layers.9.block_sparse_moe.experts.71.w3", "model.layers.9.block_sparse_moe.experts.72.w3", "model.layers.9.block_sparse_moe.experts.73.w3", "model.layers.9.block_sparse_moe.experts.74.w3", "model.layers.9.block_sparse_moe.experts.75.w3", "model.layers.9.block_sparse_moe.experts.76.w3", "model.layers.9.block_sparse_moe.experts.77.w3", "model.layers.9.block_sparse_moe.experts.78.w3", "model.layers.9.block_sparse_moe.experts.79.w3", "model.layers.9.block_sparse_moe.experts.80.w3", "model.layers.9.block_sparse_moe.experts.81.w3", "model.layers.9.block_sparse_moe.experts.82.w3", "model.layers.9.block_sparse_moe.experts.83.w3", "model.layers.9.block_sparse_moe.experts.84.w3", "model.layers.9.block_sparse_moe.experts.85.w3", "model.layers.9.block_sparse_moe.experts.86.w3", "model.layers.9.block_sparse_moe.experts.87.w3", "model.layers.9.block_sparse_moe.experts.88.w3", "model.layers.9.block_sparse_moe.experts.89.w3", "model.layers.9.block_sparse_moe.experts.90.w3", "model.layers.9.block_sparse_moe.experts.91.w3", "model.layers.9.block_sparse_moe.experts.92.w3", "model.layers.9.block_sparse_moe.experts.93.w3", "model.layers.9.block_sparse_moe.experts.94.w3", "model.layers.9.block_sparse_moe.experts.95.w3", "model.layers.9.block_sparse_moe.experts.96.w3", "model.layers.9.block_sparse_moe.experts.97.w3", "model.layers.9.block_sparse_moe.experts.98.w3", "model.layers.9.block_sparse_moe.experts.99.w3", "model.layers.9.block_sparse_moe.experts.100.w3", "model.layers.9.block_sparse_moe.experts.101.w3", "model.layers.9.block_sparse_moe.experts.102.w3", "model.layers.9.block_sparse_moe.experts.103.w3", "model.layers.9.block_sparse_moe.experts.104.w3", "model.layers.9.block_sparse_moe.experts.105.w3", "model.layers.9.block_sparse_moe.experts.106.w3", "model.layers.9.block_sparse_moe.experts.107.w3", "model.layers.9.block_sparse_moe.experts.108.w3", "model.layers.9.block_sparse_moe.experts.109.w3", "model.layers.9.block_sparse_moe.experts.110.w3", "model.layers.9.block_sparse_moe.experts.111.w3", "model.layers.9.block_sparse_moe.experts.112.w3", "model.layers.9.block_sparse_moe.experts.113.w3", "model.layers.9.block_sparse_moe.experts.114.w3", "model.layers.9.block_sparse_moe.experts.115.w3", "model.layers.9.block_sparse_moe.experts.116.w3", "model.layers.9.block_sparse_moe.experts.117.w3", "model.layers.9.block_sparse_moe.experts.118.w3", "model.layers.9.block_sparse_moe.experts.119.w3", "model.layers.9.block_sparse_moe.experts.120.w3", "model.layers.9.block_sparse_moe.experts.121.w3", "model.layers.9.block_sparse_moe.experts.122.w3", "model.layers.9.block_sparse_moe.experts.123.w3", "model.layers.9.block_sparse_moe.experts.124.w3", "model.layers.9.block_sparse_moe.experts.125.w3", "model.layers.9.block_sparse_moe.experts.126.w3", "model.layers.9.block_sparse_moe.experts.127.w3", "model.layers.9.block_sparse_moe.experts.128.w3", "model.layers.9.block_sparse_moe.experts.129.w3", "model.layers.9.block_sparse_moe.experts.130.w3", "model.layers.9.block_sparse_moe.experts.131.w3", "model.layers.9.block_sparse_moe.experts.132.w3", "model.layers.9.block_sparse_moe.experts.133.w3", "model.layers.9.block_sparse_moe.experts.134.w3", "model.layers.9.block_sparse_moe.experts.135.w3", "model.layers.9.block_sparse_moe.experts.136.w3", "model.layers.9.block_sparse_moe.experts.137.w3", "model.layers.9.block_sparse_moe.experts.138.w3", "model.layers.9.block_sparse_moe.experts.139.w3", "model.layers.9.block_sparse_moe.experts.140.w3", "model.layers.9.block_sparse_moe.experts.141.w3", "model.layers.9.block_sparse_moe.experts.142.w3", "model.layers.9.block_sparse_moe.experts.143.w3", "model.layers.9.block_sparse_moe.experts.144.w3", "model.layers.9.block_sparse_moe.experts.145.w3", "model.layers.9.block_sparse_moe.experts.146.w3", "model.layers.9.block_sparse_moe.experts.147.w3", "model.layers.9.block_sparse_moe.experts.148.w3", "model.layers.9.block_sparse_moe.experts.149.w3", "model.layers.9.block_sparse_moe.experts.150.w3", "model.layers.9.block_sparse_moe.experts.151.w3", "model.layers.9.block_sparse_moe.experts.152.w3", "model.layers.9.block_sparse_moe.experts.153.w3", "model.layers.9.block_sparse_moe.experts.154.w3", "model.layers.9.block_sparse_moe.experts.155.w3", "model.layers.9.block_sparse_moe.experts.156.w3", "model.layers.9.block_sparse_moe.experts.157.w3", "model.layers.9.block_sparse_moe.experts.158.w3", "model.layers.9.block_sparse_moe.experts.159.w3", "model.layers.9.block_sparse_moe.experts.160.w3", "model.layers.9.block_sparse_moe.experts.161.w3", "model.layers.9.block_sparse_moe.experts.162.w3", "model.layers.9.block_sparse_moe.experts.163.w3", "model.layers.9.block_sparse_moe.experts.164.w3", "model.layers.9.block_sparse_moe.experts.165.w3", "model.layers.9.block_sparse_moe.experts.166.w3", "model.layers.9.block_sparse_moe.experts.167.w3", "model.layers.9.block_sparse_moe.experts.168.w3", "model.layers.9.block_sparse_moe.experts.169.w3", "model.layers.9.block_sparse_moe.experts.170.w3", "model.layers.9.block_sparse_moe.experts.171.w3", "model.layers.9.block_sparse_moe.experts.172.w3", "model.layers.9.block_sparse_moe.experts.173.w3", "model.layers.9.block_sparse_moe.experts.174.w3", "model.layers.9.block_sparse_moe.experts.175.w3", "model.layers.9.block_sparse_moe.experts.176.w3", "model.layers.9.block_sparse_moe.experts.177.w3", "model.layers.9.block_sparse_moe.experts.178.w3", "model.layers.9.block_sparse_moe.experts.179.w3", "model.layers.9.block_sparse_moe.experts.180.w3", "model.layers.9.block_sparse_moe.experts.181.w3", "model.layers.9.block_sparse_moe.experts.182.w3", "model.layers.9.block_sparse_moe.experts.183.w3", "model.layers.9.block_sparse_moe.experts.184.w3", "model.layers.9.block_sparse_moe.experts.185.w3", "model.layers.9.block_sparse_moe.experts.186.w3", "model.layers.9.block_sparse_moe.experts.187.w3", "model.layers.9.block_sparse_moe.experts.188.w3", "model.layers.9.block_sparse_moe.experts.189.w3", "model.layers.9.block_sparse_moe.experts.190.w3", "model.layers.9.block_sparse_moe.experts.191.w3", "model.layers.9.block_sparse_moe.experts.192.w3", "model.layers.9.block_sparse_moe.experts.193.w3", "model.layers.9.block_sparse_moe.experts.194.w3", "model.layers.9.block_sparse_moe.experts.195.w3", "model.layers.9.block_sparse_moe.experts.196.w3", "model.layers.9.block_sparse_moe.experts.197.w3", "model.layers.9.block_sparse_moe.experts.198.w3", "model.layers.9.block_sparse_moe.experts.199.w3", "model.layers.9.block_sparse_moe.experts.200.w3", "model.layers.9.block_sparse_moe.experts.201.w3", "model.layers.9.block_sparse_moe.experts.202.w3", "model.layers.9.block_sparse_moe.experts.203.w3", "model.layers.9.block_sparse_moe.experts.204.w3", "model.layers.9.block_sparse_moe.experts.205.w3", "model.layers.9.block_sparse_moe.experts.206.w3", "model.layers.9.block_sparse_moe.experts.207.w3", "model.layers.9.block_sparse_moe.experts.208.w3", "model.layers.9.block_sparse_moe.experts.209.w3", "model.layers.9.block_sparse_moe.experts.210.w3", "model.layers.9.block_sparse_moe.experts.211.w3", "model.layers.9.block_sparse_moe.experts.212.w3", "model.layers.9.block_sparse_moe.experts.213.w3", "model.layers.9.block_sparse_moe.experts.214.w3", "model.layers.9.block_sparse_moe.experts.215.w3", "model.layers.9.block_sparse_moe.experts.216.w3", "model.layers.9.block_sparse_moe.experts.217.w3", "model.layers.9.block_sparse_moe.experts.218.w3", "model.layers.9.block_sparse_moe.experts.219.w3", "model.layers.9.block_sparse_moe.experts.220.w3", "model.layers.9.block_sparse_moe.experts.221.w3", "model.layers.9.block_sparse_moe.experts.222.w3", "model.layers.9.block_sparse_moe.experts.223.w3", "model.layers.9.block_sparse_moe.experts.224.w3", "model.layers.9.block_sparse_moe.experts.225.w3", "model.layers.9.block_sparse_moe.experts.226.w3", "model.layers.9.block_sparse_moe.experts.227.w3", "model.layers.9.block_sparse_moe.experts.228.w3", "model.layers.9.block_sparse_moe.experts.229.w3", "model.layers.9.block_sparse_moe.experts.230.w3", "model.layers.9.block_sparse_moe.experts.231.w3", "model.layers.9.block_sparse_moe.experts.232.w3", "model.layers.9.block_sparse_moe.experts.233.w3", "model.layers.9.block_sparse_moe.experts.234.w3", "model.layers.9.block_sparse_moe.experts.235.w3", "model.layers.9.block_sparse_moe.experts.236.w3", "model.layers.9.block_sparse_moe.experts.237.w3", "model.layers.9.block_sparse_moe.experts.238.w3", "model.layers.9.block_sparse_moe.experts.239.w3", "model.layers.9.block_sparse_moe.experts.240.w3", "model.layers.9.block_sparse_moe.experts.241.w3", "model.layers.9.block_sparse_moe.experts.242.w3", "model.layers.9.block_sparse_moe.experts.243.w3", "model.layers.9.block_sparse_moe.experts.244.w3", "model.layers.9.block_sparse_moe.experts.245.w3", "model.layers.9.block_sparse_moe.experts.246.w3", "model.layers.9.block_sparse_moe.experts.247.w3", "model.layers.9.block_sparse_moe.experts.248.w3", "model.layers.9.block_sparse_moe.experts.249.w3", "model.layers.9.block_sparse_moe.experts.250.w3", "model.layers.9.block_sparse_moe.experts.251.w3", "model.layers.9.block_sparse_moe.experts.252.w3", "model.layers.9.block_sparse_moe.experts.253.w3", "model.layers.9.block_sparse_moe.experts.254.w3", "model.layers.9.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0003153141587972641, "dbits": 2415919104 } ] }, { "idx": 49, "layers": [ "model.layers.9.block_sparse_moe.experts.0.w2", "model.layers.9.block_sparse_moe.experts.1.w2", "model.layers.9.block_sparse_moe.experts.2.w2", "model.layers.9.block_sparse_moe.experts.3.w2", "model.layers.9.block_sparse_moe.experts.4.w2", "model.layers.9.block_sparse_moe.experts.5.w2", "model.layers.9.block_sparse_moe.experts.6.w2", "model.layers.9.block_sparse_moe.experts.7.w2", "model.layers.9.block_sparse_moe.experts.8.w2", "model.layers.9.block_sparse_moe.experts.9.w2", "model.layers.9.block_sparse_moe.experts.10.w2", "model.layers.9.block_sparse_moe.experts.11.w2", "model.layers.9.block_sparse_moe.experts.12.w2", "model.layers.9.block_sparse_moe.experts.13.w2", "model.layers.9.block_sparse_moe.experts.14.w2", "model.layers.9.block_sparse_moe.experts.15.w2", "model.layers.9.block_sparse_moe.experts.16.w2", "model.layers.9.block_sparse_moe.experts.17.w2", "model.layers.9.block_sparse_moe.experts.18.w2", "model.layers.9.block_sparse_moe.experts.19.w2", "model.layers.9.block_sparse_moe.experts.20.w2", "model.layers.9.block_sparse_moe.experts.21.w2", "model.layers.9.block_sparse_moe.experts.22.w2", "model.layers.9.block_sparse_moe.experts.23.w2", "model.layers.9.block_sparse_moe.experts.24.w2", "model.layers.9.block_sparse_moe.experts.25.w2", "model.layers.9.block_sparse_moe.experts.26.w2", "model.layers.9.block_sparse_moe.experts.27.w2", "model.layers.9.block_sparse_moe.experts.28.w2", "model.layers.9.block_sparse_moe.experts.29.w2", "model.layers.9.block_sparse_moe.experts.30.w2", "model.layers.9.block_sparse_moe.experts.31.w2", "model.layers.9.block_sparse_moe.experts.32.w2", "model.layers.9.block_sparse_moe.experts.33.w2", "model.layers.9.block_sparse_moe.experts.34.w2", "model.layers.9.block_sparse_moe.experts.35.w2", "model.layers.9.block_sparse_moe.experts.36.w2", "model.layers.9.block_sparse_moe.experts.37.w2", "model.layers.9.block_sparse_moe.experts.38.w2", "model.layers.9.block_sparse_moe.experts.39.w2", "model.layers.9.block_sparse_moe.experts.40.w2", "model.layers.9.block_sparse_moe.experts.41.w2", "model.layers.9.block_sparse_moe.experts.42.w2", "model.layers.9.block_sparse_moe.experts.43.w2", "model.layers.9.block_sparse_moe.experts.44.w2", "model.layers.9.block_sparse_moe.experts.45.w2", "model.layers.9.block_sparse_moe.experts.46.w2", "model.layers.9.block_sparse_moe.experts.47.w2", "model.layers.9.block_sparse_moe.experts.48.w2", "model.layers.9.block_sparse_moe.experts.49.w2", "model.layers.9.block_sparse_moe.experts.50.w2", "model.layers.9.block_sparse_moe.experts.51.w2", "model.layers.9.block_sparse_moe.experts.52.w2", "model.layers.9.block_sparse_moe.experts.53.w2", "model.layers.9.block_sparse_moe.experts.54.w2", "model.layers.9.block_sparse_moe.experts.55.w2", "model.layers.9.block_sparse_moe.experts.56.w2", "model.layers.9.block_sparse_moe.experts.57.w2", "model.layers.9.block_sparse_moe.experts.58.w2", "model.layers.9.block_sparse_moe.experts.59.w2", "model.layers.9.block_sparse_moe.experts.60.w2", "model.layers.9.block_sparse_moe.experts.61.w2", "model.layers.9.block_sparse_moe.experts.62.w2", "model.layers.9.block_sparse_moe.experts.63.w2", "model.layers.9.block_sparse_moe.experts.64.w2", "model.layers.9.block_sparse_moe.experts.65.w2", "model.layers.9.block_sparse_moe.experts.66.w2", "model.layers.9.block_sparse_moe.experts.67.w2", "model.layers.9.block_sparse_moe.experts.68.w2", "model.layers.9.block_sparse_moe.experts.69.w2", "model.layers.9.block_sparse_moe.experts.70.w2", "model.layers.9.block_sparse_moe.experts.71.w2", "model.layers.9.block_sparse_moe.experts.72.w2", "model.layers.9.block_sparse_moe.experts.73.w2", "model.layers.9.block_sparse_moe.experts.74.w2", "model.layers.9.block_sparse_moe.experts.75.w2", "model.layers.9.block_sparse_moe.experts.76.w2", "model.layers.9.block_sparse_moe.experts.77.w2", "model.layers.9.block_sparse_moe.experts.78.w2", "model.layers.9.block_sparse_moe.experts.79.w2", "model.layers.9.block_sparse_moe.experts.80.w2", "model.layers.9.block_sparse_moe.experts.81.w2", "model.layers.9.block_sparse_moe.experts.82.w2", "model.layers.9.block_sparse_moe.experts.83.w2", "model.layers.9.block_sparse_moe.experts.84.w2", "model.layers.9.block_sparse_moe.experts.85.w2", "model.layers.9.block_sparse_moe.experts.86.w2", "model.layers.9.block_sparse_moe.experts.87.w2", "model.layers.9.block_sparse_moe.experts.88.w2", "model.layers.9.block_sparse_moe.experts.89.w2", "model.layers.9.block_sparse_moe.experts.90.w2", "model.layers.9.block_sparse_moe.experts.91.w2", "model.layers.9.block_sparse_moe.experts.92.w2", "model.layers.9.block_sparse_moe.experts.93.w2", "model.layers.9.block_sparse_moe.experts.94.w2", "model.layers.9.block_sparse_moe.experts.95.w2", "model.layers.9.block_sparse_moe.experts.96.w2", "model.layers.9.block_sparse_moe.experts.97.w2", "model.layers.9.block_sparse_moe.experts.98.w2", "model.layers.9.block_sparse_moe.experts.99.w2", "model.layers.9.block_sparse_moe.experts.100.w2", "model.layers.9.block_sparse_moe.experts.101.w2", "model.layers.9.block_sparse_moe.experts.102.w2", "model.layers.9.block_sparse_moe.experts.103.w2", "model.layers.9.block_sparse_moe.experts.104.w2", "model.layers.9.block_sparse_moe.experts.105.w2", "model.layers.9.block_sparse_moe.experts.106.w2", "model.layers.9.block_sparse_moe.experts.107.w2", "model.layers.9.block_sparse_moe.experts.108.w2", "model.layers.9.block_sparse_moe.experts.109.w2", "model.layers.9.block_sparse_moe.experts.110.w2", "model.layers.9.block_sparse_moe.experts.111.w2", "model.layers.9.block_sparse_moe.experts.112.w2", "model.layers.9.block_sparse_moe.experts.113.w2", "model.layers.9.block_sparse_moe.experts.114.w2", "model.layers.9.block_sparse_moe.experts.115.w2", "model.layers.9.block_sparse_moe.experts.116.w2", "model.layers.9.block_sparse_moe.experts.117.w2", "model.layers.9.block_sparse_moe.experts.118.w2", "model.layers.9.block_sparse_moe.experts.119.w2", "model.layers.9.block_sparse_moe.experts.120.w2", "model.layers.9.block_sparse_moe.experts.121.w2", "model.layers.9.block_sparse_moe.experts.122.w2", "model.layers.9.block_sparse_moe.experts.123.w2", "model.layers.9.block_sparse_moe.experts.124.w2", "model.layers.9.block_sparse_moe.experts.125.w2", "model.layers.9.block_sparse_moe.experts.126.w2", "model.layers.9.block_sparse_moe.experts.127.w2", "model.layers.9.block_sparse_moe.experts.128.w2", "model.layers.9.block_sparse_moe.experts.129.w2", "model.layers.9.block_sparse_moe.experts.130.w2", "model.layers.9.block_sparse_moe.experts.131.w2", "model.layers.9.block_sparse_moe.experts.132.w2", "model.layers.9.block_sparse_moe.experts.133.w2", "model.layers.9.block_sparse_moe.experts.134.w2", "model.layers.9.block_sparse_moe.experts.135.w2", "model.layers.9.block_sparse_moe.experts.136.w2", "model.layers.9.block_sparse_moe.experts.137.w2", "model.layers.9.block_sparse_moe.experts.138.w2", "model.layers.9.block_sparse_moe.experts.139.w2", "model.layers.9.block_sparse_moe.experts.140.w2", "model.layers.9.block_sparse_moe.experts.141.w2", "model.layers.9.block_sparse_moe.experts.142.w2", "model.layers.9.block_sparse_moe.experts.143.w2", "model.layers.9.block_sparse_moe.experts.144.w2", "model.layers.9.block_sparse_moe.experts.145.w2", "model.layers.9.block_sparse_moe.experts.146.w2", "model.layers.9.block_sparse_moe.experts.147.w2", "model.layers.9.block_sparse_moe.experts.148.w2", "model.layers.9.block_sparse_moe.experts.149.w2", "model.layers.9.block_sparse_moe.experts.150.w2", "model.layers.9.block_sparse_moe.experts.151.w2", "model.layers.9.block_sparse_moe.experts.152.w2", "model.layers.9.block_sparse_moe.experts.153.w2", "model.layers.9.block_sparse_moe.experts.154.w2", "model.layers.9.block_sparse_moe.experts.155.w2", "model.layers.9.block_sparse_moe.experts.156.w2", "model.layers.9.block_sparse_moe.experts.157.w2", "model.layers.9.block_sparse_moe.experts.158.w2", "model.layers.9.block_sparse_moe.experts.159.w2", "model.layers.9.block_sparse_moe.experts.160.w2", "model.layers.9.block_sparse_moe.experts.161.w2", "model.layers.9.block_sparse_moe.experts.162.w2", "model.layers.9.block_sparse_moe.experts.163.w2", "model.layers.9.block_sparse_moe.experts.164.w2", "model.layers.9.block_sparse_moe.experts.165.w2", "model.layers.9.block_sparse_moe.experts.166.w2", "model.layers.9.block_sparse_moe.experts.167.w2", "model.layers.9.block_sparse_moe.experts.168.w2", "model.layers.9.block_sparse_moe.experts.169.w2", "model.layers.9.block_sparse_moe.experts.170.w2", "model.layers.9.block_sparse_moe.experts.171.w2", "model.layers.9.block_sparse_moe.experts.172.w2", "model.layers.9.block_sparse_moe.experts.173.w2", "model.layers.9.block_sparse_moe.experts.174.w2", "model.layers.9.block_sparse_moe.experts.175.w2", "model.layers.9.block_sparse_moe.experts.176.w2", "model.layers.9.block_sparse_moe.experts.177.w2", "model.layers.9.block_sparse_moe.experts.178.w2", "model.layers.9.block_sparse_moe.experts.179.w2", "model.layers.9.block_sparse_moe.experts.180.w2", "model.layers.9.block_sparse_moe.experts.181.w2", "model.layers.9.block_sparse_moe.experts.182.w2", "model.layers.9.block_sparse_moe.experts.183.w2", "model.layers.9.block_sparse_moe.experts.184.w2", "model.layers.9.block_sparse_moe.experts.185.w2", "model.layers.9.block_sparse_moe.experts.186.w2", "model.layers.9.block_sparse_moe.experts.187.w2", "model.layers.9.block_sparse_moe.experts.188.w2", "model.layers.9.block_sparse_moe.experts.189.w2", "model.layers.9.block_sparse_moe.experts.190.w2", "model.layers.9.block_sparse_moe.experts.191.w2", "model.layers.9.block_sparse_moe.experts.192.w2", "model.layers.9.block_sparse_moe.experts.193.w2", "model.layers.9.block_sparse_moe.experts.194.w2", "model.layers.9.block_sparse_moe.experts.195.w2", "model.layers.9.block_sparse_moe.experts.196.w2", "model.layers.9.block_sparse_moe.experts.197.w2", "model.layers.9.block_sparse_moe.experts.198.w2", "model.layers.9.block_sparse_moe.experts.199.w2", "model.layers.9.block_sparse_moe.experts.200.w2", "model.layers.9.block_sparse_moe.experts.201.w2", "model.layers.9.block_sparse_moe.experts.202.w2", "model.layers.9.block_sparse_moe.experts.203.w2", "model.layers.9.block_sparse_moe.experts.204.w2", "model.layers.9.block_sparse_moe.experts.205.w2", "model.layers.9.block_sparse_moe.experts.206.w2", "model.layers.9.block_sparse_moe.experts.207.w2", "model.layers.9.block_sparse_moe.experts.208.w2", "model.layers.9.block_sparse_moe.experts.209.w2", "model.layers.9.block_sparse_moe.experts.210.w2", "model.layers.9.block_sparse_moe.experts.211.w2", "model.layers.9.block_sparse_moe.experts.212.w2", "model.layers.9.block_sparse_moe.experts.213.w2", "model.layers.9.block_sparse_moe.experts.214.w2", "model.layers.9.block_sparse_moe.experts.215.w2", "model.layers.9.block_sparse_moe.experts.216.w2", "model.layers.9.block_sparse_moe.experts.217.w2", "model.layers.9.block_sparse_moe.experts.218.w2", "model.layers.9.block_sparse_moe.experts.219.w2", "model.layers.9.block_sparse_moe.experts.220.w2", "model.layers.9.block_sparse_moe.experts.221.w2", "model.layers.9.block_sparse_moe.experts.222.w2", "model.layers.9.block_sparse_moe.experts.223.w2", "model.layers.9.block_sparse_moe.experts.224.w2", "model.layers.9.block_sparse_moe.experts.225.w2", "model.layers.9.block_sparse_moe.experts.226.w2", "model.layers.9.block_sparse_moe.experts.227.w2", "model.layers.9.block_sparse_moe.experts.228.w2", "model.layers.9.block_sparse_moe.experts.229.w2", "model.layers.9.block_sparse_moe.experts.230.w2", "model.layers.9.block_sparse_moe.experts.231.w2", "model.layers.9.block_sparse_moe.experts.232.w2", "model.layers.9.block_sparse_moe.experts.233.w2", "model.layers.9.block_sparse_moe.experts.234.w2", "model.layers.9.block_sparse_moe.experts.235.w2", "model.layers.9.block_sparse_moe.experts.236.w2", "model.layers.9.block_sparse_moe.experts.237.w2", "model.layers.9.block_sparse_moe.experts.238.w2", "model.layers.9.block_sparse_moe.experts.239.w2", "model.layers.9.block_sparse_moe.experts.240.w2", "model.layers.9.block_sparse_moe.experts.241.w2", "model.layers.9.block_sparse_moe.experts.242.w2", "model.layers.9.block_sparse_moe.experts.243.w2", "model.layers.9.block_sparse_moe.experts.244.w2", "model.layers.9.block_sparse_moe.experts.245.w2", "model.layers.9.block_sparse_moe.experts.246.w2", "model.layers.9.block_sparse_moe.experts.247.w2", "model.layers.9.block_sparse_moe.experts.248.w2", "model.layers.9.block_sparse_moe.experts.249.w2", "model.layers.9.block_sparse_moe.experts.250.w2", "model.layers.9.block_sparse_moe.experts.251.w2", "model.layers.9.block_sparse_moe.experts.252.w2", "model.layers.9.block_sparse_moe.experts.253.w2", "model.layers.9.block_sparse_moe.experts.254.w2", "model.layers.9.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.001081021688878539, "dbits": 1207959552 } ] }, { "idx": 50, "layers": [ "model.layers.10.self_attn.q_proj" ], "candidates": [ { "dkld": -0.001166343502700326, "dbits": 18874368 } ] }, { "idx": 51, "layers": [ "model.layers.10.self_attn.k_proj", "model.layers.10.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0016832513734698212, "dbits": 6291456 } ] }, { "idx": 52, "layers": [ "model.layers.10.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0008759893476963099, "dbits": 18874368 } ] }, { "idx": 53, "layers": [ "model.layers.10.block_sparse_moe.experts.0.w1", "model.layers.10.block_sparse_moe.experts.1.w1", "model.layers.10.block_sparse_moe.experts.2.w1", "model.layers.10.block_sparse_moe.experts.3.w1", "model.layers.10.block_sparse_moe.experts.4.w1", "model.layers.10.block_sparse_moe.experts.5.w1", "model.layers.10.block_sparse_moe.experts.6.w1", "model.layers.10.block_sparse_moe.experts.7.w1", "model.layers.10.block_sparse_moe.experts.8.w1", "model.layers.10.block_sparse_moe.experts.9.w1", "model.layers.10.block_sparse_moe.experts.10.w1", "model.layers.10.block_sparse_moe.experts.11.w1", "model.layers.10.block_sparse_moe.experts.12.w1", "model.layers.10.block_sparse_moe.experts.13.w1", "model.layers.10.block_sparse_moe.experts.14.w1", "model.layers.10.block_sparse_moe.experts.15.w1", "model.layers.10.block_sparse_moe.experts.16.w1", "model.layers.10.block_sparse_moe.experts.17.w1", "model.layers.10.block_sparse_moe.experts.18.w1", "model.layers.10.block_sparse_moe.experts.19.w1", "model.layers.10.block_sparse_moe.experts.20.w1", "model.layers.10.block_sparse_moe.experts.21.w1", "model.layers.10.block_sparse_moe.experts.22.w1", "model.layers.10.block_sparse_moe.experts.23.w1", "model.layers.10.block_sparse_moe.experts.24.w1", "model.layers.10.block_sparse_moe.experts.25.w1", "model.layers.10.block_sparse_moe.experts.26.w1", "model.layers.10.block_sparse_moe.experts.27.w1", "model.layers.10.block_sparse_moe.experts.28.w1", "model.layers.10.block_sparse_moe.experts.29.w1", "model.layers.10.block_sparse_moe.experts.30.w1", "model.layers.10.block_sparse_moe.experts.31.w1", "model.layers.10.block_sparse_moe.experts.32.w1", "model.layers.10.block_sparse_moe.experts.33.w1", "model.layers.10.block_sparse_moe.experts.34.w1", "model.layers.10.block_sparse_moe.experts.35.w1", "model.layers.10.block_sparse_moe.experts.36.w1", "model.layers.10.block_sparse_moe.experts.37.w1", "model.layers.10.block_sparse_moe.experts.38.w1", "model.layers.10.block_sparse_moe.experts.39.w1", "model.layers.10.block_sparse_moe.experts.40.w1", "model.layers.10.block_sparse_moe.experts.41.w1", "model.layers.10.block_sparse_moe.experts.42.w1", "model.layers.10.block_sparse_moe.experts.43.w1", "model.layers.10.block_sparse_moe.experts.44.w1", "model.layers.10.block_sparse_moe.experts.45.w1", "model.layers.10.block_sparse_moe.experts.46.w1", "model.layers.10.block_sparse_moe.experts.47.w1", "model.layers.10.block_sparse_moe.experts.48.w1", "model.layers.10.block_sparse_moe.experts.49.w1", "model.layers.10.block_sparse_moe.experts.50.w1", "model.layers.10.block_sparse_moe.experts.51.w1", "model.layers.10.block_sparse_moe.experts.52.w1", "model.layers.10.block_sparse_moe.experts.53.w1", "model.layers.10.block_sparse_moe.experts.54.w1", "model.layers.10.block_sparse_moe.experts.55.w1", "model.layers.10.block_sparse_moe.experts.56.w1", "model.layers.10.block_sparse_moe.experts.57.w1", "model.layers.10.block_sparse_moe.experts.58.w1", "model.layers.10.block_sparse_moe.experts.59.w1", "model.layers.10.block_sparse_moe.experts.60.w1", "model.layers.10.block_sparse_moe.experts.61.w1", "model.layers.10.block_sparse_moe.experts.62.w1", "model.layers.10.block_sparse_moe.experts.63.w1", "model.layers.10.block_sparse_moe.experts.64.w1", "model.layers.10.block_sparse_moe.experts.65.w1", "model.layers.10.block_sparse_moe.experts.66.w1", "model.layers.10.block_sparse_moe.experts.67.w1", "model.layers.10.block_sparse_moe.experts.68.w1", "model.layers.10.block_sparse_moe.experts.69.w1", "model.layers.10.block_sparse_moe.experts.70.w1", "model.layers.10.block_sparse_moe.experts.71.w1", "model.layers.10.block_sparse_moe.experts.72.w1", "model.layers.10.block_sparse_moe.experts.73.w1", "model.layers.10.block_sparse_moe.experts.74.w1", "model.layers.10.block_sparse_moe.experts.75.w1", "model.layers.10.block_sparse_moe.experts.76.w1", "model.layers.10.block_sparse_moe.experts.77.w1", "model.layers.10.block_sparse_moe.experts.78.w1", "model.layers.10.block_sparse_moe.experts.79.w1", "model.layers.10.block_sparse_moe.experts.80.w1", "model.layers.10.block_sparse_moe.experts.81.w1", "model.layers.10.block_sparse_moe.experts.82.w1", "model.layers.10.block_sparse_moe.experts.83.w1", "model.layers.10.block_sparse_moe.experts.84.w1", "model.layers.10.block_sparse_moe.experts.85.w1", "model.layers.10.block_sparse_moe.experts.86.w1", "model.layers.10.block_sparse_moe.experts.87.w1", "model.layers.10.block_sparse_moe.experts.88.w1", "model.layers.10.block_sparse_moe.experts.89.w1", "model.layers.10.block_sparse_moe.experts.90.w1", "model.layers.10.block_sparse_moe.experts.91.w1", "model.layers.10.block_sparse_moe.experts.92.w1", "model.layers.10.block_sparse_moe.experts.93.w1", "model.layers.10.block_sparse_moe.experts.94.w1", "model.layers.10.block_sparse_moe.experts.95.w1", "model.layers.10.block_sparse_moe.experts.96.w1", "model.layers.10.block_sparse_moe.experts.97.w1", "model.layers.10.block_sparse_moe.experts.98.w1", "model.layers.10.block_sparse_moe.experts.99.w1", "model.layers.10.block_sparse_moe.experts.100.w1", "model.layers.10.block_sparse_moe.experts.101.w1", "model.layers.10.block_sparse_moe.experts.102.w1", "model.layers.10.block_sparse_moe.experts.103.w1", "model.layers.10.block_sparse_moe.experts.104.w1", "model.layers.10.block_sparse_moe.experts.105.w1", "model.layers.10.block_sparse_moe.experts.106.w1", "model.layers.10.block_sparse_moe.experts.107.w1", "model.layers.10.block_sparse_moe.experts.108.w1", "model.layers.10.block_sparse_moe.experts.109.w1", "model.layers.10.block_sparse_moe.experts.110.w1", "model.layers.10.block_sparse_moe.experts.111.w1", "model.layers.10.block_sparse_moe.experts.112.w1", "model.layers.10.block_sparse_moe.experts.113.w1", "model.layers.10.block_sparse_moe.experts.114.w1", "model.layers.10.block_sparse_moe.experts.115.w1", "model.layers.10.block_sparse_moe.experts.116.w1", "model.layers.10.block_sparse_moe.experts.117.w1", "model.layers.10.block_sparse_moe.experts.118.w1", "model.layers.10.block_sparse_moe.experts.119.w1", "model.layers.10.block_sparse_moe.experts.120.w1", "model.layers.10.block_sparse_moe.experts.121.w1", "model.layers.10.block_sparse_moe.experts.122.w1", "model.layers.10.block_sparse_moe.experts.123.w1", "model.layers.10.block_sparse_moe.experts.124.w1", "model.layers.10.block_sparse_moe.experts.125.w1", "model.layers.10.block_sparse_moe.experts.126.w1", "model.layers.10.block_sparse_moe.experts.127.w1", "model.layers.10.block_sparse_moe.experts.128.w1", "model.layers.10.block_sparse_moe.experts.129.w1", "model.layers.10.block_sparse_moe.experts.130.w1", "model.layers.10.block_sparse_moe.experts.131.w1", "model.layers.10.block_sparse_moe.experts.132.w1", "model.layers.10.block_sparse_moe.experts.133.w1", "model.layers.10.block_sparse_moe.experts.134.w1", "model.layers.10.block_sparse_moe.experts.135.w1", "model.layers.10.block_sparse_moe.experts.136.w1", "model.layers.10.block_sparse_moe.experts.137.w1", "model.layers.10.block_sparse_moe.experts.138.w1", "model.layers.10.block_sparse_moe.experts.139.w1", "model.layers.10.block_sparse_moe.experts.140.w1", "model.layers.10.block_sparse_moe.experts.141.w1", "model.layers.10.block_sparse_moe.experts.142.w1", "model.layers.10.block_sparse_moe.experts.143.w1", "model.layers.10.block_sparse_moe.experts.144.w1", "model.layers.10.block_sparse_moe.experts.145.w1", "model.layers.10.block_sparse_moe.experts.146.w1", "model.layers.10.block_sparse_moe.experts.147.w1", "model.layers.10.block_sparse_moe.experts.148.w1", "model.layers.10.block_sparse_moe.experts.149.w1", "model.layers.10.block_sparse_moe.experts.150.w1", "model.layers.10.block_sparse_moe.experts.151.w1", "model.layers.10.block_sparse_moe.experts.152.w1", "model.layers.10.block_sparse_moe.experts.153.w1", "model.layers.10.block_sparse_moe.experts.154.w1", "model.layers.10.block_sparse_moe.experts.155.w1", "model.layers.10.block_sparse_moe.experts.156.w1", "model.layers.10.block_sparse_moe.experts.157.w1", "model.layers.10.block_sparse_moe.experts.158.w1", "model.layers.10.block_sparse_moe.experts.159.w1", "model.layers.10.block_sparse_moe.experts.160.w1", "model.layers.10.block_sparse_moe.experts.161.w1", "model.layers.10.block_sparse_moe.experts.162.w1", "model.layers.10.block_sparse_moe.experts.163.w1", "model.layers.10.block_sparse_moe.experts.164.w1", "model.layers.10.block_sparse_moe.experts.165.w1", "model.layers.10.block_sparse_moe.experts.166.w1", "model.layers.10.block_sparse_moe.experts.167.w1", "model.layers.10.block_sparse_moe.experts.168.w1", "model.layers.10.block_sparse_moe.experts.169.w1", "model.layers.10.block_sparse_moe.experts.170.w1", "model.layers.10.block_sparse_moe.experts.171.w1", "model.layers.10.block_sparse_moe.experts.172.w1", "model.layers.10.block_sparse_moe.experts.173.w1", "model.layers.10.block_sparse_moe.experts.174.w1", "model.layers.10.block_sparse_moe.experts.175.w1", "model.layers.10.block_sparse_moe.experts.176.w1", "model.layers.10.block_sparse_moe.experts.177.w1", "model.layers.10.block_sparse_moe.experts.178.w1", "model.layers.10.block_sparse_moe.experts.179.w1", "model.layers.10.block_sparse_moe.experts.180.w1", "model.layers.10.block_sparse_moe.experts.181.w1", "model.layers.10.block_sparse_moe.experts.182.w1", "model.layers.10.block_sparse_moe.experts.183.w1", "model.layers.10.block_sparse_moe.experts.184.w1", "model.layers.10.block_sparse_moe.experts.185.w1", "model.layers.10.block_sparse_moe.experts.186.w1", "model.layers.10.block_sparse_moe.experts.187.w1", "model.layers.10.block_sparse_moe.experts.188.w1", "model.layers.10.block_sparse_moe.experts.189.w1", "model.layers.10.block_sparse_moe.experts.190.w1", "model.layers.10.block_sparse_moe.experts.191.w1", "model.layers.10.block_sparse_moe.experts.192.w1", "model.layers.10.block_sparse_moe.experts.193.w1", "model.layers.10.block_sparse_moe.experts.194.w1", "model.layers.10.block_sparse_moe.experts.195.w1", "model.layers.10.block_sparse_moe.experts.196.w1", "model.layers.10.block_sparse_moe.experts.197.w1", "model.layers.10.block_sparse_moe.experts.198.w1", "model.layers.10.block_sparse_moe.experts.199.w1", "model.layers.10.block_sparse_moe.experts.200.w1", "model.layers.10.block_sparse_moe.experts.201.w1", "model.layers.10.block_sparse_moe.experts.202.w1", "model.layers.10.block_sparse_moe.experts.203.w1", "model.layers.10.block_sparse_moe.experts.204.w1", "model.layers.10.block_sparse_moe.experts.205.w1", "model.layers.10.block_sparse_moe.experts.206.w1", "model.layers.10.block_sparse_moe.experts.207.w1", "model.layers.10.block_sparse_moe.experts.208.w1", "model.layers.10.block_sparse_moe.experts.209.w1", "model.layers.10.block_sparse_moe.experts.210.w1", "model.layers.10.block_sparse_moe.experts.211.w1", "model.layers.10.block_sparse_moe.experts.212.w1", "model.layers.10.block_sparse_moe.experts.213.w1", "model.layers.10.block_sparse_moe.experts.214.w1", "model.layers.10.block_sparse_moe.experts.215.w1", "model.layers.10.block_sparse_moe.experts.216.w1", "model.layers.10.block_sparse_moe.experts.217.w1", "model.layers.10.block_sparse_moe.experts.218.w1", "model.layers.10.block_sparse_moe.experts.219.w1", "model.layers.10.block_sparse_moe.experts.220.w1", "model.layers.10.block_sparse_moe.experts.221.w1", "model.layers.10.block_sparse_moe.experts.222.w1", "model.layers.10.block_sparse_moe.experts.223.w1", "model.layers.10.block_sparse_moe.experts.224.w1", "model.layers.10.block_sparse_moe.experts.225.w1", "model.layers.10.block_sparse_moe.experts.226.w1", "model.layers.10.block_sparse_moe.experts.227.w1", "model.layers.10.block_sparse_moe.experts.228.w1", "model.layers.10.block_sparse_moe.experts.229.w1", "model.layers.10.block_sparse_moe.experts.230.w1", "model.layers.10.block_sparse_moe.experts.231.w1", "model.layers.10.block_sparse_moe.experts.232.w1", "model.layers.10.block_sparse_moe.experts.233.w1", "model.layers.10.block_sparse_moe.experts.234.w1", "model.layers.10.block_sparse_moe.experts.235.w1", "model.layers.10.block_sparse_moe.experts.236.w1", "model.layers.10.block_sparse_moe.experts.237.w1", "model.layers.10.block_sparse_moe.experts.238.w1", "model.layers.10.block_sparse_moe.experts.239.w1", "model.layers.10.block_sparse_moe.experts.240.w1", "model.layers.10.block_sparse_moe.experts.241.w1", "model.layers.10.block_sparse_moe.experts.242.w1", "model.layers.10.block_sparse_moe.experts.243.w1", "model.layers.10.block_sparse_moe.experts.244.w1", "model.layers.10.block_sparse_moe.experts.245.w1", "model.layers.10.block_sparse_moe.experts.246.w1", "model.layers.10.block_sparse_moe.experts.247.w1", "model.layers.10.block_sparse_moe.experts.248.w1", "model.layers.10.block_sparse_moe.experts.249.w1", "model.layers.10.block_sparse_moe.experts.250.w1", "model.layers.10.block_sparse_moe.experts.251.w1", "model.layers.10.block_sparse_moe.experts.252.w1", "model.layers.10.block_sparse_moe.experts.253.w1", "model.layers.10.block_sparse_moe.experts.254.w1", "model.layers.10.block_sparse_moe.experts.255.w1", "model.layers.10.block_sparse_moe.experts.0.w3", "model.layers.10.block_sparse_moe.experts.1.w3", "model.layers.10.block_sparse_moe.experts.2.w3", "model.layers.10.block_sparse_moe.experts.3.w3", "model.layers.10.block_sparse_moe.experts.4.w3", "model.layers.10.block_sparse_moe.experts.5.w3", "model.layers.10.block_sparse_moe.experts.6.w3", "model.layers.10.block_sparse_moe.experts.7.w3", "model.layers.10.block_sparse_moe.experts.8.w3", "model.layers.10.block_sparse_moe.experts.9.w3", "model.layers.10.block_sparse_moe.experts.10.w3", "model.layers.10.block_sparse_moe.experts.11.w3", "model.layers.10.block_sparse_moe.experts.12.w3", "model.layers.10.block_sparse_moe.experts.13.w3", "model.layers.10.block_sparse_moe.experts.14.w3", "model.layers.10.block_sparse_moe.experts.15.w3", "model.layers.10.block_sparse_moe.experts.16.w3", "model.layers.10.block_sparse_moe.experts.17.w3", "model.layers.10.block_sparse_moe.experts.18.w3", "model.layers.10.block_sparse_moe.experts.19.w3", "model.layers.10.block_sparse_moe.experts.20.w3", "model.layers.10.block_sparse_moe.experts.21.w3", "model.layers.10.block_sparse_moe.experts.22.w3", "model.layers.10.block_sparse_moe.experts.23.w3", "model.layers.10.block_sparse_moe.experts.24.w3", "model.layers.10.block_sparse_moe.experts.25.w3", "model.layers.10.block_sparse_moe.experts.26.w3", "model.layers.10.block_sparse_moe.experts.27.w3", "model.layers.10.block_sparse_moe.experts.28.w3", "model.layers.10.block_sparse_moe.experts.29.w3", "model.layers.10.block_sparse_moe.experts.30.w3", "model.layers.10.block_sparse_moe.experts.31.w3", "model.layers.10.block_sparse_moe.experts.32.w3", "model.layers.10.block_sparse_moe.experts.33.w3", "model.layers.10.block_sparse_moe.experts.34.w3", "model.layers.10.block_sparse_moe.experts.35.w3", "model.layers.10.block_sparse_moe.experts.36.w3", "model.layers.10.block_sparse_moe.experts.37.w3", "model.layers.10.block_sparse_moe.experts.38.w3", "model.layers.10.block_sparse_moe.experts.39.w3", "model.layers.10.block_sparse_moe.experts.40.w3", "model.layers.10.block_sparse_moe.experts.41.w3", "model.layers.10.block_sparse_moe.experts.42.w3", "model.layers.10.block_sparse_moe.experts.43.w3", "model.layers.10.block_sparse_moe.experts.44.w3", "model.layers.10.block_sparse_moe.experts.45.w3", "model.layers.10.block_sparse_moe.experts.46.w3", "model.layers.10.block_sparse_moe.experts.47.w3", "model.layers.10.block_sparse_moe.experts.48.w3", "model.layers.10.block_sparse_moe.experts.49.w3", "model.layers.10.block_sparse_moe.experts.50.w3", "model.layers.10.block_sparse_moe.experts.51.w3", "model.layers.10.block_sparse_moe.experts.52.w3", "model.layers.10.block_sparse_moe.experts.53.w3", "model.layers.10.block_sparse_moe.experts.54.w3", "model.layers.10.block_sparse_moe.experts.55.w3", "model.layers.10.block_sparse_moe.experts.56.w3", "model.layers.10.block_sparse_moe.experts.57.w3", "model.layers.10.block_sparse_moe.experts.58.w3", "model.layers.10.block_sparse_moe.experts.59.w3", "model.layers.10.block_sparse_moe.experts.60.w3", "model.layers.10.block_sparse_moe.experts.61.w3", "model.layers.10.block_sparse_moe.experts.62.w3", "model.layers.10.block_sparse_moe.experts.63.w3", "model.layers.10.block_sparse_moe.experts.64.w3", "model.layers.10.block_sparse_moe.experts.65.w3", "model.layers.10.block_sparse_moe.experts.66.w3", "model.layers.10.block_sparse_moe.experts.67.w3", "model.layers.10.block_sparse_moe.experts.68.w3", "model.layers.10.block_sparse_moe.experts.69.w3", "model.layers.10.block_sparse_moe.experts.70.w3", "model.layers.10.block_sparse_moe.experts.71.w3", "model.layers.10.block_sparse_moe.experts.72.w3", "model.layers.10.block_sparse_moe.experts.73.w3", "model.layers.10.block_sparse_moe.experts.74.w3", "model.layers.10.block_sparse_moe.experts.75.w3", "model.layers.10.block_sparse_moe.experts.76.w3", "model.layers.10.block_sparse_moe.experts.77.w3", "model.layers.10.block_sparse_moe.experts.78.w3", "model.layers.10.block_sparse_moe.experts.79.w3", "model.layers.10.block_sparse_moe.experts.80.w3", "model.layers.10.block_sparse_moe.experts.81.w3", "model.layers.10.block_sparse_moe.experts.82.w3", "model.layers.10.block_sparse_moe.experts.83.w3", "model.layers.10.block_sparse_moe.experts.84.w3", "model.layers.10.block_sparse_moe.experts.85.w3", "model.layers.10.block_sparse_moe.experts.86.w3", "model.layers.10.block_sparse_moe.experts.87.w3", "model.layers.10.block_sparse_moe.experts.88.w3", "model.layers.10.block_sparse_moe.experts.89.w3", "model.layers.10.block_sparse_moe.experts.90.w3", "model.layers.10.block_sparse_moe.experts.91.w3", "model.layers.10.block_sparse_moe.experts.92.w3", "model.layers.10.block_sparse_moe.experts.93.w3", "model.layers.10.block_sparse_moe.experts.94.w3", "model.layers.10.block_sparse_moe.experts.95.w3", "model.layers.10.block_sparse_moe.experts.96.w3", "model.layers.10.block_sparse_moe.experts.97.w3", "model.layers.10.block_sparse_moe.experts.98.w3", "model.layers.10.block_sparse_moe.experts.99.w3", "model.layers.10.block_sparse_moe.experts.100.w3", "model.layers.10.block_sparse_moe.experts.101.w3", "model.layers.10.block_sparse_moe.experts.102.w3", "model.layers.10.block_sparse_moe.experts.103.w3", "model.layers.10.block_sparse_moe.experts.104.w3", "model.layers.10.block_sparse_moe.experts.105.w3", "model.layers.10.block_sparse_moe.experts.106.w3", "model.layers.10.block_sparse_moe.experts.107.w3", "model.layers.10.block_sparse_moe.experts.108.w3", "model.layers.10.block_sparse_moe.experts.109.w3", "model.layers.10.block_sparse_moe.experts.110.w3", "model.layers.10.block_sparse_moe.experts.111.w3", "model.layers.10.block_sparse_moe.experts.112.w3", "model.layers.10.block_sparse_moe.experts.113.w3", "model.layers.10.block_sparse_moe.experts.114.w3", "model.layers.10.block_sparse_moe.experts.115.w3", "model.layers.10.block_sparse_moe.experts.116.w3", "model.layers.10.block_sparse_moe.experts.117.w3", "model.layers.10.block_sparse_moe.experts.118.w3", "model.layers.10.block_sparse_moe.experts.119.w3", "model.layers.10.block_sparse_moe.experts.120.w3", "model.layers.10.block_sparse_moe.experts.121.w3", "model.layers.10.block_sparse_moe.experts.122.w3", "model.layers.10.block_sparse_moe.experts.123.w3", "model.layers.10.block_sparse_moe.experts.124.w3", "model.layers.10.block_sparse_moe.experts.125.w3", "model.layers.10.block_sparse_moe.experts.126.w3", "model.layers.10.block_sparse_moe.experts.127.w3", "model.layers.10.block_sparse_moe.experts.128.w3", "model.layers.10.block_sparse_moe.experts.129.w3", "model.layers.10.block_sparse_moe.experts.130.w3", "model.layers.10.block_sparse_moe.experts.131.w3", "model.layers.10.block_sparse_moe.experts.132.w3", "model.layers.10.block_sparse_moe.experts.133.w3", "model.layers.10.block_sparse_moe.experts.134.w3", "model.layers.10.block_sparse_moe.experts.135.w3", "model.layers.10.block_sparse_moe.experts.136.w3", "model.layers.10.block_sparse_moe.experts.137.w3", "model.layers.10.block_sparse_moe.experts.138.w3", "model.layers.10.block_sparse_moe.experts.139.w3", "model.layers.10.block_sparse_moe.experts.140.w3", "model.layers.10.block_sparse_moe.experts.141.w3", "model.layers.10.block_sparse_moe.experts.142.w3", "model.layers.10.block_sparse_moe.experts.143.w3", "model.layers.10.block_sparse_moe.experts.144.w3", "model.layers.10.block_sparse_moe.experts.145.w3", "model.layers.10.block_sparse_moe.experts.146.w3", "model.layers.10.block_sparse_moe.experts.147.w3", "model.layers.10.block_sparse_moe.experts.148.w3", "model.layers.10.block_sparse_moe.experts.149.w3", "model.layers.10.block_sparse_moe.experts.150.w3", "model.layers.10.block_sparse_moe.experts.151.w3", "model.layers.10.block_sparse_moe.experts.152.w3", "model.layers.10.block_sparse_moe.experts.153.w3", "model.layers.10.block_sparse_moe.experts.154.w3", "model.layers.10.block_sparse_moe.experts.155.w3", "model.layers.10.block_sparse_moe.experts.156.w3", "model.layers.10.block_sparse_moe.experts.157.w3", "model.layers.10.block_sparse_moe.experts.158.w3", "model.layers.10.block_sparse_moe.experts.159.w3", "model.layers.10.block_sparse_moe.experts.160.w3", "model.layers.10.block_sparse_moe.experts.161.w3", "model.layers.10.block_sparse_moe.experts.162.w3", "model.layers.10.block_sparse_moe.experts.163.w3", "model.layers.10.block_sparse_moe.experts.164.w3", "model.layers.10.block_sparse_moe.experts.165.w3", "model.layers.10.block_sparse_moe.experts.166.w3", "model.layers.10.block_sparse_moe.experts.167.w3", "model.layers.10.block_sparse_moe.experts.168.w3", "model.layers.10.block_sparse_moe.experts.169.w3", "model.layers.10.block_sparse_moe.experts.170.w3", "model.layers.10.block_sparse_moe.experts.171.w3", "model.layers.10.block_sparse_moe.experts.172.w3", "model.layers.10.block_sparse_moe.experts.173.w3", "model.layers.10.block_sparse_moe.experts.174.w3", "model.layers.10.block_sparse_moe.experts.175.w3", "model.layers.10.block_sparse_moe.experts.176.w3", "model.layers.10.block_sparse_moe.experts.177.w3", "model.layers.10.block_sparse_moe.experts.178.w3", "model.layers.10.block_sparse_moe.experts.179.w3", "model.layers.10.block_sparse_moe.experts.180.w3", "model.layers.10.block_sparse_moe.experts.181.w3", "model.layers.10.block_sparse_moe.experts.182.w3", "model.layers.10.block_sparse_moe.experts.183.w3", "model.layers.10.block_sparse_moe.experts.184.w3", "model.layers.10.block_sparse_moe.experts.185.w3", "model.layers.10.block_sparse_moe.experts.186.w3", "model.layers.10.block_sparse_moe.experts.187.w3", "model.layers.10.block_sparse_moe.experts.188.w3", "model.layers.10.block_sparse_moe.experts.189.w3", "model.layers.10.block_sparse_moe.experts.190.w3", "model.layers.10.block_sparse_moe.experts.191.w3", "model.layers.10.block_sparse_moe.experts.192.w3", "model.layers.10.block_sparse_moe.experts.193.w3", "model.layers.10.block_sparse_moe.experts.194.w3", "model.layers.10.block_sparse_moe.experts.195.w3", "model.layers.10.block_sparse_moe.experts.196.w3", "model.layers.10.block_sparse_moe.experts.197.w3", "model.layers.10.block_sparse_moe.experts.198.w3", "model.layers.10.block_sparse_moe.experts.199.w3", "model.layers.10.block_sparse_moe.experts.200.w3", "model.layers.10.block_sparse_moe.experts.201.w3", "model.layers.10.block_sparse_moe.experts.202.w3", "model.layers.10.block_sparse_moe.experts.203.w3", "model.layers.10.block_sparse_moe.experts.204.w3", "model.layers.10.block_sparse_moe.experts.205.w3", "model.layers.10.block_sparse_moe.experts.206.w3", "model.layers.10.block_sparse_moe.experts.207.w3", "model.layers.10.block_sparse_moe.experts.208.w3", "model.layers.10.block_sparse_moe.experts.209.w3", "model.layers.10.block_sparse_moe.experts.210.w3", "model.layers.10.block_sparse_moe.experts.211.w3", "model.layers.10.block_sparse_moe.experts.212.w3", "model.layers.10.block_sparse_moe.experts.213.w3", "model.layers.10.block_sparse_moe.experts.214.w3", "model.layers.10.block_sparse_moe.experts.215.w3", "model.layers.10.block_sparse_moe.experts.216.w3", "model.layers.10.block_sparse_moe.experts.217.w3", "model.layers.10.block_sparse_moe.experts.218.w3", "model.layers.10.block_sparse_moe.experts.219.w3", "model.layers.10.block_sparse_moe.experts.220.w3", "model.layers.10.block_sparse_moe.experts.221.w3", "model.layers.10.block_sparse_moe.experts.222.w3", "model.layers.10.block_sparse_moe.experts.223.w3", "model.layers.10.block_sparse_moe.experts.224.w3", "model.layers.10.block_sparse_moe.experts.225.w3", "model.layers.10.block_sparse_moe.experts.226.w3", "model.layers.10.block_sparse_moe.experts.227.w3", "model.layers.10.block_sparse_moe.experts.228.w3", "model.layers.10.block_sparse_moe.experts.229.w3", "model.layers.10.block_sparse_moe.experts.230.w3", "model.layers.10.block_sparse_moe.experts.231.w3", "model.layers.10.block_sparse_moe.experts.232.w3", "model.layers.10.block_sparse_moe.experts.233.w3", "model.layers.10.block_sparse_moe.experts.234.w3", "model.layers.10.block_sparse_moe.experts.235.w3", "model.layers.10.block_sparse_moe.experts.236.w3", "model.layers.10.block_sparse_moe.experts.237.w3", "model.layers.10.block_sparse_moe.experts.238.w3", "model.layers.10.block_sparse_moe.experts.239.w3", "model.layers.10.block_sparse_moe.experts.240.w3", "model.layers.10.block_sparse_moe.experts.241.w3", "model.layers.10.block_sparse_moe.experts.242.w3", "model.layers.10.block_sparse_moe.experts.243.w3", "model.layers.10.block_sparse_moe.experts.244.w3", "model.layers.10.block_sparse_moe.experts.245.w3", "model.layers.10.block_sparse_moe.experts.246.w3", "model.layers.10.block_sparse_moe.experts.247.w3", "model.layers.10.block_sparse_moe.experts.248.w3", "model.layers.10.block_sparse_moe.experts.249.w3", "model.layers.10.block_sparse_moe.experts.250.w3", "model.layers.10.block_sparse_moe.experts.251.w3", "model.layers.10.block_sparse_moe.experts.252.w3", "model.layers.10.block_sparse_moe.experts.253.w3", "model.layers.10.block_sparse_moe.experts.254.w3", "model.layers.10.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0005146669223904665, "dbits": 2415919104 } ] }, { "idx": 54, "layers": [ "model.layers.10.block_sparse_moe.experts.0.w2", "model.layers.10.block_sparse_moe.experts.1.w2", "model.layers.10.block_sparse_moe.experts.2.w2", "model.layers.10.block_sparse_moe.experts.3.w2", "model.layers.10.block_sparse_moe.experts.4.w2", "model.layers.10.block_sparse_moe.experts.5.w2", "model.layers.10.block_sparse_moe.experts.6.w2", "model.layers.10.block_sparse_moe.experts.7.w2", "model.layers.10.block_sparse_moe.experts.8.w2", "model.layers.10.block_sparse_moe.experts.9.w2", "model.layers.10.block_sparse_moe.experts.10.w2", "model.layers.10.block_sparse_moe.experts.11.w2", "model.layers.10.block_sparse_moe.experts.12.w2", "model.layers.10.block_sparse_moe.experts.13.w2", "model.layers.10.block_sparse_moe.experts.14.w2", "model.layers.10.block_sparse_moe.experts.15.w2", "model.layers.10.block_sparse_moe.experts.16.w2", "model.layers.10.block_sparse_moe.experts.17.w2", "model.layers.10.block_sparse_moe.experts.18.w2", "model.layers.10.block_sparse_moe.experts.19.w2", "model.layers.10.block_sparse_moe.experts.20.w2", "model.layers.10.block_sparse_moe.experts.21.w2", "model.layers.10.block_sparse_moe.experts.22.w2", "model.layers.10.block_sparse_moe.experts.23.w2", "model.layers.10.block_sparse_moe.experts.24.w2", "model.layers.10.block_sparse_moe.experts.25.w2", "model.layers.10.block_sparse_moe.experts.26.w2", "model.layers.10.block_sparse_moe.experts.27.w2", "model.layers.10.block_sparse_moe.experts.28.w2", "model.layers.10.block_sparse_moe.experts.29.w2", "model.layers.10.block_sparse_moe.experts.30.w2", "model.layers.10.block_sparse_moe.experts.31.w2", "model.layers.10.block_sparse_moe.experts.32.w2", "model.layers.10.block_sparse_moe.experts.33.w2", "model.layers.10.block_sparse_moe.experts.34.w2", "model.layers.10.block_sparse_moe.experts.35.w2", "model.layers.10.block_sparse_moe.experts.36.w2", "model.layers.10.block_sparse_moe.experts.37.w2", "model.layers.10.block_sparse_moe.experts.38.w2", "model.layers.10.block_sparse_moe.experts.39.w2", "model.layers.10.block_sparse_moe.experts.40.w2", "model.layers.10.block_sparse_moe.experts.41.w2", "model.layers.10.block_sparse_moe.experts.42.w2", "model.layers.10.block_sparse_moe.experts.43.w2", "model.layers.10.block_sparse_moe.experts.44.w2", "model.layers.10.block_sparse_moe.experts.45.w2", "model.layers.10.block_sparse_moe.experts.46.w2", "model.layers.10.block_sparse_moe.experts.47.w2", "model.layers.10.block_sparse_moe.experts.48.w2", "model.layers.10.block_sparse_moe.experts.49.w2", "model.layers.10.block_sparse_moe.experts.50.w2", "model.layers.10.block_sparse_moe.experts.51.w2", "model.layers.10.block_sparse_moe.experts.52.w2", "model.layers.10.block_sparse_moe.experts.53.w2", "model.layers.10.block_sparse_moe.experts.54.w2", "model.layers.10.block_sparse_moe.experts.55.w2", "model.layers.10.block_sparse_moe.experts.56.w2", "model.layers.10.block_sparse_moe.experts.57.w2", "model.layers.10.block_sparse_moe.experts.58.w2", "model.layers.10.block_sparse_moe.experts.59.w2", "model.layers.10.block_sparse_moe.experts.60.w2", "model.layers.10.block_sparse_moe.experts.61.w2", "model.layers.10.block_sparse_moe.experts.62.w2", "model.layers.10.block_sparse_moe.experts.63.w2", "model.layers.10.block_sparse_moe.experts.64.w2", "model.layers.10.block_sparse_moe.experts.65.w2", "model.layers.10.block_sparse_moe.experts.66.w2", "model.layers.10.block_sparse_moe.experts.67.w2", "model.layers.10.block_sparse_moe.experts.68.w2", "model.layers.10.block_sparse_moe.experts.69.w2", "model.layers.10.block_sparse_moe.experts.70.w2", "model.layers.10.block_sparse_moe.experts.71.w2", "model.layers.10.block_sparse_moe.experts.72.w2", "model.layers.10.block_sparse_moe.experts.73.w2", "model.layers.10.block_sparse_moe.experts.74.w2", "model.layers.10.block_sparse_moe.experts.75.w2", "model.layers.10.block_sparse_moe.experts.76.w2", "model.layers.10.block_sparse_moe.experts.77.w2", "model.layers.10.block_sparse_moe.experts.78.w2", "model.layers.10.block_sparse_moe.experts.79.w2", "model.layers.10.block_sparse_moe.experts.80.w2", "model.layers.10.block_sparse_moe.experts.81.w2", "model.layers.10.block_sparse_moe.experts.82.w2", "model.layers.10.block_sparse_moe.experts.83.w2", "model.layers.10.block_sparse_moe.experts.84.w2", "model.layers.10.block_sparse_moe.experts.85.w2", "model.layers.10.block_sparse_moe.experts.86.w2", "model.layers.10.block_sparse_moe.experts.87.w2", "model.layers.10.block_sparse_moe.experts.88.w2", "model.layers.10.block_sparse_moe.experts.89.w2", "model.layers.10.block_sparse_moe.experts.90.w2", "model.layers.10.block_sparse_moe.experts.91.w2", "model.layers.10.block_sparse_moe.experts.92.w2", "model.layers.10.block_sparse_moe.experts.93.w2", "model.layers.10.block_sparse_moe.experts.94.w2", "model.layers.10.block_sparse_moe.experts.95.w2", "model.layers.10.block_sparse_moe.experts.96.w2", "model.layers.10.block_sparse_moe.experts.97.w2", "model.layers.10.block_sparse_moe.experts.98.w2", "model.layers.10.block_sparse_moe.experts.99.w2", "model.layers.10.block_sparse_moe.experts.100.w2", "model.layers.10.block_sparse_moe.experts.101.w2", "model.layers.10.block_sparse_moe.experts.102.w2", "model.layers.10.block_sparse_moe.experts.103.w2", "model.layers.10.block_sparse_moe.experts.104.w2", "model.layers.10.block_sparse_moe.experts.105.w2", "model.layers.10.block_sparse_moe.experts.106.w2", "model.layers.10.block_sparse_moe.experts.107.w2", "model.layers.10.block_sparse_moe.experts.108.w2", "model.layers.10.block_sparse_moe.experts.109.w2", "model.layers.10.block_sparse_moe.experts.110.w2", "model.layers.10.block_sparse_moe.experts.111.w2", "model.layers.10.block_sparse_moe.experts.112.w2", "model.layers.10.block_sparse_moe.experts.113.w2", "model.layers.10.block_sparse_moe.experts.114.w2", "model.layers.10.block_sparse_moe.experts.115.w2", "model.layers.10.block_sparse_moe.experts.116.w2", "model.layers.10.block_sparse_moe.experts.117.w2", "model.layers.10.block_sparse_moe.experts.118.w2", "model.layers.10.block_sparse_moe.experts.119.w2", "model.layers.10.block_sparse_moe.experts.120.w2", "model.layers.10.block_sparse_moe.experts.121.w2", "model.layers.10.block_sparse_moe.experts.122.w2", "model.layers.10.block_sparse_moe.experts.123.w2", "model.layers.10.block_sparse_moe.experts.124.w2", "model.layers.10.block_sparse_moe.experts.125.w2", "model.layers.10.block_sparse_moe.experts.126.w2", "model.layers.10.block_sparse_moe.experts.127.w2", "model.layers.10.block_sparse_moe.experts.128.w2", "model.layers.10.block_sparse_moe.experts.129.w2", "model.layers.10.block_sparse_moe.experts.130.w2", "model.layers.10.block_sparse_moe.experts.131.w2", "model.layers.10.block_sparse_moe.experts.132.w2", "model.layers.10.block_sparse_moe.experts.133.w2", "model.layers.10.block_sparse_moe.experts.134.w2", "model.layers.10.block_sparse_moe.experts.135.w2", "model.layers.10.block_sparse_moe.experts.136.w2", "model.layers.10.block_sparse_moe.experts.137.w2", "model.layers.10.block_sparse_moe.experts.138.w2", "model.layers.10.block_sparse_moe.experts.139.w2", "model.layers.10.block_sparse_moe.experts.140.w2", "model.layers.10.block_sparse_moe.experts.141.w2", "model.layers.10.block_sparse_moe.experts.142.w2", "model.layers.10.block_sparse_moe.experts.143.w2", "model.layers.10.block_sparse_moe.experts.144.w2", "model.layers.10.block_sparse_moe.experts.145.w2", "model.layers.10.block_sparse_moe.experts.146.w2", "model.layers.10.block_sparse_moe.experts.147.w2", "model.layers.10.block_sparse_moe.experts.148.w2", "model.layers.10.block_sparse_moe.experts.149.w2", "model.layers.10.block_sparse_moe.experts.150.w2", "model.layers.10.block_sparse_moe.experts.151.w2", "model.layers.10.block_sparse_moe.experts.152.w2", "model.layers.10.block_sparse_moe.experts.153.w2", "model.layers.10.block_sparse_moe.experts.154.w2", "model.layers.10.block_sparse_moe.experts.155.w2", "model.layers.10.block_sparse_moe.experts.156.w2", "model.layers.10.block_sparse_moe.experts.157.w2", "model.layers.10.block_sparse_moe.experts.158.w2", "model.layers.10.block_sparse_moe.experts.159.w2", "model.layers.10.block_sparse_moe.experts.160.w2", "model.layers.10.block_sparse_moe.experts.161.w2", "model.layers.10.block_sparse_moe.experts.162.w2", "model.layers.10.block_sparse_moe.experts.163.w2", "model.layers.10.block_sparse_moe.experts.164.w2", "model.layers.10.block_sparse_moe.experts.165.w2", "model.layers.10.block_sparse_moe.experts.166.w2", "model.layers.10.block_sparse_moe.experts.167.w2", "model.layers.10.block_sparse_moe.experts.168.w2", "model.layers.10.block_sparse_moe.experts.169.w2", "model.layers.10.block_sparse_moe.experts.170.w2", "model.layers.10.block_sparse_moe.experts.171.w2", "model.layers.10.block_sparse_moe.experts.172.w2", "model.layers.10.block_sparse_moe.experts.173.w2", "model.layers.10.block_sparse_moe.experts.174.w2", "model.layers.10.block_sparse_moe.experts.175.w2", "model.layers.10.block_sparse_moe.experts.176.w2", "model.layers.10.block_sparse_moe.experts.177.w2", "model.layers.10.block_sparse_moe.experts.178.w2", "model.layers.10.block_sparse_moe.experts.179.w2", "model.layers.10.block_sparse_moe.experts.180.w2", "model.layers.10.block_sparse_moe.experts.181.w2", "model.layers.10.block_sparse_moe.experts.182.w2", "model.layers.10.block_sparse_moe.experts.183.w2", "model.layers.10.block_sparse_moe.experts.184.w2", "model.layers.10.block_sparse_moe.experts.185.w2", "model.layers.10.block_sparse_moe.experts.186.w2", "model.layers.10.block_sparse_moe.experts.187.w2", "model.layers.10.block_sparse_moe.experts.188.w2", "model.layers.10.block_sparse_moe.experts.189.w2", "model.layers.10.block_sparse_moe.experts.190.w2", "model.layers.10.block_sparse_moe.experts.191.w2", "model.layers.10.block_sparse_moe.experts.192.w2", "model.layers.10.block_sparse_moe.experts.193.w2", "model.layers.10.block_sparse_moe.experts.194.w2", "model.layers.10.block_sparse_moe.experts.195.w2", "model.layers.10.block_sparse_moe.experts.196.w2", "model.layers.10.block_sparse_moe.experts.197.w2", "model.layers.10.block_sparse_moe.experts.198.w2", "model.layers.10.block_sparse_moe.experts.199.w2", "model.layers.10.block_sparse_moe.experts.200.w2", "model.layers.10.block_sparse_moe.experts.201.w2", "model.layers.10.block_sparse_moe.experts.202.w2", "model.layers.10.block_sparse_moe.experts.203.w2", "model.layers.10.block_sparse_moe.experts.204.w2", "model.layers.10.block_sparse_moe.experts.205.w2", "model.layers.10.block_sparse_moe.experts.206.w2", "model.layers.10.block_sparse_moe.experts.207.w2", "model.layers.10.block_sparse_moe.experts.208.w2", "model.layers.10.block_sparse_moe.experts.209.w2", "model.layers.10.block_sparse_moe.experts.210.w2", "model.layers.10.block_sparse_moe.experts.211.w2", "model.layers.10.block_sparse_moe.experts.212.w2", "model.layers.10.block_sparse_moe.experts.213.w2", "model.layers.10.block_sparse_moe.experts.214.w2", "model.layers.10.block_sparse_moe.experts.215.w2", "model.layers.10.block_sparse_moe.experts.216.w2", "model.layers.10.block_sparse_moe.experts.217.w2", "model.layers.10.block_sparse_moe.experts.218.w2", "model.layers.10.block_sparse_moe.experts.219.w2", "model.layers.10.block_sparse_moe.experts.220.w2", "model.layers.10.block_sparse_moe.experts.221.w2", "model.layers.10.block_sparse_moe.experts.222.w2", "model.layers.10.block_sparse_moe.experts.223.w2", "model.layers.10.block_sparse_moe.experts.224.w2", "model.layers.10.block_sparse_moe.experts.225.w2", "model.layers.10.block_sparse_moe.experts.226.w2", "model.layers.10.block_sparse_moe.experts.227.w2", "model.layers.10.block_sparse_moe.experts.228.w2", "model.layers.10.block_sparse_moe.experts.229.w2", "model.layers.10.block_sparse_moe.experts.230.w2", "model.layers.10.block_sparse_moe.experts.231.w2", "model.layers.10.block_sparse_moe.experts.232.w2", "model.layers.10.block_sparse_moe.experts.233.w2", "model.layers.10.block_sparse_moe.experts.234.w2", "model.layers.10.block_sparse_moe.experts.235.w2", "model.layers.10.block_sparse_moe.experts.236.w2", "model.layers.10.block_sparse_moe.experts.237.w2", "model.layers.10.block_sparse_moe.experts.238.w2", "model.layers.10.block_sparse_moe.experts.239.w2", "model.layers.10.block_sparse_moe.experts.240.w2", "model.layers.10.block_sparse_moe.experts.241.w2", "model.layers.10.block_sparse_moe.experts.242.w2", "model.layers.10.block_sparse_moe.experts.243.w2", "model.layers.10.block_sparse_moe.experts.244.w2", "model.layers.10.block_sparse_moe.experts.245.w2", "model.layers.10.block_sparse_moe.experts.246.w2", "model.layers.10.block_sparse_moe.experts.247.w2", "model.layers.10.block_sparse_moe.experts.248.w2", "model.layers.10.block_sparse_moe.experts.249.w2", "model.layers.10.block_sparse_moe.experts.250.w2", "model.layers.10.block_sparse_moe.experts.251.w2", "model.layers.10.block_sparse_moe.experts.252.w2", "model.layers.10.block_sparse_moe.experts.253.w2", "model.layers.10.block_sparse_moe.experts.254.w2", "model.layers.10.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -6.989818066359399e-05, "dbits": 1207959552 } ] }, { "idx": 55, "layers": [ "model.layers.11.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0003912301734089879, "dbits": 18874368 } ] }, { "idx": 56, "layers": [ "model.layers.11.self_attn.k_proj", "model.layers.11.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0062972327694296865, "dbits": 6291456 } ] }, { "idx": 57, "layers": [ "model.layers.11.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0025137163698673304, "dbits": 18874368 } ] }, { "idx": 58, "layers": [ "model.layers.11.block_sparse_moe.experts.0.w1", "model.layers.11.block_sparse_moe.experts.1.w1", "model.layers.11.block_sparse_moe.experts.2.w1", "model.layers.11.block_sparse_moe.experts.3.w1", "model.layers.11.block_sparse_moe.experts.4.w1", "model.layers.11.block_sparse_moe.experts.5.w1", "model.layers.11.block_sparse_moe.experts.6.w1", "model.layers.11.block_sparse_moe.experts.7.w1", "model.layers.11.block_sparse_moe.experts.8.w1", "model.layers.11.block_sparse_moe.experts.9.w1", "model.layers.11.block_sparse_moe.experts.10.w1", "model.layers.11.block_sparse_moe.experts.11.w1", "model.layers.11.block_sparse_moe.experts.12.w1", "model.layers.11.block_sparse_moe.experts.13.w1", "model.layers.11.block_sparse_moe.experts.14.w1", "model.layers.11.block_sparse_moe.experts.15.w1", "model.layers.11.block_sparse_moe.experts.16.w1", "model.layers.11.block_sparse_moe.experts.17.w1", "model.layers.11.block_sparse_moe.experts.18.w1", "model.layers.11.block_sparse_moe.experts.19.w1", "model.layers.11.block_sparse_moe.experts.20.w1", "model.layers.11.block_sparse_moe.experts.21.w1", "model.layers.11.block_sparse_moe.experts.22.w1", "model.layers.11.block_sparse_moe.experts.23.w1", "model.layers.11.block_sparse_moe.experts.24.w1", "model.layers.11.block_sparse_moe.experts.25.w1", "model.layers.11.block_sparse_moe.experts.26.w1", "model.layers.11.block_sparse_moe.experts.27.w1", "model.layers.11.block_sparse_moe.experts.28.w1", "model.layers.11.block_sparse_moe.experts.29.w1", "model.layers.11.block_sparse_moe.experts.30.w1", "model.layers.11.block_sparse_moe.experts.31.w1", "model.layers.11.block_sparse_moe.experts.32.w1", "model.layers.11.block_sparse_moe.experts.33.w1", "model.layers.11.block_sparse_moe.experts.34.w1", "model.layers.11.block_sparse_moe.experts.35.w1", "model.layers.11.block_sparse_moe.experts.36.w1", "model.layers.11.block_sparse_moe.experts.37.w1", "model.layers.11.block_sparse_moe.experts.38.w1", "model.layers.11.block_sparse_moe.experts.39.w1", "model.layers.11.block_sparse_moe.experts.40.w1", "model.layers.11.block_sparse_moe.experts.41.w1", "model.layers.11.block_sparse_moe.experts.42.w1", "model.layers.11.block_sparse_moe.experts.43.w1", "model.layers.11.block_sparse_moe.experts.44.w1", "model.layers.11.block_sparse_moe.experts.45.w1", "model.layers.11.block_sparse_moe.experts.46.w1", "model.layers.11.block_sparse_moe.experts.47.w1", "model.layers.11.block_sparse_moe.experts.48.w1", "model.layers.11.block_sparse_moe.experts.49.w1", "model.layers.11.block_sparse_moe.experts.50.w1", "model.layers.11.block_sparse_moe.experts.51.w1", "model.layers.11.block_sparse_moe.experts.52.w1", "model.layers.11.block_sparse_moe.experts.53.w1", "model.layers.11.block_sparse_moe.experts.54.w1", "model.layers.11.block_sparse_moe.experts.55.w1", "model.layers.11.block_sparse_moe.experts.56.w1", "model.layers.11.block_sparse_moe.experts.57.w1", "model.layers.11.block_sparse_moe.experts.58.w1", "model.layers.11.block_sparse_moe.experts.59.w1", "model.layers.11.block_sparse_moe.experts.60.w1", "model.layers.11.block_sparse_moe.experts.61.w1", "model.layers.11.block_sparse_moe.experts.62.w1", "model.layers.11.block_sparse_moe.experts.63.w1", "model.layers.11.block_sparse_moe.experts.64.w1", "model.layers.11.block_sparse_moe.experts.65.w1", "model.layers.11.block_sparse_moe.experts.66.w1", "model.layers.11.block_sparse_moe.experts.67.w1", "model.layers.11.block_sparse_moe.experts.68.w1", "model.layers.11.block_sparse_moe.experts.69.w1", "model.layers.11.block_sparse_moe.experts.70.w1", "model.layers.11.block_sparse_moe.experts.71.w1", "model.layers.11.block_sparse_moe.experts.72.w1", "model.layers.11.block_sparse_moe.experts.73.w1", "model.layers.11.block_sparse_moe.experts.74.w1", "model.layers.11.block_sparse_moe.experts.75.w1", "model.layers.11.block_sparse_moe.experts.76.w1", "model.layers.11.block_sparse_moe.experts.77.w1", "model.layers.11.block_sparse_moe.experts.78.w1", "model.layers.11.block_sparse_moe.experts.79.w1", "model.layers.11.block_sparse_moe.experts.80.w1", "model.layers.11.block_sparse_moe.experts.81.w1", "model.layers.11.block_sparse_moe.experts.82.w1", "model.layers.11.block_sparse_moe.experts.83.w1", "model.layers.11.block_sparse_moe.experts.84.w1", "model.layers.11.block_sparse_moe.experts.85.w1", "model.layers.11.block_sparse_moe.experts.86.w1", "model.layers.11.block_sparse_moe.experts.87.w1", "model.layers.11.block_sparse_moe.experts.88.w1", "model.layers.11.block_sparse_moe.experts.89.w1", "model.layers.11.block_sparse_moe.experts.90.w1", "model.layers.11.block_sparse_moe.experts.91.w1", "model.layers.11.block_sparse_moe.experts.92.w1", "model.layers.11.block_sparse_moe.experts.93.w1", "model.layers.11.block_sparse_moe.experts.94.w1", "model.layers.11.block_sparse_moe.experts.95.w1", "model.layers.11.block_sparse_moe.experts.96.w1", "model.layers.11.block_sparse_moe.experts.97.w1", "model.layers.11.block_sparse_moe.experts.98.w1", "model.layers.11.block_sparse_moe.experts.99.w1", "model.layers.11.block_sparse_moe.experts.100.w1", "model.layers.11.block_sparse_moe.experts.101.w1", "model.layers.11.block_sparse_moe.experts.102.w1", "model.layers.11.block_sparse_moe.experts.103.w1", "model.layers.11.block_sparse_moe.experts.104.w1", "model.layers.11.block_sparse_moe.experts.105.w1", "model.layers.11.block_sparse_moe.experts.106.w1", "model.layers.11.block_sparse_moe.experts.107.w1", "model.layers.11.block_sparse_moe.experts.108.w1", "model.layers.11.block_sparse_moe.experts.109.w1", "model.layers.11.block_sparse_moe.experts.110.w1", "model.layers.11.block_sparse_moe.experts.111.w1", "model.layers.11.block_sparse_moe.experts.112.w1", "model.layers.11.block_sparse_moe.experts.113.w1", "model.layers.11.block_sparse_moe.experts.114.w1", "model.layers.11.block_sparse_moe.experts.115.w1", "model.layers.11.block_sparse_moe.experts.116.w1", "model.layers.11.block_sparse_moe.experts.117.w1", "model.layers.11.block_sparse_moe.experts.118.w1", "model.layers.11.block_sparse_moe.experts.119.w1", "model.layers.11.block_sparse_moe.experts.120.w1", "model.layers.11.block_sparse_moe.experts.121.w1", "model.layers.11.block_sparse_moe.experts.122.w1", "model.layers.11.block_sparse_moe.experts.123.w1", "model.layers.11.block_sparse_moe.experts.124.w1", "model.layers.11.block_sparse_moe.experts.125.w1", "model.layers.11.block_sparse_moe.experts.126.w1", "model.layers.11.block_sparse_moe.experts.127.w1", "model.layers.11.block_sparse_moe.experts.128.w1", "model.layers.11.block_sparse_moe.experts.129.w1", "model.layers.11.block_sparse_moe.experts.130.w1", "model.layers.11.block_sparse_moe.experts.131.w1", "model.layers.11.block_sparse_moe.experts.132.w1", "model.layers.11.block_sparse_moe.experts.133.w1", "model.layers.11.block_sparse_moe.experts.134.w1", "model.layers.11.block_sparse_moe.experts.135.w1", "model.layers.11.block_sparse_moe.experts.136.w1", "model.layers.11.block_sparse_moe.experts.137.w1", "model.layers.11.block_sparse_moe.experts.138.w1", "model.layers.11.block_sparse_moe.experts.139.w1", "model.layers.11.block_sparse_moe.experts.140.w1", "model.layers.11.block_sparse_moe.experts.141.w1", "model.layers.11.block_sparse_moe.experts.142.w1", "model.layers.11.block_sparse_moe.experts.143.w1", "model.layers.11.block_sparse_moe.experts.144.w1", "model.layers.11.block_sparse_moe.experts.145.w1", "model.layers.11.block_sparse_moe.experts.146.w1", "model.layers.11.block_sparse_moe.experts.147.w1", "model.layers.11.block_sparse_moe.experts.148.w1", "model.layers.11.block_sparse_moe.experts.149.w1", "model.layers.11.block_sparse_moe.experts.150.w1", "model.layers.11.block_sparse_moe.experts.151.w1", "model.layers.11.block_sparse_moe.experts.152.w1", "model.layers.11.block_sparse_moe.experts.153.w1", "model.layers.11.block_sparse_moe.experts.154.w1", "model.layers.11.block_sparse_moe.experts.155.w1", "model.layers.11.block_sparse_moe.experts.156.w1", "model.layers.11.block_sparse_moe.experts.157.w1", "model.layers.11.block_sparse_moe.experts.158.w1", "model.layers.11.block_sparse_moe.experts.159.w1", "model.layers.11.block_sparse_moe.experts.160.w1", "model.layers.11.block_sparse_moe.experts.161.w1", "model.layers.11.block_sparse_moe.experts.162.w1", "model.layers.11.block_sparse_moe.experts.163.w1", "model.layers.11.block_sparse_moe.experts.164.w1", "model.layers.11.block_sparse_moe.experts.165.w1", "model.layers.11.block_sparse_moe.experts.166.w1", "model.layers.11.block_sparse_moe.experts.167.w1", "model.layers.11.block_sparse_moe.experts.168.w1", "model.layers.11.block_sparse_moe.experts.169.w1", "model.layers.11.block_sparse_moe.experts.170.w1", "model.layers.11.block_sparse_moe.experts.171.w1", "model.layers.11.block_sparse_moe.experts.172.w1", "model.layers.11.block_sparse_moe.experts.173.w1", "model.layers.11.block_sparse_moe.experts.174.w1", "model.layers.11.block_sparse_moe.experts.175.w1", "model.layers.11.block_sparse_moe.experts.176.w1", "model.layers.11.block_sparse_moe.experts.177.w1", "model.layers.11.block_sparse_moe.experts.178.w1", "model.layers.11.block_sparse_moe.experts.179.w1", "model.layers.11.block_sparse_moe.experts.180.w1", "model.layers.11.block_sparse_moe.experts.181.w1", "model.layers.11.block_sparse_moe.experts.182.w1", "model.layers.11.block_sparse_moe.experts.183.w1", "model.layers.11.block_sparse_moe.experts.184.w1", "model.layers.11.block_sparse_moe.experts.185.w1", "model.layers.11.block_sparse_moe.experts.186.w1", "model.layers.11.block_sparse_moe.experts.187.w1", "model.layers.11.block_sparse_moe.experts.188.w1", "model.layers.11.block_sparse_moe.experts.189.w1", "model.layers.11.block_sparse_moe.experts.190.w1", "model.layers.11.block_sparse_moe.experts.191.w1", "model.layers.11.block_sparse_moe.experts.192.w1", "model.layers.11.block_sparse_moe.experts.193.w1", "model.layers.11.block_sparse_moe.experts.194.w1", "model.layers.11.block_sparse_moe.experts.195.w1", "model.layers.11.block_sparse_moe.experts.196.w1", "model.layers.11.block_sparse_moe.experts.197.w1", "model.layers.11.block_sparse_moe.experts.198.w1", "model.layers.11.block_sparse_moe.experts.199.w1", "model.layers.11.block_sparse_moe.experts.200.w1", "model.layers.11.block_sparse_moe.experts.201.w1", "model.layers.11.block_sparse_moe.experts.202.w1", "model.layers.11.block_sparse_moe.experts.203.w1", "model.layers.11.block_sparse_moe.experts.204.w1", "model.layers.11.block_sparse_moe.experts.205.w1", "model.layers.11.block_sparse_moe.experts.206.w1", "model.layers.11.block_sparse_moe.experts.207.w1", "model.layers.11.block_sparse_moe.experts.208.w1", "model.layers.11.block_sparse_moe.experts.209.w1", "model.layers.11.block_sparse_moe.experts.210.w1", "model.layers.11.block_sparse_moe.experts.211.w1", "model.layers.11.block_sparse_moe.experts.212.w1", "model.layers.11.block_sparse_moe.experts.213.w1", "model.layers.11.block_sparse_moe.experts.214.w1", "model.layers.11.block_sparse_moe.experts.215.w1", "model.layers.11.block_sparse_moe.experts.216.w1", "model.layers.11.block_sparse_moe.experts.217.w1", "model.layers.11.block_sparse_moe.experts.218.w1", "model.layers.11.block_sparse_moe.experts.219.w1", "model.layers.11.block_sparse_moe.experts.220.w1", "model.layers.11.block_sparse_moe.experts.221.w1", "model.layers.11.block_sparse_moe.experts.222.w1", "model.layers.11.block_sparse_moe.experts.223.w1", "model.layers.11.block_sparse_moe.experts.224.w1", "model.layers.11.block_sparse_moe.experts.225.w1", "model.layers.11.block_sparse_moe.experts.226.w1", "model.layers.11.block_sparse_moe.experts.227.w1", "model.layers.11.block_sparse_moe.experts.228.w1", "model.layers.11.block_sparse_moe.experts.229.w1", "model.layers.11.block_sparse_moe.experts.230.w1", "model.layers.11.block_sparse_moe.experts.231.w1", "model.layers.11.block_sparse_moe.experts.232.w1", "model.layers.11.block_sparse_moe.experts.233.w1", "model.layers.11.block_sparse_moe.experts.234.w1", "model.layers.11.block_sparse_moe.experts.235.w1", "model.layers.11.block_sparse_moe.experts.236.w1", "model.layers.11.block_sparse_moe.experts.237.w1", "model.layers.11.block_sparse_moe.experts.238.w1", "model.layers.11.block_sparse_moe.experts.239.w1", "model.layers.11.block_sparse_moe.experts.240.w1", "model.layers.11.block_sparse_moe.experts.241.w1", "model.layers.11.block_sparse_moe.experts.242.w1", "model.layers.11.block_sparse_moe.experts.243.w1", "model.layers.11.block_sparse_moe.experts.244.w1", "model.layers.11.block_sparse_moe.experts.245.w1", "model.layers.11.block_sparse_moe.experts.246.w1", "model.layers.11.block_sparse_moe.experts.247.w1", "model.layers.11.block_sparse_moe.experts.248.w1", "model.layers.11.block_sparse_moe.experts.249.w1", "model.layers.11.block_sparse_moe.experts.250.w1", "model.layers.11.block_sparse_moe.experts.251.w1", "model.layers.11.block_sparse_moe.experts.252.w1", "model.layers.11.block_sparse_moe.experts.253.w1", "model.layers.11.block_sparse_moe.experts.254.w1", "model.layers.11.block_sparse_moe.experts.255.w1", "model.layers.11.block_sparse_moe.experts.0.w3", "model.layers.11.block_sparse_moe.experts.1.w3", "model.layers.11.block_sparse_moe.experts.2.w3", "model.layers.11.block_sparse_moe.experts.3.w3", "model.layers.11.block_sparse_moe.experts.4.w3", "model.layers.11.block_sparse_moe.experts.5.w3", "model.layers.11.block_sparse_moe.experts.6.w3", "model.layers.11.block_sparse_moe.experts.7.w3", "model.layers.11.block_sparse_moe.experts.8.w3", "model.layers.11.block_sparse_moe.experts.9.w3", "model.layers.11.block_sparse_moe.experts.10.w3", "model.layers.11.block_sparse_moe.experts.11.w3", "model.layers.11.block_sparse_moe.experts.12.w3", "model.layers.11.block_sparse_moe.experts.13.w3", "model.layers.11.block_sparse_moe.experts.14.w3", "model.layers.11.block_sparse_moe.experts.15.w3", "model.layers.11.block_sparse_moe.experts.16.w3", "model.layers.11.block_sparse_moe.experts.17.w3", "model.layers.11.block_sparse_moe.experts.18.w3", "model.layers.11.block_sparse_moe.experts.19.w3", "model.layers.11.block_sparse_moe.experts.20.w3", "model.layers.11.block_sparse_moe.experts.21.w3", "model.layers.11.block_sparse_moe.experts.22.w3", "model.layers.11.block_sparse_moe.experts.23.w3", "model.layers.11.block_sparse_moe.experts.24.w3", "model.layers.11.block_sparse_moe.experts.25.w3", "model.layers.11.block_sparse_moe.experts.26.w3", "model.layers.11.block_sparse_moe.experts.27.w3", "model.layers.11.block_sparse_moe.experts.28.w3", "model.layers.11.block_sparse_moe.experts.29.w3", "model.layers.11.block_sparse_moe.experts.30.w3", "model.layers.11.block_sparse_moe.experts.31.w3", "model.layers.11.block_sparse_moe.experts.32.w3", "model.layers.11.block_sparse_moe.experts.33.w3", "model.layers.11.block_sparse_moe.experts.34.w3", "model.layers.11.block_sparse_moe.experts.35.w3", "model.layers.11.block_sparse_moe.experts.36.w3", "model.layers.11.block_sparse_moe.experts.37.w3", "model.layers.11.block_sparse_moe.experts.38.w3", "model.layers.11.block_sparse_moe.experts.39.w3", "model.layers.11.block_sparse_moe.experts.40.w3", "model.layers.11.block_sparse_moe.experts.41.w3", "model.layers.11.block_sparse_moe.experts.42.w3", "model.layers.11.block_sparse_moe.experts.43.w3", "model.layers.11.block_sparse_moe.experts.44.w3", "model.layers.11.block_sparse_moe.experts.45.w3", "model.layers.11.block_sparse_moe.experts.46.w3", "model.layers.11.block_sparse_moe.experts.47.w3", "model.layers.11.block_sparse_moe.experts.48.w3", "model.layers.11.block_sparse_moe.experts.49.w3", "model.layers.11.block_sparse_moe.experts.50.w3", "model.layers.11.block_sparse_moe.experts.51.w3", "model.layers.11.block_sparse_moe.experts.52.w3", "model.layers.11.block_sparse_moe.experts.53.w3", "model.layers.11.block_sparse_moe.experts.54.w3", "model.layers.11.block_sparse_moe.experts.55.w3", "model.layers.11.block_sparse_moe.experts.56.w3", "model.layers.11.block_sparse_moe.experts.57.w3", "model.layers.11.block_sparse_moe.experts.58.w3", "model.layers.11.block_sparse_moe.experts.59.w3", "model.layers.11.block_sparse_moe.experts.60.w3", "model.layers.11.block_sparse_moe.experts.61.w3", "model.layers.11.block_sparse_moe.experts.62.w3", "model.layers.11.block_sparse_moe.experts.63.w3", "model.layers.11.block_sparse_moe.experts.64.w3", "model.layers.11.block_sparse_moe.experts.65.w3", "model.layers.11.block_sparse_moe.experts.66.w3", "model.layers.11.block_sparse_moe.experts.67.w3", "model.layers.11.block_sparse_moe.experts.68.w3", "model.layers.11.block_sparse_moe.experts.69.w3", "model.layers.11.block_sparse_moe.experts.70.w3", "model.layers.11.block_sparse_moe.experts.71.w3", "model.layers.11.block_sparse_moe.experts.72.w3", "model.layers.11.block_sparse_moe.experts.73.w3", "model.layers.11.block_sparse_moe.experts.74.w3", "model.layers.11.block_sparse_moe.experts.75.w3", "model.layers.11.block_sparse_moe.experts.76.w3", "model.layers.11.block_sparse_moe.experts.77.w3", "model.layers.11.block_sparse_moe.experts.78.w3", "model.layers.11.block_sparse_moe.experts.79.w3", "model.layers.11.block_sparse_moe.experts.80.w3", "model.layers.11.block_sparse_moe.experts.81.w3", "model.layers.11.block_sparse_moe.experts.82.w3", "model.layers.11.block_sparse_moe.experts.83.w3", "model.layers.11.block_sparse_moe.experts.84.w3", "model.layers.11.block_sparse_moe.experts.85.w3", "model.layers.11.block_sparse_moe.experts.86.w3", "model.layers.11.block_sparse_moe.experts.87.w3", "model.layers.11.block_sparse_moe.experts.88.w3", "model.layers.11.block_sparse_moe.experts.89.w3", "model.layers.11.block_sparse_moe.experts.90.w3", "model.layers.11.block_sparse_moe.experts.91.w3", "model.layers.11.block_sparse_moe.experts.92.w3", "model.layers.11.block_sparse_moe.experts.93.w3", "model.layers.11.block_sparse_moe.experts.94.w3", "model.layers.11.block_sparse_moe.experts.95.w3", "model.layers.11.block_sparse_moe.experts.96.w3", "model.layers.11.block_sparse_moe.experts.97.w3", "model.layers.11.block_sparse_moe.experts.98.w3", "model.layers.11.block_sparse_moe.experts.99.w3", "model.layers.11.block_sparse_moe.experts.100.w3", "model.layers.11.block_sparse_moe.experts.101.w3", "model.layers.11.block_sparse_moe.experts.102.w3", "model.layers.11.block_sparse_moe.experts.103.w3", "model.layers.11.block_sparse_moe.experts.104.w3", "model.layers.11.block_sparse_moe.experts.105.w3", "model.layers.11.block_sparse_moe.experts.106.w3", "model.layers.11.block_sparse_moe.experts.107.w3", "model.layers.11.block_sparse_moe.experts.108.w3", "model.layers.11.block_sparse_moe.experts.109.w3", "model.layers.11.block_sparse_moe.experts.110.w3", "model.layers.11.block_sparse_moe.experts.111.w3", "model.layers.11.block_sparse_moe.experts.112.w3", "model.layers.11.block_sparse_moe.experts.113.w3", "model.layers.11.block_sparse_moe.experts.114.w3", "model.layers.11.block_sparse_moe.experts.115.w3", "model.layers.11.block_sparse_moe.experts.116.w3", "model.layers.11.block_sparse_moe.experts.117.w3", "model.layers.11.block_sparse_moe.experts.118.w3", "model.layers.11.block_sparse_moe.experts.119.w3", "model.layers.11.block_sparse_moe.experts.120.w3", "model.layers.11.block_sparse_moe.experts.121.w3", "model.layers.11.block_sparse_moe.experts.122.w3", "model.layers.11.block_sparse_moe.experts.123.w3", "model.layers.11.block_sparse_moe.experts.124.w3", "model.layers.11.block_sparse_moe.experts.125.w3", "model.layers.11.block_sparse_moe.experts.126.w3", "model.layers.11.block_sparse_moe.experts.127.w3", "model.layers.11.block_sparse_moe.experts.128.w3", "model.layers.11.block_sparse_moe.experts.129.w3", "model.layers.11.block_sparse_moe.experts.130.w3", "model.layers.11.block_sparse_moe.experts.131.w3", "model.layers.11.block_sparse_moe.experts.132.w3", "model.layers.11.block_sparse_moe.experts.133.w3", "model.layers.11.block_sparse_moe.experts.134.w3", "model.layers.11.block_sparse_moe.experts.135.w3", "model.layers.11.block_sparse_moe.experts.136.w3", "model.layers.11.block_sparse_moe.experts.137.w3", "model.layers.11.block_sparse_moe.experts.138.w3", "model.layers.11.block_sparse_moe.experts.139.w3", "model.layers.11.block_sparse_moe.experts.140.w3", "model.layers.11.block_sparse_moe.experts.141.w3", "model.layers.11.block_sparse_moe.experts.142.w3", "model.layers.11.block_sparse_moe.experts.143.w3", "model.layers.11.block_sparse_moe.experts.144.w3", "model.layers.11.block_sparse_moe.experts.145.w3", "model.layers.11.block_sparse_moe.experts.146.w3", "model.layers.11.block_sparse_moe.experts.147.w3", "model.layers.11.block_sparse_moe.experts.148.w3", "model.layers.11.block_sparse_moe.experts.149.w3", "model.layers.11.block_sparse_moe.experts.150.w3", "model.layers.11.block_sparse_moe.experts.151.w3", "model.layers.11.block_sparse_moe.experts.152.w3", "model.layers.11.block_sparse_moe.experts.153.w3", "model.layers.11.block_sparse_moe.experts.154.w3", "model.layers.11.block_sparse_moe.experts.155.w3", "model.layers.11.block_sparse_moe.experts.156.w3", "model.layers.11.block_sparse_moe.experts.157.w3", "model.layers.11.block_sparse_moe.experts.158.w3", "model.layers.11.block_sparse_moe.experts.159.w3", "model.layers.11.block_sparse_moe.experts.160.w3", "model.layers.11.block_sparse_moe.experts.161.w3", "model.layers.11.block_sparse_moe.experts.162.w3", "model.layers.11.block_sparse_moe.experts.163.w3", "model.layers.11.block_sparse_moe.experts.164.w3", "model.layers.11.block_sparse_moe.experts.165.w3", "model.layers.11.block_sparse_moe.experts.166.w3", "model.layers.11.block_sparse_moe.experts.167.w3", "model.layers.11.block_sparse_moe.experts.168.w3", "model.layers.11.block_sparse_moe.experts.169.w3", "model.layers.11.block_sparse_moe.experts.170.w3", "model.layers.11.block_sparse_moe.experts.171.w3", "model.layers.11.block_sparse_moe.experts.172.w3", "model.layers.11.block_sparse_moe.experts.173.w3", "model.layers.11.block_sparse_moe.experts.174.w3", "model.layers.11.block_sparse_moe.experts.175.w3", "model.layers.11.block_sparse_moe.experts.176.w3", "model.layers.11.block_sparse_moe.experts.177.w3", "model.layers.11.block_sparse_moe.experts.178.w3", "model.layers.11.block_sparse_moe.experts.179.w3", "model.layers.11.block_sparse_moe.experts.180.w3", "model.layers.11.block_sparse_moe.experts.181.w3", "model.layers.11.block_sparse_moe.experts.182.w3", "model.layers.11.block_sparse_moe.experts.183.w3", "model.layers.11.block_sparse_moe.experts.184.w3", "model.layers.11.block_sparse_moe.experts.185.w3", "model.layers.11.block_sparse_moe.experts.186.w3", "model.layers.11.block_sparse_moe.experts.187.w3", "model.layers.11.block_sparse_moe.experts.188.w3", "model.layers.11.block_sparse_moe.experts.189.w3", "model.layers.11.block_sparse_moe.experts.190.w3", "model.layers.11.block_sparse_moe.experts.191.w3", "model.layers.11.block_sparse_moe.experts.192.w3", "model.layers.11.block_sparse_moe.experts.193.w3", "model.layers.11.block_sparse_moe.experts.194.w3", "model.layers.11.block_sparse_moe.experts.195.w3", "model.layers.11.block_sparse_moe.experts.196.w3", "model.layers.11.block_sparse_moe.experts.197.w3", "model.layers.11.block_sparse_moe.experts.198.w3", "model.layers.11.block_sparse_moe.experts.199.w3", "model.layers.11.block_sparse_moe.experts.200.w3", "model.layers.11.block_sparse_moe.experts.201.w3", "model.layers.11.block_sparse_moe.experts.202.w3", "model.layers.11.block_sparse_moe.experts.203.w3", "model.layers.11.block_sparse_moe.experts.204.w3", "model.layers.11.block_sparse_moe.experts.205.w3", "model.layers.11.block_sparse_moe.experts.206.w3", "model.layers.11.block_sparse_moe.experts.207.w3", "model.layers.11.block_sparse_moe.experts.208.w3", "model.layers.11.block_sparse_moe.experts.209.w3", "model.layers.11.block_sparse_moe.experts.210.w3", "model.layers.11.block_sparse_moe.experts.211.w3", "model.layers.11.block_sparse_moe.experts.212.w3", "model.layers.11.block_sparse_moe.experts.213.w3", "model.layers.11.block_sparse_moe.experts.214.w3", "model.layers.11.block_sparse_moe.experts.215.w3", "model.layers.11.block_sparse_moe.experts.216.w3", "model.layers.11.block_sparse_moe.experts.217.w3", "model.layers.11.block_sparse_moe.experts.218.w3", "model.layers.11.block_sparse_moe.experts.219.w3", "model.layers.11.block_sparse_moe.experts.220.w3", "model.layers.11.block_sparse_moe.experts.221.w3", "model.layers.11.block_sparse_moe.experts.222.w3", "model.layers.11.block_sparse_moe.experts.223.w3", "model.layers.11.block_sparse_moe.experts.224.w3", "model.layers.11.block_sparse_moe.experts.225.w3", "model.layers.11.block_sparse_moe.experts.226.w3", "model.layers.11.block_sparse_moe.experts.227.w3", "model.layers.11.block_sparse_moe.experts.228.w3", "model.layers.11.block_sparse_moe.experts.229.w3", "model.layers.11.block_sparse_moe.experts.230.w3", "model.layers.11.block_sparse_moe.experts.231.w3", "model.layers.11.block_sparse_moe.experts.232.w3", "model.layers.11.block_sparse_moe.experts.233.w3", "model.layers.11.block_sparse_moe.experts.234.w3", "model.layers.11.block_sparse_moe.experts.235.w3", "model.layers.11.block_sparse_moe.experts.236.w3", "model.layers.11.block_sparse_moe.experts.237.w3", "model.layers.11.block_sparse_moe.experts.238.w3", "model.layers.11.block_sparse_moe.experts.239.w3", "model.layers.11.block_sparse_moe.experts.240.w3", "model.layers.11.block_sparse_moe.experts.241.w3", "model.layers.11.block_sparse_moe.experts.242.w3", "model.layers.11.block_sparse_moe.experts.243.w3", "model.layers.11.block_sparse_moe.experts.244.w3", "model.layers.11.block_sparse_moe.experts.245.w3", "model.layers.11.block_sparse_moe.experts.246.w3", "model.layers.11.block_sparse_moe.experts.247.w3", "model.layers.11.block_sparse_moe.experts.248.w3", "model.layers.11.block_sparse_moe.experts.249.w3", "model.layers.11.block_sparse_moe.experts.250.w3", "model.layers.11.block_sparse_moe.experts.251.w3", "model.layers.11.block_sparse_moe.experts.252.w3", "model.layers.11.block_sparse_moe.experts.253.w3", "model.layers.11.block_sparse_moe.experts.254.w3", "model.layers.11.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0005765348672866766, "dbits": 2415919104 } ] }, { "idx": 59, "layers": [ "model.layers.11.block_sparse_moe.experts.0.w2", "model.layers.11.block_sparse_moe.experts.1.w2", "model.layers.11.block_sparse_moe.experts.2.w2", "model.layers.11.block_sparse_moe.experts.3.w2", "model.layers.11.block_sparse_moe.experts.4.w2", "model.layers.11.block_sparse_moe.experts.5.w2", "model.layers.11.block_sparse_moe.experts.6.w2", "model.layers.11.block_sparse_moe.experts.7.w2", "model.layers.11.block_sparse_moe.experts.8.w2", "model.layers.11.block_sparse_moe.experts.9.w2", "model.layers.11.block_sparse_moe.experts.10.w2", "model.layers.11.block_sparse_moe.experts.11.w2", "model.layers.11.block_sparse_moe.experts.12.w2", "model.layers.11.block_sparse_moe.experts.13.w2", "model.layers.11.block_sparse_moe.experts.14.w2", "model.layers.11.block_sparse_moe.experts.15.w2", "model.layers.11.block_sparse_moe.experts.16.w2", "model.layers.11.block_sparse_moe.experts.17.w2", "model.layers.11.block_sparse_moe.experts.18.w2", "model.layers.11.block_sparse_moe.experts.19.w2", "model.layers.11.block_sparse_moe.experts.20.w2", "model.layers.11.block_sparse_moe.experts.21.w2", "model.layers.11.block_sparse_moe.experts.22.w2", "model.layers.11.block_sparse_moe.experts.23.w2", "model.layers.11.block_sparse_moe.experts.24.w2", "model.layers.11.block_sparse_moe.experts.25.w2", "model.layers.11.block_sparse_moe.experts.26.w2", "model.layers.11.block_sparse_moe.experts.27.w2", "model.layers.11.block_sparse_moe.experts.28.w2", "model.layers.11.block_sparse_moe.experts.29.w2", "model.layers.11.block_sparse_moe.experts.30.w2", "model.layers.11.block_sparse_moe.experts.31.w2", "model.layers.11.block_sparse_moe.experts.32.w2", "model.layers.11.block_sparse_moe.experts.33.w2", "model.layers.11.block_sparse_moe.experts.34.w2", "model.layers.11.block_sparse_moe.experts.35.w2", "model.layers.11.block_sparse_moe.experts.36.w2", "model.layers.11.block_sparse_moe.experts.37.w2", "model.layers.11.block_sparse_moe.experts.38.w2", "model.layers.11.block_sparse_moe.experts.39.w2", "model.layers.11.block_sparse_moe.experts.40.w2", "model.layers.11.block_sparse_moe.experts.41.w2", "model.layers.11.block_sparse_moe.experts.42.w2", "model.layers.11.block_sparse_moe.experts.43.w2", "model.layers.11.block_sparse_moe.experts.44.w2", "model.layers.11.block_sparse_moe.experts.45.w2", "model.layers.11.block_sparse_moe.experts.46.w2", "model.layers.11.block_sparse_moe.experts.47.w2", "model.layers.11.block_sparse_moe.experts.48.w2", "model.layers.11.block_sparse_moe.experts.49.w2", "model.layers.11.block_sparse_moe.experts.50.w2", "model.layers.11.block_sparse_moe.experts.51.w2", "model.layers.11.block_sparse_moe.experts.52.w2", "model.layers.11.block_sparse_moe.experts.53.w2", "model.layers.11.block_sparse_moe.experts.54.w2", "model.layers.11.block_sparse_moe.experts.55.w2", "model.layers.11.block_sparse_moe.experts.56.w2", "model.layers.11.block_sparse_moe.experts.57.w2", "model.layers.11.block_sparse_moe.experts.58.w2", "model.layers.11.block_sparse_moe.experts.59.w2", "model.layers.11.block_sparse_moe.experts.60.w2", "model.layers.11.block_sparse_moe.experts.61.w2", "model.layers.11.block_sparse_moe.experts.62.w2", "model.layers.11.block_sparse_moe.experts.63.w2", "model.layers.11.block_sparse_moe.experts.64.w2", "model.layers.11.block_sparse_moe.experts.65.w2", "model.layers.11.block_sparse_moe.experts.66.w2", "model.layers.11.block_sparse_moe.experts.67.w2", "model.layers.11.block_sparse_moe.experts.68.w2", "model.layers.11.block_sparse_moe.experts.69.w2", "model.layers.11.block_sparse_moe.experts.70.w2", "model.layers.11.block_sparse_moe.experts.71.w2", "model.layers.11.block_sparse_moe.experts.72.w2", "model.layers.11.block_sparse_moe.experts.73.w2", "model.layers.11.block_sparse_moe.experts.74.w2", "model.layers.11.block_sparse_moe.experts.75.w2", "model.layers.11.block_sparse_moe.experts.76.w2", "model.layers.11.block_sparse_moe.experts.77.w2", "model.layers.11.block_sparse_moe.experts.78.w2", "model.layers.11.block_sparse_moe.experts.79.w2", "model.layers.11.block_sparse_moe.experts.80.w2", "model.layers.11.block_sparse_moe.experts.81.w2", "model.layers.11.block_sparse_moe.experts.82.w2", "model.layers.11.block_sparse_moe.experts.83.w2", "model.layers.11.block_sparse_moe.experts.84.w2", "model.layers.11.block_sparse_moe.experts.85.w2", "model.layers.11.block_sparse_moe.experts.86.w2", "model.layers.11.block_sparse_moe.experts.87.w2", "model.layers.11.block_sparse_moe.experts.88.w2", "model.layers.11.block_sparse_moe.experts.89.w2", "model.layers.11.block_sparse_moe.experts.90.w2", "model.layers.11.block_sparse_moe.experts.91.w2", "model.layers.11.block_sparse_moe.experts.92.w2", "model.layers.11.block_sparse_moe.experts.93.w2", "model.layers.11.block_sparse_moe.experts.94.w2", "model.layers.11.block_sparse_moe.experts.95.w2", "model.layers.11.block_sparse_moe.experts.96.w2", "model.layers.11.block_sparse_moe.experts.97.w2", "model.layers.11.block_sparse_moe.experts.98.w2", "model.layers.11.block_sparse_moe.experts.99.w2", "model.layers.11.block_sparse_moe.experts.100.w2", "model.layers.11.block_sparse_moe.experts.101.w2", "model.layers.11.block_sparse_moe.experts.102.w2", "model.layers.11.block_sparse_moe.experts.103.w2", "model.layers.11.block_sparse_moe.experts.104.w2", "model.layers.11.block_sparse_moe.experts.105.w2", "model.layers.11.block_sparse_moe.experts.106.w2", "model.layers.11.block_sparse_moe.experts.107.w2", "model.layers.11.block_sparse_moe.experts.108.w2", "model.layers.11.block_sparse_moe.experts.109.w2", "model.layers.11.block_sparse_moe.experts.110.w2", "model.layers.11.block_sparse_moe.experts.111.w2", "model.layers.11.block_sparse_moe.experts.112.w2", "model.layers.11.block_sparse_moe.experts.113.w2", "model.layers.11.block_sparse_moe.experts.114.w2", "model.layers.11.block_sparse_moe.experts.115.w2", "model.layers.11.block_sparse_moe.experts.116.w2", "model.layers.11.block_sparse_moe.experts.117.w2", "model.layers.11.block_sparse_moe.experts.118.w2", "model.layers.11.block_sparse_moe.experts.119.w2", "model.layers.11.block_sparse_moe.experts.120.w2", "model.layers.11.block_sparse_moe.experts.121.w2", "model.layers.11.block_sparse_moe.experts.122.w2", "model.layers.11.block_sparse_moe.experts.123.w2", "model.layers.11.block_sparse_moe.experts.124.w2", "model.layers.11.block_sparse_moe.experts.125.w2", "model.layers.11.block_sparse_moe.experts.126.w2", "model.layers.11.block_sparse_moe.experts.127.w2", "model.layers.11.block_sparse_moe.experts.128.w2", "model.layers.11.block_sparse_moe.experts.129.w2", "model.layers.11.block_sparse_moe.experts.130.w2", "model.layers.11.block_sparse_moe.experts.131.w2", "model.layers.11.block_sparse_moe.experts.132.w2", "model.layers.11.block_sparse_moe.experts.133.w2", "model.layers.11.block_sparse_moe.experts.134.w2", "model.layers.11.block_sparse_moe.experts.135.w2", "model.layers.11.block_sparse_moe.experts.136.w2", "model.layers.11.block_sparse_moe.experts.137.w2", "model.layers.11.block_sparse_moe.experts.138.w2", "model.layers.11.block_sparse_moe.experts.139.w2", "model.layers.11.block_sparse_moe.experts.140.w2", "model.layers.11.block_sparse_moe.experts.141.w2", "model.layers.11.block_sparse_moe.experts.142.w2", "model.layers.11.block_sparse_moe.experts.143.w2", "model.layers.11.block_sparse_moe.experts.144.w2", "model.layers.11.block_sparse_moe.experts.145.w2", "model.layers.11.block_sparse_moe.experts.146.w2", "model.layers.11.block_sparse_moe.experts.147.w2", "model.layers.11.block_sparse_moe.experts.148.w2", "model.layers.11.block_sparse_moe.experts.149.w2", "model.layers.11.block_sparse_moe.experts.150.w2", "model.layers.11.block_sparse_moe.experts.151.w2", "model.layers.11.block_sparse_moe.experts.152.w2", "model.layers.11.block_sparse_moe.experts.153.w2", "model.layers.11.block_sparse_moe.experts.154.w2", "model.layers.11.block_sparse_moe.experts.155.w2", "model.layers.11.block_sparse_moe.experts.156.w2", "model.layers.11.block_sparse_moe.experts.157.w2", "model.layers.11.block_sparse_moe.experts.158.w2", "model.layers.11.block_sparse_moe.experts.159.w2", "model.layers.11.block_sparse_moe.experts.160.w2", "model.layers.11.block_sparse_moe.experts.161.w2", "model.layers.11.block_sparse_moe.experts.162.w2", "model.layers.11.block_sparse_moe.experts.163.w2", "model.layers.11.block_sparse_moe.experts.164.w2", "model.layers.11.block_sparse_moe.experts.165.w2", "model.layers.11.block_sparse_moe.experts.166.w2", "model.layers.11.block_sparse_moe.experts.167.w2", "model.layers.11.block_sparse_moe.experts.168.w2", "model.layers.11.block_sparse_moe.experts.169.w2", "model.layers.11.block_sparse_moe.experts.170.w2", "model.layers.11.block_sparse_moe.experts.171.w2", "model.layers.11.block_sparse_moe.experts.172.w2", "model.layers.11.block_sparse_moe.experts.173.w2", "model.layers.11.block_sparse_moe.experts.174.w2", "model.layers.11.block_sparse_moe.experts.175.w2", "model.layers.11.block_sparse_moe.experts.176.w2", "model.layers.11.block_sparse_moe.experts.177.w2", "model.layers.11.block_sparse_moe.experts.178.w2", "model.layers.11.block_sparse_moe.experts.179.w2", "model.layers.11.block_sparse_moe.experts.180.w2", "model.layers.11.block_sparse_moe.experts.181.w2", "model.layers.11.block_sparse_moe.experts.182.w2", "model.layers.11.block_sparse_moe.experts.183.w2", "model.layers.11.block_sparse_moe.experts.184.w2", "model.layers.11.block_sparse_moe.experts.185.w2", "model.layers.11.block_sparse_moe.experts.186.w2", "model.layers.11.block_sparse_moe.experts.187.w2", "model.layers.11.block_sparse_moe.experts.188.w2", "model.layers.11.block_sparse_moe.experts.189.w2", "model.layers.11.block_sparse_moe.experts.190.w2", "model.layers.11.block_sparse_moe.experts.191.w2", "model.layers.11.block_sparse_moe.experts.192.w2", "model.layers.11.block_sparse_moe.experts.193.w2", "model.layers.11.block_sparse_moe.experts.194.w2", "model.layers.11.block_sparse_moe.experts.195.w2", "model.layers.11.block_sparse_moe.experts.196.w2", "model.layers.11.block_sparse_moe.experts.197.w2", "model.layers.11.block_sparse_moe.experts.198.w2", "model.layers.11.block_sparse_moe.experts.199.w2", "model.layers.11.block_sparse_moe.experts.200.w2", "model.layers.11.block_sparse_moe.experts.201.w2", "model.layers.11.block_sparse_moe.experts.202.w2", "model.layers.11.block_sparse_moe.experts.203.w2", "model.layers.11.block_sparse_moe.experts.204.w2", "model.layers.11.block_sparse_moe.experts.205.w2", "model.layers.11.block_sparse_moe.experts.206.w2", "model.layers.11.block_sparse_moe.experts.207.w2", "model.layers.11.block_sparse_moe.experts.208.w2", "model.layers.11.block_sparse_moe.experts.209.w2", "model.layers.11.block_sparse_moe.experts.210.w2", "model.layers.11.block_sparse_moe.experts.211.w2", "model.layers.11.block_sparse_moe.experts.212.w2", "model.layers.11.block_sparse_moe.experts.213.w2", "model.layers.11.block_sparse_moe.experts.214.w2", "model.layers.11.block_sparse_moe.experts.215.w2", "model.layers.11.block_sparse_moe.experts.216.w2", "model.layers.11.block_sparse_moe.experts.217.w2", "model.layers.11.block_sparse_moe.experts.218.w2", "model.layers.11.block_sparse_moe.experts.219.w2", "model.layers.11.block_sparse_moe.experts.220.w2", "model.layers.11.block_sparse_moe.experts.221.w2", "model.layers.11.block_sparse_moe.experts.222.w2", "model.layers.11.block_sparse_moe.experts.223.w2", "model.layers.11.block_sparse_moe.experts.224.w2", "model.layers.11.block_sparse_moe.experts.225.w2", "model.layers.11.block_sparse_moe.experts.226.w2", "model.layers.11.block_sparse_moe.experts.227.w2", "model.layers.11.block_sparse_moe.experts.228.w2", "model.layers.11.block_sparse_moe.experts.229.w2", "model.layers.11.block_sparse_moe.experts.230.w2", "model.layers.11.block_sparse_moe.experts.231.w2", "model.layers.11.block_sparse_moe.experts.232.w2", "model.layers.11.block_sparse_moe.experts.233.w2", "model.layers.11.block_sparse_moe.experts.234.w2", "model.layers.11.block_sparse_moe.experts.235.w2", "model.layers.11.block_sparse_moe.experts.236.w2", "model.layers.11.block_sparse_moe.experts.237.w2", "model.layers.11.block_sparse_moe.experts.238.w2", "model.layers.11.block_sparse_moe.experts.239.w2", "model.layers.11.block_sparse_moe.experts.240.w2", "model.layers.11.block_sparse_moe.experts.241.w2", "model.layers.11.block_sparse_moe.experts.242.w2", "model.layers.11.block_sparse_moe.experts.243.w2", "model.layers.11.block_sparse_moe.experts.244.w2", "model.layers.11.block_sparse_moe.experts.245.w2", "model.layers.11.block_sparse_moe.experts.246.w2", "model.layers.11.block_sparse_moe.experts.247.w2", "model.layers.11.block_sparse_moe.experts.248.w2", "model.layers.11.block_sparse_moe.experts.249.w2", "model.layers.11.block_sparse_moe.experts.250.w2", "model.layers.11.block_sparse_moe.experts.251.w2", "model.layers.11.block_sparse_moe.experts.252.w2", "model.layers.11.block_sparse_moe.experts.253.w2", "model.layers.11.block_sparse_moe.experts.254.w2", "model.layers.11.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0007105279713869178, "dbits": 1207959552 } ] }, { "idx": 60, "layers": [ "model.layers.12.self_attn.q_proj" ], "candidates": [ { "dkld": 5.526598542927863e-05, "dbits": 18874368 } ] }, { "idx": 61, "layers": [ "model.layers.12.self_attn.k_proj", "model.layers.12.self_attn.v_proj" ], "candidates": [ { "dkld": 0.004773474857211107, "dbits": 6291456 } ] }, { "idx": 62, "layers": [ "model.layers.12.self_attn.o_proj" ], "candidates": [ { "dkld": 0.00013028439134359082, "dbits": 18874368 } ] }, { "idx": 63, "layers": [ "model.layers.12.block_sparse_moe.experts.0.w1", "model.layers.12.block_sparse_moe.experts.1.w1", "model.layers.12.block_sparse_moe.experts.2.w1", "model.layers.12.block_sparse_moe.experts.3.w1", "model.layers.12.block_sparse_moe.experts.4.w1", "model.layers.12.block_sparse_moe.experts.5.w1", "model.layers.12.block_sparse_moe.experts.6.w1", "model.layers.12.block_sparse_moe.experts.7.w1", "model.layers.12.block_sparse_moe.experts.8.w1", "model.layers.12.block_sparse_moe.experts.9.w1", "model.layers.12.block_sparse_moe.experts.10.w1", "model.layers.12.block_sparse_moe.experts.11.w1", "model.layers.12.block_sparse_moe.experts.12.w1", "model.layers.12.block_sparse_moe.experts.13.w1", "model.layers.12.block_sparse_moe.experts.14.w1", "model.layers.12.block_sparse_moe.experts.15.w1", "model.layers.12.block_sparse_moe.experts.16.w1", "model.layers.12.block_sparse_moe.experts.17.w1", "model.layers.12.block_sparse_moe.experts.18.w1", "model.layers.12.block_sparse_moe.experts.19.w1", "model.layers.12.block_sparse_moe.experts.20.w1", "model.layers.12.block_sparse_moe.experts.21.w1", "model.layers.12.block_sparse_moe.experts.22.w1", "model.layers.12.block_sparse_moe.experts.23.w1", "model.layers.12.block_sparse_moe.experts.24.w1", "model.layers.12.block_sparse_moe.experts.25.w1", "model.layers.12.block_sparse_moe.experts.26.w1", "model.layers.12.block_sparse_moe.experts.27.w1", "model.layers.12.block_sparse_moe.experts.28.w1", "model.layers.12.block_sparse_moe.experts.29.w1", "model.layers.12.block_sparse_moe.experts.30.w1", "model.layers.12.block_sparse_moe.experts.31.w1", "model.layers.12.block_sparse_moe.experts.32.w1", "model.layers.12.block_sparse_moe.experts.33.w1", "model.layers.12.block_sparse_moe.experts.34.w1", "model.layers.12.block_sparse_moe.experts.35.w1", "model.layers.12.block_sparse_moe.experts.36.w1", "model.layers.12.block_sparse_moe.experts.37.w1", "model.layers.12.block_sparse_moe.experts.38.w1", "model.layers.12.block_sparse_moe.experts.39.w1", "model.layers.12.block_sparse_moe.experts.40.w1", "model.layers.12.block_sparse_moe.experts.41.w1", "model.layers.12.block_sparse_moe.experts.42.w1", "model.layers.12.block_sparse_moe.experts.43.w1", "model.layers.12.block_sparse_moe.experts.44.w1", "model.layers.12.block_sparse_moe.experts.45.w1", "model.layers.12.block_sparse_moe.experts.46.w1", "model.layers.12.block_sparse_moe.experts.47.w1", "model.layers.12.block_sparse_moe.experts.48.w1", "model.layers.12.block_sparse_moe.experts.49.w1", "model.layers.12.block_sparse_moe.experts.50.w1", "model.layers.12.block_sparse_moe.experts.51.w1", "model.layers.12.block_sparse_moe.experts.52.w1", "model.layers.12.block_sparse_moe.experts.53.w1", "model.layers.12.block_sparse_moe.experts.54.w1", "model.layers.12.block_sparse_moe.experts.55.w1", "model.layers.12.block_sparse_moe.experts.56.w1", "model.layers.12.block_sparse_moe.experts.57.w1", "model.layers.12.block_sparse_moe.experts.58.w1", "model.layers.12.block_sparse_moe.experts.59.w1", "model.layers.12.block_sparse_moe.experts.60.w1", "model.layers.12.block_sparse_moe.experts.61.w1", "model.layers.12.block_sparse_moe.experts.62.w1", "model.layers.12.block_sparse_moe.experts.63.w1", "model.layers.12.block_sparse_moe.experts.64.w1", "model.layers.12.block_sparse_moe.experts.65.w1", "model.layers.12.block_sparse_moe.experts.66.w1", "model.layers.12.block_sparse_moe.experts.67.w1", "model.layers.12.block_sparse_moe.experts.68.w1", "model.layers.12.block_sparse_moe.experts.69.w1", "model.layers.12.block_sparse_moe.experts.70.w1", "model.layers.12.block_sparse_moe.experts.71.w1", "model.layers.12.block_sparse_moe.experts.72.w1", "model.layers.12.block_sparse_moe.experts.73.w1", "model.layers.12.block_sparse_moe.experts.74.w1", "model.layers.12.block_sparse_moe.experts.75.w1", "model.layers.12.block_sparse_moe.experts.76.w1", "model.layers.12.block_sparse_moe.experts.77.w1", "model.layers.12.block_sparse_moe.experts.78.w1", "model.layers.12.block_sparse_moe.experts.79.w1", "model.layers.12.block_sparse_moe.experts.80.w1", "model.layers.12.block_sparse_moe.experts.81.w1", "model.layers.12.block_sparse_moe.experts.82.w1", "model.layers.12.block_sparse_moe.experts.83.w1", "model.layers.12.block_sparse_moe.experts.84.w1", "model.layers.12.block_sparse_moe.experts.85.w1", "model.layers.12.block_sparse_moe.experts.86.w1", "model.layers.12.block_sparse_moe.experts.87.w1", "model.layers.12.block_sparse_moe.experts.88.w1", "model.layers.12.block_sparse_moe.experts.89.w1", "model.layers.12.block_sparse_moe.experts.90.w1", "model.layers.12.block_sparse_moe.experts.91.w1", "model.layers.12.block_sparse_moe.experts.92.w1", "model.layers.12.block_sparse_moe.experts.93.w1", "model.layers.12.block_sparse_moe.experts.94.w1", "model.layers.12.block_sparse_moe.experts.95.w1", "model.layers.12.block_sparse_moe.experts.96.w1", "model.layers.12.block_sparse_moe.experts.97.w1", "model.layers.12.block_sparse_moe.experts.98.w1", "model.layers.12.block_sparse_moe.experts.99.w1", "model.layers.12.block_sparse_moe.experts.100.w1", "model.layers.12.block_sparse_moe.experts.101.w1", "model.layers.12.block_sparse_moe.experts.102.w1", "model.layers.12.block_sparse_moe.experts.103.w1", "model.layers.12.block_sparse_moe.experts.104.w1", "model.layers.12.block_sparse_moe.experts.105.w1", "model.layers.12.block_sparse_moe.experts.106.w1", "model.layers.12.block_sparse_moe.experts.107.w1", "model.layers.12.block_sparse_moe.experts.108.w1", "model.layers.12.block_sparse_moe.experts.109.w1", "model.layers.12.block_sparse_moe.experts.110.w1", "model.layers.12.block_sparse_moe.experts.111.w1", "model.layers.12.block_sparse_moe.experts.112.w1", "model.layers.12.block_sparse_moe.experts.113.w1", "model.layers.12.block_sparse_moe.experts.114.w1", "model.layers.12.block_sparse_moe.experts.115.w1", "model.layers.12.block_sparse_moe.experts.116.w1", "model.layers.12.block_sparse_moe.experts.117.w1", "model.layers.12.block_sparse_moe.experts.118.w1", "model.layers.12.block_sparse_moe.experts.119.w1", "model.layers.12.block_sparse_moe.experts.120.w1", "model.layers.12.block_sparse_moe.experts.121.w1", "model.layers.12.block_sparse_moe.experts.122.w1", "model.layers.12.block_sparse_moe.experts.123.w1", "model.layers.12.block_sparse_moe.experts.124.w1", "model.layers.12.block_sparse_moe.experts.125.w1", "model.layers.12.block_sparse_moe.experts.126.w1", "model.layers.12.block_sparse_moe.experts.127.w1", "model.layers.12.block_sparse_moe.experts.128.w1", "model.layers.12.block_sparse_moe.experts.129.w1", "model.layers.12.block_sparse_moe.experts.130.w1", "model.layers.12.block_sparse_moe.experts.131.w1", "model.layers.12.block_sparse_moe.experts.132.w1", "model.layers.12.block_sparse_moe.experts.133.w1", "model.layers.12.block_sparse_moe.experts.134.w1", "model.layers.12.block_sparse_moe.experts.135.w1", "model.layers.12.block_sparse_moe.experts.136.w1", "model.layers.12.block_sparse_moe.experts.137.w1", "model.layers.12.block_sparse_moe.experts.138.w1", "model.layers.12.block_sparse_moe.experts.139.w1", "model.layers.12.block_sparse_moe.experts.140.w1", "model.layers.12.block_sparse_moe.experts.141.w1", "model.layers.12.block_sparse_moe.experts.142.w1", "model.layers.12.block_sparse_moe.experts.143.w1", "model.layers.12.block_sparse_moe.experts.144.w1", "model.layers.12.block_sparse_moe.experts.145.w1", "model.layers.12.block_sparse_moe.experts.146.w1", "model.layers.12.block_sparse_moe.experts.147.w1", "model.layers.12.block_sparse_moe.experts.148.w1", "model.layers.12.block_sparse_moe.experts.149.w1", "model.layers.12.block_sparse_moe.experts.150.w1", "model.layers.12.block_sparse_moe.experts.151.w1", "model.layers.12.block_sparse_moe.experts.152.w1", "model.layers.12.block_sparse_moe.experts.153.w1", "model.layers.12.block_sparse_moe.experts.154.w1", "model.layers.12.block_sparse_moe.experts.155.w1", "model.layers.12.block_sparse_moe.experts.156.w1", "model.layers.12.block_sparse_moe.experts.157.w1", "model.layers.12.block_sparse_moe.experts.158.w1", "model.layers.12.block_sparse_moe.experts.159.w1", "model.layers.12.block_sparse_moe.experts.160.w1", "model.layers.12.block_sparse_moe.experts.161.w1", "model.layers.12.block_sparse_moe.experts.162.w1", "model.layers.12.block_sparse_moe.experts.163.w1", "model.layers.12.block_sparse_moe.experts.164.w1", "model.layers.12.block_sparse_moe.experts.165.w1", "model.layers.12.block_sparse_moe.experts.166.w1", "model.layers.12.block_sparse_moe.experts.167.w1", "model.layers.12.block_sparse_moe.experts.168.w1", "model.layers.12.block_sparse_moe.experts.169.w1", "model.layers.12.block_sparse_moe.experts.170.w1", "model.layers.12.block_sparse_moe.experts.171.w1", "model.layers.12.block_sparse_moe.experts.172.w1", "model.layers.12.block_sparse_moe.experts.173.w1", "model.layers.12.block_sparse_moe.experts.174.w1", "model.layers.12.block_sparse_moe.experts.175.w1", "model.layers.12.block_sparse_moe.experts.176.w1", "model.layers.12.block_sparse_moe.experts.177.w1", "model.layers.12.block_sparse_moe.experts.178.w1", "model.layers.12.block_sparse_moe.experts.179.w1", "model.layers.12.block_sparse_moe.experts.180.w1", "model.layers.12.block_sparse_moe.experts.181.w1", "model.layers.12.block_sparse_moe.experts.182.w1", "model.layers.12.block_sparse_moe.experts.183.w1", "model.layers.12.block_sparse_moe.experts.184.w1", "model.layers.12.block_sparse_moe.experts.185.w1", "model.layers.12.block_sparse_moe.experts.186.w1", "model.layers.12.block_sparse_moe.experts.187.w1", "model.layers.12.block_sparse_moe.experts.188.w1", "model.layers.12.block_sparse_moe.experts.189.w1", "model.layers.12.block_sparse_moe.experts.190.w1", "model.layers.12.block_sparse_moe.experts.191.w1", "model.layers.12.block_sparse_moe.experts.192.w1", "model.layers.12.block_sparse_moe.experts.193.w1", "model.layers.12.block_sparse_moe.experts.194.w1", "model.layers.12.block_sparse_moe.experts.195.w1", "model.layers.12.block_sparse_moe.experts.196.w1", "model.layers.12.block_sparse_moe.experts.197.w1", "model.layers.12.block_sparse_moe.experts.198.w1", "model.layers.12.block_sparse_moe.experts.199.w1", "model.layers.12.block_sparse_moe.experts.200.w1", "model.layers.12.block_sparse_moe.experts.201.w1", "model.layers.12.block_sparse_moe.experts.202.w1", "model.layers.12.block_sparse_moe.experts.203.w1", "model.layers.12.block_sparse_moe.experts.204.w1", "model.layers.12.block_sparse_moe.experts.205.w1", "model.layers.12.block_sparse_moe.experts.206.w1", "model.layers.12.block_sparse_moe.experts.207.w1", "model.layers.12.block_sparse_moe.experts.208.w1", "model.layers.12.block_sparse_moe.experts.209.w1", "model.layers.12.block_sparse_moe.experts.210.w1", "model.layers.12.block_sparse_moe.experts.211.w1", "model.layers.12.block_sparse_moe.experts.212.w1", "model.layers.12.block_sparse_moe.experts.213.w1", "model.layers.12.block_sparse_moe.experts.214.w1", "model.layers.12.block_sparse_moe.experts.215.w1", "model.layers.12.block_sparse_moe.experts.216.w1", "model.layers.12.block_sparse_moe.experts.217.w1", "model.layers.12.block_sparse_moe.experts.218.w1", "model.layers.12.block_sparse_moe.experts.219.w1", "model.layers.12.block_sparse_moe.experts.220.w1", "model.layers.12.block_sparse_moe.experts.221.w1", "model.layers.12.block_sparse_moe.experts.222.w1", "model.layers.12.block_sparse_moe.experts.223.w1", "model.layers.12.block_sparse_moe.experts.224.w1", "model.layers.12.block_sparse_moe.experts.225.w1", "model.layers.12.block_sparse_moe.experts.226.w1", "model.layers.12.block_sparse_moe.experts.227.w1", "model.layers.12.block_sparse_moe.experts.228.w1", "model.layers.12.block_sparse_moe.experts.229.w1", "model.layers.12.block_sparse_moe.experts.230.w1", "model.layers.12.block_sparse_moe.experts.231.w1", "model.layers.12.block_sparse_moe.experts.232.w1", "model.layers.12.block_sparse_moe.experts.233.w1", "model.layers.12.block_sparse_moe.experts.234.w1", "model.layers.12.block_sparse_moe.experts.235.w1", "model.layers.12.block_sparse_moe.experts.236.w1", "model.layers.12.block_sparse_moe.experts.237.w1", "model.layers.12.block_sparse_moe.experts.238.w1", "model.layers.12.block_sparse_moe.experts.239.w1", "model.layers.12.block_sparse_moe.experts.240.w1", "model.layers.12.block_sparse_moe.experts.241.w1", "model.layers.12.block_sparse_moe.experts.242.w1", "model.layers.12.block_sparse_moe.experts.243.w1", "model.layers.12.block_sparse_moe.experts.244.w1", "model.layers.12.block_sparse_moe.experts.245.w1", "model.layers.12.block_sparse_moe.experts.246.w1", "model.layers.12.block_sparse_moe.experts.247.w1", "model.layers.12.block_sparse_moe.experts.248.w1", "model.layers.12.block_sparse_moe.experts.249.w1", "model.layers.12.block_sparse_moe.experts.250.w1", "model.layers.12.block_sparse_moe.experts.251.w1", "model.layers.12.block_sparse_moe.experts.252.w1", "model.layers.12.block_sparse_moe.experts.253.w1", "model.layers.12.block_sparse_moe.experts.254.w1", "model.layers.12.block_sparse_moe.experts.255.w1", "model.layers.12.block_sparse_moe.experts.0.w3", "model.layers.12.block_sparse_moe.experts.1.w3", "model.layers.12.block_sparse_moe.experts.2.w3", "model.layers.12.block_sparse_moe.experts.3.w3", "model.layers.12.block_sparse_moe.experts.4.w3", "model.layers.12.block_sparse_moe.experts.5.w3", "model.layers.12.block_sparse_moe.experts.6.w3", "model.layers.12.block_sparse_moe.experts.7.w3", "model.layers.12.block_sparse_moe.experts.8.w3", "model.layers.12.block_sparse_moe.experts.9.w3", "model.layers.12.block_sparse_moe.experts.10.w3", "model.layers.12.block_sparse_moe.experts.11.w3", "model.layers.12.block_sparse_moe.experts.12.w3", "model.layers.12.block_sparse_moe.experts.13.w3", "model.layers.12.block_sparse_moe.experts.14.w3", "model.layers.12.block_sparse_moe.experts.15.w3", "model.layers.12.block_sparse_moe.experts.16.w3", "model.layers.12.block_sparse_moe.experts.17.w3", "model.layers.12.block_sparse_moe.experts.18.w3", "model.layers.12.block_sparse_moe.experts.19.w3", "model.layers.12.block_sparse_moe.experts.20.w3", "model.layers.12.block_sparse_moe.experts.21.w3", "model.layers.12.block_sparse_moe.experts.22.w3", "model.layers.12.block_sparse_moe.experts.23.w3", "model.layers.12.block_sparse_moe.experts.24.w3", "model.layers.12.block_sparse_moe.experts.25.w3", "model.layers.12.block_sparse_moe.experts.26.w3", "model.layers.12.block_sparse_moe.experts.27.w3", "model.layers.12.block_sparse_moe.experts.28.w3", "model.layers.12.block_sparse_moe.experts.29.w3", "model.layers.12.block_sparse_moe.experts.30.w3", "model.layers.12.block_sparse_moe.experts.31.w3", "model.layers.12.block_sparse_moe.experts.32.w3", "model.layers.12.block_sparse_moe.experts.33.w3", "model.layers.12.block_sparse_moe.experts.34.w3", "model.layers.12.block_sparse_moe.experts.35.w3", "model.layers.12.block_sparse_moe.experts.36.w3", "model.layers.12.block_sparse_moe.experts.37.w3", "model.layers.12.block_sparse_moe.experts.38.w3", "model.layers.12.block_sparse_moe.experts.39.w3", "model.layers.12.block_sparse_moe.experts.40.w3", "model.layers.12.block_sparse_moe.experts.41.w3", "model.layers.12.block_sparse_moe.experts.42.w3", "model.layers.12.block_sparse_moe.experts.43.w3", "model.layers.12.block_sparse_moe.experts.44.w3", "model.layers.12.block_sparse_moe.experts.45.w3", "model.layers.12.block_sparse_moe.experts.46.w3", "model.layers.12.block_sparse_moe.experts.47.w3", "model.layers.12.block_sparse_moe.experts.48.w3", "model.layers.12.block_sparse_moe.experts.49.w3", "model.layers.12.block_sparse_moe.experts.50.w3", "model.layers.12.block_sparse_moe.experts.51.w3", "model.layers.12.block_sparse_moe.experts.52.w3", "model.layers.12.block_sparse_moe.experts.53.w3", "model.layers.12.block_sparse_moe.experts.54.w3", "model.layers.12.block_sparse_moe.experts.55.w3", "model.layers.12.block_sparse_moe.experts.56.w3", "model.layers.12.block_sparse_moe.experts.57.w3", "model.layers.12.block_sparse_moe.experts.58.w3", "model.layers.12.block_sparse_moe.experts.59.w3", "model.layers.12.block_sparse_moe.experts.60.w3", "model.layers.12.block_sparse_moe.experts.61.w3", "model.layers.12.block_sparse_moe.experts.62.w3", "model.layers.12.block_sparse_moe.experts.63.w3", "model.layers.12.block_sparse_moe.experts.64.w3", "model.layers.12.block_sparse_moe.experts.65.w3", "model.layers.12.block_sparse_moe.experts.66.w3", "model.layers.12.block_sparse_moe.experts.67.w3", "model.layers.12.block_sparse_moe.experts.68.w3", "model.layers.12.block_sparse_moe.experts.69.w3", "model.layers.12.block_sparse_moe.experts.70.w3", "model.layers.12.block_sparse_moe.experts.71.w3", "model.layers.12.block_sparse_moe.experts.72.w3", "model.layers.12.block_sparse_moe.experts.73.w3", "model.layers.12.block_sparse_moe.experts.74.w3", "model.layers.12.block_sparse_moe.experts.75.w3", "model.layers.12.block_sparse_moe.experts.76.w3", "model.layers.12.block_sparse_moe.experts.77.w3", "model.layers.12.block_sparse_moe.experts.78.w3", "model.layers.12.block_sparse_moe.experts.79.w3", "model.layers.12.block_sparse_moe.experts.80.w3", "model.layers.12.block_sparse_moe.experts.81.w3", "model.layers.12.block_sparse_moe.experts.82.w3", "model.layers.12.block_sparse_moe.experts.83.w3", "model.layers.12.block_sparse_moe.experts.84.w3", "model.layers.12.block_sparse_moe.experts.85.w3", "model.layers.12.block_sparse_moe.experts.86.w3", "model.layers.12.block_sparse_moe.experts.87.w3", "model.layers.12.block_sparse_moe.experts.88.w3", "model.layers.12.block_sparse_moe.experts.89.w3", "model.layers.12.block_sparse_moe.experts.90.w3", "model.layers.12.block_sparse_moe.experts.91.w3", "model.layers.12.block_sparse_moe.experts.92.w3", "model.layers.12.block_sparse_moe.experts.93.w3", "model.layers.12.block_sparse_moe.experts.94.w3", "model.layers.12.block_sparse_moe.experts.95.w3", "model.layers.12.block_sparse_moe.experts.96.w3", "model.layers.12.block_sparse_moe.experts.97.w3", "model.layers.12.block_sparse_moe.experts.98.w3", "model.layers.12.block_sparse_moe.experts.99.w3", "model.layers.12.block_sparse_moe.experts.100.w3", "model.layers.12.block_sparse_moe.experts.101.w3", "model.layers.12.block_sparse_moe.experts.102.w3", "model.layers.12.block_sparse_moe.experts.103.w3", "model.layers.12.block_sparse_moe.experts.104.w3", "model.layers.12.block_sparse_moe.experts.105.w3", "model.layers.12.block_sparse_moe.experts.106.w3", "model.layers.12.block_sparse_moe.experts.107.w3", "model.layers.12.block_sparse_moe.experts.108.w3", "model.layers.12.block_sparse_moe.experts.109.w3", "model.layers.12.block_sparse_moe.experts.110.w3", "model.layers.12.block_sparse_moe.experts.111.w3", "model.layers.12.block_sparse_moe.experts.112.w3", "model.layers.12.block_sparse_moe.experts.113.w3", "model.layers.12.block_sparse_moe.experts.114.w3", "model.layers.12.block_sparse_moe.experts.115.w3", "model.layers.12.block_sparse_moe.experts.116.w3", "model.layers.12.block_sparse_moe.experts.117.w3", "model.layers.12.block_sparse_moe.experts.118.w3", "model.layers.12.block_sparse_moe.experts.119.w3", "model.layers.12.block_sparse_moe.experts.120.w3", "model.layers.12.block_sparse_moe.experts.121.w3", "model.layers.12.block_sparse_moe.experts.122.w3", "model.layers.12.block_sparse_moe.experts.123.w3", "model.layers.12.block_sparse_moe.experts.124.w3", "model.layers.12.block_sparse_moe.experts.125.w3", "model.layers.12.block_sparse_moe.experts.126.w3", "model.layers.12.block_sparse_moe.experts.127.w3", "model.layers.12.block_sparse_moe.experts.128.w3", "model.layers.12.block_sparse_moe.experts.129.w3", "model.layers.12.block_sparse_moe.experts.130.w3", "model.layers.12.block_sparse_moe.experts.131.w3", "model.layers.12.block_sparse_moe.experts.132.w3", "model.layers.12.block_sparse_moe.experts.133.w3", "model.layers.12.block_sparse_moe.experts.134.w3", "model.layers.12.block_sparse_moe.experts.135.w3", "model.layers.12.block_sparse_moe.experts.136.w3", "model.layers.12.block_sparse_moe.experts.137.w3", "model.layers.12.block_sparse_moe.experts.138.w3", "model.layers.12.block_sparse_moe.experts.139.w3", "model.layers.12.block_sparse_moe.experts.140.w3", "model.layers.12.block_sparse_moe.experts.141.w3", "model.layers.12.block_sparse_moe.experts.142.w3", "model.layers.12.block_sparse_moe.experts.143.w3", "model.layers.12.block_sparse_moe.experts.144.w3", "model.layers.12.block_sparse_moe.experts.145.w3", "model.layers.12.block_sparse_moe.experts.146.w3", "model.layers.12.block_sparse_moe.experts.147.w3", "model.layers.12.block_sparse_moe.experts.148.w3", "model.layers.12.block_sparse_moe.experts.149.w3", "model.layers.12.block_sparse_moe.experts.150.w3", "model.layers.12.block_sparse_moe.experts.151.w3", "model.layers.12.block_sparse_moe.experts.152.w3", "model.layers.12.block_sparse_moe.experts.153.w3", "model.layers.12.block_sparse_moe.experts.154.w3", "model.layers.12.block_sparse_moe.experts.155.w3", "model.layers.12.block_sparse_moe.experts.156.w3", "model.layers.12.block_sparse_moe.experts.157.w3", "model.layers.12.block_sparse_moe.experts.158.w3", "model.layers.12.block_sparse_moe.experts.159.w3", "model.layers.12.block_sparse_moe.experts.160.w3", "model.layers.12.block_sparse_moe.experts.161.w3", "model.layers.12.block_sparse_moe.experts.162.w3", "model.layers.12.block_sparse_moe.experts.163.w3", "model.layers.12.block_sparse_moe.experts.164.w3", "model.layers.12.block_sparse_moe.experts.165.w3", "model.layers.12.block_sparse_moe.experts.166.w3", "model.layers.12.block_sparse_moe.experts.167.w3", "model.layers.12.block_sparse_moe.experts.168.w3", "model.layers.12.block_sparse_moe.experts.169.w3", "model.layers.12.block_sparse_moe.experts.170.w3", "model.layers.12.block_sparse_moe.experts.171.w3", "model.layers.12.block_sparse_moe.experts.172.w3", "model.layers.12.block_sparse_moe.experts.173.w3", "model.layers.12.block_sparse_moe.experts.174.w3", "model.layers.12.block_sparse_moe.experts.175.w3", "model.layers.12.block_sparse_moe.experts.176.w3", "model.layers.12.block_sparse_moe.experts.177.w3", "model.layers.12.block_sparse_moe.experts.178.w3", "model.layers.12.block_sparse_moe.experts.179.w3", "model.layers.12.block_sparse_moe.experts.180.w3", "model.layers.12.block_sparse_moe.experts.181.w3", "model.layers.12.block_sparse_moe.experts.182.w3", "model.layers.12.block_sparse_moe.experts.183.w3", "model.layers.12.block_sparse_moe.experts.184.w3", "model.layers.12.block_sparse_moe.experts.185.w3", "model.layers.12.block_sparse_moe.experts.186.w3", "model.layers.12.block_sparse_moe.experts.187.w3", "model.layers.12.block_sparse_moe.experts.188.w3", "model.layers.12.block_sparse_moe.experts.189.w3", "model.layers.12.block_sparse_moe.experts.190.w3", "model.layers.12.block_sparse_moe.experts.191.w3", "model.layers.12.block_sparse_moe.experts.192.w3", "model.layers.12.block_sparse_moe.experts.193.w3", "model.layers.12.block_sparse_moe.experts.194.w3", "model.layers.12.block_sparse_moe.experts.195.w3", "model.layers.12.block_sparse_moe.experts.196.w3", "model.layers.12.block_sparse_moe.experts.197.w3", "model.layers.12.block_sparse_moe.experts.198.w3", "model.layers.12.block_sparse_moe.experts.199.w3", "model.layers.12.block_sparse_moe.experts.200.w3", "model.layers.12.block_sparse_moe.experts.201.w3", "model.layers.12.block_sparse_moe.experts.202.w3", "model.layers.12.block_sparse_moe.experts.203.w3", "model.layers.12.block_sparse_moe.experts.204.w3", "model.layers.12.block_sparse_moe.experts.205.w3", "model.layers.12.block_sparse_moe.experts.206.w3", "model.layers.12.block_sparse_moe.experts.207.w3", "model.layers.12.block_sparse_moe.experts.208.w3", "model.layers.12.block_sparse_moe.experts.209.w3", "model.layers.12.block_sparse_moe.experts.210.w3", "model.layers.12.block_sparse_moe.experts.211.w3", "model.layers.12.block_sparse_moe.experts.212.w3", "model.layers.12.block_sparse_moe.experts.213.w3", "model.layers.12.block_sparse_moe.experts.214.w3", "model.layers.12.block_sparse_moe.experts.215.w3", "model.layers.12.block_sparse_moe.experts.216.w3", "model.layers.12.block_sparse_moe.experts.217.w3", "model.layers.12.block_sparse_moe.experts.218.w3", "model.layers.12.block_sparse_moe.experts.219.w3", "model.layers.12.block_sparse_moe.experts.220.w3", "model.layers.12.block_sparse_moe.experts.221.w3", "model.layers.12.block_sparse_moe.experts.222.w3", "model.layers.12.block_sparse_moe.experts.223.w3", "model.layers.12.block_sparse_moe.experts.224.w3", "model.layers.12.block_sparse_moe.experts.225.w3", "model.layers.12.block_sparse_moe.experts.226.w3", "model.layers.12.block_sparse_moe.experts.227.w3", "model.layers.12.block_sparse_moe.experts.228.w3", "model.layers.12.block_sparse_moe.experts.229.w3", "model.layers.12.block_sparse_moe.experts.230.w3", "model.layers.12.block_sparse_moe.experts.231.w3", "model.layers.12.block_sparse_moe.experts.232.w3", "model.layers.12.block_sparse_moe.experts.233.w3", "model.layers.12.block_sparse_moe.experts.234.w3", "model.layers.12.block_sparse_moe.experts.235.w3", "model.layers.12.block_sparse_moe.experts.236.w3", "model.layers.12.block_sparse_moe.experts.237.w3", "model.layers.12.block_sparse_moe.experts.238.w3", "model.layers.12.block_sparse_moe.experts.239.w3", "model.layers.12.block_sparse_moe.experts.240.w3", "model.layers.12.block_sparse_moe.experts.241.w3", "model.layers.12.block_sparse_moe.experts.242.w3", "model.layers.12.block_sparse_moe.experts.243.w3", "model.layers.12.block_sparse_moe.experts.244.w3", "model.layers.12.block_sparse_moe.experts.245.w3", "model.layers.12.block_sparse_moe.experts.246.w3", "model.layers.12.block_sparse_moe.experts.247.w3", "model.layers.12.block_sparse_moe.experts.248.w3", "model.layers.12.block_sparse_moe.experts.249.w3", "model.layers.12.block_sparse_moe.experts.250.w3", "model.layers.12.block_sparse_moe.experts.251.w3", "model.layers.12.block_sparse_moe.experts.252.w3", "model.layers.12.block_sparse_moe.experts.253.w3", "model.layers.12.block_sparse_moe.experts.254.w3", "model.layers.12.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0007515551522374181, "dbits": 2415919104 } ] }, { "idx": 64, "layers": [ "model.layers.12.block_sparse_moe.experts.0.w2", "model.layers.12.block_sparse_moe.experts.1.w2", "model.layers.12.block_sparse_moe.experts.2.w2", "model.layers.12.block_sparse_moe.experts.3.w2", "model.layers.12.block_sparse_moe.experts.4.w2", "model.layers.12.block_sparse_moe.experts.5.w2", "model.layers.12.block_sparse_moe.experts.6.w2", "model.layers.12.block_sparse_moe.experts.7.w2", "model.layers.12.block_sparse_moe.experts.8.w2", "model.layers.12.block_sparse_moe.experts.9.w2", "model.layers.12.block_sparse_moe.experts.10.w2", "model.layers.12.block_sparse_moe.experts.11.w2", "model.layers.12.block_sparse_moe.experts.12.w2", "model.layers.12.block_sparse_moe.experts.13.w2", "model.layers.12.block_sparse_moe.experts.14.w2", "model.layers.12.block_sparse_moe.experts.15.w2", "model.layers.12.block_sparse_moe.experts.16.w2", "model.layers.12.block_sparse_moe.experts.17.w2", "model.layers.12.block_sparse_moe.experts.18.w2", "model.layers.12.block_sparse_moe.experts.19.w2", "model.layers.12.block_sparse_moe.experts.20.w2", "model.layers.12.block_sparse_moe.experts.21.w2", "model.layers.12.block_sparse_moe.experts.22.w2", "model.layers.12.block_sparse_moe.experts.23.w2", "model.layers.12.block_sparse_moe.experts.24.w2", "model.layers.12.block_sparse_moe.experts.25.w2", "model.layers.12.block_sparse_moe.experts.26.w2", "model.layers.12.block_sparse_moe.experts.27.w2", "model.layers.12.block_sparse_moe.experts.28.w2", "model.layers.12.block_sparse_moe.experts.29.w2", "model.layers.12.block_sparse_moe.experts.30.w2", "model.layers.12.block_sparse_moe.experts.31.w2", "model.layers.12.block_sparse_moe.experts.32.w2", "model.layers.12.block_sparse_moe.experts.33.w2", "model.layers.12.block_sparse_moe.experts.34.w2", "model.layers.12.block_sparse_moe.experts.35.w2", "model.layers.12.block_sparse_moe.experts.36.w2", "model.layers.12.block_sparse_moe.experts.37.w2", "model.layers.12.block_sparse_moe.experts.38.w2", "model.layers.12.block_sparse_moe.experts.39.w2", "model.layers.12.block_sparse_moe.experts.40.w2", "model.layers.12.block_sparse_moe.experts.41.w2", "model.layers.12.block_sparse_moe.experts.42.w2", "model.layers.12.block_sparse_moe.experts.43.w2", "model.layers.12.block_sparse_moe.experts.44.w2", "model.layers.12.block_sparse_moe.experts.45.w2", "model.layers.12.block_sparse_moe.experts.46.w2", "model.layers.12.block_sparse_moe.experts.47.w2", "model.layers.12.block_sparse_moe.experts.48.w2", "model.layers.12.block_sparse_moe.experts.49.w2", "model.layers.12.block_sparse_moe.experts.50.w2", "model.layers.12.block_sparse_moe.experts.51.w2", "model.layers.12.block_sparse_moe.experts.52.w2", "model.layers.12.block_sparse_moe.experts.53.w2", "model.layers.12.block_sparse_moe.experts.54.w2", "model.layers.12.block_sparse_moe.experts.55.w2", "model.layers.12.block_sparse_moe.experts.56.w2", "model.layers.12.block_sparse_moe.experts.57.w2", "model.layers.12.block_sparse_moe.experts.58.w2", "model.layers.12.block_sparse_moe.experts.59.w2", "model.layers.12.block_sparse_moe.experts.60.w2", "model.layers.12.block_sparse_moe.experts.61.w2", "model.layers.12.block_sparse_moe.experts.62.w2", "model.layers.12.block_sparse_moe.experts.63.w2", "model.layers.12.block_sparse_moe.experts.64.w2", "model.layers.12.block_sparse_moe.experts.65.w2", "model.layers.12.block_sparse_moe.experts.66.w2", "model.layers.12.block_sparse_moe.experts.67.w2", "model.layers.12.block_sparse_moe.experts.68.w2", "model.layers.12.block_sparse_moe.experts.69.w2", "model.layers.12.block_sparse_moe.experts.70.w2", "model.layers.12.block_sparse_moe.experts.71.w2", "model.layers.12.block_sparse_moe.experts.72.w2", "model.layers.12.block_sparse_moe.experts.73.w2", "model.layers.12.block_sparse_moe.experts.74.w2", "model.layers.12.block_sparse_moe.experts.75.w2", "model.layers.12.block_sparse_moe.experts.76.w2", "model.layers.12.block_sparse_moe.experts.77.w2", "model.layers.12.block_sparse_moe.experts.78.w2", "model.layers.12.block_sparse_moe.experts.79.w2", "model.layers.12.block_sparse_moe.experts.80.w2", "model.layers.12.block_sparse_moe.experts.81.w2", "model.layers.12.block_sparse_moe.experts.82.w2", "model.layers.12.block_sparse_moe.experts.83.w2", "model.layers.12.block_sparse_moe.experts.84.w2", "model.layers.12.block_sparse_moe.experts.85.w2", "model.layers.12.block_sparse_moe.experts.86.w2", "model.layers.12.block_sparse_moe.experts.87.w2", "model.layers.12.block_sparse_moe.experts.88.w2", "model.layers.12.block_sparse_moe.experts.89.w2", "model.layers.12.block_sparse_moe.experts.90.w2", "model.layers.12.block_sparse_moe.experts.91.w2", "model.layers.12.block_sparse_moe.experts.92.w2", "model.layers.12.block_sparse_moe.experts.93.w2", "model.layers.12.block_sparse_moe.experts.94.w2", "model.layers.12.block_sparse_moe.experts.95.w2", "model.layers.12.block_sparse_moe.experts.96.w2", "model.layers.12.block_sparse_moe.experts.97.w2", "model.layers.12.block_sparse_moe.experts.98.w2", "model.layers.12.block_sparse_moe.experts.99.w2", "model.layers.12.block_sparse_moe.experts.100.w2", "model.layers.12.block_sparse_moe.experts.101.w2", "model.layers.12.block_sparse_moe.experts.102.w2", "model.layers.12.block_sparse_moe.experts.103.w2", "model.layers.12.block_sparse_moe.experts.104.w2", "model.layers.12.block_sparse_moe.experts.105.w2", "model.layers.12.block_sparse_moe.experts.106.w2", "model.layers.12.block_sparse_moe.experts.107.w2", "model.layers.12.block_sparse_moe.experts.108.w2", "model.layers.12.block_sparse_moe.experts.109.w2", "model.layers.12.block_sparse_moe.experts.110.w2", "model.layers.12.block_sparse_moe.experts.111.w2", "model.layers.12.block_sparse_moe.experts.112.w2", "model.layers.12.block_sparse_moe.experts.113.w2", "model.layers.12.block_sparse_moe.experts.114.w2", "model.layers.12.block_sparse_moe.experts.115.w2", "model.layers.12.block_sparse_moe.experts.116.w2", "model.layers.12.block_sparse_moe.experts.117.w2", "model.layers.12.block_sparse_moe.experts.118.w2", "model.layers.12.block_sparse_moe.experts.119.w2", "model.layers.12.block_sparse_moe.experts.120.w2", "model.layers.12.block_sparse_moe.experts.121.w2", "model.layers.12.block_sparse_moe.experts.122.w2", "model.layers.12.block_sparse_moe.experts.123.w2", "model.layers.12.block_sparse_moe.experts.124.w2", "model.layers.12.block_sparse_moe.experts.125.w2", "model.layers.12.block_sparse_moe.experts.126.w2", "model.layers.12.block_sparse_moe.experts.127.w2", "model.layers.12.block_sparse_moe.experts.128.w2", "model.layers.12.block_sparse_moe.experts.129.w2", "model.layers.12.block_sparse_moe.experts.130.w2", "model.layers.12.block_sparse_moe.experts.131.w2", "model.layers.12.block_sparse_moe.experts.132.w2", "model.layers.12.block_sparse_moe.experts.133.w2", "model.layers.12.block_sparse_moe.experts.134.w2", "model.layers.12.block_sparse_moe.experts.135.w2", "model.layers.12.block_sparse_moe.experts.136.w2", "model.layers.12.block_sparse_moe.experts.137.w2", "model.layers.12.block_sparse_moe.experts.138.w2", "model.layers.12.block_sparse_moe.experts.139.w2", "model.layers.12.block_sparse_moe.experts.140.w2", "model.layers.12.block_sparse_moe.experts.141.w2", "model.layers.12.block_sparse_moe.experts.142.w2", "model.layers.12.block_sparse_moe.experts.143.w2", "model.layers.12.block_sparse_moe.experts.144.w2", "model.layers.12.block_sparse_moe.experts.145.w2", "model.layers.12.block_sparse_moe.experts.146.w2", "model.layers.12.block_sparse_moe.experts.147.w2", "model.layers.12.block_sparse_moe.experts.148.w2", "model.layers.12.block_sparse_moe.experts.149.w2", "model.layers.12.block_sparse_moe.experts.150.w2", "model.layers.12.block_sparse_moe.experts.151.w2", "model.layers.12.block_sparse_moe.experts.152.w2", "model.layers.12.block_sparse_moe.experts.153.w2", "model.layers.12.block_sparse_moe.experts.154.w2", "model.layers.12.block_sparse_moe.experts.155.w2", "model.layers.12.block_sparse_moe.experts.156.w2", "model.layers.12.block_sparse_moe.experts.157.w2", "model.layers.12.block_sparse_moe.experts.158.w2", "model.layers.12.block_sparse_moe.experts.159.w2", "model.layers.12.block_sparse_moe.experts.160.w2", "model.layers.12.block_sparse_moe.experts.161.w2", "model.layers.12.block_sparse_moe.experts.162.w2", "model.layers.12.block_sparse_moe.experts.163.w2", "model.layers.12.block_sparse_moe.experts.164.w2", "model.layers.12.block_sparse_moe.experts.165.w2", "model.layers.12.block_sparse_moe.experts.166.w2", "model.layers.12.block_sparse_moe.experts.167.w2", "model.layers.12.block_sparse_moe.experts.168.w2", "model.layers.12.block_sparse_moe.experts.169.w2", "model.layers.12.block_sparse_moe.experts.170.w2", "model.layers.12.block_sparse_moe.experts.171.w2", "model.layers.12.block_sparse_moe.experts.172.w2", "model.layers.12.block_sparse_moe.experts.173.w2", "model.layers.12.block_sparse_moe.experts.174.w2", "model.layers.12.block_sparse_moe.experts.175.w2", "model.layers.12.block_sparse_moe.experts.176.w2", "model.layers.12.block_sparse_moe.experts.177.w2", "model.layers.12.block_sparse_moe.experts.178.w2", "model.layers.12.block_sparse_moe.experts.179.w2", "model.layers.12.block_sparse_moe.experts.180.w2", "model.layers.12.block_sparse_moe.experts.181.w2", "model.layers.12.block_sparse_moe.experts.182.w2", "model.layers.12.block_sparse_moe.experts.183.w2", "model.layers.12.block_sparse_moe.experts.184.w2", "model.layers.12.block_sparse_moe.experts.185.w2", "model.layers.12.block_sparse_moe.experts.186.w2", "model.layers.12.block_sparse_moe.experts.187.w2", "model.layers.12.block_sparse_moe.experts.188.w2", "model.layers.12.block_sparse_moe.experts.189.w2", "model.layers.12.block_sparse_moe.experts.190.w2", "model.layers.12.block_sparse_moe.experts.191.w2", "model.layers.12.block_sparse_moe.experts.192.w2", "model.layers.12.block_sparse_moe.experts.193.w2", "model.layers.12.block_sparse_moe.experts.194.w2", "model.layers.12.block_sparse_moe.experts.195.w2", "model.layers.12.block_sparse_moe.experts.196.w2", "model.layers.12.block_sparse_moe.experts.197.w2", "model.layers.12.block_sparse_moe.experts.198.w2", "model.layers.12.block_sparse_moe.experts.199.w2", "model.layers.12.block_sparse_moe.experts.200.w2", "model.layers.12.block_sparse_moe.experts.201.w2", "model.layers.12.block_sparse_moe.experts.202.w2", "model.layers.12.block_sparse_moe.experts.203.w2", "model.layers.12.block_sparse_moe.experts.204.w2", "model.layers.12.block_sparse_moe.experts.205.w2", "model.layers.12.block_sparse_moe.experts.206.w2", "model.layers.12.block_sparse_moe.experts.207.w2", "model.layers.12.block_sparse_moe.experts.208.w2", "model.layers.12.block_sparse_moe.experts.209.w2", "model.layers.12.block_sparse_moe.experts.210.w2", "model.layers.12.block_sparse_moe.experts.211.w2", "model.layers.12.block_sparse_moe.experts.212.w2", "model.layers.12.block_sparse_moe.experts.213.w2", "model.layers.12.block_sparse_moe.experts.214.w2", "model.layers.12.block_sparse_moe.experts.215.w2", "model.layers.12.block_sparse_moe.experts.216.w2", "model.layers.12.block_sparse_moe.experts.217.w2", "model.layers.12.block_sparse_moe.experts.218.w2", "model.layers.12.block_sparse_moe.experts.219.w2", "model.layers.12.block_sparse_moe.experts.220.w2", "model.layers.12.block_sparse_moe.experts.221.w2", "model.layers.12.block_sparse_moe.experts.222.w2", "model.layers.12.block_sparse_moe.experts.223.w2", "model.layers.12.block_sparse_moe.experts.224.w2", "model.layers.12.block_sparse_moe.experts.225.w2", "model.layers.12.block_sparse_moe.experts.226.w2", "model.layers.12.block_sparse_moe.experts.227.w2", "model.layers.12.block_sparse_moe.experts.228.w2", "model.layers.12.block_sparse_moe.experts.229.w2", "model.layers.12.block_sparse_moe.experts.230.w2", "model.layers.12.block_sparse_moe.experts.231.w2", "model.layers.12.block_sparse_moe.experts.232.w2", "model.layers.12.block_sparse_moe.experts.233.w2", "model.layers.12.block_sparse_moe.experts.234.w2", "model.layers.12.block_sparse_moe.experts.235.w2", "model.layers.12.block_sparse_moe.experts.236.w2", "model.layers.12.block_sparse_moe.experts.237.w2", "model.layers.12.block_sparse_moe.experts.238.w2", "model.layers.12.block_sparse_moe.experts.239.w2", "model.layers.12.block_sparse_moe.experts.240.w2", "model.layers.12.block_sparse_moe.experts.241.w2", "model.layers.12.block_sparse_moe.experts.242.w2", "model.layers.12.block_sparse_moe.experts.243.w2", "model.layers.12.block_sparse_moe.experts.244.w2", "model.layers.12.block_sparse_moe.experts.245.w2", "model.layers.12.block_sparse_moe.experts.246.w2", "model.layers.12.block_sparse_moe.experts.247.w2", "model.layers.12.block_sparse_moe.experts.248.w2", "model.layers.12.block_sparse_moe.experts.249.w2", "model.layers.12.block_sparse_moe.experts.250.w2", "model.layers.12.block_sparse_moe.experts.251.w2", "model.layers.12.block_sparse_moe.experts.252.w2", "model.layers.12.block_sparse_moe.experts.253.w2", "model.layers.12.block_sparse_moe.experts.254.w2", "model.layers.12.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00045679099857806604, "dbits": 1207959552 } ] }, { "idx": 65, "layers": [ "model.layers.13.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00014283731579779746, "dbits": 18874368 } ] }, { "idx": 66, "layers": [ "model.layers.13.self_attn.k_proj", "model.layers.13.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0016716597601771355, "dbits": 6291456 } ] }, { "idx": 67, "layers": [ "model.layers.13.self_attn.o_proj" ], "candidates": [ { "dkld": 0.004124370403587818, "dbits": 18874368 } ] }, { "idx": 68, "layers": [ "model.layers.13.block_sparse_moe.experts.0.w1", "model.layers.13.block_sparse_moe.experts.1.w1", "model.layers.13.block_sparse_moe.experts.2.w1", "model.layers.13.block_sparse_moe.experts.3.w1", "model.layers.13.block_sparse_moe.experts.4.w1", "model.layers.13.block_sparse_moe.experts.5.w1", "model.layers.13.block_sparse_moe.experts.6.w1", "model.layers.13.block_sparse_moe.experts.7.w1", "model.layers.13.block_sparse_moe.experts.8.w1", "model.layers.13.block_sparse_moe.experts.9.w1", "model.layers.13.block_sparse_moe.experts.10.w1", "model.layers.13.block_sparse_moe.experts.11.w1", "model.layers.13.block_sparse_moe.experts.12.w1", "model.layers.13.block_sparse_moe.experts.13.w1", "model.layers.13.block_sparse_moe.experts.14.w1", "model.layers.13.block_sparse_moe.experts.15.w1", "model.layers.13.block_sparse_moe.experts.16.w1", "model.layers.13.block_sparse_moe.experts.17.w1", "model.layers.13.block_sparse_moe.experts.18.w1", "model.layers.13.block_sparse_moe.experts.19.w1", "model.layers.13.block_sparse_moe.experts.20.w1", "model.layers.13.block_sparse_moe.experts.21.w1", "model.layers.13.block_sparse_moe.experts.22.w1", "model.layers.13.block_sparse_moe.experts.23.w1", "model.layers.13.block_sparse_moe.experts.24.w1", "model.layers.13.block_sparse_moe.experts.25.w1", "model.layers.13.block_sparse_moe.experts.26.w1", "model.layers.13.block_sparse_moe.experts.27.w1", "model.layers.13.block_sparse_moe.experts.28.w1", "model.layers.13.block_sparse_moe.experts.29.w1", "model.layers.13.block_sparse_moe.experts.30.w1", "model.layers.13.block_sparse_moe.experts.31.w1", "model.layers.13.block_sparse_moe.experts.32.w1", "model.layers.13.block_sparse_moe.experts.33.w1", "model.layers.13.block_sparse_moe.experts.34.w1", "model.layers.13.block_sparse_moe.experts.35.w1", "model.layers.13.block_sparse_moe.experts.36.w1", "model.layers.13.block_sparse_moe.experts.37.w1", "model.layers.13.block_sparse_moe.experts.38.w1", "model.layers.13.block_sparse_moe.experts.39.w1", "model.layers.13.block_sparse_moe.experts.40.w1", "model.layers.13.block_sparse_moe.experts.41.w1", "model.layers.13.block_sparse_moe.experts.42.w1", "model.layers.13.block_sparse_moe.experts.43.w1", "model.layers.13.block_sparse_moe.experts.44.w1", "model.layers.13.block_sparse_moe.experts.45.w1", "model.layers.13.block_sparse_moe.experts.46.w1", "model.layers.13.block_sparse_moe.experts.47.w1", "model.layers.13.block_sparse_moe.experts.48.w1", "model.layers.13.block_sparse_moe.experts.49.w1", "model.layers.13.block_sparse_moe.experts.50.w1", "model.layers.13.block_sparse_moe.experts.51.w1", "model.layers.13.block_sparse_moe.experts.52.w1", "model.layers.13.block_sparse_moe.experts.53.w1", "model.layers.13.block_sparse_moe.experts.54.w1", "model.layers.13.block_sparse_moe.experts.55.w1", "model.layers.13.block_sparse_moe.experts.56.w1", "model.layers.13.block_sparse_moe.experts.57.w1", "model.layers.13.block_sparse_moe.experts.58.w1", "model.layers.13.block_sparse_moe.experts.59.w1", "model.layers.13.block_sparse_moe.experts.60.w1", "model.layers.13.block_sparse_moe.experts.61.w1", "model.layers.13.block_sparse_moe.experts.62.w1", "model.layers.13.block_sparse_moe.experts.63.w1", "model.layers.13.block_sparse_moe.experts.64.w1", "model.layers.13.block_sparse_moe.experts.65.w1", "model.layers.13.block_sparse_moe.experts.66.w1", "model.layers.13.block_sparse_moe.experts.67.w1", "model.layers.13.block_sparse_moe.experts.68.w1", "model.layers.13.block_sparse_moe.experts.69.w1", "model.layers.13.block_sparse_moe.experts.70.w1", "model.layers.13.block_sparse_moe.experts.71.w1", "model.layers.13.block_sparse_moe.experts.72.w1", "model.layers.13.block_sparse_moe.experts.73.w1", "model.layers.13.block_sparse_moe.experts.74.w1", "model.layers.13.block_sparse_moe.experts.75.w1", "model.layers.13.block_sparse_moe.experts.76.w1", "model.layers.13.block_sparse_moe.experts.77.w1", "model.layers.13.block_sparse_moe.experts.78.w1", "model.layers.13.block_sparse_moe.experts.79.w1", "model.layers.13.block_sparse_moe.experts.80.w1", "model.layers.13.block_sparse_moe.experts.81.w1", "model.layers.13.block_sparse_moe.experts.82.w1", "model.layers.13.block_sparse_moe.experts.83.w1", "model.layers.13.block_sparse_moe.experts.84.w1", "model.layers.13.block_sparse_moe.experts.85.w1", "model.layers.13.block_sparse_moe.experts.86.w1", "model.layers.13.block_sparse_moe.experts.87.w1", "model.layers.13.block_sparse_moe.experts.88.w1", "model.layers.13.block_sparse_moe.experts.89.w1", "model.layers.13.block_sparse_moe.experts.90.w1", "model.layers.13.block_sparse_moe.experts.91.w1", "model.layers.13.block_sparse_moe.experts.92.w1", "model.layers.13.block_sparse_moe.experts.93.w1", "model.layers.13.block_sparse_moe.experts.94.w1", "model.layers.13.block_sparse_moe.experts.95.w1", "model.layers.13.block_sparse_moe.experts.96.w1", "model.layers.13.block_sparse_moe.experts.97.w1", "model.layers.13.block_sparse_moe.experts.98.w1", "model.layers.13.block_sparse_moe.experts.99.w1", "model.layers.13.block_sparse_moe.experts.100.w1", "model.layers.13.block_sparse_moe.experts.101.w1", "model.layers.13.block_sparse_moe.experts.102.w1", "model.layers.13.block_sparse_moe.experts.103.w1", "model.layers.13.block_sparse_moe.experts.104.w1", "model.layers.13.block_sparse_moe.experts.105.w1", "model.layers.13.block_sparse_moe.experts.106.w1", "model.layers.13.block_sparse_moe.experts.107.w1", "model.layers.13.block_sparse_moe.experts.108.w1", "model.layers.13.block_sparse_moe.experts.109.w1", "model.layers.13.block_sparse_moe.experts.110.w1", "model.layers.13.block_sparse_moe.experts.111.w1", "model.layers.13.block_sparse_moe.experts.112.w1", "model.layers.13.block_sparse_moe.experts.113.w1", "model.layers.13.block_sparse_moe.experts.114.w1", "model.layers.13.block_sparse_moe.experts.115.w1", "model.layers.13.block_sparse_moe.experts.116.w1", "model.layers.13.block_sparse_moe.experts.117.w1", "model.layers.13.block_sparse_moe.experts.118.w1", "model.layers.13.block_sparse_moe.experts.119.w1", "model.layers.13.block_sparse_moe.experts.120.w1", "model.layers.13.block_sparse_moe.experts.121.w1", "model.layers.13.block_sparse_moe.experts.122.w1", "model.layers.13.block_sparse_moe.experts.123.w1", "model.layers.13.block_sparse_moe.experts.124.w1", "model.layers.13.block_sparse_moe.experts.125.w1", "model.layers.13.block_sparse_moe.experts.126.w1", "model.layers.13.block_sparse_moe.experts.127.w1", "model.layers.13.block_sparse_moe.experts.128.w1", "model.layers.13.block_sparse_moe.experts.129.w1", "model.layers.13.block_sparse_moe.experts.130.w1", "model.layers.13.block_sparse_moe.experts.131.w1", "model.layers.13.block_sparse_moe.experts.132.w1", "model.layers.13.block_sparse_moe.experts.133.w1", "model.layers.13.block_sparse_moe.experts.134.w1", "model.layers.13.block_sparse_moe.experts.135.w1", "model.layers.13.block_sparse_moe.experts.136.w1", "model.layers.13.block_sparse_moe.experts.137.w1", "model.layers.13.block_sparse_moe.experts.138.w1", "model.layers.13.block_sparse_moe.experts.139.w1", "model.layers.13.block_sparse_moe.experts.140.w1", "model.layers.13.block_sparse_moe.experts.141.w1", "model.layers.13.block_sparse_moe.experts.142.w1", "model.layers.13.block_sparse_moe.experts.143.w1", "model.layers.13.block_sparse_moe.experts.144.w1", "model.layers.13.block_sparse_moe.experts.145.w1", "model.layers.13.block_sparse_moe.experts.146.w1", "model.layers.13.block_sparse_moe.experts.147.w1", "model.layers.13.block_sparse_moe.experts.148.w1", "model.layers.13.block_sparse_moe.experts.149.w1", "model.layers.13.block_sparse_moe.experts.150.w1", "model.layers.13.block_sparse_moe.experts.151.w1", "model.layers.13.block_sparse_moe.experts.152.w1", "model.layers.13.block_sparse_moe.experts.153.w1", "model.layers.13.block_sparse_moe.experts.154.w1", "model.layers.13.block_sparse_moe.experts.155.w1", "model.layers.13.block_sparse_moe.experts.156.w1", "model.layers.13.block_sparse_moe.experts.157.w1", "model.layers.13.block_sparse_moe.experts.158.w1", "model.layers.13.block_sparse_moe.experts.159.w1", "model.layers.13.block_sparse_moe.experts.160.w1", "model.layers.13.block_sparse_moe.experts.161.w1", "model.layers.13.block_sparse_moe.experts.162.w1", "model.layers.13.block_sparse_moe.experts.163.w1", "model.layers.13.block_sparse_moe.experts.164.w1", "model.layers.13.block_sparse_moe.experts.165.w1", "model.layers.13.block_sparse_moe.experts.166.w1", "model.layers.13.block_sparse_moe.experts.167.w1", "model.layers.13.block_sparse_moe.experts.168.w1", "model.layers.13.block_sparse_moe.experts.169.w1", "model.layers.13.block_sparse_moe.experts.170.w1", "model.layers.13.block_sparse_moe.experts.171.w1", "model.layers.13.block_sparse_moe.experts.172.w1", "model.layers.13.block_sparse_moe.experts.173.w1", "model.layers.13.block_sparse_moe.experts.174.w1", "model.layers.13.block_sparse_moe.experts.175.w1", "model.layers.13.block_sparse_moe.experts.176.w1", "model.layers.13.block_sparse_moe.experts.177.w1", "model.layers.13.block_sparse_moe.experts.178.w1", "model.layers.13.block_sparse_moe.experts.179.w1", "model.layers.13.block_sparse_moe.experts.180.w1", "model.layers.13.block_sparse_moe.experts.181.w1", "model.layers.13.block_sparse_moe.experts.182.w1", "model.layers.13.block_sparse_moe.experts.183.w1", "model.layers.13.block_sparse_moe.experts.184.w1", "model.layers.13.block_sparse_moe.experts.185.w1", "model.layers.13.block_sparse_moe.experts.186.w1", "model.layers.13.block_sparse_moe.experts.187.w1", "model.layers.13.block_sparse_moe.experts.188.w1", "model.layers.13.block_sparse_moe.experts.189.w1", "model.layers.13.block_sparse_moe.experts.190.w1", "model.layers.13.block_sparse_moe.experts.191.w1", "model.layers.13.block_sparse_moe.experts.192.w1", "model.layers.13.block_sparse_moe.experts.193.w1", "model.layers.13.block_sparse_moe.experts.194.w1", "model.layers.13.block_sparse_moe.experts.195.w1", "model.layers.13.block_sparse_moe.experts.196.w1", "model.layers.13.block_sparse_moe.experts.197.w1", "model.layers.13.block_sparse_moe.experts.198.w1", "model.layers.13.block_sparse_moe.experts.199.w1", "model.layers.13.block_sparse_moe.experts.200.w1", "model.layers.13.block_sparse_moe.experts.201.w1", "model.layers.13.block_sparse_moe.experts.202.w1", "model.layers.13.block_sparse_moe.experts.203.w1", "model.layers.13.block_sparse_moe.experts.204.w1", "model.layers.13.block_sparse_moe.experts.205.w1", "model.layers.13.block_sparse_moe.experts.206.w1", "model.layers.13.block_sparse_moe.experts.207.w1", "model.layers.13.block_sparse_moe.experts.208.w1", "model.layers.13.block_sparse_moe.experts.209.w1", "model.layers.13.block_sparse_moe.experts.210.w1", "model.layers.13.block_sparse_moe.experts.211.w1", "model.layers.13.block_sparse_moe.experts.212.w1", "model.layers.13.block_sparse_moe.experts.213.w1", "model.layers.13.block_sparse_moe.experts.214.w1", "model.layers.13.block_sparse_moe.experts.215.w1", "model.layers.13.block_sparse_moe.experts.216.w1", "model.layers.13.block_sparse_moe.experts.217.w1", "model.layers.13.block_sparse_moe.experts.218.w1", "model.layers.13.block_sparse_moe.experts.219.w1", "model.layers.13.block_sparse_moe.experts.220.w1", "model.layers.13.block_sparse_moe.experts.221.w1", "model.layers.13.block_sparse_moe.experts.222.w1", "model.layers.13.block_sparse_moe.experts.223.w1", "model.layers.13.block_sparse_moe.experts.224.w1", "model.layers.13.block_sparse_moe.experts.225.w1", "model.layers.13.block_sparse_moe.experts.226.w1", "model.layers.13.block_sparse_moe.experts.227.w1", "model.layers.13.block_sparse_moe.experts.228.w1", "model.layers.13.block_sparse_moe.experts.229.w1", "model.layers.13.block_sparse_moe.experts.230.w1", "model.layers.13.block_sparse_moe.experts.231.w1", "model.layers.13.block_sparse_moe.experts.232.w1", "model.layers.13.block_sparse_moe.experts.233.w1", "model.layers.13.block_sparse_moe.experts.234.w1", "model.layers.13.block_sparse_moe.experts.235.w1", "model.layers.13.block_sparse_moe.experts.236.w1", "model.layers.13.block_sparse_moe.experts.237.w1", "model.layers.13.block_sparse_moe.experts.238.w1", "model.layers.13.block_sparse_moe.experts.239.w1", "model.layers.13.block_sparse_moe.experts.240.w1", "model.layers.13.block_sparse_moe.experts.241.w1", "model.layers.13.block_sparse_moe.experts.242.w1", "model.layers.13.block_sparse_moe.experts.243.w1", "model.layers.13.block_sparse_moe.experts.244.w1", "model.layers.13.block_sparse_moe.experts.245.w1", "model.layers.13.block_sparse_moe.experts.246.w1", "model.layers.13.block_sparse_moe.experts.247.w1", "model.layers.13.block_sparse_moe.experts.248.w1", "model.layers.13.block_sparse_moe.experts.249.w1", "model.layers.13.block_sparse_moe.experts.250.w1", "model.layers.13.block_sparse_moe.experts.251.w1", "model.layers.13.block_sparse_moe.experts.252.w1", "model.layers.13.block_sparse_moe.experts.253.w1", "model.layers.13.block_sparse_moe.experts.254.w1", "model.layers.13.block_sparse_moe.experts.255.w1", "model.layers.13.block_sparse_moe.experts.0.w3", "model.layers.13.block_sparse_moe.experts.1.w3", "model.layers.13.block_sparse_moe.experts.2.w3", "model.layers.13.block_sparse_moe.experts.3.w3", "model.layers.13.block_sparse_moe.experts.4.w3", "model.layers.13.block_sparse_moe.experts.5.w3", "model.layers.13.block_sparse_moe.experts.6.w3", "model.layers.13.block_sparse_moe.experts.7.w3", "model.layers.13.block_sparse_moe.experts.8.w3", "model.layers.13.block_sparse_moe.experts.9.w3", "model.layers.13.block_sparse_moe.experts.10.w3", "model.layers.13.block_sparse_moe.experts.11.w3", "model.layers.13.block_sparse_moe.experts.12.w3", "model.layers.13.block_sparse_moe.experts.13.w3", "model.layers.13.block_sparse_moe.experts.14.w3", "model.layers.13.block_sparse_moe.experts.15.w3", "model.layers.13.block_sparse_moe.experts.16.w3", "model.layers.13.block_sparse_moe.experts.17.w3", "model.layers.13.block_sparse_moe.experts.18.w3", "model.layers.13.block_sparse_moe.experts.19.w3", "model.layers.13.block_sparse_moe.experts.20.w3", "model.layers.13.block_sparse_moe.experts.21.w3", "model.layers.13.block_sparse_moe.experts.22.w3", "model.layers.13.block_sparse_moe.experts.23.w3", "model.layers.13.block_sparse_moe.experts.24.w3", "model.layers.13.block_sparse_moe.experts.25.w3", "model.layers.13.block_sparse_moe.experts.26.w3", "model.layers.13.block_sparse_moe.experts.27.w3", "model.layers.13.block_sparse_moe.experts.28.w3", "model.layers.13.block_sparse_moe.experts.29.w3", "model.layers.13.block_sparse_moe.experts.30.w3", "model.layers.13.block_sparse_moe.experts.31.w3", "model.layers.13.block_sparse_moe.experts.32.w3", "model.layers.13.block_sparse_moe.experts.33.w3", "model.layers.13.block_sparse_moe.experts.34.w3", "model.layers.13.block_sparse_moe.experts.35.w3", "model.layers.13.block_sparse_moe.experts.36.w3", "model.layers.13.block_sparse_moe.experts.37.w3", "model.layers.13.block_sparse_moe.experts.38.w3", "model.layers.13.block_sparse_moe.experts.39.w3", "model.layers.13.block_sparse_moe.experts.40.w3", "model.layers.13.block_sparse_moe.experts.41.w3", "model.layers.13.block_sparse_moe.experts.42.w3", "model.layers.13.block_sparse_moe.experts.43.w3", "model.layers.13.block_sparse_moe.experts.44.w3", "model.layers.13.block_sparse_moe.experts.45.w3", "model.layers.13.block_sparse_moe.experts.46.w3", "model.layers.13.block_sparse_moe.experts.47.w3", "model.layers.13.block_sparse_moe.experts.48.w3", "model.layers.13.block_sparse_moe.experts.49.w3", "model.layers.13.block_sparse_moe.experts.50.w3", "model.layers.13.block_sparse_moe.experts.51.w3", "model.layers.13.block_sparse_moe.experts.52.w3", "model.layers.13.block_sparse_moe.experts.53.w3", "model.layers.13.block_sparse_moe.experts.54.w3", "model.layers.13.block_sparse_moe.experts.55.w3", "model.layers.13.block_sparse_moe.experts.56.w3", "model.layers.13.block_sparse_moe.experts.57.w3", "model.layers.13.block_sparse_moe.experts.58.w3", "model.layers.13.block_sparse_moe.experts.59.w3", "model.layers.13.block_sparse_moe.experts.60.w3", "model.layers.13.block_sparse_moe.experts.61.w3", "model.layers.13.block_sparse_moe.experts.62.w3", "model.layers.13.block_sparse_moe.experts.63.w3", "model.layers.13.block_sparse_moe.experts.64.w3", "model.layers.13.block_sparse_moe.experts.65.w3", "model.layers.13.block_sparse_moe.experts.66.w3", "model.layers.13.block_sparse_moe.experts.67.w3", "model.layers.13.block_sparse_moe.experts.68.w3", "model.layers.13.block_sparse_moe.experts.69.w3", "model.layers.13.block_sparse_moe.experts.70.w3", "model.layers.13.block_sparse_moe.experts.71.w3", "model.layers.13.block_sparse_moe.experts.72.w3", "model.layers.13.block_sparse_moe.experts.73.w3", "model.layers.13.block_sparse_moe.experts.74.w3", "model.layers.13.block_sparse_moe.experts.75.w3", "model.layers.13.block_sparse_moe.experts.76.w3", "model.layers.13.block_sparse_moe.experts.77.w3", "model.layers.13.block_sparse_moe.experts.78.w3", "model.layers.13.block_sparse_moe.experts.79.w3", "model.layers.13.block_sparse_moe.experts.80.w3", "model.layers.13.block_sparse_moe.experts.81.w3", "model.layers.13.block_sparse_moe.experts.82.w3", "model.layers.13.block_sparse_moe.experts.83.w3", "model.layers.13.block_sparse_moe.experts.84.w3", "model.layers.13.block_sparse_moe.experts.85.w3", "model.layers.13.block_sparse_moe.experts.86.w3", "model.layers.13.block_sparse_moe.experts.87.w3", "model.layers.13.block_sparse_moe.experts.88.w3", "model.layers.13.block_sparse_moe.experts.89.w3", "model.layers.13.block_sparse_moe.experts.90.w3", "model.layers.13.block_sparse_moe.experts.91.w3", "model.layers.13.block_sparse_moe.experts.92.w3", "model.layers.13.block_sparse_moe.experts.93.w3", "model.layers.13.block_sparse_moe.experts.94.w3", "model.layers.13.block_sparse_moe.experts.95.w3", "model.layers.13.block_sparse_moe.experts.96.w3", "model.layers.13.block_sparse_moe.experts.97.w3", "model.layers.13.block_sparse_moe.experts.98.w3", "model.layers.13.block_sparse_moe.experts.99.w3", "model.layers.13.block_sparse_moe.experts.100.w3", "model.layers.13.block_sparse_moe.experts.101.w3", "model.layers.13.block_sparse_moe.experts.102.w3", "model.layers.13.block_sparse_moe.experts.103.w3", "model.layers.13.block_sparse_moe.experts.104.w3", "model.layers.13.block_sparse_moe.experts.105.w3", "model.layers.13.block_sparse_moe.experts.106.w3", "model.layers.13.block_sparse_moe.experts.107.w3", "model.layers.13.block_sparse_moe.experts.108.w3", "model.layers.13.block_sparse_moe.experts.109.w3", "model.layers.13.block_sparse_moe.experts.110.w3", "model.layers.13.block_sparse_moe.experts.111.w3", "model.layers.13.block_sparse_moe.experts.112.w3", "model.layers.13.block_sparse_moe.experts.113.w3", "model.layers.13.block_sparse_moe.experts.114.w3", "model.layers.13.block_sparse_moe.experts.115.w3", "model.layers.13.block_sparse_moe.experts.116.w3", "model.layers.13.block_sparse_moe.experts.117.w3", "model.layers.13.block_sparse_moe.experts.118.w3", "model.layers.13.block_sparse_moe.experts.119.w3", "model.layers.13.block_sparse_moe.experts.120.w3", "model.layers.13.block_sparse_moe.experts.121.w3", "model.layers.13.block_sparse_moe.experts.122.w3", "model.layers.13.block_sparse_moe.experts.123.w3", "model.layers.13.block_sparse_moe.experts.124.w3", "model.layers.13.block_sparse_moe.experts.125.w3", "model.layers.13.block_sparse_moe.experts.126.w3", "model.layers.13.block_sparse_moe.experts.127.w3", "model.layers.13.block_sparse_moe.experts.128.w3", "model.layers.13.block_sparse_moe.experts.129.w3", "model.layers.13.block_sparse_moe.experts.130.w3", "model.layers.13.block_sparse_moe.experts.131.w3", "model.layers.13.block_sparse_moe.experts.132.w3", "model.layers.13.block_sparse_moe.experts.133.w3", "model.layers.13.block_sparse_moe.experts.134.w3", "model.layers.13.block_sparse_moe.experts.135.w3", "model.layers.13.block_sparse_moe.experts.136.w3", "model.layers.13.block_sparse_moe.experts.137.w3", "model.layers.13.block_sparse_moe.experts.138.w3", "model.layers.13.block_sparse_moe.experts.139.w3", "model.layers.13.block_sparse_moe.experts.140.w3", "model.layers.13.block_sparse_moe.experts.141.w3", "model.layers.13.block_sparse_moe.experts.142.w3", "model.layers.13.block_sparse_moe.experts.143.w3", "model.layers.13.block_sparse_moe.experts.144.w3", "model.layers.13.block_sparse_moe.experts.145.w3", "model.layers.13.block_sparse_moe.experts.146.w3", "model.layers.13.block_sparse_moe.experts.147.w3", "model.layers.13.block_sparse_moe.experts.148.w3", "model.layers.13.block_sparse_moe.experts.149.w3", "model.layers.13.block_sparse_moe.experts.150.w3", "model.layers.13.block_sparse_moe.experts.151.w3", "model.layers.13.block_sparse_moe.experts.152.w3", "model.layers.13.block_sparse_moe.experts.153.w3", "model.layers.13.block_sparse_moe.experts.154.w3", "model.layers.13.block_sparse_moe.experts.155.w3", "model.layers.13.block_sparse_moe.experts.156.w3", "model.layers.13.block_sparse_moe.experts.157.w3", "model.layers.13.block_sparse_moe.experts.158.w3", "model.layers.13.block_sparse_moe.experts.159.w3", "model.layers.13.block_sparse_moe.experts.160.w3", "model.layers.13.block_sparse_moe.experts.161.w3", "model.layers.13.block_sparse_moe.experts.162.w3", "model.layers.13.block_sparse_moe.experts.163.w3", "model.layers.13.block_sparse_moe.experts.164.w3", "model.layers.13.block_sparse_moe.experts.165.w3", "model.layers.13.block_sparse_moe.experts.166.w3", "model.layers.13.block_sparse_moe.experts.167.w3", "model.layers.13.block_sparse_moe.experts.168.w3", "model.layers.13.block_sparse_moe.experts.169.w3", "model.layers.13.block_sparse_moe.experts.170.w3", "model.layers.13.block_sparse_moe.experts.171.w3", "model.layers.13.block_sparse_moe.experts.172.w3", "model.layers.13.block_sparse_moe.experts.173.w3", "model.layers.13.block_sparse_moe.experts.174.w3", "model.layers.13.block_sparse_moe.experts.175.w3", "model.layers.13.block_sparse_moe.experts.176.w3", "model.layers.13.block_sparse_moe.experts.177.w3", "model.layers.13.block_sparse_moe.experts.178.w3", "model.layers.13.block_sparse_moe.experts.179.w3", "model.layers.13.block_sparse_moe.experts.180.w3", "model.layers.13.block_sparse_moe.experts.181.w3", "model.layers.13.block_sparse_moe.experts.182.w3", "model.layers.13.block_sparse_moe.experts.183.w3", "model.layers.13.block_sparse_moe.experts.184.w3", "model.layers.13.block_sparse_moe.experts.185.w3", "model.layers.13.block_sparse_moe.experts.186.w3", "model.layers.13.block_sparse_moe.experts.187.w3", "model.layers.13.block_sparse_moe.experts.188.w3", "model.layers.13.block_sparse_moe.experts.189.w3", "model.layers.13.block_sparse_moe.experts.190.w3", "model.layers.13.block_sparse_moe.experts.191.w3", "model.layers.13.block_sparse_moe.experts.192.w3", "model.layers.13.block_sparse_moe.experts.193.w3", "model.layers.13.block_sparse_moe.experts.194.w3", "model.layers.13.block_sparse_moe.experts.195.w3", "model.layers.13.block_sparse_moe.experts.196.w3", "model.layers.13.block_sparse_moe.experts.197.w3", "model.layers.13.block_sparse_moe.experts.198.w3", "model.layers.13.block_sparse_moe.experts.199.w3", "model.layers.13.block_sparse_moe.experts.200.w3", "model.layers.13.block_sparse_moe.experts.201.w3", "model.layers.13.block_sparse_moe.experts.202.w3", "model.layers.13.block_sparse_moe.experts.203.w3", "model.layers.13.block_sparse_moe.experts.204.w3", "model.layers.13.block_sparse_moe.experts.205.w3", "model.layers.13.block_sparse_moe.experts.206.w3", "model.layers.13.block_sparse_moe.experts.207.w3", "model.layers.13.block_sparse_moe.experts.208.w3", "model.layers.13.block_sparse_moe.experts.209.w3", "model.layers.13.block_sparse_moe.experts.210.w3", "model.layers.13.block_sparse_moe.experts.211.w3", "model.layers.13.block_sparse_moe.experts.212.w3", "model.layers.13.block_sparse_moe.experts.213.w3", "model.layers.13.block_sparse_moe.experts.214.w3", "model.layers.13.block_sparse_moe.experts.215.w3", "model.layers.13.block_sparse_moe.experts.216.w3", "model.layers.13.block_sparse_moe.experts.217.w3", "model.layers.13.block_sparse_moe.experts.218.w3", "model.layers.13.block_sparse_moe.experts.219.w3", "model.layers.13.block_sparse_moe.experts.220.w3", "model.layers.13.block_sparse_moe.experts.221.w3", "model.layers.13.block_sparse_moe.experts.222.w3", "model.layers.13.block_sparse_moe.experts.223.w3", "model.layers.13.block_sparse_moe.experts.224.w3", "model.layers.13.block_sparse_moe.experts.225.w3", "model.layers.13.block_sparse_moe.experts.226.w3", "model.layers.13.block_sparse_moe.experts.227.w3", "model.layers.13.block_sparse_moe.experts.228.w3", "model.layers.13.block_sparse_moe.experts.229.w3", "model.layers.13.block_sparse_moe.experts.230.w3", "model.layers.13.block_sparse_moe.experts.231.w3", "model.layers.13.block_sparse_moe.experts.232.w3", "model.layers.13.block_sparse_moe.experts.233.w3", "model.layers.13.block_sparse_moe.experts.234.w3", "model.layers.13.block_sparse_moe.experts.235.w3", "model.layers.13.block_sparse_moe.experts.236.w3", "model.layers.13.block_sparse_moe.experts.237.w3", "model.layers.13.block_sparse_moe.experts.238.w3", "model.layers.13.block_sparse_moe.experts.239.w3", "model.layers.13.block_sparse_moe.experts.240.w3", "model.layers.13.block_sparse_moe.experts.241.w3", "model.layers.13.block_sparse_moe.experts.242.w3", "model.layers.13.block_sparse_moe.experts.243.w3", "model.layers.13.block_sparse_moe.experts.244.w3", "model.layers.13.block_sparse_moe.experts.245.w3", "model.layers.13.block_sparse_moe.experts.246.w3", "model.layers.13.block_sparse_moe.experts.247.w3", "model.layers.13.block_sparse_moe.experts.248.w3", "model.layers.13.block_sparse_moe.experts.249.w3", "model.layers.13.block_sparse_moe.experts.250.w3", "model.layers.13.block_sparse_moe.experts.251.w3", "model.layers.13.block_sparse_moe.experts.252.w3", "model.layers.13.block_sparse_moe.experts.253.w3", "model.layers.13.block_sparse_moe.experts.254.w3", "model.layers.13.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0006610749289393397, "dbits": 2415919104 } ] }, { "idx": 69, "layers": [ "model.layers.13.block_sparse_moe.experts.0.w2", "model.layers.13.block_sparse_moe.experts.1.w2", "model.layers.13.block_sparse_moe.experts.2.w2", "model.layers.13.block_sparse_moe.experts.3.w2", "model.layers.13.block_sparse_moe.experts.4.w2", "model.layers.13.block_sparse_moe.experts.5.w2", "model.layers.13.block_sparse_moe.experts.6.w2", "model.layers.13.block_sparse_moe.experts.7.w2", "model.layers.13.block_sparse_moe.experts.8.w2", "model.layers.13.block_sparse_moe.experts.9.w2", "model.layers.13.block_sparse_moe.experts.10.w2", "model.layers.13.block_sparse_moe.experts.11.w2", "model.layers.13.block_sparse_moe.experts.12.w2", "model.layers.13.block_sparse_moe.experts.13.w2", "model.layers.13.block_sparse_moe.experts.14.w2", "model.layers.13.block_sparse_moe.experts.15.w2", "model.layers.13.block_sparse_moe.experts.16.w2", "model.layers.13.block_sparse_moe.experts.17.w2", "model.layers.13.block_sparse_moe.experts.18.w2", "model.layers.13.block_sparse_moe.experts.19.w2", "model.layers.13.block_sparse_moe.experts.20.w2", "model.layers.13.block_sparse_moe.experts.21.w2", "model.layers.13.block_sparse_moe.experts.22.w2", "model.layers.13.block_sparse_moe.experts.23.w2", "model.layers.13.block_sparse_moe.experts.24.w2", "model.layers.13.block_sparse_moe.experts.25.w2", "model.layers.13.block_sparse_moe.experts.26.w2", "model.layers.13.block_sparse_moe.experts.27.w2", "model.layers.13.block_sparse_moe.experts.28.w2", "model.layers.13.block_sparse_moe.experts.29.w2", "model.layers.13.block_sparse_moe.experts.30.w2", "model.layers.13.block_sparse_moe.experts.31.w2", "model.layers.13.block_sparse_moe.experts.32.w2", "model.layers.13.block_sparse_moe.experts.33.w2", "model.layers.13.block_sparse_moe.experts.34.w2", "model.layers.13.block_sparse_moe.experts.35.w2", "model.layers.13.block_sparse_moe.experts.36.w2", "model.layers.13.block_sparse_moe.experts.37.w2", "model.layers.13.block_sparse_moe.experts.38.w2", "model.layers.13.block_sparse_moe.experts.39.w2", "model.layers.13.block_sparse_moe.experts.40.w2", "model.layers.13.block_sparse_moe.experts.41.w2", "model.layers.13.block_sparse_moe.experts.42.w2", "model.layers.13.block_sparse_moe.experts.43.w2", "model.layers.13.block_sparse_moe.experts.44.w2", "model.layers.13.block_sparse_moe.experts.45.w2", "model.layers.13.block_sparse_moe.experts.46.w2", "model.layers.13.block_sparse_moe.experts.47.w2", "model.layers.13.block_sparse_moe.experts.48.w2", "model.layers.13.block_sparse_moe.experts.49.w2", "model.layers.13.block_sparse_moe.experts.50.w2", "model.layers.13.block_sparse_moe.experts.51.w2", "model.layers.13.block_sparse_moe.experts.52.w2", "model.layers.13.block_sparse_moe.experts.53.w2", "model.layers.13.block_sparse_moe.experts.54.w2", "model.layers.13.block_sparse_moe.experts.55.w2", "model.layers.13.block_sparse_moe.experts.56.w2", "model.layers.13.block_sparse_moe.experts.57.w2", "model.layers.13.block_sparse_moe.experts.58.w2", "model.layers.13.block_sparse_moe.experts.59.w2", "model.layers.13.block_sparse_moe.experts.60.w2", "model.layers.13.block_sparse_moe.experts.61.w2", "model.layers.13.block_sparse_moe.experts.62.w2", "model.layers.13.block_sparse_moe.experts.63.w2", "model.layers.13.block_sparse_moe.experts.64.w2", "model.layers.13.block_sparse_moe.experts.65.w2", "model.layers.13.block_sparse_moe.experts.66.w2", "model.layers.13.block_sparse_moe.experts.67.w2", "model.layers.13.block_sparse_moe.experts.68.w2", "model.layers.13.block_sparse_moe.experts.69.w2", "model.layers.13.block_sparse_moe.experts.70.w2", "model.layers.13.block_sparse_moe.experts.71.w2", "model.layers.13.block_sparse_moe.experts.72.w2", "model.layers.13.block_sparse_moe.experts.73.w2", "model.layers.13.block_sparse_moe.experts.74.w2", "model.layers.13.block_sparse_moe.experts.75.w2", "model.layers.13.block_sparse_moe.experts.76.w2", "model.layers.13.block_sparse_moe.experts.77.w2", "model.layers.13.block_sparse_moe.experts.78.w2", "model.layers.13.block_sparse_moe.experts.79.w2", "model.layers.13.block_sparse_moe.experts.80.w2", "model.layers.13.block_sparse_moe.experts.81.w2", "model.layers.13.block_sparse_moe.experts.82.w2", "model.layers.13.block_sparse_moe.experts.83.w2", "model.layers.13.block_sparse_moe.experts.84.w2", "model.layers.13.block_sparse_moe.experts.85.w2", "model.layers.13.block_sparse_moe.experts.86.w2", "model.layers.13.block_sparse_moe.experts.87.w2", "model.layers.13.block_sparse_moe.experts.88.w2", "model.layers.13.block_sparse_moe.experts.89.w2", "model.layers.13.block_sparse_moe.experts.90.w2", "model.layers.13.block_sparse_moe.experts.91.w2", "model.layers.13.block_sparse_moe.experts.92.w2", "model.layers.13.block_sparse_moe.experts.93.w2", "model.layers.13.block_sparse_moe.experts.94.w2", "model.layers.13.block_sparse_moe.experts.95.w2", "model.layers.13.block_sparse_moe.experts.96.w2", "model.layers.13.block_sparse_moe.experts.97.w2", "model.layers.13.block_sparse_moe.experts.98.w2", "model.layers.13.block_sparse_moe.experts.99.w2", "model.layers.13.block_sparse_moe.experts.100.w2", "model.layers.13.block_sparse_moe.experts.101.w2", "model.layers.13.block_sparse_moe.experts.102.w2", "model.layers.13.block_sparse_moe.experts.103.w2", "model.layers.13.block_sparse_moe.experts.104.w2", "model.layers.13.block_sparse_moe.experts.105.w2", "model.layers.13.block_sparse_moe.experts.106.w2", "model.layers.13.block_sparse_moe.experts.107.w2", "model.layers.13.block_sparse_moe.experts.108.w2", "model.layers.13.block_sparse_moe.experts.109.w2", "model.layers.13.block_sparse_moe.experts.110.w2", "model.layers.13.block_sparse_moe.experts.111.w2", "model.layers.13.block_sparse_moe.experts.112.w2", "model.layers.13.block_sparse_moe.experts.113.w2", "model.layers.13.block_sparse_moe.experts.114.w2", "model.layers.13.block_sparse_moe.experts.115.w2", "model.layers.13.block_sparse_moe.experts.116.w2", "model.layers.13.block_sparse_moe.experts.117.w2", "model.layers.13.block_sparse_moe.experts.118.w2", "model.layers.13.block_sparse_moe.experts.119.w2", "model.layers.13.block_sparse_moe.experts.120.w2", "model.layers.13.block_sparse_moe.experts.121.w2", "model.layers.13.block_sparse_moe.experts.122.w2", "model.layers.13.block_sparse_moe.experts.123.w2", "model.layers.13.block_sparse_moe.experts.124.w2", "model.layers.13.block_sparse_moe.experts.125.w2", "model.layers.13.block_sparse_moe.experts.126.w2", "model.layers.13.block_sparse_moe.experts.127.w2", "model.layers.13.block_sparse_moe.experts.128.w2", "model.layers.13.block_sparse_moe.experts.129.w2", "model.layers.13.block_sparse_moe.experts.130.w2", "model.layers.13.block_sparse_moe.experts.131.w2", "model.layers.13.block_sparse_moe.experts.132.w2", "model.layers.13.block_sparse_moe.experts.133.w2", "model.layers.13.block_sparse_moe.experts.134.w2", "model.layers.13.block_sparse_moe.experts.135.w2", "model.layers.13.block_sparse_moe.experts.136.w2", "model.layers.13.block_sparse_moe.experts.137.w2", "model.layers.13.block_sparse_moe.experts.138.w2", "model.layers.13.block_sparse_moe.experts.139.w2", "model.layers.13.block_sparse_moe.experts.140.w2", "model.layers.13.block_sparse_moe.experts.141.w2", "model.layers.13.block_sparse_moe.experts.142.w2", "model.layers.13.block_sparse_moe.experts.143.w2", "model.layers.13.block_sparse_moe.experts.144.w2", "model.layers.13.block_sparse_moe.experts.145.w2", "model.layers.13.block_sparse_moe.experts.146.w2", "model.layers.13.block_sparse_moe.experts.147.w2", "model.layers.13.block_sparse_moe.experts.148.w2", "model.layers.13.block_sparse_moe.experts.149.w2", "model.layers.13.block_sparse_moe.experts.150.w2", "model.layers.13.block_sparse_moe.experts.151.w2", "model.layers.13.block_sparse_moe.experts.152.w2", "model.layers.13.block_sparse_moe.experts.153.w2", "model.layers.13.block_sparse_moe.experts.154.w2", "model.layers.13.block_sparse_moe.experts.155.w2", "model.layers.13.block_sparse_moe.experts.156.w2", "model.layers.13.block_sparse_moe.experts.157.w2", "model.layers.13.block_sparse_moe.experts.158.w2", "model.layers.13.block_sparse_moe.experts.159.w2", "model.layers.13.block_sparse_moe.experts.160.w2", "model.layers.13.block_sparse_moe.experts.161.w2", "model.layers.13.block_sparse_moe.experts.162.w2", "model.layers.13.block_sparse_moe.experts.163.w2", "model.layers.13.block_sparse_moe.experts.164.w2", "model.layers.13.block_sparse_moe.experts.165.w2", "model.layers.13.block_sparse_moe.experts.166.w2", "model.layers.13.block_sparse_moe.experts.167.w2", "model.layers.13.block_sparse_moe.experts.168.w2", "model.layers.13.block_sparse_moe.experts.169.w2", "model.layers.13.block_sparse_moe.experts.170.w2", "model.layers.13.block_sparse_moe.experts.171.w2", "model.layers.13.block_sparse_moe.experts.172.w2", "model.layers.13.block_sparse_moe.experts.173.w2", "model.layers.13.block_sparse_moe.experts.174.w2", "model.layers.13.block_sparse_moe.experts.175.w2", "model.layers.13.block_sparse_moe.experts.176.w2", "model.layers.13.block_sparse_moe.experts.177.w2", "model.layers.13.block_sparse_moe.experts.178.w2", "model.layers.13.block_sparse_moe.experts.179.w2", "model.layers.13.block_sparse_moe.experts.180.w2", "model.layers.13.block_sparse_moe.experts.181.w2", "model.layers.13.block_sparse_moe.experts.182.w2", "model.layers.13.block_sparse_moe.experts.183.w2", "model.layers.13.block_sparse_moe.experts.184.w2", "model.layers.13.block_sparse_moe.experts.185.w2", "model.layers.13.block_sparse_moe.experts.186.w2", "model.layers.13.block_sparse_moe.experts.187.w2", "model.layers.13.block_sparse_moe.experts.188.w2", "model.layers.13.block_sparse_moe.experts.189.w2", "model.layers.13.block_sparse_moe.experts.190.w2", "model.layers.13.block_sparse_moe.experts.191.w2", "model.layers.13.block_sparse_moe.experts.192.w2", "model.layers.13.block_sparse_moe.experts.193.w2", "model.layers.13.block_sparse_moe.experts.194.w2", "model.layers.13.block_sparse_moe.experts.195.w2", "model.layers.13.block_sparse_moe.experts.196.w2", "model.layers.13.block_sparse_moe.experts.197.w2", "model.layers.13.block_sparse_moe.experts.198.w2", "model.layers.13.block_sparse_moe.experts.199.w2", "model.layers.13.block_sparse_moe.experts.200.w2", "model.layers.13.block_sparse_moe.experts.201.w2", "model.layers.13.block_sparse_moe.experts.202.w2", "model.layers.13.block_sparse_moe.experts.203.w2", "model.layers.13.block_sparse_moe.experts.204.w2", "model.layers.13.block_sparse_moe.experts.205.w2", "model.layers.13.block_sparse_moe.experts.206.w2", "model.layers.13.block_sparse_moe.experts.207.w2", "model.layers.13.block_sparse_moe.experts.208.w2", "model.layers.13.block_sparse_moe.experts.209.w2", "model.layers.13.block_sparse_moe.experts.210.w2", "model.layers.13.block_sparse_moe.experts.211.w2", "model.layers.13.block_sparse_moe.experts.212.w2", "model.layers.13.block_sparse_moe.experts.213.w2", "model.layers.13.block_sparse_moe.experts.214.w2", "model.layers.13.block_sparse_moe.experts.215.w2", "model.layers.13.block_sparse_moe.experts.216.w2", "model.layers.13.block_sparse_moe.experts.217.w2", "model.layers.13.block_sparse_moe.experts.218.w2", "model.layers.13.block_sparse_moe.experts.219.w2", "model.layers.13.block_sparse_moe.experts.220.w2", "model.layers.13.block_sparse_moe.experts.221.w2", "model.layers.13.block_sparse_moe.experts.222.w2", "model.layers.13.block_sparse_moe.experts.223.w2", "model.layers.13.block_sparse_moe.experts.224.w2", "model.layers.13.block_sparse_moe.experts.225.w2", "model.layers.13.block_sparse_moe.experts.226.w2", "model.layers.13.block_sparse_moe.experts.227.w2", "model.layers.13.block_sparse_moe.experts.228.w2", "model.layers.13.block_sparse_moe.experts.229.w2", "model.layers.13.block_sparse_moe.experts.230.w2", "model.layers.13.block_sparse_moe.experts.231.w2", "model.layers.13.block_sparse_moe.experts.232.w2", "model.layers.13.block_sparse_moe.experts.233.w2", "model.layers.13.block_sparse_moe.experts.234.w2", "model.layers.13.block_sparse_moe.experts.235.w2", "model.layers.13.block_sparse_moe.experts.236.w2", "model.layers.13.block_sparse_moe.experts.237.w2", "model.layers.13.block_sparse_moe.experts.238.w2", "model.layers.13.block_sparse_moe.experts.239.w2", "model.layers.13.block_sparse_moe.experts.240.w2", "model.layers.13.block_sparse_moe.experts.241.w2", "model.layers.13.block_sparse_moe.experts.242.w2", "model.layers.13.block_sparse_moe.experts.243.w2", "model.layers.13.block_sparse_moe.experts.244.w2", "model.layers.13.block_sparse_moe.experts.245.w2", "model.layers.13.block_sparse_moe.experts.246.w2", "model.layers.13.block_sparse_moe.experts.247.w2", "model.layers.13.block_sparse_moe.experts.248.w2", "model.layers.13.block_sparse_moe.experts.249.w2", "model.layers.13.block_sparse_moe.experts.250.w2", "model.layers.13.block_sparse_moe.experts.251.w2", "model.layers.13.block_sparse_moe.experts.252.w2", "model.layers.13.block_sparse_moe.experts.253.w2", "model.layers.13.block_sparse_moe.experts.254.w2", "model.layers.13.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0001582128927111598, "dbits": 1207959552 } ] }, { "idx": 70, "layers": [ "model.layers.14.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0010717680677771596, "dbits": 18874368 } ] }, { "idx": 71, "layers": [ "model.layers.14.self_attn.k_proj", "model.layers.14.self_attn.v_proj" ], "candidates": [ { "dkld": -0.002458680421113968, "dbits": 6291456 } ] }, { "idx": 72, "layers": [ "model.layers.14.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00212283078581095, "dbits": 18874368 } ] }, { "idx": 73, "layers": [ "model.layers.14.block_sparse_moe.experts.0.w1", "model.layers.14.block_sparse_moe.experts.1.w1", "model.layers.14.block_sparse_moe.experts.2.w1", "model.layers.14.block_sparse_moe.experts.3.w1", "model.layers.14.block_sparse_moe.experts.4.w1", "model.layers.14.block_sparse_moe.experts.5.w1", "model.layers.14.block_sparse_moe.experts.6.w1", "model.layers.14.block_sparse_moe.experts.7.w1", "model.layers.14.block_sparse_moe.experts.8.w1", "model.layers.14.block_sparse_moe.experts.9.w1", "model.layers.14.block_sparse_moe.experts.10.w1", "model.layers.14.block_sparse_moe.experts.11.w1", "model.layers.14.block_sparse_moe.experts.12.w1", "model.layers.14.block_sparse_moe.experts.13.w1", "model.layers.14.block_sparse_moe.experts.14.w1", "model.layers.14.block_sparse_moe.experts.15.w1", "model.layers.14.block_sparse_moe.experts.16.w1", "model.layers.14.block_sparse_moe.experts.17.w1", "model.layers.14.block_sparse_moe.experts.18.w1", "model.layers.14.block_sparse_moe.experts.19.w1", "model.layers.14.block_sparse_moe.experts.20.w1", "model.layers.14.block_sparse_moe.experts.21.w1", "model.layers.14.block_sparse_moe.experts.22.w1", "model.layers.14.block_sparse_moe.experts.23.w1", "model.layers.14.block_sparse_moe.experts.24.w1", "model.layers.14.block_sparse_moe.experts.25.w1", "model.layers.14.block_sparse_moe.experts.26.w1", "model.layers.14.block_sparse_moe.experts.27.w1", "model.layers.14.block_sparse_moe.experts.28.w1", "model.layers.14.block_sparse_moe.experts.29.w1", "model.layers.14.block_sparse_moe.experts.30.w1", "model.layers.14.block_sparse_moe.experts.31.w1", "model.layers.14.block_sparse_moe.experts.32.w1", "model.layers.14.block_sparse_moe.experts.33.w1", "model.layers.14.block_sparse_moe.experts.34.w1", "model.layers.14.block_sparse_moe.experts.35.w1", "model.layers.14.block_sparse_moe.experts.36.w1", "model.layers.14.block_sparse_moe.experts.37.w1", "model.layers.14.block_sparse_moe.experts.38.w1", "model.layers.14.block_sparse_moe.experts.39.w1", "model.layers.14.block_sparse_moe.experts.40.w1", "model.layers.14.block_sparse_moe.experts.41.w1", "model.layers.14.block_sparse_moe.experts.42.w1", "model.layers.14.block_sparse_moe.experts.43.w1", "model.layers.14.block_sparse_moe.experts.44.w1", "model.layers.14.block_sparse_moe.experts.45.w1", "model.layers.14.block_sparse_moe.experts.46.w1", "model.layers.14.block_sparse_moe.experts.47.w1", "model.layers.14.block_sparse_moe.experts.48.w1", "model.layers.14.block_sparse_moe.experts.49.w1", "model.layers.14.block_sparse_moe.experts.50.w1", "model.layers.14.block_sparse_moe.experts.51.w1", "model.layers.14.block_sparse_moe.experts.52.w1", "model.layers.14.block_sparse_moe.experts.53.w1", "model.layers.14.block_sparse_moe.experts.54.w1", "model.layers.14.block_sparse_moe.experts.55.w1", "model.layers.14.block_sparse_moe.experts.56.w1", "model.layers.14.block_sparse_moe.experts.57.w1", "model.layers.14.block_sparse_moe.experts.58.w1", "model.layers.14.block_sparse_moe.experts.59.w1", "model.layers.14.block_sparse_moe.experts.60.w1", "model.layers.14.block_sparse_moe.experts.61.w1", "model.layers.14.block_sparse_moe.experts.62.w1", "model.layers.14.block_sparse_moe.experts.63.w1", "model.layers.14.block_sparse_moe.experts.64.w1", "model.layers.14.block_sparse_moe.experts.65.w1", "model.layers.14.block_sparse_moe.experts.66.w1", "model.layers.14.block_sparse_moe.experts.67.w1", "model.layers.14.block_sparse_moe.experts.68.w1", "model.layers.14.block_sparse_moe.experts.69.w1", "model.layers.14.block_sparse_moe.experts.70.w1", "model.layers.14.block_sparse_moe.experts.71.w1", "model.layers.14.block_sparse_moe.experts.72.w1", "model.layers.14.block_sparse_moe.experts.73.w1", "model.layers.14.block_sparse_moe.experts.74.w1", "model.layers.14.block_sparse_moe.experts.75.w1", "model.layers.14.block_sparse_moe.experts.76.w1", "model.layers.14.block_sparse_moe.experts.77.w1", "model.layers.14.block_sparse_moe.experts.78.w1", "model.layers.14.block_sparse_moe.experts.79.w1", "model.layers.14.block_sparse_moe.experts.80.w1", "model.layers.14.block_sparse_moe.experts.81.w1", "model.layers.14.block_sparse_moe.experts.82.w1", "model.layers.14.block_sparse_moe.experts.83.w1", "model.layers.14.block_sparse_moe.experts.84.w1", "model.layers.14.block_sparse_moe.experts.85.w1", "model.layers.14.block_sparse_moe.experts.86.w1", "model.layers.14.block_sparse_moe.experts.87.w1", "model.layers.14.block_sparse_moe.experts.88.w1", "model.layers.14.block_sparse_moe.experts.89.w1", "model.layers.14.block_sparse_moe.experts.90.w1", "model.layers.14.block_sparse_moe.experts.91.w1", "model.layers.14.block_sparse_moe.experts.92.w1", "model.layers.14.block_sparse_moe.experts.93.w1", "model.layers.14.block_sparse_moe.experts.94.w1", "model.layers.14.block_sparse_moe.experts.95.w1", "model.layers.14.block_sparse_moe.experts.96.w1", "model.layers.14.block_sparse_moe.experts.97.w1", "model.layers.14.block_sparse_moe.experts.98.w1", "model.layers.14.block_sparse_moe.experts.99.w1", "model.layers.14.block_sparse_moe.experts.100.w1", "model.layers.14.block_sparse_moe.experts.101.w1", "model.layers.14.block_sparse_moe.experts.102.w1", "model.layers.14.block_sparse_moe.experts.103.w1", "model.layers.14.block_sparse_moe.experts.104.w1", "model.layers.14.block_sparse_moe.experts.105.w1", "model.layers.14.block_sparse_moe.experts.106.w1", "model.layers.14.block_sparse_moe.experts.107.w1", "model.layers.14.block_sparse_moe.experts.108.w1", "model.layers.14.block_sparse_moe.experts.109.w1", "model.layers.14.block_sparse_moe.experts.110.w1", "model.layers.14.block_sparse_moe.experts.111.w1", "model.layers.14.block_sparse_moe.experts.112.w1", "model.layers.14.block_sparse_moe.experts.113.w1", "model.layers.14.block_sparse_moe.experts.114.w1", "model.layers.14.block_sparse_moe.experts.115.w1", "model.layers.14.block_sparse_moe.experts.116.w1", "model.layers.14.block_sparse_moe.experts.117.w1", "model.layers.14.block_sparse_moe.experts.118.w1", "model.layers.14.block_sparse_moe.experts.119.w1", "model.layers.14.block_sparse_moe.experts.120.w1", "model.layers.14.block_sparse_moe.experts.121.w1", "model.layers.14.block_sparse_moe.experts.122.w1", "model.layers.14.block_sparse_moe.experts.123.w1", "model.layers.14.block_sparse_moe.experts.124.w1", "model.layers.14.block_sparse_moe.experts.125.w1", "model.layers.14.block_sparse_moe.experts.126.w1", "model.layers.14.block_sparse_moe.experts.127.w1", "model.layers.14.block_sparse_moe.experts.128.w1", "model.layers.14.block_sparse_moe.experts.129.w1", "model.layers.14.block_sparse_moe.experts.130.w1", "model.layers.14.block_sparse_moe.experts.131.w1", "model.layers.14.block_sparse_moe.experts.132.w1", "model.layers.14.block_sparse_moe.experts.133.w1", "model.layers.14.block_sparse_moe.experts.134.w1", "model.layers.14.block_sparse_moe.experts.135.w1", "model.layers.14.block_sparse_moe.experts.136.w1", "model.layers.14.block_sparse_moe.experts.137.w1", "model.layers.14.block_sparse_moe.experts.138.w1", "model.layers.14.block_sparse_moe.experts.139.w1", "model.layers.14.block_sparse_moe.experts.140.w1", "model.layers.14.block_sparse_moe.experts.141.w1", "model.layers.14.block_sparse_moe.experts.142.w1", "model.layers.14.block_sparse_moe.experts.143.w1", "model.layers.14.block_sparse_moe.experts.144.w1", "model.layers.14.block_sparse_moe.experts.145.w1", "model.layers.14.block_sparse_moe.experts.146.w1", "model.layers.14.block_sparse_moe.experts.147.w1", "model.layers.14.block_sparse_moe.experts.148.w1", "model.layers.14.block_sparse_moe.experts.149.w1", "model.layers.14.block_sparse_moe.experts.150.w1", "model.layers.14.block_sparse_moe.experts.151.w1", "model.layers.14.block_sparse_moe.experts.152.w1", "model.layers.14.block_sparse_moe.experts.153.w1", "model.layers.14.block_sparse_moe.experts.154.w1", "model.layers.14.block_sparse_moe.experts.155.w1", "model.layers.14.block_sparse_moe.experts.156.w1", "model.layers.14.block_sparse_moe.experts.157.w1", "model.layers.14.block_sparse_moe.experts.158.w1", "model.layers.14.block_sparse_moe.experts.159.w1", "model.layers.14.block_sparse_moe.experts.160.w1", "model.layers.14.block_sparse_moe.experts.161.w1", "model.layers.14.block_sparse_moe.experts.162.w1", "model.layers.14.block_sparse_moe.experts.163.w1", "model.layers.14.block_sparse_moe.experts.164.w1", "model.layers.14.block_sparse_moe.experts.165.w1", "model.layers.14.block_sparse_moe.experts.166.w1", "model.layers.14.block_sparse_moe.experts.167.w1", "model.layers.14.block_sparse_moe.experts.168.w1", "model.layers.14.block_sparse_moe.experts.169.w1", "model.layers.14.block_sparse_moe.experts.170.w1", "model.layers.14.block_sparse_moe.experts.171.w1", "model.layers.14.block_sparse_moe.experts.172.w1", "model.layers.14.block_sparse_moe.experts.173.w1", "model.layers.14.block_sparse_moe.experts.174.w1", "model.layers.14.block_sparse_moe.experts.175.w1", "model.layers.14.block_sparse_moe.experts.176.w1", "model.layers.14.block_sparse_moe.experts.177.w1", "model.layers.14.block_sparse_moe.experts.178.w1", "model.layers.14.block_sparse_moe.experts.179.w1", "model.layers.14.block_sparse_moe.experts.180.w1", "model.layers.14.block_sparse_moe.experts.181.w1", "model.layers.14.block_sparse_moe.experts.182.w1", "model.layers.14.block_sparse_moe.experts.183.w1", "model.layers.14.block_sparse_moe.experts.184.w1", "model.layers.14.block_sparse_moe.experts.185.w1", "model.layers.14.block_sparse_moe.experts.186.w1", "model.layers.14.block_sparse_moe.experts.187.w1", "model.layers.14.block_sparse_moe.experts.188.w1", "model.layers.14.block_sparse_moe.experts.189.w1", "model.layers.14.block_sparse_moe.experts.190.w1", "model.layers.14.block_sparse_moe.experts.191.w1", "model.layers.14.block_sparse_moe.experts.192.w1", "model.layers.14.block_sparse_moe.experts.193.w1", "model.layers.14.block_sparse_moe.experts.194.w1", "model.layers.14.block_sparse_moe.experts.195.w1", "model.layers.14.block_sparse_moe.experts.196.w1", "model.layers.14.block_sparse_moe.experts.197.w1", "model.layers.14.block_sparse_moe.experts.198.w1", "model.layers.14.block_sparse_moe.experts.199.w1", "model.layers.14.block_sparse_moe.experts.200.w1", "model.layers.14.block_sparse_moe.experts.201.w1", "model.layers.14.block_sparse_moe.experts.202.w1", "model.layers.14.block_sparse_moe.experts.203.w1", "model.layers.14.block_sparse_moe.experts.204.w1", "model.layers.14.block_sparse_moe.experts.205.w1", "model.layers.14.block_sparse_moe.experts.206.w1", "model.layers.14.block_sparse_moe.experts.207.w1", "model.layers.14.block_sparse_moe.experts.208.w1", "model.layers.14.block_sparse_moe.experts.209.w1", "model.layers.14.block_sparse_moe.experts.210.w1", "model.layers.14.block_sparse_moe.experts.211.w1", "model.layers.14.block_sparse_moe.experts.212.w1", "model.layers.14.block_sparse_moe.experts.213.w1", "model.layers.14.block_sparse_moe.experts.214.w1", "model.layers.14.block_sparse_moe.experts.215.w1", "model.layers.14.block_sparse_moe.experts.216.w1", "model.layers.14.block_sparse_moe.experts.217.w1", "model.layers.14.block_sparse_moe.experts.218.w1", "model.layers.14.block_sparse_moe.experts.219.w1", "model.layers.14.block_sparse_moe.experts.220.w1", "model.layers.14.block_sparse_moe.experts.221.w1", "model.layers.14.block_sparse_moe.experts.222.w1", "model.layers.14.block_sparse_moe.experts.223.w1", "model.layers.14.block_sparse_moe.experts.224.w1", "model.layers.14.block_sparse_moe.experts.225.w1", "model.layers.14.block_sparse_moe.experts.226.w1", "model.layers.14.block_sparse_moe.experts.227.w1", "model.layers.14.block_sparse_moe.experts.228.w1", "model.layers.14.block_sparse_moe.experts.229.w1", "model.layers.14.block_sparse_moe.experts.230.w1", "model.layers.14.block_sparse_moe.experts.231.w1", "model.layers.14.block_sparse_moe.experts.232.w1", "model.layers.14.block_sparse_moe.experts.233.w1", "model.layers.14.block_sparse_moe.experts.234.w1", "model.layers.14.block_sparse_moe.experts.235.w1", "model.layers.14.block_sparse_moe.experts.236.w1", "model.layers.14.block_sparse_moe.experts.237.w1", "model.layers.14.block_sparse_moe.experts.238.w1", "model.layers.14.block_sparse_moe.experts.239.w1", "model.layers.14.block_sparse_moe.experts.240.w1", "model.layers.14.block_sparse_moe.experts.241.w1", "model.layers.14.block_sparse_moe.experts.242.w1", "model.layers.14.block_sparse_moe.experts.243.w1", "model.layers.14.block_sparse_moe.experts.244.w1", "model.layers.14.block_sparse_moe.experts.245.w1", "model.layers.14.block_sparse_moe.experts.246.w1", "model.layers.14.block_sparse_moe.experts.247.w1", "model.layers.14.block_sparse_moe.experts.248.w1", "model.layers.14.block_sparse_moe.experts.249.w1", "model.layers.14.block_sparse_moe.experts.250.w1", "model.layers.14.block_sparse_moe.experts.251.w1", "model.layers.14.block_sparse_moe.experts.252.w1", "model.layers.14.block_sparse_moe.experts.253.w1", "model.layers.14.block_sparse_moe.experts.254.w1", "model.layers.14.block_sparse_moe.experts.255.w1", "model.layers.14.block_sparse_moe.experts.0.w3", "model.layers.14.block_sparse_moe.experts.1.w3", "model.layers.14.block_sparse_moe.experts.2.w3", "model.layers.14.block_sparse_moe.experts.3.w3", "model.layers.14.block_sparse_moe.experts.4.w3", "model.layers.14.block_sparse_moe.experts.5.w3", "model.layers.14.block_sparse_moe.experts.6.w3", "model.layers.14.block_sparse_moe.experts.7.w3", "model.layers.14.block_sparse_moe.experts.8.w3", "model.layers.14.block_sparse_moe.experts.9.w3", "model.layers.14.block_sparse_moe.experts.10.w3", "model.layers.14.block_sparse_moe.experts.11.w3", "model.layers.14.block_sparse_moe.experts.12.w3", "model.layers.14.block_sparse_moe.experts.13.w3", "model.layers.14.block_sparse_moe.experts.14.w3", "model.layers.14.block_sparse_moe.experts.15.w3", "model.layers.14.block_sparse_moe.experts.16.w3", "model.layers.14.block_sparse_moe.experts.17.w3", "model.layers.14.block_sparse_moe.experts.18.w3", "model.layers.14.block_sparse_moe.experts.19.w3", "model.layers.14.block_sparse_moe.experts.20.w3", "model.layers.14.block_sparse_moe.experts.21.w3", "model.layers.14.block_sparse_moe.experts.22.w3", "model.layers.14.block_sparse_moe.experts.23.w3", "model.layers.14.block_sparse_moe.experts.24.w3", "model.layers.14.block_sparse_moe.experts.25.w3", "model.layers.14.block_sparse_moe.experts.26.w3", "model.layers.14.block_sparse_moe.experts.27.w3", "model.layers.14.block_sparse_moe.experts.28.w3", "model.layers.14.block_sparse_moe.experts.29.w3", "model.layers.14.block_sparse_moe.experts.30.w3", "model.layers.14.block_sparse_moe.experts.31.w3", "model.layers.14.block_sparse_moe.experts.32.w3", "model.layers.14.block_sparse_moe.experts.33.w3", "model.layers.14.block_sparse_moe.experts.34.w3", "model.layers.14.block_sparse_moe.experts.35.w3", "model.layers.14.block_sparse_moe.experts.36.w3", "model.layers.14.block_sparse_moe.experts.37.w3", "model.layers.14.block_sparse_moe.experts.38.w3", "model.layers.14.block_sparse_moe.experts.39.w3", "model.layers.14.block_sparse_moe.experts.40.w3", "model.layers.14.block_sparse_moe.experts.41.w3", "model.layers.14.block_sparse_moe.experts.42.w3", "model.layers.14.block_sparse_moe.experts.43.w3", "model.layers.14.block_sparse_moe.experts.44.w3", "model.layers.14.block_sparse_moe.experts.45.w3", "model.layers.14.block_sparse_moe.experts.46.w3", "model.layers.14.block_sparse_moe.experts.47.w3", "model.layers.14.block_sparse_moe.experts.48.w3", "model.layers.14.block_sparse_moe.experts.49.w3", "model.layers.14.block_sparse_moe.experts.50.w3", "model.layers.14.block_sparse_moe.experts.51.w3", "model.layers.14.block_sparse_moe.experts.52.w3", "model.layers.14.block_sparse_moe.experts.53.w3", "model.layers.14.block_sparse_moe.experts.54.w3", "model.layers.14.block_sparse_moe.experts.55.w3", "model.layers.14.block_sparse_moe.experts.56.w3", "model.layers.14.block_sparse_moe.experts.57.w3", "model.layers.14.block_sparse_moe.experts.58.w3", "model.layers.14.block_sparse_moe.experts.59.w3", "model.layers.14.block_sparse_moe.experts.60.w3", "model.layers.14.block_sparse_moe.experts.61.w3", "model.layers.14.block_sparse_moe.experts.62.w3", "model.layers.14.block_sparse_moe.experts.63.w3", "model.layers.14.block_sparse_moe.experts.64.w3", "model.layers.14.block_sparse_moe.experts.65.w3", "model.layers.14.block_sparse_moe.experts.66.w3", "model.layers.14.block_sparse_moe.experts.67.w3", "model.layers.14.block_sparse_moe.experts.68.w3", "model.layers.14.block_sparse_moe.experts.69.w3", "model.layers.14.block_sparse_moe.experts.70.w3", "model.layers.14.block_sparse_moe.experts.71.w3", "model.layers.14.block_sparse_moe.experts.72.w3", "model.layers.14.block_sparse_moe.experts.73.w3", "model.layers.14.block_sparse_moe.experts.74.w3", "model.layers.14.block_sparse_moe.experts.75.w3", "model.layers.14.block_sparse_moe.experts.76.w3", "model.layers.14.block_sparse_moe.experts.77.w3", "model.layers.14.block_sparse_moe.experts.78.w3", "model.layers.14.block_sparse_moe.experts.79.w3", "model.layers.14.block_sparse_moe.experts.80.w3", "model.layers.14.block_sparse_moe.experts.81.w3", "model.layers.14.block_sparse_moe.experts.82.w3", "model.layers.14.block_sparse_moe.experts.83.w3", "model.layers.14.block_sparse_moe.experts.84.w3", "model.layers.14.block_sparse_moe.experts.85.w3", "model.layers.14.block_sparse_moe.experts.86.w3", "model.layers.14.block_sparse_moe.experts.87.w3", "model.layers.14.block_sparse_moe.experts.88.w3", "model.layers.14.block_sparse_moe.experts.89.w3", "model.layers.14.block_sparse_moe.experts.90.w3", "model.layers.14.block_sparse_moe.experts.91.w3", "model.layers.14.block_sparse_moe.experts.92.w3", "model.layers.14.block_sparse_moe.experts.93.w3", "model.layers.14.block_sparse_moe.experts.94.w3", "model.layers.14.block_sparse_moe.experts.95.w3", "model.layers.14.block_sparse_moe.experts.96.w3", "model.layers.14.block_sparse_moe.experts.97.w3", "model.layers.14.block_sparse_moe.experts.98.w3", "model.layers.14.block_sparse_moe.experts.99.w3", "model.layers.14.block_sparse_moe.experts.100.w3", "model.layers.14.block_sparse_moe.experts.101.w3", "model.layers.14.block_sparse_moe.experts.102.w3", "model.layers.14.block_sparse_moe.experts.103.w3", "model.layers.14.block_sparse_moe.experts.104.w3", "model.layers.14.block_sparse_moe.experts.105.w3", "model.layers.14.block_sparse_moe.experts.106.w3", "model.layers.14.block_sparse_moe.experts.107.w3", "model.layers.14.block_sparse_moe.experts.108.w3", "model.layers.14.block_sparse_moe.experts.109.w3", "model.layers.14.block_sparse_moe.experts.110.w3", "model.layers.14.block_sparse_moe.experts.111.w3", "model.layers.14.block_sparse_moe.experts.112.w3", "model.layers.14.block_sparse_moe.experts.113.w3", "model.layers.14.block_sparse_moe.experts.114.w3", "model.layers.14.block_sparse_moe.experts.115.w3", "model.layers.14.block_sparse_moe.experts.116.w3", "model.layers.14.block_sparse_moe.experts.117.w3", "model.layers.14.block_sparse_moe.experts.118.w3", "model.layers.14.block_sparse_moe.experts.119.w3", "model.layers.14.block_sparse_moe.experts.120.w3", "model.layers.14.block_sparse_moe.experts.121.w3", "model.layers.14.block_sparse_moe.experts.122.w3", "model.layers.14.block_sparse_moe.experts.123.w3", "model.layers.14.block_sparse_moe.experts.124.w3", "model.layers.14.block_sparse_moe.experts.125.w3", "model.layers.14.block_sparse_moe.experts.126.w3", "model.layers.14.block_sparse_moe.experts.127.w3", "model.layers.14.block_sparse_moe.experts.128.w3", "model.layers.14.block_sparse_moe.experts.129.w3", "model.layers.14.block_sparse_moe.experts.130.w3", "model.layers.14.block_sparse_moe.experts.131.w3", "model.layers.14.block_sparse_moe.experts.132.w3", "model.layers.14.block_sparse_moe.experts.133.w3", "model.layers.14.block_sparse_moe.experts.134.w3", "model.layers.14.block_sparse_moe.experts.135.w3", "model.layers.14.block_sparse_moe.experts.136.w3", "model.layers.14.block_sparse_moe.experts.137.w3", "model.layers.14.block_sparse_moe.experts.138.w3", "model.layers.14.block_sparse_moe.experts.139.w3", "model.layers.14.block_sparse_moe.experts.140.w3", "model.layers.14.block_sparse_moe.experts.141.w3", "model.layers.14.block_sparse_moe.experts.142.w3", "model.layers.14.block_sparse_moe.experts.143.w3", "model.layers.14.block_sparse_moe.experts.144.w3", "model.layers.14.block_sparse_moe.experts.145.w3", "model.layers.14.block_sparse_moe.experts.146.w3", "model.layers.14.block_sparse_moe.experts.147.w3", "model.layers.14.block_sparse_moe.experts.148.w3", "model.layers.14.block_sparse_moe.experts.149.w3", "model.layers.14.block_sparse_moe.experts.150.w3", "model.layers.14.block_sparse_moe.experts.151.w3", "model.layers.14.block_sparse_moe.experts.152.w3", "model.layers.14.block_sparse_moe.experts.153.w3", "model.layers.14.block_sparse_moe.experts.154.w3", "model.layers.14.block_sparse_moe.experts.155.w3", "model.layers.14.block_sparse_moe.experts.156.w3", "model.layers.14.block_sparse_moe.experts.157.w3", "model.layers.14.block_sparse_moe.experts.158.w3", "model.layers.14.block_sparse_moe.experts.159.w3", "model.layers.14.block_sparse_moe.experts.160.w3", "model.layers.14.block_sparse_moe.experts.161.w3", "model.layers.14.block_sparse_moe.experts.162.w3", "model.layers.14.block_sparse_moe.experts.163.w3", "model.layers.14.block_sparse_moe.experts.164.w3", "model.layers.14.block_sparse_moe.experts.165.w3", "model.layers.14.block_sparse_moe.experts.166.w3", "model.layers.14.block_sparse_moe.experts.167.w3", "model.layers.14.block_sparse_moe.experts.168.w3", "model.layers.14.block_sparse_moe.experts.169.w3", "model.layers.14.block_sparse_moe.experts.170.w3", "model.layers.14.block_sparse_moe.experts.171.w3", "model.layers.14.block_sparse_moe.experts.172.w3", "model.layers.14.block_sparse_moe.experts.173.w3", "model.layers.14.block_sparse_moe.experts.174.w3", "model.layers.14.block_sparse_moe.experts.175.w3", "model.layers.14.block_sparse_moe.experts.176.w3", "model.layers.14.block_sparse_moe.experts.177.w3", "model.layers.14.block_sparse_moe.experts.178.w3", "model.layers.14.block_sparse_moe.experts.179.w3", "model.layers.14.block_sparse_moe.experts.180.w3", "model.layers.14.block_sparse_moe.experts.181.w3", "model.layers.14.block_sparse_moe.experts.182.w3", "model.layers.14.block_sparse_moe.experts.183.w3", "model.layers.14.block_sparse_moe.experts.184.w3", "model.layers.14.block_sparse_moe.experts.185.w3", "model.layers.14.block_sparse_moe.experts.186.w3", "model.layers.14.block_sparse_moe.experts.187.w3", "model.layers.14.block_sparse_moe.experts.188.w3", "model.layers.14.block_sparse_moe.experts.189.w3", "model.layers.14.block_sparse_moe.experts.190.w3", "model.layers.14.block_sparse_moe.experts.191.w3", "model.layers.14.block_sparse_moe.experts.192.w3", "model.layers.14.block_sparse_moe.experts.193.w3", "model.layers.14.block_sparse_moe.experts.194.w3", "model.layers.14.block_sparse_moe.experts.195.w3", "model.layers.14.block_sparse_moe.experts.196.w3", "model.layers.14.block_sparse_moe.experts.197.w3", "model.layers.14.block_sparse_moe.experts.198.w3", "model.layers.14.block_sparse_moe.experts.199.w3", "model.layers.14.block_sparse_moe.experts.200.w3", "model.layers.14.block_sparse_moe.experts.201.w3", "model.layers.14.block_sparse_moe.experts.202.w3", "model.layers.14.block_sparse_moe.experts.203.w3", "model.layers.14.block_sparse_moe.experts.204.w3", "model.layers.14.block_sparse_moe.experts.205.w3", "model.layers.14.block_sparse_moe.experts.206.w3", "model.layers.14.block_sparse_moe.experts.207.w3", "model.layers.14.block_sparse_moe.experts.208.w3", "model.layers.14.block_sparse_moe.experts.209.w3", "model.layers.14.block_sparse_moe.experts.210.w3", "model.layers.14.block_sparse_moe.experts.211.w3", "model.layers.14.block_sparse_moe.experts.212.w3", "model.layers.14.block_sparse_moe.experts.213.w3", "model.layers.14.block_sparse_moe.experts.214.w3", "model.layers.14.block_sparse_moe.experts.215.w3", "model.layers.14.block_sparse_moe.experts.216.w3", "model.layers.14.block_sparse_moe.experts.217.w3", "model.layers.14.block_sparse_moe.experts.218.w3", "model.layers.14.block_sparse_moe.experts.219.w3", "model.layers.14.block_sparse_moe.experts.220.w3", "model.layers.14.block_sparse_moe.experts.221.w3", "model.layers.14.block_sparse_moe.experts.222.w3", "model.layers.14.block_sparse_moe.experts.223.w3", "model.layers.14.block_sparse_moe.experts.224.w3", "model.layers.14.block_sparse_moe.experts.225.w3", "model.layers.14.block_sparse_moe.experts.226.w3", "model.layers.14.block_sparse_moe.experts.227.w3", "model.layers.14.block_sparse_moe.experts.228.w3", "model.layers.14.block_sparse_moe.experts.229.w3", "model.layers.14.block_sparse_moe.experts.230.w3", "model.layers.14.block_sparse_moe.experts.231.w3", "model.layers.14.block_sparse_moe.experts.232.w3", "model.layers.14.block_sparse_moe.experts.233.w3", "model.layers.14.block_sparse_moe.experts.234.w3", "model.layers.14.block_sparse_moe.experts.235.w3", "model.layers.14.block_sparse_moe.experts.236.w3", "model.layers.14.block_sparse_moe.experts.237.w3", "model.layers.14.block_sparse_moe.experts.238.w3", "model.layers.14.block_sparse_moe.experts.239.w3", "model.layers.14.block_sparse_moe.experts.240.w3", "model.layers.14.block_sparse_moe.experts.241.w3", "model.layers.14.block_sparse_moe.experts.242.w3", "model.layers.14.block_sparse_moe.experts.243.w3", "model.layers.14.block_sparse_moe.experts.244.w3", "model.layers.14.block_sparse_moe.experts.245.w3", "model.layers.14.block_sparse_moe.experts.246.w3", "model.layers.14.block_sparse_moe.experts.247.w3", "model.layers.14.block_sparse_moe.experts.248.w3", "model.layers.14.block_sparse_moe.experts.249.w3", "model.layers.14.block_sparse_moe.experts.250.w3", "model.layers.14.block_sparse_moe.experts.251.w3", "model.layers.14.block_sparse_moe.experts.252.w3", "model.layers.14.block_sparse_moe.experts.253.w3", "model.layers.14.block_sparse_moe.experts.254.w3", "model.layers.14.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0003320595249533681, "dbits": 2415919104 } ] }, { "idx": 74, "layers": [ "model.layers.14.block_sparse_moe.experts.0.w2", "model.layers.14.block_sparse_moe.experts.1.w2", "model.layers.14.block_sparse_moe.experts.2.w2", "model.layers.14.block_sparse_moe.experts.3.w2", "model.layers.14.block_sparse_moe.experts.4.w2", "model.layers.14.block_sparse_moe.experts.5.w2", "model.layers.14.block_sparse_moe.experts.6.w2", "model.layers.14.block_sparse_moe.experts.7.w2", "model.layers.14.block_sparse_moe.experts.8.w2", "model.layers.14.block_sparse_moe.experts.9.w2", "model.layers.14.block_sparse_moe.experts.10.w2", "model.layers.14.block_sparse_moe.experts.11.w2", "model.layers.14.block_sparse_moe.experts.12.w2", "model.layers.14.block_sparse_moe.experts.13.w2", "model.layers.14.block_sparse_moe.experts.14.w2", "model.layers.14.block_sparse_moe.experts.15.w2", "model.layers.14.block_sparse_moe.experts.16.w2", "model.layers.14.block_sparse_moe.experts.17.w2", "model.layers.14.block_sparse_moe.experts.18.w2", "model.layers.14.block_sparse_moe.experts.19.w2", "model.layers.14.block_sparse_moe.experts.20.w2", "model.layers.14.block_sparse_moe.experts.21.w2", "model.layers.14.block_sparse_moe.experts.22.w2", "model.layers.14.block_sparse_moe.experts.23.w2", "model.layers.14.block_sparse_moe.experts.24.w2", "model.layers.14.block_sparse_moe.experts.25.w2", "model.layers.14.block_sparse_moe.experts.26.w2", "model.layers.14.block_sparse_moe.experts.27.w2", "model.layers.14.block_sparse_moe.experts.28.w2", "model.layers.14.block_sparse_moe.experts.29.w2", "model.layers.14.block_sparse_moe.experts.30.w2", "model.layers.14.block_sparse_moe.experts.31.w2", "model.layers.14.block_sparse_moe.experts.32.w2", "model.layers.14.block_sparse_moe.experts.33.w2", "model.layers.14.block_sparse_moe.experts.34.w2", "model.layers.14.block_sparse_moe.experts.35.w2", "model.layers.14.block_sparse_moe.experts.36.w2", "model.layers.14.block_sparse_moe.experts.37.w2", "model.layers.14.block_sparse_moe.experts.38.w2", "model.layers.14.block_sparse_moe.experts.39.w2", "model.layers.14.block_sparse_moe.experts.40.w2", "model.layers.14.block_sparse_moe.experts.41.w2", "model.layers.14.block_sparse_moe.experts.42.w2", "model.layers.14.block_sparse_moe.experts.43.w2", "model.layers.14.block_sparse_moe.experts.44.w2", "model.layers.14.block_sparse_moe.experts.45.w2", "model.layers.14.block_sparse_moe.experts.46.w2", "model.layers.14.block_sparse_moe.experts.47.w2", "model.layers.14.block_sparse_moe.experts.48.w2", "model.layers.14.block_sparse_moe.experts.49.w2", "model.layers.14.block_sparse_moe.experts.50.w2", "model.layers.14.block_sparse_moe.experts.51.w2", "model.layers.14.block_sparse_moe.experts.52.w2", "model.layers.14.block_sparse_moe.experts.53.w2", "model.layers.14.block_sparse_moe.experts.54.w2", "model.layers.14.block_sparse_moe.experts.55.w2", "model.layers.14.block_sparse_moe.experts.56.w2", "model.layers.14.block_sparse_moe.experts.57.w2", "model.layers.14.block_sparse_moe.experts.58.w2", "model.layers.14.block_sparse_moe.experts.59.w2", "model.layers.14.block_sparse_moe.experts.60.w2", "model.layers.14.block_sparse_moe.experts.61.w2", "model.layers.14.block_sparse_moe.experts.62.w2", "model.layers.14.block_sparse_moe.experts.63.w2", "model.layers.14.block_sparse_moe.experts.64.w2", "model.layers.14.block_sparse_moe.experts.65.w2", "model.layers.14.block_sparse_moe.experts.66.w2", "model.layers.14.block_sparse_moe.experts.67.w2", "model.layers.14.block_sparse_moe.experts.68.w2", "model.layers.14.block_sparse_moe.experts.69.w2", "model.layers.14.block_sparse_moe.experts.70.w2", "model.layers.14.block_sparse_moe.experts.71.w2", "model.layers.14.block_sparse_moe.experts.72.w2", "model.layers.14.block_sparse_moe.experts.73.w2", "model.layers.14.block_sparse_moe.experts.74.w2", "model.layers.14.block_sparse_moe.experts.75.w2", "model.layers.14.block_sparse_moe.experts.76.w2", "model.layers.14.block_sparse_moe.experts.77.w2", "model.layers.14.block_sparse_moe.experts.78.w2", "model.layers.14.block_sparse_moe.experts.79.w2", "model.layers.14.block_sparse_moe.experts.80.w2", "model.layers.14.block_sparse_moe.experts.81.w2", "model.layers.14.block_sparse_moe.experts.82.w2", "model.layers.14.block_sparse_moe.experts.83.w2", "model.layers.14.block_sparse_moe.experts.84.w2", "model.layers.14.block_sparse_moe.experts.85.w2", "model.layers.14.block_sparse_moe.experts.86.w2", "model.layers.14.block_sparse_moe.experts.87.w2", "model.layers.14.block_sparse_moe.experts.88.w2", "model.layers.14.block_sparse_moe.experts.89.w2", "model.layers.14.block_sparse_moe.experts.90.w2", "model.layers.14.block_sparse_moe.experts.91.w2", "model.layers.14.block_sparse_moe.experts.92.w2", "model.layers.14.block_sparse_moe.experts.93.w2", "model.layers.14.block_sparse_moe.experts.94.w2", "model.layers.14.block_sparse_moe.experts.95.w2", "model.layers.14.block_sparse_moe.experts.96.w2", "model.layers.14.block_sparse_moe.experts.97.w2", "model.layers.14.block_sparse_moe.experts.98.w2", "model.layers.14.block_sparse_moe.experts.99.w2", "model.layers.14.block_sparse_moe.experts.100.w2", "model.layers.14.block_sparse_moe.experts.101.w2", "model.layers.14.block_sparse_moe.experts.102.w2", "model.layers.14.block_sparse_moe.experts.103.w2", "model.layers.14.block_sparse_moe.experts.104.w2", "model.layers.14.block_sparse_moe.experts.105.w2", "model.layers.14.block_sparse_moe.experts.106.w2", "model.layers.14.block_sparse_moe.experts.107.w2", "model.layers.14.block_sparse_moe.experts.108.w2", "model.layers.14.block_sparse_moe.experts.109.w2", "model.layers.14.block_sparse_moe.experts.110.w2", "model.layers.14.block_sparse_moe.experts.111.w2", "model.layers.14.block_sparse_moe.experts.112.w2", "model.layers.14.block_sparse_moe.experts.113.w2", "model.layers.14.block_sparse_moe.experts.114.w2", "model.layers.14.block_sparse_moe.experts.115.w2", "model.layers.14.block_sparse_moe.experts.116.w2", "model.layers.14.block_sparse_moe.experts.117.w2", "model.layers.14.block_sparse_moe.experts.118.w2", "model.layers.14.block_sparse_moe.experts.119.w2", "model.layers.14.block_sparse_moe.experts.120.w2", "model.layers.14.block_sparse_moe.experts.121.w2", "model.layers.14.block_sparse_moe.experts.122.w2", "model.layers.14.block_sparse_moe.experts.123.w2", "model.layers.14.block_sparse_moe.experts.124.w2", "model.layers.14.block_sparse_moe.experts.125.w2", "model.layers.14.block_sparse_moe.experts.126.w2", "model.layers.14.block_sparse_moe.experts.127.w2", "model.layers.14.block_sparse_moe.experts.128.w2", "model.layers.14.block_sparse_moe.experts.129.w2", "model.layers.14.block_sparse_moe.experts.130.w2", "model.layers.14.block_sparse_moe.experts.131.w2", "model.layers.14.block_sparse_moe.experts.132.w2", "model.layers.14.block_sparse_moe.experts.133.w2", "model.layers.14.block_sparse_moe.experts.134.w2", "model.layers.14.block_sparse_moe.experts.135.w2", "model.layers.14.block_sparse_moe.experts.136.w2", "model.layers.14.block_sparse_moe.experts.137.w2", "model.layers.14.block_sparse_moe.experts.138.w2", "model.layers.14.block_sparse_moe.experts.139.w2", "model.layers.14.block_sparse_moe.experts.140.w2", "model.layers.14.block_sparse_moe.experts.141.w2", "model.layers.14.block_sparse_moe.experts.142.w2", "model.layers.14.block_sparse_moe.experts.143.w2", "model.layers.14.block_sparse_moe.experts.144.w2", "model.layers.14.block_sparse_moe.experts.145.w2", "model.layers.14.block_sparse_moe.experts.146.w2", "model.layers.14.block_sparse_moe.experts.147.w2", "model.layers.14.block_sparse_moe.experts.148.w2", "model.layers.14.block_sparse_moe.experts.149.w2", "model.layers.14.block_sparse_moe.experts.150.w2", "model.layers.14.block_sparse_moe.experts.151.w2", "model.layers.14.block_sparse_moe.experts.152.w2", "model.layers.14.block_sparse_moe.experts.153.w2", "model.layers.14.block_sparse_moe.experts.154.w2", "model.layers.14.block_sparse_moe.experts.155.w2", "model.layers.14.block_sparse_moe.experts.156.w2", "model.layers.14.block_sparse_moe.experts.157.w2", "model.layers.14.block_sparse_moe.experts.158.w2", "model.layers.14.block_sparse_moe.experts.159.w2", "model.layers.14.block_sparse_moe.experts.160.w2", "model.layers.14.block_sparse_moe.experts.161.w2", "model.layers.14.block_sparse_moe.experts.162.w2", "model.layers.14.block_sparse_moe.experts.163.w2", "model.layers.14.block_sparse_moe.experts.164.w2", "model.layers.14.block_sparse_moe.experts.165.w2", "model.layers.14.block_sparse_moe.experts.166.w2", "model.layers.14.block_sparse_moe.experts.167.w2", "model.layers.14.block_sparse_moe.experts.168.w2", "model.layers.14.block_sparse_moe.experts.169.w2", "model.layers.14.block_sparse_moe.experts.170.w2", "model.layers.14.block_sparse_moe.experts.171.w2", "model.layers.14.block_sparse_moe.experts.172.w2", "model.layers.14.block_sparse_moe.experts.173.w2", "model.layers.14.block_sparse_moe.experts.174.w2", "model.layers.14.block_sparse_moe.experts.175.w2", "model.layers.14.block_sparse_moe.experts.176.w2", "model.layers.14.block_sparse_moe.experts.177.w2", "model.layers.14.block_sparse_moe.experts.178.w2", "model.layers.14.block_sparse_moe.experts.179.w2", "model.layers.14.block_sparse_moe.experts.180.w2", "model.layers.14.block_sparse_moe.experts.181.w2", "model.layers.14.block_sparse_moe.experts.182.w2", "model.layers.14.block_sparse_moe.experts.183.w2", "model.layers.14.block_sparse_moe.experts.184.w2", "model.layers.14.block_sparse_moe.experts.185.w2", "model.layers.14.block_sparse_moe.experts.186.w2", "model.layers.14.block_sparse_moe.experts.187.w2", "model.layers.14.block_sparse_moe.experts.188.w2", "model.layers.14.block_sparse_moe.experts.189.w2", "model.layers.14.block_sparse_moe.experts.190.w2", "model.layers.14.block_sparse_moe.experts.191.w2", "model.layers.14.block_sparse_moe.experts.192.w2", "model.layers.14.block_sparse_moe.experts.193.w2", "model.layers.14.block_sparse_moe.experts.194.w2", "model.layers.14.block_sparse_moe.experts.195.w2", "model.layers.14.block_sparse_moe.experts.196.w2", "model.layers.14.block_sparse_moe.experts.197.w2", "model.layers.14.block_sparse_moe.experts.198.w2", "model.layers.14.block_sparse_moe.experts.199.w2", "model.layers.14.block_sparse_moe.experts.200.w2", "model.layers.14.block_sparse_moe.experts.201.w2", "model.layers.14.block_sparse_moe.experts.202.w2", "model.layers.14.block_sparse_moe.experts.203.w2", "model.layers.14.block_sparse_moe.experts.204.w2", "model.layers.14.block_sparse_moe.experts.205.w2", "model.layers.14.block_sparse_moe.experts.206.w2", "model.layers.14.block_sparse_moe.experts.207.w2", "model.layers.14.block_sparse_moe.experts.208.w2", "model.layers.14.block_sparse_moe.experts.209.w2", "model.layers.14.block_sparse_moe.experts.210.w2", "model.layers.14.block_sparse_moe.experts.211.w2", "model.layers.14.block_sparse_moe.experts.212.w2", "model.layers.14.block_sparse_moe.experts.213.w2", "model.layers.14.block_sparse_moe.experts.214.w2", "model.layers.14.block_sparse_moe.experts.215.w2", "model.layers.14.block_sparse_moe.experts.216.w2", "model.layers.14.block_sparse_moe.experts.217.w2", "model.layers.14.block_sparse_moe.experts.218.w2", "model.layers.14.block_sparse_moe.experts.219.w2", "model.layers.14.block_sparse_moe.experts.220.w2", "model.layers.14.block_sparse_moe.experts.221.w2", "model.layers.14.block_sparse_moe.experts.222.w2", "model.layers.14.block_sparse_moe.experts.223.w2", "model.layers.14.block_sparse_moe.experts.224.w2", "model.layers.14.block_sparse_moe.experts.225.w2", "model.layers.14.block_sparse_moe.experts.226.w2", "model.layers.14.block_sparse_moe.experts.227.w2", "model.layers.14.block_sparse_moe.experts.228.w2", "model.layers.14.block_sparse_moe.experts.229.w2", "model.layers.14.block_sparse_moe.experts.230.w2", "model.layers.14.block_sparse_moe.experts.231.w2", "model.layers.14.block_sparse_moe.experts.232.w2", "model.layers.14.block_sparse_moe.experts.233.w2", "model.layers.14.block_sparse_moe.experts.234.w2", "model.layers.14.block_sparse_moe.experts.235.w2", "model.layers.14.block_sparse_moe.experts.236.w2", "model.layers.14.block_sparse_moe.experts.237.w2", "model.layers.14.block_sparse_moe.experts.238.w2", "model.layers.14.block_sparse_moe.experts.239.w2", "model.layers.14.block_sparse_moe.experts.240.w2", "model.layers.14.block_sparse_moe.experts.241.w2", "model.layers.14.block_sparse_moe.experts.242.w2", "model.layers.14.block_sparse_moe.experts.243.w2", "model.layers.14.block_sparse_moe.experts.244.w2", "model.layers.14.block_sparse_moe.experts.245.w2", "model.layers.14.block_sparse_moe.experts.246.w2", "model.layers.14.block_sparse_moe.experts.247.w2", "model.layers.14.block_sparse_moe.experts.248.w2", "model.layers.14.block_sparse_moe.experts.249.w2", "model.layers.14.block_sparse_moe.experts.250.w2", "model.layers.14.block_sparse_moe.experts.251.w2", "model.layers.14.block_sparse_moe.experts.252.w2", "model.layers.14.block_sparse_moe.experts.253.w2", "model.layers.14.block_sparse_moe.experts.254.w2", "model.layers.14.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0017510041594505227, "dbits": 1207959552 } ] }, { "idx": 75, "layers": [ "model.layers.15.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00037499349564314166, "dbits": 18874368 } ] }, { "idx": 76, "layers": [ "model.layers.15.self_attn.k_proj", "model.layers.15.self_attn.v_proj" ], "candidates": [ { "dkld": -0.00422836076468229, "dbits": 6291456 } ] }, { "idx": 77, "layers": [ "model.layers.15.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0015909221023321096, "dbits": 18874368 } ] }, { "idx": 78, "layers": [ "model.layers.15.block_sparse_moe.experts.0.w1", "model.layers.15.block_sparse_moe.experts.1.w1", "model.layers.15.block_sparse_moe.experts.2.w1", "model.layers.15.block_sparse_moe.experts.3.w1", "model.layers.15.block_sparse_moe.experts.4.w1", "model.layers.15.block_sparse_moe.experts.5.w1", "model.layers.15.block_sparse_moe.experts.6.w1", "model.layers.15.block_sparse_moe.experts.7.w1", "model.layers.15.block_sparse_moe.experts.8.w1", "model.layers.15.block_sparse_moe.experts.9.w1", "model.layers.15.block_sparse_moe.experts.10.w1", "model.layers.15.block_sparse_moe.experts.11.w1", "model.layers.15.block_sparse_moe.experts.12.w1", "model.layers.15.block_sparse_moe.experts.13.w1", "model.layers.15.block_sparse_moe.experts.14.w1", "model.layers.15.block_sparse_moe.experts.15.w1", "model.layers.15.block_sparse_moe.experts.16.w1", "model.layers.15.block_sparse_moe.experts.17.w1", "model.layers.15.block_sparse_moe.experts.18.w1", "model.layers.15.block_sparse_moe.experts.19.w1", "model.layers.15.block_sparse_moe.experts.20.w1", "model.layers.15.block_sparse_moe.experts.21.w1", "model.layers.15.block_sparse_moe.experts.22.w1", "model.layers.15.block_sparse_moe.experts.23.w1", "model.layers.15.block_sparse_moe.experts.24.w1", "model.layers.15.block_sparse_moe.experts.25.w1", "model.layers.15.block_sparse_moe.experts.26.w1", "model.layers.15.block_sparse_moe.experts.27.w1", "model.layers.15.block_sparse_moe.experts.28.w1", "model.layers.15.block_sparse_moe.experts.29.w1", "model.layers.15.block_sparse_moe.experts.30.w1", "model.layers.15.block_sparse_moe.experts.31.w1", "model.layers.15.block_sparse_moe.experts.32.w1", "model.layers.15.block_sparse_moe.experts.33.w1", "model.layers.15.block_sparse_moe.experts.34.w1", "model.layers.15.block_sparse_moe.experts.35.w1", "model.layers.15.block_sparse_moe.experts.36.w1", "model.layers.15.block_sparse_moe.experts.37.w1", "model.layers.15.block_sparse_moe.experts.38.w1", "model.layers.15.block_sparse_moe.experts.39.w1", "model.layers.15.block_sparse_moe.experts.40.w1", "model.layers.15.block_sparse_moe.experts.41.w1", "model.layers.15.block_sparse_moe.experts.42.w1", "model.layers.15.block_sparse_moe.experts.43.w1", "model.layers.15.block_sparse_moe.experts.44.w1", "model.layers.15.block_sparse_moe.experts.45.w1", "model.layers.15.block_sparse_moe.experts.46.w1", "model.layers.15.block_sparse_moe.experts.47.w1", "model.layers.15.block_sparse_moe.experts.48.w1", "model.layers.15.block_sparse_moe.experts.49.w1", "model.layers.15.block_sparse_moe.experts.50.w1", "model.layers.15.block_sparse_moe.experts.51.w1", "model.layers.15.block_sparse_moe.experts.52.w1", "model.layers.15.block_sparse_moe.experts.53.w1", "model.layers.15.block_sparse_moe.experts.54.w1", "model.layers.15.block_sparse_moe.experts.55.w1", "model.layers.15.block_sparse_moe.experts.56.w1", "model.layers.15.block_sparse_moe.experts.57.w1", "model.layers.15.block_sparse_moe.experts.58.w1", "model.layers.15.block_sparse_moe.experts.59.w1", "model.layers.15.block_sparse_moe.experts.60.w1", "model.layers.15.block_sparse_moe.experts.61.w1", "model.layers.15.block_sparse_moe.experts.62.w1", "model.layers.15.block_sparse_moe.experts.63.w1", "model.layers.15.block_sparse_moe.experts.64.w1", "model.layers.15.block_sparse_moe.experts.65.w1", "model.layers.15.block_sparse_moe.experts.66.w1", "model.layers.15.block_sparse_moe.experts.67.w1", "model.layers.15.block_sparse_moe.experts.68.w1", "model.layers.15.block_sparse_moe.experts.69.w1", "model.layers.15.block_sparse_moe.experts.70.w1", "model.layers.15.block_sparse_moe.experts.71.w1", "model.layers.15.block_sparse_moe.experts.72.w1", "model.layers.15.block_sparse_moe.experts.73.w1", "model.layers.15.block_sparse_moe.experts.74.w1", "model.layers.15.block_sparse_moe.experts.75.w1", "model.layers.15.block_sparse_moe.experts.76.w1", "model.layers.15.block_sparse_moe.experts.77.w1", "model.layers.15.block_sparse_moe.experts.78.w1", "model.layers.15.block_sparse_moe.experts.79.w1", "model.layers.15.block_sparse_moe.experts.80.w1", "model.layers.15.block_sparse_moe.experts.81.w1", "model.layers.15.block_sparse_moe.experts.82.w1", "model.layers.15.block_sparse_moe.experts.83.w1", "model.layers.15.block_sparse_moe.experts.84.w1", "model.layers.15.block_sparse_moe.experts.85.w1", "model.layers.15.block_sparse_moe.experts.86.w1", "model.layers.15.block_sparse_moe.experts.87.w1", "model.layers.15.block_sparse_moe.experts.88.w1", "model.layers.15.block_sparse_moe.experts.89.w1", "model.layers.15.block_sparse_moe.experts.90.w1", "model.layers.15.block_sparse_moe.experts.91.w1", "model.layers.15.block_sparse_moe.experts.92.w1", "model.layers.15.block_sparse_moe.experts.93.w1", "model.layers.15.block_sparse_moe.experts.94.w1", "model.layers.15.block_sparse_moe.experts.95.w1", "model.layers.15.block_sparse_moe.experts.96.w1", "model.layers.15.block_sparse_moe.experts.97.w1", "model.layers.15.block_sparse_moe.experts.98.w1", "model.layers.15.block_sparse_moe.experts.99.w1", "model.layers.15.block_sparse_moe.experts.100.w1", "model.layers.15.block_sparse_moe.experts.101.w1", "model.layers.15.block_sparse_moe.experts.102.w1", "model.layers.15.block_sparse_moe.experts.103.w1", "model.layers.15.block_sparse_moe.experts.104.w1", "model.layers.15.block_sparse_moe.experts.105.w1", "model.layers.15.block_sparse_moe.experts.106.w1", "model.layers.15.block_sparse_moe.experts.107.w1", "model.layers.15.block_sparse_moe.experts.108.w1", "model.layers.15.block_sparse_moe.experts.109.w1", "model.layers.15.block_sparse_moe.experts.110.w1", "model.layers.15.block_sparse_moe.experts.111.w1", "model.layers.15.block_sparse_moe.experts.112.w1", "model.layers.15.block_sparse_moe.experts.113.w1", "model.layers.15.block_sparse_moe.experts.114.w1", "model.layers.15.block_sparse_moe.experts.115.w1", "model.layers.15.block_sparse_moe.experts.116.w1", "model.layers.15.block_sparse_moe.experts.117.w1", "model.layers.15.block_sparse_moe.experts.118.w1", "model.layers.15.block_sparse_moe.experts.119.w1", "model.layers.15.block_sparse_moe.experts.120.w1", "model.layers.15.block_sparse_moe.experts.121.w1", "model.layers.15.block_sparse_moe.experts.122.w1", "model.layers.15.block_sparse_moe.experts.123.w1", "model.layers.15.block_sparse_moe.experts.124.w1", "model.layers.15.block_sparse_moe.experts.125.w1", "model.layers.15.block_sparse_moe.experts.126.w1", "model.layers.15.block_sparse_moe.experts.127.w1", "model.layers.15.block_sparse_moe.experts.128.w1", "model.layers.15.block_sparse_moe.experts.129.w1", "model.layers.15.block_sparse_moe.experts.130.w1", "model.layers.15.block_sparse_moe.experts.131.w1", "model.layers.15.block_sparse_moe.experts.132.w1", "model.layers.15.block_sparse_moe.experts.133.w1", "model.layers.15.block_sparse_moe.experts.134.w1", "model.layers.15.block_sparse_moe.experts.135.w1", "model.layers.15.block_sparse_moe.experts.136.w1", "model.layers.15.block_sparse_moe.experts.137.w1", "model.layers.15.block_sparse_moe.experts.138.w1", "model.layers.15.block_sparse_moe.experts.139.w1", "model.layers.15.block_sparse_moe.experts.140.w1", "model.layers.15.block_sparse_moe.experts.141.w1", "model.layers.15.block_sparse_moe.experts.142.w1", "model.layers.15.block_sparse_moe.experts.143.w1", "model.layers.15.block_sparse_moe.experts.144.w1", "model.layers.15.block_sparse_moe.experts.145.w1", "model.layers.15.block_sparse_moe.experts.146.w1", "model.layers.15.block_sparse_moe.experts.147.w1", "model.layers.15.block_sparse_moe.experts.148.w1", "model.layers.15.block_sparse_moe.experts.149.w1", "model.layers.15.block_sparse_moe.experts.150.w1", "model.layers.15.block_sparse_moe.experts.151.w1", "model.layers.15.block_sparse_moe.experts.152.w1", "model.layers.15.block_sparse_moe.experts.153.w1", "model.layers.15.block_sparse_moe.experts.154.w1", "model.layers.15.block_sparse_moe.experts.155.w1", "model.layers.15.block_sparse_moe.experts.156.w1", "model.layers.15.block_sparse_moe.experts.157.w1", "model.layers.15.block_sparse_moe.experts.158.w1", "model.layers.15.block_sparse_moe.experts.159.w1", "model.layers.15.block_sparse_moe.experts.160.w1", "model.layers.15.block_sparse_moe.experts.161.w1", "model.layers.15.block_sparse_moe.experts.162.w1", "model.layers.15.block_sparse_moe.experts.163.w1", "model.layers.15.block_sparse_moe.experts.164.w1", "model.layers.15.block_sparse_moe.experts.165.w1", "model.layers.15.block_sparse_moe.experts.166.w1", "model.layers.15.block_sparse_moe.experts.167.w1", "model.layers.15.block_sparse_moe.experts.168.w1", "model.layers.15.block_sparse_moe.experts.169.w1", "model.layers.15.block_sparse_moe.experts.170.w1", "model.layers.15.block_sparse_moe.experts.171.w1", "model.layers.15.block_sparse_moe.experts.172.w1", "model.layers.15.block_sparse_moe.experts.173.w1", "model.layers.15.block_sparse_moe.experts.174.w1", "model.layers.15.block_sparse_moe.experts.175.w1", "model.layers.15.block_sparse_moe.experts.176.w1", "model.layers.15.block_sparse_moe.experts.177.w1", "model.layers.15.block_sparse_moe.experts.178.w1", "model.layers.15.block_sparse_moe.experts.179.w1", "model.layers.15.block_sparse_moe.experts.180.w1", "model.layers.15.block_sparse_moe.experts.181.w1", "model.layers.15.block_sparse_moe.experts.182.w1", "model.layers.15.block_sparse_moe.experts.183.w1", "model.layers.15.block_sparse_moe.experts.184.w1", "model.layers.15.block_sparse_moe.experts.185.w1", "model.layers.15.block_sparse_moe.experts.186.w1", "model.layers.15.block_sparse_moe.experts.187.w1", "model.layers.15.block_sparse_moe.experts.188.w1", "model.layers.15.block_sparse_moe.experts.189.w1", "model.layers.15.block_sparse_moe.experts.190.w1", "model.layers.15.block_sparse_moe.experts.191.w1", "model.layers.15.block_sparse_moe.experts.192.w1", "model.layers.15.block_sparse_moe.experts.193.w1", "model.layers.15.block_sparse_moe.experts.194.w1", "model.layers.15.block_sparse_moe.experts.195.w1", "model.layers.15.block_sparse_moe.experts.196.w1", "model.layers.15.block_sparse_moe.experts.197.w1", "model.layers.15.block_sparse_moe.experts.198.w1", "model.layers.15.block_sparse_moe.experts.199.w1", "model.layers.15.block_sparse_moe.experts.200.w1", "model.layers.15.block_sparse_moe.experts.201.w1", "model.layers.15.block_sparse_moe.experts.202.w1", "model.layers.15.block_sparse_moe.experts.203.w1", "model.layers.15.block_sparse_moe.experts.204.w1", "model.layers.15.block_sparse_moe.experts.205.w1", "model.layers.15.block_sparse_moe.experts.206.w1", "model.layers.15.block_sparse_moe.experts.207.w1", "model.layers.15.block_sparse_moe.experts.208.w1", "model.layers.15.block_sparse_moe.experts.209.w1", "model.layers.15.block_sparse_moe.experts.210.w1", "model.layers.15.block_sparse_moe.experts.211.w1", "model.layers.15.block_sparse_moe.experts.212.w1", "model.layers.15.block_sparse_moe.experts.213.w1", "model.layers.15.block_sparse_moe.experts.214.w1", "model.layers.15.block_sparse_moe.experts.215.w1", "model.layers.15.block_sparse_moe.experts.216.w1", "model.layers.15.block_sparse_moe.experts.217.w1", "model.layers.15.block_sparse_moe.experts.218.w1", "model.layers.15.block_sparse_moe.experts.219.w1", "model.layers.15.block_sparse_moe.experts.220.w1", "model.layers.15.block_sparse_moe.experts.221.w1", "model.layers.15.block_sparse_moe.experts.222.w1", "model.layers.15.block_sparse_moe.experts.223.w1", "model.layers.15.block_sparse_moe.experts.224.w1", "model.layers.15.block_sparse_moe.experts.225.w1", "model.layers.15.block_sparse_moe.experts.226.w1", "model.layers.15.block_sparse_moe.experts.227.w1", "model.layers.15.block_sparse_moe.experts.228.w1", "model.layers.15.block_sparse_moe.experts.229.w1", "model.layers.15.block_sparse_moe.experts.230.w1", "model.layers.15.block_sparse_moe.experts.231.w1", "model.layers.15.block_sparse_moe.experts.232.w1", "model.layers.15.block_sparse_moe.experts.233.w1", "model.layers.15.block_sparse_moe.experts.234.w1", "model.layers.15.block_sparse_moe.experts.235.w1", "model.layers.15.block_sparse_moe.experts.236.w1", "model.layers.15.block_sparse_moe.experts.237.w1", "model.layers.15.block_sparse_moe.experts.238.w1", "model.layers.15.block_sparse_moe.experts.239.w1", "model.layers.15.block_sparse_moe.experts.240.w1", "model.layers.15.block_sparse_moe.experts.241.w1", "model.layers.15.block_sparse_moe.experts.242.w1", "model.layers.15.block_sparse_moe.experts.243.w1", "model.layers.15.block_sparse_moe.experts.244.w1", "model.layers.15.block_sparse_moe.experts.245.w1", "model.layers.15.block_sparse_moe.experts.246.w1", "model.layers.15.block_sparse_moe.experts.247.w1", "model.layers.15.block_sparse_moe.experts.248.w1", "model.layers.15.block_sparse_moe.experts.249.w1", "model.layers.15.block_sparse_moe.experts.250.w1", "model.layers.15.block_sparse_moe.experts.251.w1", "model.layers.15.block_sparse_moe.experts.252.w1", "model.layers.15.block_sparse_moe.experts.253.w1", "model.layers.15.block_sparse_moe.experts.254.w1", "model.layers.15.block_sparse_moe.experts.255.w1", "model.layers.15.block_sparse_moe.experts.0.w3", "model.layers.15.block_sparse_moe.experts.1.w3", "model.layers.15.block_sparse_moe.experts.2.w3", "model.layers.15.block_sparse_moe.experts.3.w3", "model.layers.15.block_sparse_moe.experts.4.w3", "model.layers.15.block_sparse_moe.experts.5.w3", "model.layers.15.block_sparse_moe.experts.6.w3", "model.layers.15.block_sparse_moe.experts.7.w3", "model.layers.15.block_sparse_moe.experts.8.w3", "model.layers.15.block_sparse_moe.experts.9.w3", "model.layers.15.block_sparse_moe.experts.10.w3", "model.layers.15.block_sparse_moe.experts.11.w3", "model.layers.15.block_sparse_moe.experts.12.w3", "model.layers.15.block_sparse_moe.experts.13.w3", "model.layers.15.block_sparse_moe.experts.14.w3", "model.layers.15.block_sparse_moe.experts.15.w3", "model.layers.15.block_sparse_moe.experts.16.w3", "model.layers.15.block_sparse_moe.experts.17.w3", "model.layers.15.block_sparse_moe.experts.18.w3", "model.layers.15.block_sparse_moe.experts.19.w3", "model.layers.15.block_sparse_moe.experts.20.w3", "model.layers.15.block_sparse_moe.experts.21.w3", "model.layers.15.block_sparse_moe.experts.22.w3", "model.layers.15.block_sparse_moe.experts.23.w3", "model.layers.15.block_sparse_moe.experts.24.w3", "model.layers.15.block_sparse_moe.experts.25.w3", "model.layers.15.block_sparse_moe.experts.26.w3", "model.layers.15.block_sparse_moe.experts.27.w3", "model.layers.15.block_sparse_moe.experts.28.w3", "model.layers.15.block_sparse_moe.experts.29.w3", "model.layers.15.block_sparse_moe.experts.30.w3", "model.layers.15.block_sparse_moe.experts.31.w3", "model.layers.15.block_sparse_moe.experts.32.w3", "model.layers.15.block_sparse_moe.experts.33.w3", "model.layers.15.block_sparse_moe.experts.34.w3", "model.layers.15.block_sparse_moe.experts.35.w3", "model.layers.15.block_sparse_moe.experts.36.w3", "model.layers.15.block_sparse_moe.experts.37.w3", "model.layers.15.block_sparse_moe.experts.38.w3", "model.layers.15.block_sparse_moe.experts.39.w3", "model.layers.15.block_sparse_moe.experts.40.w3", "model.layers.15.block_sparse_moe.experts.41.w3", "model.layers.15.block_sparse_moe.experts.42.w3", "model.layers.15.block_sparse_moe.experts.43.w3", "model.layers.15.block_sparse_moe.experts.44.w3", "model.layers.15.block_sparse_moe.experts.45.w3", "model.layers.15.block_sparse_moe.experts.46.w3", "model.layers.15.block_sparse_moe.experts.47.w3", "model.layers.15.block_sparse_moe.experts.48.w3", "model.layers.15.block_sparse_moe.experts.49.w3", "model.layers.15.block_sparse_moe.experts.50.w3", "model.layers.15.block_sparse_moe.experts.51.w3", "model.layers.15.block_sparse_moe.experts.52.w3", "model.layers.15.block_sparse_moe.experts.53.w3", "model.layers.15.block_sparse_moe.experts.54.w3", "model.layers.15.block_sparse_moe.experts.55.w3", "model.layers.15.block_sparse_moe.experts.56.w3", "model.layers.15.block_sparse_moe.experts.57.w3", "model.layers.15.block_sparse_moe.experts.58.w3", "model.layers.15.block_sparse_moe.experts.59.w3", "model.layers.15.block_sparse_moe.experts.60.w3", "model.layers.15.block_sparse_moe.experts.61.w3", "model.layers.15.block_sparse_moe.experts.62.w3", "model.layers.15.block_sparse_moe.experts.63.w3", "model.layers.15.block_sparse_moe.experts.64.w3", "model.layers.15.block_sparse_moe.experts.65.w3", "model.layers.15.block_sparse_moe.experts.66.w3", "model.layers.15.block_sparse_moe.experts.67.w3", "model.layers.15.block_sparse_moe.experts.68.w3", "model.layers.15.block_sparse_moe.experts.69.w3", "model.layers.15.block_sparse_moe.experts.70.w3", "model.layers.15.block_sparse_moe.experts.71.w3", "model.layers.15.block_sparse_moe.experts.72.w3", "model.layers.15.block_sparse_moe.experts.73.w3", "model.layers.15.block_sparse_moe.experts.74.w3", "model.layers.15.block_sparse_moe.experts.75.w3", "model.layers.15.block_sparse_moe.experts.76.w3", "model.layers.15.block_sparse_moe.experts.77.w3", "model.layers.15.block_sparse_moe.experts.78.w3", "model.layers.15.block_sparse_moe.experts.79.w3", "model.layers.15.block_sparse_moe.experts.80.w3", "model.layers.15.block_sparse_moe.experts.81.w3", "model.layers.15.block_sparse_moe.experts.82.w3", "model.layers.15.block_sparse_moe.experts.83.w3", "model.layers.15.block_sparse_moe.experts.84.w3", "model.layers.15.block_sparse_moe.experts.85.w3", "model.layers.15.block_sparse_moe.experts.86.w3", "model.layers.15.block_sparse_moe.experts.87.w3", "model.layers.15.block_sparse_moe.experts.88.w3", "model.layers.15.block_sparse_moe.experts.89.w3", "model.layers.15.block_sparse_moe.experts.90.w3", "model.layers.15.block_sparse_moe.experts.91.w3", "model.layers.15.block_sparse_moe.experts.92.w3", "model.layers.15.block_sparse_moe.experts.93.w3", "model.layers.15.block_sparse_moe.experts.94.w3", "model.layers.15.block_sparse_moe.experts.95.w3", "model.layers.15.block_sparse_moe.experts.96.w3", "model.layers.15.block_sparse_moe.experts.97.w3", "model.layers.15.block_sparse_moe.experts.98.w3", "model.layers.15.block_sparse_moe.experts.99.w3", "model.layers.15.block_sparse_moe.experts.100.w3", "model.layers.15.block_sparse_moe.experts.101.w3", "model.layers.15.block_sparse_moe.experts.102.w3", "model.layers.15.block_sparse_moe.experts.103.w3", "model.layers.15.block_sparse_moe.experts.104.w3", "model.layers.15.block_sparse_moe.experts.105.w3", "model.layers.15.block_sparse_moe.experts.106.w3", "model.layers.15.block_sparse_moe.experts.107.w3", "model.layers.15.block_sparse_moe.experts.108.w3", "model.layers.15.block_sparse_moe.experts.109.w3", "model.layers.15.block_sparse_moe.experts.110.w3", "model.layers.15.block_sparse_moe.experts.111.w3", "model.layers.15.block_sparse_moe.experts.112.w3", "model.layers.15.block_sparse_moe.experts.113.w3", "model.layers.15.block_sparse_moe.experts.114.w3", "model.layers.15.block_sparse_moe.experts.115.w3", "model.layers.15.block_sparse_moe.experts.116.w3", "model.layers.15.block_sparse_moe.experts.117.w3", "model.layers.15.block_sparse_moe.experts.118.w3", "model.layers.15.block_sparse_moe.experts.119.w3", "model.layers.15.block_sparse_moe.experts.120.w3", "model.layers.15.block_sparse_moe.experts.121.w3", "model.layers.15.block_sparse_moe.experts.122.w3", "model.layers.15.block_sparse_moe.experts.123.w3", "model.layers.15.block_sparse_moe.experts.124.w3", "model.layers.15.block_sparse_moe.experts.125.w3", "model.layers.15.block_sparse_moe.experts.126.w3", "model.layers.15.block_sparse_moe.experts.127.w3", "model.layers.15.block_sparse_moe.experts.128.w3", "model.layers.15.block_sparse_moe.experts.129.w3", "model.layers.15.block_sparse_moe.experts.130.w3", "model.layers.15.block_sparse_moe.experts.131.w3", "model.layers.15.block_sparse_moe.experts.132.w3", "model.layers.15.block_sparse_moe.experts.133.w3", "model.layers.15.block_sparse_moe.experts.134.w3", "model.layers.15.block_sparse_moe.experts.135.w3", "model.layers.15.block_sparse_moe.experts.136.w3", "model.layers.15.block_sparse_moe.experts.137.w3", "model.layers.15.block_sparse_moe.experts.138.w3", "model.layers.15.block_sparse_moe.experts.139.w3", "model.layers.15.block_sparse_moe.experts.140.w3", "model.layers.15.block_sparse_moe.experts.141.w3", "model.layers.15.block_sparse_moe.experts.142.w3", "model.layers.15.block_sparse_moe.experts.143.w3", "model.layers.15.block_sparse_moe.experts.144.w3", "model.layers.15.block_sparse_moe.experts.145.w3", "model.layers.15.block_sparse_moe.experts.146.w3", "model.layers.15.block_sparse_moe.experts.147.w3", "model.layers.15.block_sparse_moe.experts.148.w3", "model.layers.15.block_sparse_moe.experts.149.w3", "model.layers.15.block_sparse_moe.experts.150.w3", "model.layers.15.block_sparse_moe.experts.151.w3", "model.layers.15.block_sparse_moe.experts.152.w3", "model.layers.15.block_sparse_moe.experts.153.w3", "model.layers.15.block_sparse_moe.experts.154.w3", "model.layers.15.block_sparse_moe.experts.155.w3", "model.layers.15.block_sparse_moe.experts.156.w3", "model.layers.15.block_sparse_moe.experts.157.w3", "model.layers.15.block_sparse_moe.experts.158.w3", "model.layers.15.block_sparse_moe.experts.159.w3", "model.layers.15.block_sparse_moe.experts.160.w3", "model.layers.15.block_sparse_moe.experts.161.w3", "model.layers.15.block_sparse_moe.experts.162.w3", "model.layers.15.block_sparse_moe.experts.163.w3", "model.layers.15.block_sparse_moe.experts.164.w3", "model.layers.15.block_sparse_moe.experts.165.w3", "model.layers.15.block_sparse_moe.experts.166.w3", "model.layers.15.block_sparse_moe.experts.167.w3", "model.layers.15.block_sparse_moe.experts.168.w3", "model.layers.15.block_sparse_moe.experts.169.w3", "model.layers.15.block_sparse_moe.experts.170.w3", "model.layers.15.block_sparse_moe.experts.171.w3", "model.layers.15.block_sparse_moe.experts.172.w3", "model.layers.15.block_sparse_moe.experts.173.w3", "model.layers.15.block_sparse_moe.experts.174.w3", "model.layers.15.block_sparse_moe.experts.175.w3", "model.layers.15.block_sparse_moe.experts.176.w3", "model.layers.15.block_sparse_moe.experts.177.w3", "model.layers.15.block_sparse_moe.experts.178.w3", "model.layers.15.block_sparse_moe.experts.179.w3", "model.layers.15.block_sparse_moe.experts.180.w3", "model.layers.15.block_sparse_moe.experts.181.w3", "model.layers.15.block_sparse_moe.experts.182.w3", "model.layers.15.block_sparse_moe.experts.183.w3", "model.layers.15.block_sparse_moe.experts.184.w3", "model.layers.15.block_sparse_moe.experts.185.w3", "model.layers.15.block_sparse_moe.experts.186.w3", "model.layers.15.block_sparse_moe.experts.187.w3", "model.layers.15.block_sparse_moe.experts.188.w3", "model.layers.15.block_sparse_moe.experts.189.w3", "model.layers.15.block_sparse_moe.experts.190.w3", "model.layers.15.block_sparse_moe.experts.191.w3", "model.layers.15.block_sparse_moe.experts.192.w3", "model.layers.15.block_sparse_moe.experts.193.w3", "model.layers.15.block_sparse_moe.experts.194.w3", "model.layers.15.block_sparse_moe.experts.195.w3", "model.layers.15.block_sparse_moe.experts.196.w3", "model.layers.15.block_sparse_moe.experts.197.w3", "model.layers.15.block_sparse_moe.experts.198.w3", "model.layers.15.block_sparse_moe.experts.199.w3", "model.layers.15.block_sparse_moe.experts.200.w3", "model.layers.15.block_sparse_moe.experts.201.w3", "model.layers.15.block_sparse_moe.experts.202.w3", "model.layers.15.block_sparse_moe.experts.203.w3", "model.layers.15.block_sparse_moe.experts.204.w3", "model.layers.15.block_sparse_moe.experts.205.w3", "model.layers.15.block_sparse_moe.experts.206.w3", "model.layers.15.block_sparse_moe.experts.207.w3", "model.layers.15.block_sparse_moe.experts.208.w3", "model.layers.15.block_sparse_moe.experts.209.w3", "model.layers.15.block_sparse_moe.experts.210.w3", "model.layers.15.block_sparse_moe.experts.211.w3", "model.layers.15.block_sparse_moe.experts.212.w3", "model.layers.15.block_sparse_moe.experts.213.w3", "model.layers.15.block_sparse_moe.experts.214.w3", "model.layers.15.block_sparse_moe.experts.215.w3", "model.layers.15.block_sparse_moe.experts.216.w3", "model.layers.15.block_sparse_moe.experts.217.w3", "model.layers.15.block_sparse_moe.experts.218.w3", "model.layers.15.block_sparse_moe.experts.219.w3", "model.layers.15.block_sparse_moe.experts.220.w3", "model.layers.15.block_sparse_moe.experts.221.w3", "model.layers.15.block_sparse_moe.experts.222.w3", "model.layers.15.block_sparse_moe.experts.223.w3", "model.layers.15.block_sparse_moe.experts.224.w3", "model.layers.15.block_sparse_moe.experts.225.w3", "model.layers.15.block_sparse_moe.experts.226.w3", "model.layers.15.block_sparse_moe.experts.227.w3", "model.layers.15.block_sparse_moe.experts.228.w3", "model.layers.15.block_sparse_moe.experts.229.w3", "model.layers.15.block_sparse_moe.experts.230.w3", "model.layers.15.block_sparse_moe.experts.231.w3", "model.layers.15.block_sparse_moe.experts.232.w3", "model.layers.15.block_sparse_moe.experts.233.w3", "model.layers.15.block_sparse_moe.experts.234.w3", "model.layers.15.block_sparse_moe.experts.235.w3", "model.layers.15.block_sparse_moe.experts.236.w3", "model.layers.15.block_sparse_moe.experts.237.w3", "model.layers.15.block_sparse_moe.experts.238.w3", "model.layers.15.block_sparse_moe.experts.239.w3", "model.layers.15.block_sparse_moe.experts.240.w3", "model.layers.15.block_sparse_moe.experts.241.w3", "model.layers.15.block_sparse_moe.experts.242.w3", "model.layers.15.block_sparse_moe.experts.243.w3", "model.layers.15.block_sparse_moe.experts.244.w3", "model.layers.15.block_sparse_moe.experts.245.w3", "model.layers.15.block_sparse_moe.experts.246.w3", "model.layers.15.block_sparse_moe.experts.247.w3", "model.layers.15.block_sparse_moe.experts.248.w3", "model.layers.15.block_sparse_moe.experts.249.w3", "model.layers.15.block_sparse_moe.experts.250.w3", "model.layers.15.block_sparse_moe.experts.251.w3", "model.layers.15.block_sparse_moe.experts.252.w3", "model.layers.15.block_sparse_moe.experts.253.w3", "model.layers.15.block_sparse_moe.experts.254.w3", "model.layers.15.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00027364958077669144, "dbits": 2415919104 } ] }, { "idx": 79, "layers": [ "model.layers.15.block_sparse_moe.experts.0.w2", "model.layers.15.block_sparse_moe.experts.1.w2", "model.layers.15.block_sparse_moe.experts.2.w2", "model.layers.15.block_sparse_moe.experts.3.w2", "model.layers.15.block_sparse_moe.experts.4.w2", "model.layers.15.block_sparse_moe.experts.5.w2", "model.layers.15.block_sparse_moe.experts.6.w2", "model.layers.15.block_sparse_moe.experts.7.w2", "model.layers.15.block_sparse_moe.experts.8.w2", "model.layers.15.block_sparse_moe.experts.9.w2", "model.layers.15.block_sparse_moe.experts.10.w2", "model.layers.15.block_sparse_moe.experts.11.w2", "model.layers.15.block_sparse_moe.experts.12.w2", "model.layers.15.block_sparse_moe.experts.13.w2", "model.layers.15.block_sparse_moe.experts.14.w2", "model.layers.15.block_sparse_moe.experts.15.w2", "model.layers.15.block_sparse_moe.experts.16.w2", "model.layers.15.block_sparse_moe.experts.17.w2", "model.layers.15.block_sparse_moe.experts.18.w2", "model.layers.15.block_sparse_moe.experts.19.w2", "model.layers.15.block_sparse_moe.experts.20.w2", "model.layers.15.block_sparse_moe.experts.21.w2", "model.layers.15.block_sparse_moe.experts.22.w2", "model.layers.15.block_sparse_moe.experts.23.w2", "model.layers.15.block_sparse_moe.experts.24.w2", "model.layers.15.block_sparse_moe.experts.25.w2", "model.layers.15.block_sparse_moe.experts.26.w2", "model.layers.15.block_sparse_moe.experts.27.w2", "model.layers.15.block_sparse_moe.experts.28.w2", "model.layers.15.block_sparse_moe.experts.29.w2", "model.layers.15.block_sparse_moe.experts.30.w2", "model.layers.15.block_sparse_moe.experts.31.w2", "model.layers.15.block_sparse_moe.experts.32.w2", "model.layers.15.block_sparse_moe.experts.33.w2", "model.layers.15.block_sparse_moe.experts.34.w2", "model.layers.15.block_sparse_moe.experts.35.w2", "model.layers.15.block_sparse_moe.experts.36.w2", "model.layers.15.block_sparse_moe.experts.37.w2", "model.layers.15.block_sparse_moe.experts.38.w2", "model.layers.15.block_sparse_moe.experts.39.w2", "model.layers.15.block_sparse_moe.experts.40.w2", "model.layers.15.block_sparse_moe.experts.41.w2", "model.layers.15.block_sparse_moe.experts.42.w2", "model.layers.15.block_sparse_moe.experts.43.w2", "model.layers.15.block_sparse_moe.experts.44.w2", "model.layers.15.block_sparse_moe.experts.45.w2", "model.layers.15.block_sparse_moe.experts.46.w2", "model.layers.15.block_sparse_moe.experts.47.w2", "model.layers.15.block_sparse_moe.experts.48.w2", "model.layers.15.block_sparse_moe.experts.49.w2", "model.layers.15.block_sparse_moe.experts.50.w2", "model.layers.15.block_sparse_moe.experts.51.w2", "model.layers.15.block_sparse_moe.experts.52.w2", "model.layers.15.block_sparse_moe.experts.53.w2", "model.layers.15.block_sparse_moe.experts.54.w2", "model.layers.15.block_sparse_moe.experts.55.w2", "model.layers.15.block_sparse_moe.experts.56.w2", "model.layers.15.block_sparse_moe.experts.57.w2", "model.layers.15.block_sparse_moe.experts.58.w2", "model.layers.15.block_sparse_moe.experts.59.w2", "model.layers.15.block_sparse_moe.experts.60.w2", "model.layers.15.block_sparse_moe.experts.61.w2", "model.layers.15.block_sparse_moe.experts.62.w2", "model.layers.15.block_sparse_moe.experts.63.w2", "model.layers.15.block_sparse_moe.experts.64.w2", "model.layers.15.block_sparse_moe.experts.65.w2", "model.layers.15.block_sparse_moe.experts.66.w2", "model.layers.15.block_sparse_moe.experts.67.w2", "model.layers.15.block_sparse_moe.experts.68.w2", "model.layers.15.block_sparse_moe.experts.69.w2", "model.layers.15.block_sparse_moe.experts.70.w2", "model.layers.15.block_sparse_moe.experts.71.w2", "model.layers.15.block_sparse_moe.experts.72.w2", "model.layers.15.block_sparse_moe.experts.73.w2", "model.layers.15.block_sparse_moe.experts.74.w2", "model.layers.15.block_sparse_moe.experts.75.w2", "model.layers.15.block_sparse_moe.experts.76.w2", "model.layers.15.block_sparse_moe.experts.77.w2", "model.layers.15.block_sparse_moe.experts.78.w2", "model.layers.15.block_sparse_moe.experts.79.w2", "model.layers.15.block_sparse_moe.experts.80.w2", "model.layers.15.block_sparse_moe.experts.81.w2", "model.layers.15.block_sparse_moe.experts.82.w2", "model.layers.15.block_sparse_moe.experts.83.w2", "model.layers.15.block_sparse_moe.experts.84.w2", "model.layers.15.block_sparse_moe.experts.85.w2", "model.layers.15.block_sparse_moe.experts.86.w2", "model.layers.15.block_sparse_moe.experts.87.w2", "model.layers.15.block_sparse_moe.experts.88.w2", "model.layers.15.block_sparse_moe.experts.89.w2", "model.layers.15.block_sparse_moe.experts.90.w2", "model.layers.15.block_sparse_moe.experts.91.w2", "model.layers.15.block_sparse_moe.experts.92.w2", "model.layers.15.block_sparse_moe.experts.93.w2", "model.layers.15.block_sparse_moe.experts.94.w2", "model.layers.15.block_sparse_moe.experts.95.w2", "model.layers.15.block_sparse_moe.experts.96.w2", "model.layers.15.block_sparse_moe.experts.97.w2", "model.layers.15.block_sparse_moe.experts.98.w2", "model.layers.15.block_sparse_moe.experts.99.w2", "model.layers.15.block_sparse_moe.experts.100.w2", "model.layers.15.block_sparse_moe.experts.101.w2", "model.layers.15.block_sparse_moe.experts.102.w2", "model.layers.15.block_sparse_moe.experts.103.w2", "model.layers.15.block_sparse_moe.experts.104.w2", "model.layers.15.block_sparse_moe.experts.105.w2", "model.layers.15.block_sparse_moe.experts.106.w2", "model.layers.15.block_sparse_moe.experts.107.w2", "model.layers.15.block_sparse_moe.experts.108.w2", "model.layers.15.block_sparse_moe.experts.109.w2", "model.layers.15.block_sparse_moe.experts.110.w2", "model.layers.15.block_sparse_moe.experts.111.w2", "model.layers.15.block_sparse_moe.experts.112.w2", "model.layers.15.block_sparse_moe.experts.113.w2", "model.layers.15.block_sparse_moe.experts.114.w2", "model.layers.15.block_sparse_moe.experts.115.w2", "model.layers.15.block_sparse_moe.experts.116.w2", "model.layers.15.block_sparse_moe.experts.117.w2", "model.layers.15.block_sparse_moe.experts.118.w2", "model.layers.15.block_sparse_moe.experts.119.w2", "model.layers.15.block_sparse_moe.experts.120.w2", "model.layers.15.block_sparse_moe.experts.121.w2", "model.layers.15.block_sparse_moe.experts.122.w2", "model.layers.15.block_sparse_moe.experts.123.w2", "model.layers.15.block_sparse_moe.experts.124.w2", "model.layers.15.block_sparse_moe.experts.125.w2", "model.layers.15.block_sparse_moe.experts.126.w2", "model.layers.15.block_sparse_moe.experts.127.w2", "model.layers.15.block_sparse_moe.experts.128.w2", "model.layers.15.block_sparse_moe.experts.129.w2", "model.layers.15.block_sparse_moe.experts.130.w2", "model.layers.15.block_sparse_moe.experts.131.w2", "model.layers.15.block_sparse_moe.experts.132.w2", "model.layers.15.block_sparse_moe.experts.133.w2", "model.layers.15.block_sparse_moe.experts.134.w2", "model.layers.15.block_sparse_moe.experts.135.w2", "model.layers.15.block_sparse_moe.experts.136.w2", "model.layers.15.block_sparse_moe.experts.137.w2", "model.layers.15.block_sparse_moe.experts.138.w2", "model.layers.15.block_sparse_moe.experts.139.w2", "model.layers.15.block_sparse_moe.experts.140.w2", "model.layers.15.block_sparse_moe.experts.141.w2", "model.layers.15.block_sparse_moe.experts.142.w2", "model.layers.15.block_sparse_moe.experts.143.w2", "model.layers.15.block_sparse_moe.experts.144.w2", "model.layers.15.block_sparse_moe.experts.145.w2", "model.layers.15.block_sparse_moe.experts.146.w2", "model.layers.15.block_sparse_moe.experts.147.w2", "model.layers.15.block_sparse_moe.experts.148.w2", "model.layers.15.block_sparse_moe.experts.149.w2", "model.layers.15.block_sparse_moe.experts.150.w2", "model.layers.15.block_sparse_moe.experts.151.w2", "model.layers.15.block_sparse_moe.experts.152.w2", "model.layers.15.block_sparse_moe.experts.153.w2", "model.layers.15.block_sparse_moe.experts.154.w2", "model.layers.15.block_sparse_moe.experts.155.w2", "model.layers.15.block_sparse_moe.experts.156.w2", "model.layers.15.block_sparse_moe.experts.157.w2", "model.layers.15.block_sparse_moe.experts.158.w2", "model.layers.15.block_sparse_moe.experts.159.w2", "model.layers.15.block_sparse_moe.experts.160.w2", "model.layers.15.block_sparse_moe.experts.161.w2", "model.layers.15.block_sparse_moe.experts.162.w2", "model.layers.15.block_sparse_moe.experts.163.w2", "model.layers.15.block_sparse_moe.experts.164.w2", "model.layers.15.block_sparse_moe.experts.165.w2", "model.layers.15.block_sparse_moe.experts.166.w2", "model.layers.15.block_sparse_moe.experts.167.w2", "model.layers.15.block_sparse_moe.experts.168.w2", "model.layers.15.block_sparse_moe.experts.169.w2", "model.layers.15.block_sparse_moe.experts.170.w2", "model.layers.15.block_sparse_moe.experts.171.w2", "model.layers.15.block_sparse_moe.experts.172.w2", "model.layers.15.block_sparse_moe.experts.173.w2", "model.layers.15.block_sparse_moe.experts.174.w2", "model.layers.15.block_sparse_moe.experts.175.w2", "model.layers.15.block_sparse_moe.experts.176.w2", "model.layers.15.block_sparse_moe.experts.177.w2", "model.layers.15.block_sparse_moe.experts.178.w2", "model.layers.15.block_sparse_moe.experts.179.w2", "model.layers.15.block_sparse_moe.experts.180.w2", "model.layers.15.block_sparse_moe.experts.181.w2", "model.layers.15.block_sparse_moe.experts.182.w2", "model.layers.15.block_sparse_moe.experts.183.w2", "model.layers.15.block_sparse_moe.experts.184.w2", "model.layers.15.block_sparse_moe.experts.185.w2", "model.layers.15.block_sparse_moe.experts.186.w2", "model.layers.15.block_sparse_moe.experts.187.w2", "model.layers.15.block_sparse_moe.experts.188.w2", "model.layers.15.block_sparse_moe.experts.189.w2", "model.layers.15.block_sparse_moe.experts.190.w2", "model.layers.15.block_sparse_moe.experts.191.w2", "model.layers.15.block_sparse_moe.experts.192.w2", "model.layers.15.block_sparse_moe.experts.193.w2", "model.layers.15.block_sparse_moe.experts.194.w2", "model.layers.15.block_sparse_moe.experts.195.w2", "model.layers.15.block_sparse_moe.experts.196.w2", "model.layers.15.block_sparse_moe.experts.197.w2", "model.layers.15.block_sparse_moe.experts.198.w2", "model.layers.15.block_sparse_moe.experts.199.w2", "model.layers.15.block_sparse_moe.experts.200.w2", "model.layers.15.block_sparse_moe.experts.201.w2", "model.layers.15.block_sparse_moe.experts.202.w2", "model.layers.15.block_sparse_moe.experts.203.w2", "model.layers.15.block_sparse_moe.experts.204.w2", "model.layers.15.block_sparse_moe.experts.205.w2", "model.layers.15.block_sparse_moe.experts.206.w2", "model.layers.15.block_sparse_moe.experts.207.w2", "model.layers.15.block_sparse_moe.experts.208.w2", "model.layers.15.block_sparse_moe.experts.209.w2", "model.layers.15.block_sparse_moe.experts.210.w2", "model.layers.15.block_sparse_moe.experts.211.w2", "model.layers.15.block_sparse_moe.experts.212.w2", "model.layers.15.block_sparse_moe.experts.213.w2", "model.layers.15.block_sparse_moe.experts.214.w2", "model.layers.15.block_sparse_moe.experts.215.w2", "model.layers.15.block_sparse_moe.experts.216.w2", "model.layers.15.block_sparse_moe.experts.217.w2", "model.layers.15.block_sparse_moe.experts.218.w2", "model.layers.15.block_sparse_moe.experts.219.w2", "model.layers.15.block_sparse_moe.experts.220.w2", "model.layers.15.block_sparse_moe.experts.221.w2", "model.layers.15.block_sparse_moe.experts.222.w2", "model.layers.15.block_sparse_moe.experts.223.w2", "model.layers.15.block_sparse_moe.experts.224.w2", "model.layers.15.block_sparse_moe.experts.225.w2", "model.layers.15.block_sparse_moe.experts.226.w2", "model.layers.15.block_sparse_moe.experts.227.w2", "model.layers.15.block_sparse_moe.experts.228.w2", "model.layers.15.block_sparse_moe.experts.229.w2", "model.layers.15.block_sparse_moe.experts.230.w2", "model.layers.15.block_sparse_moe.experts.231.w2", "model.layers.15.block_sparse_moe.experts.232.w2", "model.layers.15.block_sparse_moe.experts.233.w2", "model.layers.15.block_sparse_moe.experts.234.w2", "model.layers.15.block_sparse_moe.experts.235.w2", "model.layers.15.block_sparse_moe.experts.236.w2", "model.layers.15.block_sparse_moe.experts.237.w2", "model.layers.15.block_sparse_moe.experts.238.w2", "model.layers.15.block_sparse_moe.experts.239.w2", "model.layers.15.block_sparse_moe.experts.240.w2", "model.layers.15.block_sparse_moe.experts.241.w2", "model.layers.15.block_sparse_moe.experts.242.w2", "model.layers.15.block_sparse_moe.experts.243.w2", "model.layers.15.block_sparse_moe.experts.244.w2", "model.layers.15.block_sparse_moe.experts.245.w2", "model.layers.15.block_sparse_moe.experts.246.w2", "model.layers.15.block_sparse_moe.experts.247.w2", "model.layers.15.block_sparse_moe.experts.248.w2", "model.layers.15.block_sparse_moe.experts.249.w2", "model.layers.15.block_sparse_moe.experts.250.w2", "model.layers.15.block_sparse_moe.experts.251.w2", "model.layers.15.block_sparse_moe.experts.252.w2", "model.layers.15.block_sparse_moe.experts.253.w2", "model.layers.15.block_sparse_moe.experts.254.w2", "model.layers.15.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00015001464635133743, "dbits": 1207959552 } ] }, { "idx": 80, "layers": [ "model.layers.16.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00010438673198222281, "dbits": 18874368 } ] }, { "idx": 81, "layers": [ "model.layers.16.self_attn.k_proj", "model.layers.16.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0006949452683329554, "dbits": 6291456 } ] }, { "idx": 82, "layers": [ "model.layers.16.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0012572562322020475, "dbits": 18874368 } ] }, { "idx": 83, "layers": [ "model.layers.16.block_sparse_moe.experts.0.w1", "model.layers.16.block_sparse_moe.experts.1.w1", "model.layers.16.block_sparse_moe.experts.2.w1", "model.layers.16.block_sparse_moe.experts.3.w1", "model.layers.16.block_sparse_moe.experts.4.w1", "model.layers.16.block_sparse_moe.experts.5.w1", "model.layers.16.block_sparse_moe.experts.6.w1", "model.layers.16.block_sparse_moe.experts.7.w1", "model.layers.16.block_sparse_moe.experts.8.w1", "model.layers.16.block_sparse_moe.experts.9.w1", "model.layers.16.block_sparse_moe.experts.10.w1", "model.layers.16.block_sparse_moe.experts.11.w1", "model.layers.16.block_sparse_moe.experts.12.w1", "model.layers.16.block_sparse_moe.experts.13.w1", "model.layers.16.block_sparse_moe.experts.14.w1", "model.layers.16.block_sparse_moe.experts.15.w1", "model.layers.16.block_sparse_moe.experts.16.w1", "model.layers.16.block_sparse_moe.experts.17.w1", "model.layers.16.block_sparse_moe.experts.18.w1", "model.layers.16.block_sparse_moe.experts.19.w1", "model.layers.16.block_sparse_moe.experts.20.w1", "model.layers.16.block_sparse_moe.experts.21.w1", "model.layers.16.block_sparse_moe.experts.22.w1", "model.layers.16.block_sparse_moe.experts.23.w1", "model.layers.16.block_sparse_moe.experts.24.w1", "model.layers.16.block_sparse_moe.experts.25.w1", "model.layers.16.block_sparse_moe.experts.26.w1", "model.layers.16.block_sparse_moe.experts.27.w1", "model.layers.16.block_sparse_moe.experts.28.w1", "model.layers.16.block_sparse_moe.experts.29.w1", "model.layers.16.block_sparse_moe.experts.30.w1", "model.layers.16.block_sparse_moe.experts.31.w1", "model.layers.16.block_sparse_moe.experts.32.w1", "model.layers.16.block_sparse_moe.experts.33.w1", "model.layers.16.block_sparse_moe.experts.34.w1", "model.layers.16.block_sparse_moe.experts.35.w1", "model.layers.16.block_sparse_moe.experts.36.w1", "model.layers.16.block_sparse_moe.experts.37.w1", "model.layers.16.block_sparse_moe.experts.38.w1", "model.layers.16.block_sparse_moe.experts.39.w1", "model.layers.16.block_sparse_moe.experts.40.w1", "model.layers.16.block_sparse_moe.experts.41.w1", "model.layers.16.block_sparse_moe.experts.42.w1", "model.layers.16.block_sparse_moe.experts.43.w1", "model.layers.16.block_sparse_moe.experts.44.w1", "model.layers.16.block_sparse_moe.experts.45.w1", "model.layers.16.block_sparse_moe.experts.46.w1", "model.layers.16.block_sparse_moe.experts.47.w1", "model.layers.16.block_sparse_moe.experts.48.w1", "model.layers.16.block_sparse_moe.experts.49.w1", "model.layers.16.block_sparse_moe.experts.50.w1", "model.layers.16.block_sparse_moe.experts.51.w1", "model.layers.16.block_sparse_moe.experts.52.w1", "model.layers.16.block_sparse_moe.experts.53.w1", "model.layers.16.block_sparse_moe.experts.54.w1", "model.layers.16.block_sparse_moe.experts.55.w1", "model.layers.16.block_sparse_moe.experts.56.w1", "model.layers.16.block_sparse_moe.experts.57.w1", "model.layers.16.block_sparse_moe.experts.58.w1", "model.layers.16.block_sparse_moe.experts.59.w1", "model.layers.16.block_sparse_moe.experts.60.w1", "model.layers.16.block_sparse_moe.experts.61.w1", "model.layers.16.block_sparse_moe.experts.62.w1", "model.layers.16.block_sparse_moe.experts.63.w1", "model.layers.16.block_sparse_moe.experts.64.w1", "model.layers.16.block_sparse_moe.experts.65.w1", "model.layers.16.block_sparse_moe.experts.66.w1", "model.layers.16.block_sparse_moe.experts.67.w1", "model.layers.16.block_sparse_moe.experts.68.w1", "model.layers.16.block_sparse_moe.experts.69.w1", "model.layers.16.block_sparse_moe.experts.70.w1", "model.layers.16.block_sparse_moe.experts.71.w1", "model.layers.16.block_sparse_moe.experts.72.w1", "model.layers.16.block_sparse_moe.experts.73.w1", "model.layers.16.block_sparse_moe.experts.74.w1", "model.layers.16.block_sparse_moe.experts.75.w1", "model.layers.16.block_sparse_moe.experts.76.w1", "model.layers.16.block_sparse_moe.experts.77.w1", "model.layers.16.block_sparse_moe.experts.78.w1", "model.layers.16.block_sparse_moe.experts.79.w1", "model.layers.16.block_sparse_moe.experts.80.w1", "model.layers.16.block_sparse_moe.experts.81.w1", "model.layers.16.block_sparse_moe.experts.82.w1", "model.layers.16.block_sparse_moe.experts.83.w1", "model.layers.16.block_sparse_moe.experts.84.w1", "model.layers.16.block_sparse_moe.experts.85.w1", "model.layers.16.block_sparse_moe.experts.86.w1", "model.layers.16.block_sparse_moe.experts.87.w1", "model.layers.16.block_sparse_moe.experts.88.w1", "model.layers.16.block_sparse_moe.experts.89.w1", "model.layers.16.block_sparse_moe.experts.90.w1", "model.layers.16.block_sparse_moe.experts.91.w1", "model.layers.16.block_sparse_moe.experts.92.w1", "model.layers.16.block_sparse_moe.experts.93.w1", "model.layers.16.block_sparse_moe.experts.94.w1", "model.layers.16.block_sparse_moe.experts.95.w1", "model.layers.16.block_sparse_moe.experts.96.w1", "model.layers.16.block_sparse_moe.experts.97.w1", "model.layers.16.block_sparse_moe.experts.98.w1", "model.layers.16.block_sparse_moe.experts.99.w1", "model.layers.16.block_sparse_moe.experts.100.w1", "model.layers.16.block_sparse_moe.experts.101.w1", "model.layers.16.block_sparse_moe.experts.102.w1", "model.layers.16.block_sparse_moe.experts.103.w1", "model.layers.16.block_sparse_moe.experts.104.w1", "model.layers.16.block_sparse_moe.experts.105.w1", "model.layers.16.block_sparse_moe.experts.106.w1", "model.layers.16.block_sparse_moe.experts.107.w1", "model.layers.16.block_sparse_moe.experts.108.w1", "model.layers.16.block_sparse_moe.experts.109.w1", "model.layers.16.block_sparse_moe.experts.110.w1", "model.layers.16.block_sparse_moe.experts.111.w1", "model.layers.16.block_sparse_moe.experts.112.w1", "model.layers.16.block_sparse_moe.experts.113.w1", "model.layers.16.block_sparse_moe.experts.114.w1", "model.layers.16.block_sparse_moe.experts.115.w1", "model.layers.16.block_sparse_moe.experts.116.w1", "model.layers.16.block_sparse_moe.experts.117.w1", "model.layers.16.block_sparse_moe.experts.118.w1", "model.layers.16.block_sparse_moe.experts.119.w1", "model.layers.16.block_sparse_moe.experts.120.w1", "model.layers.16.block_sparse_moe.experts.121.w1", "model.layers.16.block_sparse_moe.experts.122.w1", "model.layers.16.block_sparse_moe.experts.123.w1", "model.layers.16.block_sparse_moe.experts.124.w1", "model.layers.16.block_sparse_moe.experts.125.w1", "model.layers.16.block_sparse_moe.experts.126.w1", "model.layers.16.block_sparse_moe.experts.127.w1", "model.layers.16.block_sparse_moe.experts.128.w1", "model.layers.16.block_sparse_moe.experts.129.w1", "model.layers.16.block_sparse_moe.experts.130.w1", "model.layers.16.block_sparse_moe.experts.131.w1", "model.layers.16.block_sparse_moe.experts.132.w1", "model.layers.16.block_sparse_moe.experts.133.w1", "model.layers.16.block_sparse_moe.experts.134.w1", "model.layers.16.block_sparse_moe.experts.135.w1", "model.layers.16.block_sparse_moe.experts.136.w1", "model.layers.16.block_sparse_moe.experts.137.w1", "model.layers.16.block_sparse_moe.experts.138.w1", "model.layers.16.block_sparse_moe.experts.139.w1", "model.layers.16.block_sparse_moe.experts.140.w1", "model.layers.16.block_sparse_moe.experts.141.w1", "model.layers.16.block_sparse_moe.experts.142.w1", "model.layers.16.block_sparse_moe.experts.143.w1", "model.layers.16.block_sparse_moe.experts.144.w1", "model.layers.16.block_sparse_moe.experts.145.w1", "model.layers.16.block_sparse_moe.experts.146.w1", "model.layers.16.block_sparse_moe.experts.147.w1", "model.layers.16.block_sparse_moe.experts.148.w1", "model.layers.16.block_sparse_moe.experts.149.w1", "model.layers.16.block_sparse_moe.experts.150.w1", "model.layers.16.block_sparse_moe.experts.151.w1", "model.layers.16.block_sparse_moe.experts.152.w1", "model.layers.16.block_sparse_moe.experts.153.w1", "model.layers.16.block_sparse_moe.experts.154.w1", "model.layers.16.block_sparse_moe.experts.155.w1", "model.layers.16.block_sparse_moe.experts.156.w1", "model.layers.16.block_sparse_moe.experts.157.w1", "model.layers.16.block_sparse_moe.experts.158.w1", "model.layers.16.block_sparse_moe.experts.159.w1", "model.layers.16.block_sparse_moe.experts.160.w1", "model.layers.16.block_sparse_moe.experts.161.w1", "model.layers.16.block_sparse_moe.experts.162.w1", "model.layers.16.block_sparse_moe.experts.163.w1", "model.layers.16.block_sparse_moe.experts.164.w1", "model.layers.16.block_sparse_moe.experts.165.w1", "model.layers.16.block_sparse_moe.experts.166.w1", "model.layers.16.block_sparse_moe.experts.167.w1", "model.layers.16.block_sparse_moe.experts.168.w1", "model.layers.16.block_sparse_moe.experts.169.w1", "model.layers.16.block_sparse_moe.experts.170.w1", "model.layers.16.block_sparse_moe.experts.171.w1", "model.layers.16.block_sparse_moe.experts.172.w1", "model.layers.16.block_sparse_moe.experts.173.w1", "model.layers.16.block_sparse_moe.experts.174.w1", "model.layers.16.block_sparse_moe.experts.175.w1", "model.layers.16.block_sparse_moe.experts.176.w1", "model.layers.16.block_sparse_moe.experts.177.w1", "model.layers.16.block_sparse_moe.experts.178.w1", "model.layers.16.block_sparse_moe.experts.179.w1", "model.layers.16.block_sparse_moe.experts.180.w1", "model.layers.16.block_sparse_moe.experts.181.w1", "model.layers.16.block_sparse_moe.experts.182.w1", "model.layers.16.block_sparse_moe.experts.183.w1", "model.layers.16.block_sparse_moe.experts.184.w1", "model.layers.16.block_sparse_moe.experts.185.w1", "model.layers.16.block_sparse_moe.experts.186.w1", "model.layers.16.block_sparse_moe.experts.187.w1", "model.layers.16.block_sparse_moe.experts.188.w1", "model.layers.16.block_sparse_moe.experts.189.w1", "model.layers.16.block_sparse_moe.experts.190.w1", "model.layers.16.block_sparse_moe.experts.191.w1", "model.layers.16.block_sparse_moe.experts.192.w1", "model.layers.16.block_sparse_moe.experts.193.w1", "model.layers.16.block_sparse_moe.experts.194.w1", "model.layers.16.block_sparse_moe.experts.195.w1", "model.layers.16.block_sparse_moe.experts.196.w1", "model.layers.16.block_sparse_moe.experts.197.w1", "model.layers.16.block_sparse_moe.experts.198.w1", "model.layers.16.block_sparse_moe.experts.199.w1", "model.layers.16.block_sparse_moe.experts.200.w1", "model.layers.16.block_sparse_moe.experts.201.w1", "model.layers.16.block_sparse_moe.experts.202.w1", "model.layers.16.block_sparse_moe.experts.203.w1", "model.layers.16.block_sparse_moe.experts.204.w1", "model.layers.16.block_sparse_moe.experts.205.w1", "model.layers.16.block_sparse_moe.experts.206.w1", "model.layers.16.block_sparse_moe.experts.207.w1", "model.layers.16.block_sparse_moe.experts.208.w1", "model.layers.16.block_sparse_moe.experts.209.w1", "model.layers.16.block_sparse_moe.experts.210.w1", "model.layers.16.block_sparse_moe.experts.211.w1", "model.layers.16.block_sparse_moe.experts.212.w1", "model.layers.16.block_sparse_moe.experts.213.w1", "model.layers.16.block_sparse_moe.experts.214.w1", "model.layers.16.block_sparse_moe.experts.215.w1", "model.layers.16.block_sparse_moe.experts.216.w1", "model.layers.16.block_sparse_moe.experts.217.w1", "model.layers.16.block_sparse_moe.experts.218.w1", "model.layers.16.block_sparse_moe.experts.219.w1", "model.layers.16.block_sparse_moe.experts.220.w1", "model.layers.16.block_sparse_moe.experts.221.w1", "model.layers.16.block_sparse_moe.experts.222.w1", "model.layers.16.block_sparse_moe.experts.223.w1", "model.layers.16.block_sparse_moe.experts.224.w1", "model.layers.16.block_sparse_moe.experts.225.w1", "model.layers.16.block_sparse_moe.experts.226.w1", "model.layers.16.block_sparse_moe.experts.227.w1", "model.layers.16.block_sparse_moe.experts.228.w1", "model.layers.16.block_sparse_moe.experts.229.w1", "model.layers.16.block_sparse_moe.experts.230.w1", "model.layers.16.block_sparse_moe.experts.231.w1", "model.layers.16.block_sparse_moe.experts.232.w1", "model.layers.16.block_sparse_moe.experts.233.w1", "model.layers.16.block_sparse_moe.experts.234.w1", "model.layers.16.block_sparse_moe.experts.235.w1", "model.layers.16.block_sparse_moe.experts.236.w1", "model.layers.16.block_sparse_moe.experts.237.w1", "model.layers.16.block_sparse_moe.experts.238.w1", "model.layers.16.block_sparse_moe.experts.239.w1", "model.layers.16.block_sparse_moe.experts.240.w1", "model.layers.16.block_sparse_moe.experts.241.w1", "model.layers.16.block_sparse_moe.experts.242.w1", "model.layers.16.block_sparse_moe.experts.243.w1", "model.layers.16.block_sparse_moe.experts.244.w1", "model.layers.16.block_sparse_moe.experts.245.w1", "model.layers.16.block_sparse_moe.experts.246.w1", "model.layers.16.block_sparse_moe.experts.247.w1", "model.layers.16.block_sparse_moe.experts.248.w1", "model.layers.16.block_sparse_moe.experts.249.w1", "model.layers.16.block_sparse_moe.experts.250.w1", "model.layers.16.block_sparse_moe.experts.251.w1", "model.layers.16.block_sparse_moe.experts.252.w1", "model.layers.16.block_sparse_moe.experts.253.w1", "model.layers.16.block_sparse_moe.experts.254.w1", "model.layers.16.block_sparse_moe.experts.255.w1", "model.layers.16.block_sparse_moe.experts.0.w3", "model.layers.16.block_sparse_moe.experts.1.w3", "model.layers.16.block_sparse_moe.experts.2.w3", "model.layers.16.block_sparse_moe.experts.3.w3", "model.layers.16.block_sparse_moe.experts.4.w3", "model.layers.16.block_sparse_moe.experts.5.w3", "model.layers.16.block_sparse_moe.experts.6.w3", "model.layers.16.block_sparse_moe.experts.7.w3", "model.layers.16.block_sparse_moe.experts.8.w3", "model.layers.16.block_sparse_moe.experts.9.w3", "model.layers.16.block_sparse_moe.experts.10.w3", "model.layers.16.block_sparse_moe.experts.11.w3", "model.layers.16.block_sparse_moe.experts.12.w3", "model.layers.16.block_sparse_moe.experts.13.w3", "model.layers.16.block_sparse_moe.experts.14.w3", "model.layers.16.block_sparse_moe.experts.15.w3", "model.layers.16.block_sparse_moe.experts.16.w3", "model.layers.16.block_sparse_moe.experts.17.w3", "model.layers.16.block_sparse_moe.experts.18.w3", "model.layers.16.block_sparse_moe.experts.19.w3", "model.layers.16.block_sparse_moe.experts.20.w3", "model.layers.16.block_sparse_moe.experts.21.w3", "model.layers.16.block_sparse_moe.experts.22.w3", "model.layers.16.block_sparse_moe.experts.23.w3", "model.layers.16.block_sparse_moe.experts.24.w3", "model.layers.16.block_sparse_moe.experts.25.w3", "model.layers.16.block_sparse_moe.experts.26.w3", "model.layers.16.block_sparse_moe.experts.27.w3", "model.layers.16.block_sparse_moe.experts.28.w3", "model.layers.16.block_sparse_moe.experts.29.w3", "model.layers.16.block_sparse_moe.experts.30.w3", "model.layers.16.block_sparse_moe.experts.31.w3", "model.layers.16.block_sparse_moe.experts.32.w3", "model.layers.16.block_sparse_moe.experts.33.w3", "model.layers.16.block_sparse_moe.experts.34.w3", "model.layers.16.block_sparse_moe.experts.35.w3", "model.layers.16.block_sparse_moe.experts.36.w3", "model.layers.16.block_sparse_moe.experts.37.w3", "model.layers.16.block_sparse_moe.experts.38.w3", "model.layers.16.block_sparse_moe.experts.39.w3", "model.layers.16.block_sparse_moe.experts.40.w3", "model.layers.16.block_sparse_moe.experts.41.w3", "model.layers.16.block_sparse_moe.experts.42.w3", "model.layers.16.block_sparse_moe.experts.43.w3", "model.layers.16.block_sparse_moe.experts.44.w3", "model.layers.16.block_sparse_moe.experts.45.w3", "model.layers.16.block_sparse_moe.experts.46.w3", "model.layers.16.block_sparse_moe.experts.47.w3", "model.layers.16.block_sparse_moe.experts.48.w3", "model.layers.16.block_sparse_moe.experts.49.w3", "model.layers.16.block_sparse_moe.experts.50.w3", "model.layers.16.block_sparse_moe.experts.51.w3", "model.layers.16.block_sparse_moe.experts.52.w3", "model.layers.16.block_sparse_moe.experts.53.w3", "model.layers.16.block_sparse_moe.experts.54.w3", "model.layers.16.block_sparse_moe.experts.55.w3", "model.layers.16.block_sparse_moe.experts.56.w3", "model.layers.16.block_sparse_moe.experts.57.w3", "model.layers.16.block_sparse_moe.experts.58.w3", "model.layers.16.block_sparse_moe.experts.59.w3", "model.layers.16.block_sparse_moe.experts.60.w3", "model.layers.16.block_sparse_moe.experts.61.w3", "model.layers.16.block_sparse_moe.experts.62.w3", "model.layers.16.block_sparse_moe.experts.63.w3", "model.layers.16.block_sparse_moe.experts.64.w3", "model.layers.16.block_sparse_moe.experts.65.w3", "model.layers.16.block_sparse_moe.experts.66.w3", "model.layers.16.block_sparse_moe.experts.67.w3", "model.layers.16.block_sparse_moe.experts.68.w3", "model.layers.16.block_sparse_moe.experts.69.w3", "model.layers.16.block_sparse_moe.experts.70.w3", "model.layers.16.block_sparse_moe.experts.71.w3", "model.layers.16.block_sparse_moe.experts.72.w3", "model.layers.16.block_sparse_moe.experts.73.w3", "model.layers.16.block_sparse_moe.experts.74.w3", "model.layers.16.block_sparse_moe.experts.75.w3", "model.layers.16.block_sparse_moe.experts.76.w3", "model.layers.16.block_sparse_moe.experts.77.w3", "model.layers.16.block_sparse_moe.experts.78.w3", "model.layers.16.block_sparse_moe.experts.79.w3", "model.layers.16.block_sparse_moe.experts.80.w3", "model.layers.16.block_sparse_moe.experts.81.w3", "model.layers.16.block_sparse_moe.experts.82.w3", "model.layers.16.block_sparse_moe.experts.83.w3", "model.layers.16.block_sparse_moe.experts.84.w3", "model.layers.16.block_sparse_moe.experts.85.w3", "model.layers.16.block_sparse_moe.experts.86.w3", "model.layers.16.block_sparse_moe.experts.87.w3", "model.layers.16.block_sparse_moe.experts.88.w3", "model.layers.16.block_sparse_moe.experts.89.w3", "model.layers.16.block_sparse_moe.experts.90.w3", "model.layers.16.block_sparse_moe.experts.91.w3", "model.layers.16.block_sparse_moe.experts.92.w3", "model.layers.16.block_sparse_moe.experts.93.w3", "model.layers.16.block_sparse_moe.experts.94.w3", "model.layers.16.block_sparse_moe.experts.95.w3", "model.layers.16.block_sparse_moe.experts.96.w3", "model.layers.16.block_sparse_moe.experts.97.w3", "model.layers.16.block_sparse_moe.experts.98.w3", "model.layers.16.block_sparse_moe.experts.99.w3", "model.layers.16.block_sparse_moe.experts.100.w3", "model.layers.16.block_sparse_moe.experts.101.w3", "model.layers.16.block_sparse_moe.experts.102.w3", "model.layers.16.block_sparse_moe.experts.103.w3", "model.layers.16.block_sparse_moe.experts.104.w3", "model.layers.16.block_sparse_moe.experts.105.w3", "model.layers.16.block_sparse_moe.experts.106.w3", "model.layers.16.block_sparse_moe.experts.107.w3", "model.layers.16.block_sparse_moe.experts.108.w3", "model.layers.16.block_sparse_moe.experts.109.w3", "model.layers.16.block_sparse_moe.experts.110.w3", "model.layers.16.block_sparse_moe.experts.111.w3", "model.layers.16.block_sparse_moe.experts.112.w3", "model.layers.16.block_sparse_moe.experts.113.w3", "model.layers.16.block_sparse_moe.experts.114.w3", "model.layers.16.block_sparse_moe.experts.115.w3", "model.layers.16.block_sparse_moe.experts.116.w3", "model.layers.16.block_sparse_moe.experts.117.w3", "model.layers.16.block_sparse_moe.experts.118.w3", "model.layers.16.block_sparse_moe.experts.119.w3", "model.layers.16.block_sparse_moe.experts.120.w3", "model.layers.16.block_sparse_moe.experts.121.w3", "model.layers.16.block_sparse_moe.experts.122.w3", "model.layers.16.block_sparse_moe.experts.123.w3", "model.layers.16.block_sparse_moe.experts.124.w3", "model.layers.16.block_sparse_moe.experts.125.w3", "model.layers.16.block_sparse_moe.experts.126.w3", "model.layers.16.block_sparse_moe.experts.127.w3", "model.layers.16.block_sparse_moe.experts.128.w3", "model.layers.16.block_sparse_moe.experts.129.w3", "model.layers.16.block_sparse_moe.experts.130.w3", "model.layers.16.block_sparse_moe.experts.131.w3", "model.layers.16.block_sparse_moe.experts.132.w3", "model.layers.16.block_sparse_moe.experts.133.w3", "model.layers.16.block_sparse_moe.experts.134.w3", "model.layers.16.block_sparse_moe.experts.135.w3", "model.layers.16.block_sparse_moe.experts.136.w3", "model.layers.16.block_sparse_moe.experts.137.w3", "model.layers.16.block_sparse_moe.experts.138.w3", "model.layers.16.block_sparse_moe.experts.139.w3", "model.layers.16.block_sparse_moe.experts.140.w3", "model.layers.16.block_sparse_moe.experts.141.w3", "model.layers.16.block_sparse_moe.experts.142.w3", "model.layers.16.block_sparse_moe.experts.143.w3", "model.layers.16.block_sparse_moe.experts.144.w3", "model.layers.16.block_sparse_moe.experts.145.w3", "model.layers.16.block_sparse_moe.experts.146.w3", "model.layers.16.block_sparse_moe.experts.147.w3", "model.layers.16.block_sparse_moe.experts.148.w3", "model.layers.16.block_sparse_moe.experts.149.w3", "model.layers.16.block_sparse_moe.experts.150.w3", "model.layers.16.block_sparse_moe.experts.151.w3", "model.layers.16.block_sparse_moe.experts.152.w3", "model.layers.16.block_sparse_moe.experts.153.w3", "model.layers.16.block_sparse_moe.experts.154.w3", "model.layers.16.block_sparse_moe.experts.155.w3", "model.layers.16.block_sparse_moe.experts.156.w3", "model.layers.16.block_sparse_moe.experts.157.w3", "model.layers.16.block_sparse_moe.experts.158.w3", "model.layers.16.block_sparse_moe.experts.159.w3", "model.layers.16.block_sparse_moe.experts.160.w3", "model.layers.16.block_sparse_moe.experts.161.w3", "model.layers.16.block_sparse_moe.experts.162.w3", "model.layers.16.block_sparse_moe.experts.163.w3", "model.layers.16.block_sparse_moe.experts.164.w3", "model.layers.16.block_sparse_moe.experts.165.w3", "model.layers.16.block_sparse_moe.experts.166.w3", "model.layers.16.block_sparse_moe.experts.167.w3", "model.layers.16.block_sparse_moe.experts.168.w3", "model.layers.16.block_sparse_moe.experts.169.w3", "model.layers.16.block_sparse_moe.experts.170.w3", "model.layers.16.block_sparse_moe.experts.171.w3", "model.layers.16.block_sparse_moe.experts.172.w3", "model.layers.16.block_sparse_moe.experts.173.w3", "model.layers.16.block_sparse_moe.experts.174.w3", "model.layers.16.block_sparse_moe.experts.175.w3", "model.layers.16.block_sparse_moe.experts.176.w3", "model.layers.16.block_sparse_moe.experts.177.w3", "model.layers.16.block_sparse_moe.experts.178.w3", "model.layers.16.block_sparse_moe.experts.179.w3", "model.layers.16.block_sparse_moe.experts.180.w3", "model.layers.16.block_sparse_moe.experts.181.w3", "model.layers.16.block_sparse_moe.experts.182.w3", "model.layers.16.block_sparse_moe.experts.183.w3", "model.layers.16.block_sparse_moe.experts.184.w3", "model.layers.16.block_sparse_moe.experts.185.w3", "model.layers.16.block_sparse_moe.experts.186.w3", "model.layers.16.block_sparse_moe.experts.187.w3", "model.layers.16.block_sparse_moe.experts.188.w3", "model.layers.16.block_sparse_moe.experts.189.w3", "model.layers.16.block_sparse_moe.experts.190.w3", "model.layers.16.block_sparse_moe.experts.191.w3", "model.layers.16.block_sparse_moe.experts.192.w3", "model.layers.16.block_sparse_moe.experts.193.w3", "model.layers.16.block_sparse_moe.experts.194.w3", "model.layers.16.block_sparse_moe.experts.195.w3", "model.layers.16.block_sparse_moe.experts.196.w3", "model.layers.16.block_sparse_moe.experts.197.w3", "model.layers.16.block_sparse_moe.experts.198.w3", "model.layers.16.block_sparse_moe.experts.199.w3", "model.layers.16.block_sparse_moe.experts.200.w3", "model.layers.16.block_sparse_moe.experts.201.w3", "model.layers.16.block_sparse_moe.experts.202.w3", "model.layers.16.block_sparse_moe.experts.203.w3", "model.layers.16.block_sparse_moe.experts.204.w3", "model.layers.16.block_sparse_moe.experts.205.w3", "model.layers.16.block_sparse_moe.experts.206.w3", "model.layers.16.block_sparse_moe.experts.207.w3", "model.layers.16.block_sparse_moe.experts.208.w3", "model.layers.16.block_sparse_moe.experts.209.w3", "model.layers.16.block_sparse_moe.experts.210.w3", "model.layers.16.block_sparse_moe.experts.211.w3", "model.layers.16.block_sparse_moe.experts.212.w3", "model.layers.16.block_sparse_moe.experts.213.w3", "model.layers.16.block_sparse_moe.experts.214.w3", "model.layers.16.block_sparse_moe.experts.215.w3", "model.layers.16.block_sparse_moe.experts.216.w3", "model.layers.16.block_sparse_moe.experts.217.w3", "model.layers.16.block_sparse_moe.experts.218.w3", "model.layers.16.block_sparse_moe.experts.219.w3", "model.layers.16.block_sparse_moe.experts.220.w3", "model.layers.16.block_sparse_moe.experts.221.w3", "model.layers.16.block_sparse_moe.experts.222.w3", "model.layers.16.block_sparse_moe.experts.223.w3", "model.layers.16.block_sparse_moe.experts.224.w3", "model.layers.16.block_sparse_moe.experts.225.w3", "model.layers.16.block_sparse_moe.experts.226.w3", "model.layers.16.block_sparse_moe.experts.227.w3", "model.layers.16.block_sparse_moe.experts.228.w3", "model.layers.16.block_sparse_moe.experts.229.w3", "model.layers.16.block_sparse_moe.experts.230.w3", "model.layers.16.block_sparse_moe.experts.231.w3", "model.layers.16.block_sparse_moe.experts.232.w3", "model.layers.16.block_sparse_moe.experts.233.w3", "model.layers.16.block_sparse_moe.experts.234.w3", "model.layers.16.block_sparse_moe.experts.235.w3", "model.layers.16.block_sparse_moe.experts.236.w3", "model.layers.16.block_sparse_moe.experts.237.w3", "model.layers.16.block_sparse_moe.experts.238.w3", "model.layers.16.block_sparse_moe.experts.239.w3", "model.layers.16.block_sparse_moe.experts.240.w3", "model.layers.16.block_sparse_moe.experts.241.w3", "model.layers.16.block_sparse_moe.experts.242.w3", "model.layers.16.block_sparse_moe.experts.243.w3", "model.layers.16.block_sparse_moe.experts.244.w3", "model.layers.16.block_sparse_moe.experts.245.w3", "model.layers.16.block_sparse_moe.experts.246.w3", "model.layers.16.block_sparse_moe.experts.247.w3", "model.layers.16.block_sparse_moe.experts.248.w3", "model.layers.16.block_sparse_moe.experts.249.w3", "model.layers.16.block_sparse_moe.experts.250.w3", "model.layers.16.block_sparse_moe.experts.251.w3", "model.layers.16.block_sparse_moe.experts.252.w3", "model.layers.16.block_sparse_moe.experts.253.w3", "model.layers.16.block_sparse_moe.experts.254.w3", "model.layers.16.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00010107439011335095, "dbits": 2415919104 } ] }, { "idx": 84, "layers": [ "model.layers.16.block_sparse_moe.experts.0.w2", "model.layers.16.block_sparse_moe.experts.1.w2", "model.layers.16.block_sparse_moe.experts.2.w2", "model.layers.16.block_sparse_moe.experts.3.w2", "model.layers.16.block_sparse_moe.experts.4.w2", "model.layers.16.block_sparse_moe.experts.5.w2", "model.layers.16.block_sparse_moe.experts.6.w2", "model.layers.16.block_sparse_moe.experts.7.w2", "model.layers.16.block_sparse_moe.experts.8.w2", "model.layers.16.block_sparse_moe.experts.9.w2", "model.layers.16.block_sparse_moe.experts.10.w2", "model.layers.16.block_sparse_moe.experts.11.w2", "model.layers.16.block_sparse_moe.experts.12.w2", "model.layers.16.block_sparse_moe.experts.13.w2", "model.layers.16.block_sparse_moe.experts.14.w2", "model.layers.16.block_sparse_moe.experts.15.w2", "model.layers.16.block_sparse_moe.experts.16.w2", "model.layers.16.block_sparse_moe.experts.17.w2", "model.layers.16.block_sparse_moe.experts.18.w2", "model.layers.16.block_sparse_moe.experts.19.w2", "model.layers.16.block_sparse_moe.experts.20.w2", "model.layers.16.block_sparse_moe.experts.21.w2", "model.layers.16.block_sparse_moe.experts.22.w2", "model.layers.16.block_sparse_moe.experts.23.w2", "model.layers.16.block_sparse_moe.experts.24.w2", "model.layers.16.block_sparse_moe.experts.25.w2", "model.layers.16.block_sparse_moe.experts.26.w2", "model.layers.16.block_sparse_moe.experts.27.w2", "model.layers.16.block_sparse_moe.experts.28.w2", "model.layers.16.block_sparse_moe.experts.29.w2", "model.layers.16.block_sparse_moe.experts.30.w2", "model.layers.16.block_sparse_moe.experts.31.w2", "model.layers.16.block_sparse_moe.experts.32.w2", "model.layers.16.block_sparse_moe.experts.33.w2", "model.layers.16.block_sparse_moe.experts.34.w2", "model.layers.16.block_sparse_moe.experts.35.w2", "model.layers.16.block_sparse_moe.experts.36.w2", "model.layers.16.block_sparse_moe.experts.37.w2", "model.layers.16.block_sparse_moe.experts.38.w2", "model.layers.16.block_sparse_moe.experts.39.w2", "model.layers.16.block_sparse_moe.experts.40.w2", "model.layers.16.block_sparse_moe.experts.41.w2", "model.layers.16.block_sparse_moe.experts.42.w2", "model.layers.16.block_sparse_moe.experts.43.w2", "model.layers.16.block_sparse_moe.experts.44.w2", "model.layers.16.block_sparse_moe.experts.45.w2", "model.layers.16.block_sparse_moe.experts.46.w2", "model.layers.16.block_sparse_moe.experts.47.w2", "model.layers.16.block_sparse_moe.experts.48.w2", "model.layers.16.block_sparse_moe.experts.49.w2", "model.layers.16.block_sparse_moe.experts.50.w2", "model.layers.16.block_sparse_moe.experts.51.w2", "model.layers.16.block_sparse_moe.experts.52.w2", "model.layers.16.block_sparse_moe.experts.53.w2", "model.layers.16.block_sparse_moe.experts.54.w2", "model.layers.16.block_sparse_moe.experts.55.w2", "model.layers.16.block_sparse_moe.experts.56.w2", "model.layers.16.block_sparse_moe.experts.57.w2", "model.layers.16.block_sparse_moe.experts.58.w2", "model.layers.16.block_sparse_moe.experts.59.w2", "model.layers.16.block_sparse_moe.experts.60.w2", "model.layers.16.block_sparse_moe.experts.61.w2", "model.layers.16.block_sparse_moe.experts.62.w2", "model.layers.16.block_sparse_moe.experts.63.w2", "model.layers.16.block_sparse_moe.experts.64.w2", "model.layers.16.block_sparse_moe.experts.65.w2", "model.layers.16.block_sparse_moe.experts.66.w2", "model.layers.16.block_sparse_moe.experts.67.w2", "model.layers.16.block_sparse_moe.experts.68.w2", "model.layers.16.block_sparse_moe.experts.69.w2", "model.layers.16.block_sparse_moe.experts.70.w2", "model.layers.16.block_sparse_moe.experts.71.w2", "model.layers.16.block_sparse_moe.experts.72.w2", "model.layers.16.block_sparse_moe.experts.73.w2", "model.layers.16.block_sparse_moe.experts.74.w2", "model.layers.16.block_sparse_moe.experts.75.w2", "model.layers.16.block_sparse_moe.experts.76.w2", "model.layers.16.block_sparse_moe.experts.77.w2", "model.layers.16.block_sparse_moe.experts.78.w2", "model.layers.16.block_sparse_moe.experts.79.w2", "model.layers.16.block_sparse_moe.experts.80.w2", "model.layers.16.block_sparse_moe.experts.81.w2", "model.layers.16.block_sparse_moe.experts.82.w2", "model.layers.16.block_sparse_moe.experts.83.w2", "model.layers.16.block_sparse_moe.experts.84.w2", "model.layers.16.block_sparse_moe.experts.85.w2", "model.layers.16.block_sparse_moe.experts.86.w2", "model.layers.16.block_sparse_moe.experts.87.w2", "model.layers.16.block_sparse_moe.experts.88.w2", "model.layers.16.block_sparse_moe.experts.89.w2", "model.layers.16.block_sparse_moe.experts.90.w2", "model.layers.16.block_sparse_moe.experts.91.w2", "model.layers.16.block_sparse_moe.experts.92.w2", "model.layers.16.block_sparse_moe.experts.93.w2", "model.layers.16.block_sparse_moe.experts.94.w2", "model.layers.16.block_sparse_moe.experts.95.w2", "model.layers.16.block_sparse_moe.experts.96.w2", "model.layers.16.block_sparse_moe.experts.97.w2", "model.layers.16.block_sparse_moe.experts.98.w2", "model.layers.16.block_sparse_moe.experts.99.w2", "model.layers.16.block_sparse_moe.experts.100.w2", "model.layers.16.block_sparse_moe.experts.101.w2", "model.layers.16.block_sparse_moe.experts.102.w2", "model.layers.16.block_sparse_moe.experts.103.w2", "model.layers.16.block_sparse_moe.experts.104.w2", "model.layers.16.block_sparse_moe.experts.105.w2", "model.layers.16.block_sparse_moe.experts.106.w2", "model.layers.16.block_sparse_moe.experts.107.w2", "model.layers.16.block_sparse_moe.experts.108.w2", "model.layers.16.block_sparse_moe.experts.109.w2", "model.layers.16.block_sparse_moe.experts.110.w2", "model.layers.16.block_sparse_moe.experts.111.w2", "model.layers.16.block_sparse_moe.experts.112.w2", "model.layers.16.block_sparse_moe.experts.113.w2", "model.layers.16.block_sparse_moe.experts.114.w2", "model.layers.16.block_sparse_moe.experts.115.w2", "model.layers.16.block_sparse_moe.experts.116.w2", "model.layers.16.block_sparse_moe.experts.117.w2", "model.layers.16.block_sparse_moe.experts.118.w2", "model.layers.16.block_sparse_moe.experts.119.w2", "model.layers.16.block_sparse_moe.experts.120.w2", "model.layers.16.block_sparse_moe.experts.121.w2", "model.layers.16.block_sparse_moe.experts.122.w2", "model.layers.16.block_sparse_moe.experts.123.w2", "model.layers.16.block_sparse_moe.experts.124.w2", "model.layers.16.block_sparse_moe.experts.125.w2", "model.layers.16.block_sparse_moe.experts.126.w2", "model.layers.16.block_sparse_moe.experts.127.w2", "model.layers.16.block_sparse_moe.experts.128.w2", "model.layers.16.block_sparse_moe.experts.129.w2", "model.layers.16.block_sparse_moe.experts.130.w2", "model.layers.16.block_sparse_moe.experts.131.w2", "model.layers.16.block_sparse_moe.experts.132.w2", "model.layers.16.block_sparse_moe.experts.133.w2", "model.layers.16.block_sparse_moe.experts.134.w2", "model.layers.16.block_sparse_moe.experts.135.w2", "model.layers.16.block_sparse_moe.experts.136.w2", "model.layers.16.block_sparse_moe.experts.137.w2", "model.layers.16.block_sparse_moe.experts.138.w2", "model.layers.16.block_sparse_moe.experts.139.w2", "model.layers.16.block_sparse_moe.experts.140.w2", "model.layers.16.block_sparse_moe.experts.141.w2", "model.layers.16.block_sparse_moe.experts.142.w2", "model.layers.16.block_sparse_moe.experts.143.w2", "model.layers.16.block_sparse_moe.experts.144.w2", "model.layers.16.block_sparse_moe.experts.145.w2", "model.layers.16.block_sparse_moe.experts.146.w2", "model.layers.16.block_sparse_moe.experts.147.w2", "model.layers.16.block_sparse_moe.experts.148.w2", "model.layers.16.block_sparse_moe.experts.149.w2", "model.layers.16.block_sparse_moe.experts.150.w2", "model.layers.16.block_sparse_moe.experts.151.w2", "model.layers.16.block_sparse_moe.experts.152.w2", "model.layers.16.block_sparse_moe.experts.153.w2", "model.layers.16.block_sparse_moe.experts.154.w2", "model.layers.16.block_sparse_moe.experts.155.w2", "model.layers.16.block_sparse_moe.experts.156.w2", "model.layers.16.block_sparse_moe.experts.157.w2", "model.layers.16.block_sparse_moe.experts.158.w2", "model.layers.16.block_sparse_moe.experts.159.w2", "model.layers.16.block_sparse_moe.experts.160.w2", "model.layers.16.block_sparse_moe.experts.161.w2", "model.layers.16.block_sparse_moe.experts.162.w2", "model.layers.16.block_sparse_moe.experts.163.w2", "model.layers.16.block_sparse_moe.experts.164.w2", "model.layers.16.block_sparse_moe.experts.165.w2", "model.layers.16.block_sparse_moe.experts.166.w2", "model.layers.16.block_sparse_moe.experts.167.w2", "model.layers.16.block_sparse_moe.experts.168.w2", "model.layers.16.block_sparse_moe.experts.169.w2", "model.layers.16.block_sparse_moe.experts.170.w2", "model.layers.16.block_sparse_moe.experts.171.w2", "model.layers.16.block_sparse_moe.experts.172.w2", "model.layers.16.block_sparse_moe.experts.173.w2", "model.layers.16.block_sparse_moe.experts.174.w2", "model.layers.16.block_sparse_moe.experts.175.w2", "model.layers.16.block_sparse_moe.experts.176.w2", "model.layers.16.block_sparse_moe.experts.177.w2", "model.layers.16.block_sparse_moe.experts.178.w2", "model.layers.16.block_sparse_moe.experts.179.w2", "model.layers.16.block_sparse_moe.experts.180.w2", "model.layers.16.block_sparse_moe.experts.181.w2", "model.layers.16.block_sparse_moe.experts.182.w2", "model.layers.16.block_sparse_moe.experts.183.w2", "model.layers.16.block_sparse_moe.experts.184.w2", "model.layers.16.block_sparse_moe.experts.185.w2", "model.layers.16.block_sparse_moe.experts.186.w2", "model.layers.16.block_sparse_moe.experts.187.w2", "model.layers.16.block_sparse_moe.experts.188.w2", "model.layers.16.block_sparse_moe.experts.189.w2", "model.layers.16.block_sparse_moe.experts.190.w2", "model.layers.16.block_sparse_moe.experts.191.w2", "model.layers.16.block_sparse_moe.experts.192.w2", "model.layers.16.block_sparse_moe.experts.193.w2", "model.layers.16.block_sparse_moe.experts.194.w2", "model.layers.16.block_sparse_moe.experts.195.w2", "model.layers.16.block_sparse_moe.experts.196.w2", "model.layers.16.block_sparse_moe.experts.197.w2", "model.layers.16.block_sparse_moe.experts.198.w2", "model.layers.16.block_sparse_moe.experts.199.w2", "model.layers.16.block_sparse_moe.experts.200.w2", "model.layers.16.block_sparse_moe.experts.201.w2", "model.layers.16.block_sparse_moe.experts.202.w2", "model.layers.16.block_sparse_moe.experts.203.w2", "model.layers.16.block_sparse_moe.experts.204.w2", "model.layers.16.block_sparse_moe.experts.205.w2", "model.layers.16.block_sparse_moe.experts.206.w2", "model.layers.16.block_sparse_moe.experts.207.w2", "model.layers.16.block_sparse_moe.experts.208.w2", "model.layers.16.block_sparse_moe.experts.209.w2", "model.layers.16.block_sparse_moe.experts.210.w2", "model.layers.16.block_sparse_moe.experts.211.w2", "model.layers.16.block_sparse_moe.experts.212.w2", "model.layers.16.block_sparse_moe.experts.213.w2", "model.layers.16.block_sparse_moe.experts.214.w2", "model.layers.16.block_sparse_moe.experts.215.w2", "model.layers.16.block_sparse_moe.experts.216.w2", "model.layers.16.block_sparse_moe.experts.217.w2", "model.layers.16.block_sparse_moe.experts.218.w2", "model.layers.16.block_sparse_moe.experts.219.w2", "model.layers.16.block_sparse_moe.experts.220.w2", "model.layers.16.block_sparse_moe.experts.221.w2", "model.layers.16.block_sparse_moe.experts.222.w2", "model.layers.16.block_sparse_moe.experts.223.w2", "model.layers.16.block_sparse_moe.experts.224.w2", "model.layers.16.block_sparse_moe.experts.225.w2", "model.layers.16.block_sparse_moe.experts.226.w2", "model.layers.16.block_sparse_moe.experts.227.w2", "model.layers.16.block_sparse_moe.experts.228.w2", "model.layers.16.block_sparse_moe.experts.229.w2", "model.layers.16.block_sparse_moe.experts.230.w2", "model.layers.16.block_sparse_moe.experts.231.w2", "model.layers.16.block_sparse_moe.experts.232.w2", "model.layers.16.block_sparse_moe.experts.233.w2", "model.layers.16.block_sparse_moe.experts.234.w2", "model.layers.16.block_sparse_moe.experts.235.w2", "model.layers.16.block_sparse_moe.experts.236.w2", "model.layers.16.block_sparse_moe.experts.237.w2", "model.layers.16.block_sparse_moe.experts.238.w2", "model.layers.16.block_sparse_moe.experts.239.w2", "model.layers.16.block_sparse_moe.experts.240.w2", "model.layers.16.block_sparse_moe.experts.241.w2", "model.layers.16.block_sparse_moe.experts.242.w2", "model.layers.16.block_sparse_moe.experts.243.w2", "model.layers.16.block_sparse_moe.experts.244.w2", "model.layers.16.block_sparse_moe.experts.245.w2", "model.layers.16.block_sparse_moe.experts.246.w2", "model.layers.16.block_sparse_moe.experts.247.w2", "model.layers.16.block_sparse_moe.experts.248.w2", "model.layers.16.block_sparse_moe.experts.249.w2", "model.layers.16.block_sparse_moe.experts.250.w2", "model.layers.16.block_sparse_moe.experts.251.w2", "model.layers.16.block_sparse_moe.experts.252.w2", "model.layers.16.block_sparse_moe.experts.253.w2", "model.layers.16.block_sparse_moe.experts.254.w2", "model.layers.16.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0008892768993973704, "dbits": 1207959552 } ] }, { "idx": 85, "layers": [ "model.layers.17.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0005369452759623472, "dbits": 18874368 } ] }, { "idx": 86, "layers": [ "model.layers.17.self_attn.k_proj", "model.layers.17.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0018879417330026627, "dbits": 6291456 } ] }, { "idx": 87, "layers": [ "model.layers.17.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0012793347239494351, "dbits": 18874368 } ] }, { "idx": 88, "layers": [ "model.layers.17.block_sparse_moe.experts.0.w1", "model.layers.17.block_sparse_moe.experts.1.w1", "model.layers.17.block_sparse_moe.experts.2.w1", "model.layers.17.block_sparse_moe.experts.3.w1", "model.layers.17.block_sparse_moe.experts.4.w1", "model.layers.17.block_sparse_moe.experts.5.w1", "model.layers.17.block_sparse_moe.experts.6.w1", "model.layers.17.block_sparse_moe.experts.7.w1", "model.layers.17.block_sparse_moe.experts.8.w1", "model.layers.17.block_sparse_moe.experts.9.w1", "model.layers.17.block_sparse_moe.experts.10.w1", "model.layers.17.block_sparse_moe.experts.11.w1", "model.layers.17.block_sparse_moe.experts.12.w1", "model.layers.17.block_sparse_moe.experts.13.w1", "model.layers.17.block_sparse_moe.experts.14.w1", "model.layers.17.block_sparse_moe.experts.15.w1", "model.layers.17.block_sparse_moe.experts.16.w1", "model.layers.17.block_sparse_moe.experts.17.w1", "model.layers.17.block_sparse_moe.experts.18.w1", "model.layers.17.block_sparse_moe.experts.19.w1", "model.layers.17.block_sparse_moe.experts.20.w1", "model.layers.17.block_sparse_moe.experts.21.w1", "model.layers.17.block_sparse_moe.experts.22.w1", "model.layers.17.block_sparse_moe.experts.23.w1", "model.layers.17.block_sparse_moe.experts.24.w1", "model.layers.17.block_sparse_moe.experts.25.w1", "model.layers.17.block_sparse_moe.experts.26.w1", "model.layers.17.block_sparse_moe.experts.27.w1", "model.layers.17.block_sparse_moe.experts.28.w1", "model.layers.17.block_sparse_moe.experts.29.w1", "model.layers.17.block_sparse_moe.experts.30.w1", "model.layers.17.block_sparse_moe.experts.31.w1", "model.layers.17.block_sparse_moe.experts.32.w1", "model.layers.17.block_sparse_moe.experts.33.w1", "model.layers.17.block_sparse_moe.experts.34.w1", "model.layers.17.block_sparse_moe.experts.35.w1", "model.layers.17.block_sparse_moe.experts.36.w1", "model.layers.17.block_sparse_moe.experts.37.w1", "model.layers.17.block_sparse_moe.experts.38.w1", "model.layers.17.block_sparse_moe.experts.39.w1", "model.layers.17.block_sparse_moe.experts.40.w1", "model.layers.17.block_sparse_moe.experts.41.w1", "model.layers.17.block_sparse_moe.experts.42.w1", "model.layers.17.block_sparse_moe.experts.43.w1", "model.layers.17.block_sparse_moe.experts.44.w1", "model.layers.17.block_sparse_moe.experts.45.w1", "model.layers.17.block_sparse_moe.experts.46.w1", "model.layers.17.block_sparse_moe.experts.47.w1", "model.layers.17.block_sparse_moe.experts.48.w1", "model.layers.17.block_sparse_moe.experts.49.w1", "model.layers.17.block_sparse_moe.experts.50.w1", "model.layers.17.block_sparse_moe.experts.51.w1", "model.layers.17.block_sparse_moe.experts.52.w1", "model.layers.17.block_sparse_moe.experts.53.w1", "model.layers.17.block_sparse_moe.experts.54.w1", "model.layers.17.block_sparse_moe.experts.55.w1", "model.layers.17.block_sparse_moe.experts.56.w1", "model.layers.17.block_sparse_moe.experts.57.w1", "model.layers.17.block_sparse_moe.experts.58.w1", "model.layers.17.block_sparse_moe.experts.59.w1", "model.layers.17.block_sparse_moe.experts.60.w1", "model.layers.17.block_sparse_moe.experts.61.w1", "model.layers.17.block_sparse_moe.experts.62.w1", "model.layers.17.block_sparse_moe.experts.63.w1", "model.layers.17.block_sparse_moe.experts.64.w1", "model.layers.17.block_sparse_moe.experts.65.w1", "model.layers.17.block_sparse_moe.experts.66.w1", "model.layers.17.block_sparse_moe.experts.67.w1", "model.layers.17.block_sparse_moe.experts.68.w1", "model.layers.17.block_sparse_moe.experts.69.w1", "model.layers.17.block_sparse_moe.experts.70.w1", "model.layers.17.block_sparse_moe.experts.71.w1", "model.layers.17.block_sparse_moe.experts.72.w1", "model.layers.17.block_sparse_moe.experts.73.w1", "model.layers.17.block_sparse_moe.experts.74.w1", "model.layers.17.block_sparse_moe.experts.75.w1", "model.layers.17.block_sparse_moe.experts.76.w1", "model.layers.17.block_sparse_moe.experts.77.w1", "model.layers.17.block_sparse_moe.experts.78.w1", "model.layers.17.block_sparse_moe.experts.79.w1", "model.layers.17.block_sparse_moe.experts.80.w1", "model.layers.17.block_sparse_moe.experts.81.w1", "model.layers.17.block_sparse_moe.experts.82.w1", "model.layers.17.block_sparse_moe.experts.83.w1", "model.layers.17.block_sparse_moe.experts.84.w1", "model.layers.17.block_sparse_moe.experts.85.w1", "model.layers.17.block_sparse_moe.experts.86.w1", "model.layers.17.block_sparse_moe.experts.87.w1", "model.layers.17.block_sparse_moe.experts.88.w1", "model.layers.17.block_sparse_moe.experts.89.w1", "model.layers.17.block_sparse_moe.experts.90.w1", "model.layers.17.block_sparse_moe.experts.91.w1", "model.layers.17.block_sparse_moe.experts.92.w1", "model.layers.17.block_sparse_moe.experts.93.w1", "model.layers.17.block_sparse_moe.experts.94.w1", "model.layers.17.block_sparse_moe.experts.95.w1", "model.layers.17.block_sparse_moe.experts.96.w1", "model.layers.17.block_sparse_moe.experts.97.w1", "model.layers.17.block_sparse_moe.experts.98.w1", "model.layers.17.block_sparse_moe.experts.99.w1", "model.layers.17.block_sparse_moe.experts.100.w1", "model.layers.17.block_sparse_moe.experts.101.w1", "model.layers.17.block_sparse_moe.experts.102.w1", "model.layers.17.block_sparse_moe.experts.103.w1", "model.layers.17.block_sparse_moe.experts.104.w1", "model.layers.17.block_sparse_moe.experts.105.w1", "model.layers.17.block_sparse_moe.experts.106.w1", "model.layers.17.block_sparse_moe.experts.107.w1", "model.layers.17.block_sparse_moe.experts.108.w1", "model.layers.17.block_sparse_moe.experts.109.w1", "model.layers.17.block_sparse_moe.experts.110.w1", "model.layers.17.block_sparse_moe.experts.111.w1", "model.layers.17.block_sparse_moe.experts.112.w1", "model.layers.17.block_sparse_moe.experts.113.w1", "model.layers.17.block_sparse_moe.experts.114.w1", "model.layers.17.block_sparse_moe.experts.115.w1", "model.layers.17.block_sparse_moe.experts.116.w1", "model.layers.17.block_sparse_moe.experts.117.w1", "model.layers.17.block_sparse_moe.experts.118.w1", "model.layers.17.block_sparse_moe.experts.119.w1", "model.layers.17.block_sparse_moe.experts.120.w1", "model.layers.17.block_sparse_moe.experts.121.w1", "model.layers.17.block_sparse_moe.experts.122.w1", "model.layers.17.block_sparse_moe.experts.123.w1", "model.layers.17.block_sparse_moe.experts.124.w1", "model.layers.17.block_sparse_moe.experts.125.w1", "model.layers.17.block_sparse_moe.experts.126.w1", "model.layers.17.block_sparse_moe.experts.127.w1", "model.layers.17.block_sparse_moe.experts.128.w1", "model.layers.17.block_sparse_moe.experts.129.w1", "model.layers.17.block_sparse_moe.experts.130.w1", "model.layers.17.block_sparse_moe.experts.131.w1", "model.layers.17.block_sparse_moe.experts.132.w1", "model.layers.17.block_sparse_moe.experts.133.w1", "model.layers.17.block_sparse_moe.experts.134.w1", "model.layers.17.block_sparse_moe.experts.135.w1", "model.layers.17.block_sparse_moe.experts.136.w1", "model.layers.17.block_sparse_moe.experts.137.w1", "model.layers.17.block_sparse_moe.experts.138.w1", "model.layers.17.block_sparse_moe.experts.139.w1", "model.layers.17.block_sparse_moe.experts.140.w1", "model.layers.17.block_sparse_moe.experts.141.w1", "model.layers.17.block_sparse_moe.experts.142.w1", "model.layers.17.block_sparse_moe.experts.143.w1", "model.layers.17.block_sparse_moe.experts.144.w1", "model.layers.17.block_sparse_moe.experts.145.w1", "model.layers.17.block_sparse_moe.experts.146.w1", "model.layers.17.block_sparse_moe.experts.147.w1", "model.layers.17.block_sparse_moe.experts.148.w1", "model.layers.17.block_sparse_moe.experts.149.w1", "model.layers.17.block_sparse_moe.experts.150.w1", "model.layers.17.block_sparse_moe.experts.151.w1", "model.layers.17.block_sparse_moe.experts.152.w1", "model.layers.17.block_sparse_moe.experts.153.w1", "model.layers.17.block_sparse_moe.experts.154.w1", "model.layers.17.block_sparse_moe.experts.155.w1", "model.layers.17.block_sparse_moe.experts.156.w1", "model.layers.17.block_sparse_moe.experts.157.w1", "model.layers.17.block_sparse_moe.experts.158.w1", "model.layers.17.block_sparse_moe.experts.159.w1", "model.layers.17.block_sparse_moe.experts.160.w1", "model.layers.17.block_sparse_moe.experts.161.w1", "model.layers.17.block_sparse_moe.experts.162.w1", "model.layers.17.block_sparse_moe.experts.163.w1", "model.layers.17.block_sparse_moe.experts.164.w1", "model.layers.17.block_sparse_moe.experts.165.w1", "model.layers.17.block_sparse_moe.experts.166.w1", "model.layers.17.block_sparse_moe.experts.167.w1", "model.layers.17.block_sparse_moe.experts.168.w1", "model.layers.17.block_sparse_moe.experts.169.w1", "model.layers.17.block_sparse_moe.experts.170.w1", "model.layers.17.block_sparse_moe.experts.171.w1", "model.layers.17.block_sparse_moe.experts.172.w1", "model.layers.17.block_sparse_moe.experts.173.w1", "model.layers.17.block_sparse_moe.experts.174.w1", "model.layers.17.block_sparse_moe.experts.175.w1", "model.layers.17.block_sparse_moe.experts.176.w1", "model.layers.17.block_sparse_moe.experts.177.w1", "model.layers.17.block_sparse_moe.experts.178.w1", "model.layers.17.block_sparse_moe.experts.179.w1", "model.layers.17.block_sparse_moe.experts.180.w1", "model.layers.17.block_sparse_moe.experts.181.w1", "model.layers.17.block_sparse_moe.experts.182.w1", "model.layers.17.block_sparse_moe.experts.183.w1", "model.layers.17.block_sparse_moe.experts.184.w1", "model.layers.17.block_sparse_moe.experts.185.w1", "model.layers.17.block_sparse_moe.experts.186.w1", "model.layers.17.block_sparse_moe.experts.187.w1", "model.layers.17.block_sparse_moe.experts.188.w1", "model.layers.17.block_sparse_moe.experts.189.w1", "model.layers.17.block_sparse_moe.experts.190.w1", "model.layers.17.block_sparse_moe.experts.191.w1", "model.layers.17.block_sparse_moe.experts.192.w1", "model.layers.17.block_sparse_moe.experts.193.w1", "model.layers.17.block_sparse_moe.experts.194.w1", "model.layers.17.block_sparse_moe.experts.195.w1", "model.layers.17.block_sparse_moe.experts.196.w1", "model.layers.17.block_sparse_moe.experts.197.w1", "model.layers.17.block_sparse_moe.experts.198.w1", "model.layers.17.block_sparse_moe.experts.199.w1", "model.layers.17.block_sparse_moe.experts.200.w1", "model.layers.17.block_sparse_moe.experts.201.w1", "model.layers.17.block_sparse_moe.experts.202.w1", "model.layers.17.block_sparse_moe.experts.203.w1", "model.layers.17.block_sparse_moe.experts.204.w1", "model.layers.17.block_sparse_moe.experts.205.w1", "model.layers.17.block_sparse_moe.experts.206.w1", "model.layers.17.block_sparse_moe.experts.207.w1", "model.layers.17.block_sparse_moe.experts.208.w1", "model.layers.17.block_sparse_moe.experts.209.w1", "model.layers.17.block_sparse_moe.experts.210.w1", "model.layers.17.block_sparse_moe.experts.211.w1", "model.layers.17.block_sparse_moe.experts.212.w1", "model.layers.17.block_sparse_moe.experts.213.w1", "model.layers.17.block_sparse_moe.experts.214.w1", "model.layers.17.block_sparse_moe.experts.215.w1", "model.layers.17.block_sparse_moe.experts.216.w1", "model.layers.17.block_sparse_moe.experts.217.w1", "model.layers.17.block_sparse_moe.experts.218.w1", "model.layers.17.block_sparse_moe.experts.219.w1", "model.layers.17.block_sparse_moe.experts.220.w1", "model.layers.17.block_sparse_moe.experts.221.w1", "model.layers.17.block_sparse_moe.experts.222.w1", "model.layers.17.block_sparse_moe.experts.223.w1", "model.layers.17.block_sparse_moe.experts.224.w1", "model.layers.17.block_sparse_moe.experts.225.w1", "model.layers.17.block_sparse_moe.experts.226.w1", "model.layers.17.block_sparse_moe.experts.227.w1", "model.layers.17.block_sparse_moe.experts.228.w1", "model.layers.17.block_sparse_moe.experts.229.w1", "model.layers.17.block_sparse_moe.experts.230.w1", "model.layers.17.block_sparse_moe.experts.231.w1", "model.layers.17.block_sparse_moe.experts.232.w1", "model.layers.17.block_sparse_moe.experts.233.w1", "model.layers.17.block_sparse_moe.experts.234.w1", "model.layers.17.block_sparse_moe.experts.235.w1", "model.layers.17.block_sparse_moe.experts.236.w1", "model.layers.17.block_sparse_moe.experts.237.w1", "model.layers.17.block_sparse_moe.experts.238.w1", "model.layers.17.block_sparse_moe.experts.239.w1", "model.layers.17.block_sparse_moe.experts.240.w1", "model.layers.17.block_sparse_moe.experts.241.w1", "model.layers.17.block_sparse_moe.experts.242.w1", "model.layers.17.block_sparse_moe.experts.243.w1", "model.layers.17.block_sparse_moe.experts.244.w1", "model.layers.17.block_sparse_moe.experts.245.w1", "model.layers.17.block_sparse_moe.experts.246.w1", "model.layers.17.block_sparse_moe.experts.247.w1", "model.layers.17.block_sparse_moe.experts.248.w1", "model.layers.17.block_sparse_moe.experts.249.w1", "model.layers.17.block_sparse_moe.experts.250.w1", "model.layers.17.block_sparse_moe.experts.251.w1", "model.layers.17.block_sparse_moe.experts.252.w1", "model.layers.17.block_sparse_moe.experts.253.w1", "model.layers.17.block_sparse_moe.experts.254.w1", "model.layers.17.block_sparse_moe.experts.255.w1", "model.layers.17.block_sparse_moe.experts.0.w3", "model.layers.17.block_sparse_moe.experts.1.w3", "model.layers.17.block_sparse_moe.experts.2.w3", "model.layers.17.block_sparse_moe.experts.3.w3", "model.layers.17.block_sparse_moe.experts.4.w3", "model.layers.17.block_sparse_moe.experts.5.w3", "model.layers.17.block_sparse_moe.experts.6.w3", "model.layers.17.block_sparse_moe.experts.7.w3", "model.layers.17.block_sparse_moe.experts.8.w3", "model.layers.17.block_sparse_moe.experts.9.w3", "model.layers.17.block_sparse_moe.experts.10.w3", "model.layers.17.block_sparse_moe.experts.11.w3", "model.layers.17.block_sparse_moe.experts.12.w3", "model.layers.17.block_sparse_moe.experts.13.w3", "model.layers.17.block_sparse_moe.experts.14.w3", "model.layers.17.block_sparse_moe.experts.15.w3", "model.layers.17.block_sparse_moe.experts.16.w3", "model.layers.17.block_sparse_moe.experts.17.w3", "model.layers.17.block_sparse_moe.experts.18.w3", "model.layers.17.block_sparse_moe.experts.19.w3", "model.layers.17.block_sparse_moe.experts.20.w3", "model.layers.17.block_sparse_moe.experts.21.w3", "model.layers.17.block_sparse_moe.experts.22.w3", "model.layers.17.block_sparse_moe.experts.23.w3", "model.layers.17.block_sparse_moe.experts.24.w3", "model.layers.17.block_sparse_moe.experts.25.w3", "model.layers.17.block_sparse_moe.experts.26.w3", "model.layers.17.block_sparse_moe.experts.27.w3", "model.layers.17.block_sparse_moe.experts.28.w3", "model.layers.17.block_sparse_moe.experts.29.w3", "model.layers.17.block_sparse_moe.experts.30.w3", "model.layers.17.block_sparse_moe.experts.31.w3", "model.layers.17.block_sparse_moe.experts.32.w3", "model.layers.17.block_sparse_moe.experts.33.w3", "model.layers.17.block_sparse_moe.experts.34.w3", "model.layers.17.block_sparse_moe.experts.35.w3", "model.layers.17.block_sparse_moe.experts.36.w3", "model.layers.17.block_sparse_moe.experts.37.w3", "model.layers.17.block_sparse_moe.experts.38.w3", "model.layers.17.block_sparse_moe.experts.39.w3", "model.layers.17.block_sparse_moe.experts.40.w3", "model.layers.17.block_sparse_moe.experts.41.w3", "model.layers.17.block_sparse_moe.experts.42.w3", "model.layers.17.block_sparse_moe.experts.43.w3", "model.layers.17.block_sparse_moe.experts.44.w3", "model.layers.17.block_sparse_moe.experts.45.w3", "model.layers.17.block_sparse_moe.experts.46.w3", "model.layers.17.block_sparse_moe.experts.47.w3", "model.layers.17.block_sparse_moe.experts.48.w3", "model.layers.17.block_sparse_moe.experts.49.w3", "model.layers.17.block_sparse_moe.experts.50.w3", "model.layers.17.block_sparse_moe.experts.51.w3", "model.layers.17.block_sparse_moe.experts.52.w3", "model.layers.17.block_sparse_moe.experts.53.w3", "model.layers.17.block_sparse_moe.experts.54.w3", "model.layers.17.block_sparse_moe.experts.55.w3", "model.layers.17.block_sparse_moe.experts.56.w3", "model.layers.17.block_sparse_moe.experts.57.w3", "model.layers.17.block_sparse_moe.experts.58.w3", "model.layers.17.block_sparse_moe.experts.59.w3", "model.layers.17.block_sparse_moe.experts.60.w3", "model.layers.17.block_sparse_moe.experts.61.w3", "model.layers.17.block_sparse_moe.experts.62.w3", "model.layers.17.block_sparse_moe.experts.63.w3", "model.layers.17.block_sparse_moe.experts.64.w3", "model.layers.17.block_sparse_moe.experts.65.w3", "model.layers.17.block_sparse_moe.experts.66.w3", "model.layers.17.block_sparse_moe.experts.67.w3", "model.layers.17.block_sparse_moe.experts.68.w3", "model.layers.17.block_sparse_moe.experts.69.w3", "model.layers.17.block_sparse_moe.experts.70.w3", "model.layers.17.block_sparse_moe.experts.71.w3", "model.layers.17.block_sparse_moe.experts.72.w3", "model.layers.17.block_sparse_moe.experts.73.w3", "model.layers.17.block_sparse_moe.experts.74.w3", "model.layers.17.block_sparse_moe.experts.75.w3", "model.layers.17.block_sparse_moe.experts.76.w3", "model.layers.17.block_sparse_moe.experts.77.w3", "model.layers.17.block_sparse_moe.experts.78.w3", "model.layers.17.block_sparse_moe.experts.79.w3", "model.layers.17.block_sparse_moe.experts.80.w3", "model.layers.17.block_sparse_moe.experts.81.w3", "model.layers.17.block_sparse_moe.experts.82.w3", "model.layers.17.block_sparse_moe.experts.83.w3", "model.layers.17.block_sparse_moe.experts.84.w3", "model.layers.17.block_sparse_moe.experts.85.w3", "model.layers.17.block_sparse_moe.experts.86.w3", "model.layers.17.block_sparse_moe.experts.87.w3", "model.layers.17.block_sparse_moe.experts.88.w3", "model.layers.17.block_sparse_moe.experts.89.w3", "model.layers.17.block_sparse_moe.experts.90.w3", "model.layers.17.block_sparse_moe.experts.91.w3", "model.layers.17.block_sparse_moe.experts.92.w3", "model.layers.17.block_sparse_moe.experts.93.w3", "model.layers.17.block_sparse_moe.experts.94.w3", "model.layers.17.block_sparse_moe.experts.95.w3", "model.layers.17.block_sparse_moe.experts.96.w3", "model.layers.17.block_sparse_moe.experts.97.w3", "model.layers.17.block_sparse_moe.experts.98.w3", "model.layers.17.block_sparse_moe.experts.99.w3", "model.layers.17.block_sparse_moe.experts.100.w3", "model.layers.17.block_sparse_moe.experts.101.w3", "model.layers.17.block_sparse_moe.experts.102.w3", "model.layers.17.block_sparse_moe.experts.103.w3", "model.layers.17.block_sparse_moe.experts.104.w3", "model.layers.17.block_sparse_moe.experts.105.w3", "model.layers.17.block_sparse_moe.experts.106.w3", "model.layers.17.block_sparse_moe.experts.107.w3", "model.layers.17.block_sparse_moe.experts.108.w3", "model.layers.17.block_sparse_moe.experts.109.w3", "model.layers.17.block_sparse_moe.experts.110.w3", "model.layers.17.block_sparse_moe.experts.111.w3", "model.layers.17.block_sparse_moe.experts.112.w3", "model.layers.17.block_sparse_moe.experts.113.w3", "model.layers.17.block_sparse_moe.experts.114.w3", "model.layers.17.block_sparse_moe.experts.115.w3", "model.layers.17.block_sparse_moe.experts.116.w3", "model.layers.17.block_sparse_moe.experts.117.w3", "model.layers.17.block_sparse_moe.experts.118.w3", "model.layers.17.block_sparse_moe.experts.119.w3", "model.layers.17.block_sparse_moe.experts.120.w3", "model.layers.17.block_sparse_moe.experts.121.w3", "model.layers.17.block_sparse_moe.experts.122.w3", "model.layers.17.block_sparse_moe.experts.123.w3", "model.layers.17.block_sparse_moe.experts.124.w3", "model.layers.17.block_sparse_moe.experts.125.w3", "model.layers.17.block_sparse_moe.experts.126.w3", "model.layers.17.block_sparse_moe.experts.127.w3", "model.layers.17.block_sparse_moe.experts.128.w3", "model.layers.17.block_sparse_moe.experts.129.w3", "model.layers.17.block_sparse_moe.experts.130.w3", "model.layers.17.block_sparse_moe.experts.131.w3", "model.layers.17.block_sparse_moe.experts.132.w3", "model.layers.17.block_sparse_moe.experts.133.w3", "model.layers.17.block_sparse_moe.experts.134.w3", "model.layers.17.block_sparse_moe.experts.135.w3", "model.layers.17.block_sparse_moe.experts.136.w3", "model.layers.17.block_sparse_moe.experts.137.w3", "model.layers.17.block_sparse_moe.experts.138.w3", "model.layers.17.block_sparse_moe.experts.139.w3", "model.layers.17.block_sparse_moe.experts.140.w3", "model.layers.17.block_sparse_moe.experts.141.w3", "model.layers.17.block_sparse_moe.experts.142.w3", "model.layers.17.block_sparse_moe.experts.143.w3", "model.layers.17.block_sparse_moe.experts.144.w3", "model.layers.17.block_sparse_moe.experts.145.w3", "model.layers.17.block_sparse_moe.experts.146.w3", "model.layers.17.block_sparse_moe.experts.147.w3", "model.layers.17.block_sparse_moe.experts.148.w3", "model.layers.17.block_sparse_moe.experts.149.w3", "model.layers.17.block_sparse_moe.experts.150.w3", "model.layers.17.block_sparse_moe.experts.151.w3", "model.layers.17.block_sparse_moe.experts.152.w3", "model.layers.17.block_sparse_moe.experts.153.w3", "model.layers.17.block_sparse_moe.experts.154.w3", "model.layers.17.block_sparse_moe.experts.155.w3", "model.layers.17.block_sparse_moe.experts.156.w3", "model.layers.17.block_sparse_moe.experts.157.w3", "model.layers.17.block_sparse_moe.experts.158.w3", "model.layers.17.block_sparse_moe.experts.159.w3", "model.layers.17.block_sparse_moe.experts.160.w3", "model.layers.17.block_sparse_moe.experts.161.w3", "model.layers.17.block_sparse_moe.experts.162.w3", "model.layers.17.block_sparse_moe.experts.163.w3", "model.layers.17.block_sparse_moe.experts.164.w3", "model.layers.17.block_sparse_moe.experts.165.w3", "model.layers.17.block_sparse_moe.experts.166.w3", "model.layers.17.block_sparse_moe.experts.167.w3", "model.layers.17.block_sparse_moe.experts.168.w3", "model.layers.17.block_sparse_moe.experts.169.w3", "model.layers.17.block_sparse_moe.experts.170.w3", "model.layers.17.block_sparse_moe.experts.171.w3", "model.layers.17.block_sparse_moe.experts.172.w3", "model.layers.17.block_sparse_moe.experts.173.w3", "model.layers.17.block_sparse_moe.experts.174.w3", "model.layers.17.block_sparse_moe.experts.175.w3", "model.layers.17.block_sparse_moe.experts.176.w3", "model.layers.17.block_sparse_moe.experts.177.w3", "model.layers.17.block_sparse_moe.experts.178.w3", "model.layers.17.block_sparse_moe.experts.179.w3", "model.layers.17.block_sparse_moe.experts.180.w3", "model.layers.17.block_sparse_moe.experts.181.w3", "model.layers.17.block_sparse_moe.experts.182.w3", "model.layers.17.block_sparse_moe.experts.183.w3", "model.layers.17.block_sparse_moe.experts.184.w3", "model.layers.17.block_sparse_moe.experts.185.w3", "model.layers.17.block_sparse_moe.experts.186.w3", "model.layers.17.block_sparse_moe.experts.187.w3", "model.layers.17.block_sparse_moe.experts.188.w3", "model.layers.17.block_sparse_moe.experts.189.w3", "model.layers.17.block_sparse_moe.experts.190.w3", "model.layers.17.block_sparse_moe.experts.191.w3", "model.layers.17.block_sparse_moe.experts.192.w3", "model.layers.17.block_sparse_moe.experts.193.w3", "model.layers.17.block_sparse_moe.experts.194.w3", "model.layers.17.block_sparse_moe.experts.195.w3", "model.layers.17.block_sparse_moe.experts.196.w3", "model.layers.17.block_sparse_moe.experts.197.w3", "model.layers.17.block_sparse_moe.experts.198.w3", "model.layers.17.block_sparse_moe.experts.199.w3", "model.layers.17.block_sparse_moe.experts.200.w3", "model.layers.17.block_sparse_moe.experts.201.w3", "model.layers.17.block_sparse_moe.experts.202.w3", "model.layers.17.block_sparse_moe.experts.203.w3", "model.layers.17.block_sparse_moe.experts.204.w3", "model.layers.17.block_sparse_moe.experts.205.w3", "model.layers.17.block_sparse_moe.experts.206.w3", "model.layers.17.block_sparse_moe.experts.207.w3", "model.layers.17.block_sparse_moe.experts.208.w3", "model.layers.17.block_sparse_moe.experts.209.w3", "model.layers.17.block_sparse_moe.experts.210.w3", "model.layers.17.block_sparse_moe.experts.211.w3", "model.layers.17.block_sparse_moe.experts.212.w3", "model.layers.17.block_sparse_moe.experts.213.w3", "model.layers.17.block_sparse_moe.experts.214.w3", "model.layers.17.block_sparse_moe.experts.215.w3", "model.layers.17.block_sparse_moe.experts.216.w3", "model.layers.17.block_sparse_moe.experts.217.w3", "model.layers.17.block_sparse_moe.experts.218.w3", "model.layers.17.block_sparse_moe.experts.219.w3", "model.layers.17.block_sparse_moe.experts.220.w3", "model.layers.17.block_sparse_moe.experts.221.w3", "model.layers.17.block_sparse_moe.experts.222.w3", "model.layers.17.block_sparse_moe.experts.223.w3", "model.layers.17.block_sparse_moe.experts.224.w3", "model.layers.17.block_sparse_moe.experts.225.w3", "model.layers.17.block_sparse_moe.experts.226.w3", "model.layers.17.block_sparse_moe.experts.227.w3", "model.layers.17.block_sparse_moe.experts.228.w3", "model.layers.17.block_sparse_moe.experts.229.w3", "model.layers.17.block_sparse_moe.experts.230.w3", "model.layers.17.block_sparse_moe.experts.231.w3", "model.layers.17.block_sparse_moe.experts.232.w3", "model.layers.17.block_sparse_moe.experts.233.w3", "model.layers.17.block_sparse_moe.experts.234.w3", "model.layers.17.block_sparse_moe.experts.235.w3", "model.layers.17.block_sparse_moe.experts.236.w3", "model.layers.17.block_sparse_moe.experts.237.w3", "model.layers.17.block_sparse_moe.experts.238.w3", "model.layers.17.block_sparse_moe.experts.239.w3", "model.layers.17.block_sparse_moe.experts.240.w3", "model.layers.17.block_sparse_moe.experts.241.w3", "model.layers.17.block_sparse_moe.experts.242.w3", "model.layers.17.block_sparse_moe.experts.243.w3", "model.layers.17.block_sparse_moe.experts.244.w3", "model.layers.17.block_sparse_moe.experts.245.w3", "model.layers.17.block_sparse_moe.experts.246.w3", "model.layers.17.block_sparse_moe.experts.247.w3", "model.layers.17.block_sparse_moe.experts.248.w3", "model.layers.17.block_sparse_moe.experts.249.w3", "model.layers.17.block_sparse_moe.experts.250.w3", "model.layers.17.block_sparse_moe.experts.251.w3", "model.layers.17.block_sparse_moe.experts.252.w3", "model.layers.17.block_sparse_moe.experts.253.w3", "model.layers.17.block_sparse_moe.experts.254.w3", "model.layers.17.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.001835440471768382, "dbits": 2415919104 } ] }, { "idx": 89, "layers": [ "model.layers.17.block_sparse_moe.experts.0.w2", "model.layers.17.block_sparse_moe.experts.1.w2", "model.layers.17.block_sparse_moe.experts.2.w2", "model.layers.17.block_sparse_moe.experts.3.w2", "model.layers.17.block_sparse_moe.experts.4.w2", "model.layers.17.block_sparse_moe.experts.5.w2", "model.layers.17.block_sparse_moe.experts.6.w2", "model.layers.17.block_sparse_moe.experts.7.w2", "model.layers.17.block_sparse_moe.experts.8.w2", "model.layers.17.block_sparse_moe.experts.9.w2", "model.layers.17.block_sparse_moe.experts.10.w2", "model.layers.17.block_sparse_moe.experts.11.w2", "model.layers.17.block_sparse_moe.experts.12.w2", "model.layers.17.block_sparse_moe.experts.13.w2", "model.layers.17.block_sparse_moe.experts.14.w2", "model.layers.17.block_sparse_moe.experts.15.w2", "model.layers.17.block_sparse_moe.experts.16.w2", "model.layers.17.block_sparse_moe.experts.17.w2", "model.layers.17.block_sparse_moe.experts.18.w2", "model.layers.17.block_sparse_moe.experts.19.w2", "model.layers.17.block_sparse_moe.experts.20.w2", "model.layers.17.block_sparse_moe.experts.21.w2", "model.layers.17.block_sparse_moe.experts.22.w2", "model.layers.17.block_sparse_moe.experts.23.w2", "model.layers.17.block_sparse_moe.experts.24.w2", "model.layers.17.block_sparse_moe.experts.25.w2", "model.layers.17.block_sparse_moe.experts.26.w2", "model.layers.17.block_sparse_moe.experts.27.w2", "model.layers.17.block_sparse_moe.experts.28.w2", "model.layers.17.block_sparse_moe.experts.29.w2", "model.layers.17.block_sparse_moe.experts.30.w2", "model.layers.17.block_sparse_moe.experts.31.w2", "model.layers.17.block_sparse_moe.experts.32.w2", "model.layers.17.block_sparse_moe.experts.33.w2", "model.layers.17.block_sparse_moe.experts.34.w2", "model.layers.17.block_sparse_moe.experts.35.w2", "model.layers.17.block_sparse_moe.experts.36.w2", "model.layers.17.block_sparse_moe.experts.37.w2", "model.layers.17.block_sparse_moe.experts.38.w2", "model.layers.17.block_sparse_moe.experts.39.w2", "model.layers.17.block_sparse_moe.experts.40.w2", "model.layers.17.block_sparse_moe.experts.41.w2", "model.layers.17.block_sparse_moe.experts.42.w2", "model.layers.17.block_sparse_moe.experts.43.w2", "model.layers.17.block_sparse_moe.experts.44.w2", "model.layers.17.block_sparse_moe.experts.45.w2", "model.layers.17.block_sparse_moe.experts.46.w2", "model.layers.17.block_sparse_moe.experts.47.w2", "model.layers.17.block_sparse_moe.experts.48.w2", "model.layers.17.block_sparse_moe.experts.49.w2", "model.layers.17.block_sparse_moe.experts.50.w2", "model.layers.17.block_sparse_moe.experts.51.w2", "model.layers.17.block_sparse_moe.experts.52.w2", "model.layers.17.block_sparse_moe.experts.53.w2", "model.layers.17.block_sparse_moe.experts.54.w2", "model.layers.17.block_sparse_moe.experts.55.w2", "model.layers.17.block_sparse_moe.experts.56.w2", "model.layers.17.block_sparse_moe.experts.57.w2", "model.layers.17.block_sparse_moe.experts.58.w2", "model.layers.17.block_sparse_moe.experts.59.w2", "model.layers.17.block_sparse_moe.experts.60.w2", "model.layers.17.block_sparse_moe.experts.61.w2", "model.layers.17.block_sparse_moe.experts.62.w2", "model.layers.17.block_sparse_moe.experts.63.w2", "model.layers.17.block_sparse_moe.experts.64.w2", "model.layers.17.block_sparse_moe.experts.65.w2", "model.layers.17.block_sparse_moe.experts.66.w2", "model.layers.17.block_sparse_moe.experts.67.w2", "model.layers.17.block_sparse_moe.experts.68.w2", "model.layers.17.block_sparse_moe.experts.69.w2", "model.layers.17.block_sparse_moe.experts.70.w2", "model.layers.17.block_sparse_moe.experts.71.w2", "model.layers.17.block_sparse_moe.experts.72.w2", "model.layers.17.block_sparse_moe.experts.73.w2", "model.layers.17.block_sparse_moe.experts.74.w2", "model.layers.17.block_sparse_moe.experts.75.w2", "model.layers.17.block_sparse_moe.experts.76.w2", "model.layers.17.block_sparse_moe.experts.77.w2", "model.layers.17.block_sparse_moe.experts.78.w2", "model.layers.17.block_sparse_moe.experts.79.w2", "model.layers.17.block_sparse_moe.experts.80.w2", "model.layers.17.block_sparse_moe.experts.81.w2", "model.layers.17.block_sparse_moe.experts.82.w2", "model.layers.17.block_sparse_moe.experts.83.w2", "model.layers.17.block_sparse_moe.experts.84.w2", "model.layers.17.block_sparse_moe.experts.85.w2", "model.layers.17.block_sparse_moe.experts.86.w2", "model.layers.17.block_sparse_moe.experts.87.w2", "model.layers.17.block_sparse_moe.experts.88.w2", "model.layers.17.block_sparse_moe.experts.89.w2", "model.layers.17.block_sparse_moe.experts.90.w2", "model.layers.17.block_sparse_moe.experts.91.w2", "model.layers.17.block_sparse_moe.experts.92.w2", "model.layers.17.block_sparse_moe.experts.93.w2", "model.layers.17.block_sparse_moe.experts.94.w2", "model.layers.17.block_sparse_moe.experts.95.w2", "model.layers.17.block_sparse_moe.experts.96.w2", "model.layers.17.block_sparse_moe.experts.97.w2", "model.layers.17.block_sparse_moe.experts.98.w2", "model.layers.17.block_sparse_moe.experts.99.w2", "model.layers.17.block_sparse_moe.experts.100.w2", "model.layers.17.block_sparse_moe.experts.101.w2", "model.layers.17.block_sparse_moe.experts.102.w2", "model.layers.17.block_sparse_moe.experts.103.w2", "model.layers.17.block_sparse_moe.experts.104.w2", "model.layers.17.block_sparse_moe.experts.105.w2", "model.layers.17.block_sparse_moe.experts.106.w2", "model.layers.17.block_sparse_moe.experts.107.w2", "model.layers.17.block_sparse_moe.experts.108.w2", "model.layers.17.block_sparse_moe.experts.109.w2", "model.layers.17.block_sparse_moe.experts.110.w2", "model.layers.17.block_sparse_moe.experts.111.w2", "model.layers.17.block_sparse_moe.experts.112.w2", "model.layers.17.block_sparse_moe.experts.113.w2", "model.layers.17.block_sparse_moe.experts.114.w2", "model.layers.17.block_sparse_moe.experts.115.w2", "model.layers.17.block_sparse_moe.experts.116.w2", "model.layers.17.block_sparse_moe.experts.117.w2", "model.layers.17.block_sparse_moe.experts.118.w2", "model.layers.17.block_sparse_moe.experts.119.w2", "model.layers.17.block_sparse_moe.experts.120.w2", "model.layers.17.block_sparse_moe.experts.121.w2", "model.layers.17.block_sparse_moe.experts.122.w2", "model.layers.17.block_sparse_moe.experts.123.w2", "model.layers.17.block_sparse_moe.experts.124.w2", "model.layers.17.block_sparse_moe.experts.125.w2", "model.layers.17.block_sparse_moe.experts.126.w2", "model.layers.17.block_sparse_moe.experts.127.w2", "model.layers.17.block_sparse_moe.experts.128.w2", "model.layers.17.block_sparse_moe.experts.129.w2", "model.layers.17.block_sparse_moe.experts.130.w2", "model.layers.17.block_sparse_moe.experts.131.w2", "model.layers.17.block_sparse_moe.experts.132.w2", "model.layers.17.block_sparse_moe.experts.133.w2", "model.layers.17.block_sparse_moe.experts.134.w2", "model.layers.17.block_sparse_moe.experts.135.w2", "model.layers.17.block_sparse_moe.experts.136.w2", "model.layers.17.block_sparse_moe.experts.137.w2", "model.layers.17.block_sparse_moe.experts.138.w2", "model.layers.17.block_sparse_moe.experts.139.w2", "model.layers.17.block_sparse_moe.experts.140.w2", "model.layers.17.block_sparse_moe.experts.141.w2", "model.layers.17.block_sparse_moe.experts.142.w2", "model.layers.17.block_sparse_moe.experts.143.w2", "model.layers.17.block_sparse_moe.experts.144.w2", "model.layers.17.block_sparse_moe.experts.145.w2", "model.layers.17.block_sparse_moe.experts.146.w2", "model.layers.17.block_sparse_moe.experts.147.w2", "model.layers.17.block_sparse_moe.experts.148.w2", "model.layers.17.block_sparse_moe.experts.149.w2", "model.layers.17.block_sparse_moe.experts.150.w2", "model.layers.17.block_sparse_moe.experts.151.w2", "model.layers.17.block_sparse_moe.experts.152.w2", "model.layers.17.block_sparse_moe.experts.153.w2", "model.layers.17.block_sparse_moe.experts.154.w2", "model.layers.17.block_sparse_moe.experts.155.w2", "model.layers.17.block_sparse_moe.experts.156.w2", "model.layers.17.block_sparse_moe.experts.157.w2", "model.layers.17.block_sparse_moe.experts.158.w2", "model.layers.17.block_sparse_moe.experts.159.w2", "model.layers.17.block_sparse_moe.experts.160.w2", "model.layers.17.block_sparse_moe.experts.161.w2", "model.layers.17.block_sparse_moe.experts.162.w2", "model.layers.17.block_sparse_moe.experts.163.w2", "model.layers.17.block_sparse_moe.experts.164.w2", "model.layers.17.block_sparse_moe.experts.165.w2", "model.layers.17.block_sparse_moe.experts.166.w2", "model.layers.17.block_sparse_moe.experts.167.w2", "model.layers.17.block_sparse_moe.experts.168.w2", "model.layers.17.block_sparse_moe.experts.169.w2", "model.layers.17.block_sparse_moe.experts.170.w2", "model.layers.17.block_sparse_moe.experts.171.w2", "model.layers.17.block_sparse_moe.experts.172.w2", "model.layers.17.block_sparse_moe.experts.173.w2", "model.layers.17.block_sparse_moe.experts.174.w2", "model.layers.17.block_sparse_moe.experts.175.w2", "model.layers.17.block_sparse_moe.experts.176.w2", "model.layers.17.block_sparse_moe.experts.177.w2", "model.layers.17.block_sparse_moe.experts.178.w2", "model.layers.17.block_sparse_moe.experts.179.w2", "model.layers.17.block_sparse_moe.experts.180.w2", "model.layers.17.block_sparse_moe.experts.181.w2", "model.layers.17.block_sparse_moe.experts.182.w2", "model.layers.17.block_sparse_moe.experts.183.w2", "model.layers.17.block_sparse_moe.experts.184.w2", "model.layers.17.block_sparse_moe.experts.185.w2", "model.layers.17.block_sparse_moe.experts.186.w2", "model.layers.17.block_sparse_moe.experts.187.w2", "model.layers.17.block_sparse_moe.experts.188.w2", "model.layers.17.block_sparse_moe.experts.189.w2", "model.layers.17.block_sparse_moe.experts.190.w2", "model.layers.17.block_sparse_moe.experts.191.w2", "model.layers.17.block_sparse_moe.experts.192.w2", "model.layers.17.block_sparse_moe.experts.193.w2", "model.layers.17.block_sparse_moe.experts.194.w2", "model.layers.17.block_sparse_moe.experts.195.w2", "model.layers.17.block_sparse_moe.experts.196.w2", "model.layers.17.block_sparse_moe.experts.197.w2", "model.layers.17.block_sparse_moe.experts.198.w2", "model.layers.17.block_sparse_moe.experts.199.w2", "model.layers.17.block_sparse_moe.experts.200.w2", "model.layers.17.block_sparse_moe.experts.201.w2", "model.layers.17.block_sparse_moe.experts.202.w2", "model.layers.17.block_sparse_moe.experts.203.w2", "model.layers.17.block_sparse_moe.experts.204.w2", "model.layers.17.block_sparse_moe.experts.205.w2", "model.layers.17.block_sparse_moe.experts.206.w2", "model.layers.17.block_sparse_moe.experts.207.w2", "model.layers.17.block_sparse_moe.experts.208.w2", "model.layers.17.block_sparse_moe.experts.209.w2", "model.layers.17.block_sparse_moe.experts.210.w2", "model.layers.17.block_sparse_moe.experts.211.w2", "model.layers.17.block_sparse_moe.experts.212.w2", "model.layers.17.block_sparse_moe.experts.213.w2", "model.layers.17.block_sparse_moe.experts.214.w2", "model.layers.17.block_sparse_moe.experts.215.w2", "model.layers.17.block_sparse_moe.experts.216.w2", "model.layers.17.block_sparse_moe.experts.217.w2", "model.layers.17.block_sparse_moe.experts.218.w2", "model.layers.17.block_sparse_moe.experts.219.w2", "model.layers.17.block_sparse_moe.experts.220.w2", "model.layers.17.block_sparse_moe.experts.221.w2", "model.layers.17.block_sparse_moe.experts.222.w2", "model.layers.17.block_sparse_moe.experts.223.w2", "model.layers.17.block_sparse_moe.experts.224.w2", "model.layers.17.block_sparse_moe.experts.225.w2", "model.layers.17.block_sparse_moe.experts.226.w2", "model.layers.17.block_sparse_moe.experts.227.w2", "model.layers.17.block_sparse_moe.experts.228.w2", "model.layers.17.block_sparse_moe.experts.229.w2", "model.layers.17.block_sparse_moe.experts.230.w2", "model.layers.17.block_sparse_moe.experts.231.w2", "model.layers.17.block_sparse_moe.experts.232.w2", "model.layers.17.block_sparse_moe.experts.233.w2", "model.layers.17.block_sparse_moe.experts.234.w2", "model.layers.17.block_sparse_moe.experts.235.w2", "model.layers.17.block_sparse_moe.experts.236.w2", "model.layers.17.block_sparse_moe.experts.237.w2", "model.layers.17.block_sparse_moe.experts.238.w2", "model.layers.17.block_sparse_moe.experts.239.w2", "model.layers.17.block_sparse_moe.experts.240.w2", "model.layers.17.block_sparse_moe.experts.241.w2", "model.layers.17.block_sparse_moe.experts.242.w2", "model.layers.17.block_sparse_moe.experts.243.w2", "model.layers.17.block_sparse_moe.experts.244.w2", "model.layers.17.block_sparse_moe.experts.245.w2", "model.layers.17.block_sparse_moe.experts.246.w2", "model.layers.17.block_sparse_moe.experts.247.w2", "model.layers.17.block_sparse_moe.experts.248.w2", "model.layers.17.block_sparse_moe.experts.249.w2", "model.layers.17.block_sparse_moe.experts.250.w2", "model.layers.17.block_sparse_moe.experts.251.w2", "model.layers.17.block_sparse_moe.experts.252.w2", "model.layers.17.block_sparse_moe.experts.253.w2", "model.layers.17.block_sparse_moe.experts.254.w2", "model.layers.17.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0008641559630632317, "dbits": 1207959552 } ] }, { "idx": 90, "layers": [ "model.layers.18.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0004465365782380104, "dbits": 18874368 } ] }, { "idx": 91, "layers": [ "model.layers.18.self_attn.k_proj", "model.layers.18.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0004380812868475942, "dbits": 6291456 } ] }, { "idx": 92, "layers": [ "model.layers.18.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0028056774288415937, "dbits": 18874368 } ] }, { "idx": 93, "layers": [ "model.layers.18.block_sparse_moe.experts.0.w1", "model.layers.18.block_sparse_moe.experts.1.w1", "model.layers.18.block_sparse_moe.experts.2.w1", "model.layers.18.block_sparse_moe.experts.3.w1", "model.layers.18.block_sparse_moe.experts.4.w1", "model.layers.18.block_sparse_moe.experts.5.w1", "model.layers.18.block_sparse_moe.experts.6.w1", "model.layers.18.block_sparse_moe.experts.7.w1", "model.layers.18.block_sparse_moe.experts.8.w1", "model.layers.18.block_sparse_moe.experts.9.w1", "model.layers.18.block_sparse_moe.experts.10.w1", "model.layers.18.block_sparse_moe.experts.11.w1", "model.layers.18.block_sparse_moe.experts.12.w1", "model.layers.18.block_sparse_moe.experts.13.w1", "model.layers.18.block_sparse_moe.experts.14.w1", "model.layers.18.block_sparse_moe.experts.15.w1", "model.layers.18.block_sparse_moe.experts.16.w1", "model.layers.18.block_sparse_moe.experts.17.w1", "model.layers.18.block_sparse_moe.experts.18.w1", "model.layers.18.block_sparse_moe.experts.19.w1", "model.layers.18.block_sparse_moe.experts.20.w1", "model.layers.18.block_sparse_moe.experts.21.w1", "model.layers.18.block_sparse_moe.experts.22.w1", "model.layers.18.block_sparse_moe.experts.23.w1", "model.layers.18.block_sparse_moe.experts.24.w1", "model.layers.18.block_sparse_moe.experts.25.w1", "model.layers.18.block_sparse_moe.experts.26.w1", "model.layers.18.block_sparse_moe.experts.27.w1", "model.layers.18.block_sparse_moe.experts.28.w1", "model.layers.18.block_sparse_moe.experts.29.w1", "model.layers.18.block_sparse_moe.experts.30.w1", "model.layers.18.block_sparse_moe.experts.31.w1", "model.layers.18.block_sparse_moe.experts.32.w1", "model.layers.18.block_sparse_moe.experts.33.w1", "model.layers.18.block_sparse_moe.experts.34.w1", "model.layers.18.block_sparse_moe.experts.35.w1", "model.layers.18.block_sparse_moe.experts.36.w1", "model.layers.18.block_sparse_moe.experts.37.w1", "model.layers.18.block_sparse_moe.experts.38.w1", "model.layers.18.block_sparse_moe.experts.39.w1", "model.layers.18.block_sparse_moe.experts.40.w1", "model.layers.18.block_sparse_moe.experts.41.w1", "model.layers.18.block_sparse_moe.experts.42.w1", "model.layers.18.block_sparse_moe.experts.43.w1", "model.layers.18.block_sparse_moe.experts.44.w1", "model.layers.18.block_sparse_moe.experts.45.w1", "model.layers.18.block_sparse_moe.experts.46.w1", "model.layers.18.block_sparse_moe.experts.47.w1", "model.layers.18.block_sparse_moe.experts.48.w1", "model.layers.18.block_sparse_moe.experts.49.w1", "model.layers.18.block_sparse_moe.experts.50.w1", "model.layers.18.block_sparse_moe.experts.51.w1", "model.layers.18.block_sparse_moe.experts.52.w1", "model.layers.18.block_sparse_moe.experts.53.w1", "model.layers.18.block_sparse_moe.experts.54.w1", "model.layers.18.block_sparse_moe.experts.55.w1", "model.layers.18.block_sparse_moe.experts.56.w1", "model.layers.18.block_sparse_moe.experts.57.w1", "model.layers.18.block_sparse_moe.experts.58.w1", "model.layers.18.block_sparse_moe.experts.59.w1", "model.layers.18.block_sparse_moe.experts.60.w1", "model.layers.18.block_sparse_moe.experts.61.w1", "model.layers.18.block_sparse_moe.experts.62.w1", "model.layers.18.block_sparse_moe.experts.63.w1", "model.layers.18.block_sparse_moe.experts.64.w1", "model.layers.18.block_sparse_moe.experts.65.w1", "model.layers.18.block_sparse_moe.experts.66.w1", "model.layers.18.block_sparse_moe.experts.67.w1", "model.layers.18.block_sparse_moe.experts.68.w1", "model.layers.18.block_sparse_moe.experts.69.w1", "model.layers.18.block_sparse_moe.experts.70.w1", "model.layers.18.block_sparse_moe.experts.71.w1", "model.layers.18.block_sparse_moe.experts.72.w1", "model.layers.18.block_sparse_moe.experts.73.w1", "model.layers.18.block_sparse_moe.experts.74.w1", "model.layers.18.block_sparse_moe.experts.75.w1", "model.layers.18.block_sparse_moe.experts.76.w1", "model.layers.18.block_sparse_moe.experts.77.w1", "model.layers.18.block_sparse_moe.experts.78.w1", "model.layers.18.block_sparse_moe.experts.79.w1", "model.layers.18.block_sparse_moe.experts.80.w1", "model.layers.18.block_sparse_moe.experts.81.w1", "model.layers.18.block_sparse_moe.experts.82.w1", "model.layers.18.block_sparse_moe.experts.83.w1", "model.layers.18.block_sparse_moe.experts.84.w1", "model.layers.18.block_sparse_moe.experts.85.w1", "model.layers.18.block_sparse_moe.experts.86.w1", "model.layers.18.block_sparse_moe.experts.87.w1", "model.layers.18.block_sparse_moe.experts.88.w1", "model.layers.18.block_sparse_moe.experts.89.w1", "model.layers.18.block_sparse_moe.experts.90.w1", "model.layers.18.block_sparse_moe.experts.91.w1", "model.layers.18.block_sparse_moe.experts.92.w1", "model.layers.18.block_sparse_moe.experts.93.w1", "model.layers.18.block_sparse_moe.experts.94.w1", "model.layers.18.block_sparse_moe.experts.95.w1", "model.layers.18.block_sparse_moe.experts.96.w1", "model.layers.18.block_sparse_moe.experts.97.w1", "model.layers.18.block_sparse_moe.experts.98.w1", "model.layers.18.block_sparse_moe.experts.99.w1", "model.layers.18.block_sparse_moe.experts.100.w1", "model.layers.18.block_sparse_moe.experts.101.w1", "model.layers.18.block_sparse_moe.experts.102.w1", "model.layers.18.block_sparse_moe.experts.103.w1", "model.layers.18.block_sparse_moe.experts.104.w1", "model.layers.18.block_sparse_moe.experts.105.w1", "model.layers.18.block_sparse_moe.experts.106.w1", "model.layers.18.block_sparse_moe.experts.107.w1", "model.layers.18.block_sparse_moe.experts.108.w1", "model.layers.18.block_sparse_moe.experts.109.w1", "model.layers.18.block_sparse_moe.experts.110.w1", "model.layers.18.block_sparse_moe.experts.111.w1", "model.layers.18.block_sparse_moe.experts.112.w1", "model.layers.18.block_sparse_moe.experts.113.w1", "model.layers.18.block_sparse_moe.experts.114.w1", "model.layers.18.block_sparse_moe.experts.115.w1", "model.layers.18.block_sparse_moe.experts.116.w1", "model.layers.18.block_sparse_moe.experts.117.w1", "model.layers.18.block_sparse_moe.experts.118.w1", "model.layers.18.block_sparse_moe.experts.119.w1", "model.layers.18.block_sparse_moe.experts.120.w1", "model.layers.18.block_sparse_moe.experts.121.w1", "model.layers.18.block_sparse_moe.experts.122.w1", "model.layers.18.block_sparse_moe.experts.123.w1", "model.layers.18.block_sparse_moe.experts.124.w1", "model.layers.18.block_sparse_moe.experts.125.w1", "model.layers.18.block_sparse_moe.experts.126.w1", "model.layers.18.block_sparse_moe.experts.127.w1", "model.layers.18.block_sparse_moe.experts.128.w1", "model.layers.18.block_sparse_moe.experts.129.w1", "model.layers.18.block_sparse_moe.experts.130.w1", "model.layers.18.block_sparse_moe.experts.131.w1", "model.layers.18.block_sparse_moe.experts.132.w1", "model.layers.18.block_sparse_moe.experts.133.w1", "model.layers.18.block_sparse_moe.experts.134.w1", "model.layers.18.block_sparse_moe.experts.135.w1", "model.layers.18.block_sparse_moe.experts.136.w1", "model.layers.18.block_sparse_moe.experts.137.w1", "model.layers.18.block_sparse_moe.experts.138.w1", "model.layers.18.block_sparse_moe.experts.139.w1", "model.layers.18.block_sparse_moe.experts.140.w1", "model.layers.18.block_sparse_moe.experts.141.w1", "model.layers.18.block_sparse_moe.experts.142.w1", "model.layers.18.block_sparse_moe.experts.143.w1", "model.layers.18.block_sparse_moe.experts.144.w1", "model.layers.18.block_sparse_moe.experts.145.w1", "model.layers.18.block_sparse_moe.experts.146.w1", "model.layers.18.block_sparse_moe.experts.147.w1", "model.layers.18.block_sparse_moe.experts.148.w1", "model.layers.18.block_sparse_moe.experts.149.w1", "model.layers.18.block_sparse_moe.experts.150.w1", "model.layers.18.block_sparse_moe.experts.151.w1", "model.layers.18.block_sparse_moe.experts.152.w1", "model.layers.18.block_sparse_moe.experts.153.w1", "model.layers.18.block_sparse_moe.experts.154.w1", "model.layers.18.block_sparse_moe.experts.155.w1", "model.layers.18.block_sparse_moe.experts.156.w1", "model.layers.18.block_sparse_moe.experts.157.w1", "model.layers.18.block_sparse_moe.experts.158.w1", "model.layers.18.block_sparse_moe.experts.159.w1", "model.layers.18.block_sparse_moe.experts.160.w1", "model.layers.18.block_sparse_moe.experts.161.w1", "model.layers.18.block_sparse_moe.experts.162.w1", "model.layers.18.block_sparse_moe.experts.163.w1", "model.layers.18.block_sparse_moe.experts.164.w1", "model.layers.18.block_sparse_moe.experts.165.w1", "model.layers.18.block_sparse_moe.experts.166.w1", "model.layers.18.block_sparse_moe.experts.167.w1", "model.layers.18.block_sparse_moe.experts.168.w1", "model.layers.18.block_sparse_moe.experts.169.w1", "model.layers.18.block_sparse_moe.experts.170.w1", "model.layers.18.block_sparse_moe.experts.171.w1", "model.layers.18.block_sparse_moe.experts.172.w1", "model.layers.18.block_sparse_moe.experts.173.w1", "model.layers.18.block_sparse_moe.experts.174.w1", "model.layers.18.block_sparse_moe.experts.175.w1", "model.layers.18.block_sparse_moe.experts.176.w1", "model.layers.18.block_sparse_moe.experts.177.w1", "model.layers.18.block_sparse_moe.experts.178.w1", "model.layers.18.block_sparse_moe.experts.179.w1", "model.layers.18.block_sparse_moe.experts.180.w1", "model.layers.18.block_sparse_moe.experts.181.w1", "model.layers.18.block_sparse_moe.experts.182.w1", "model.layers.18.block_sparse_moe.experts.183.w1", "model.layers.18.block_sparse_moe.experts.184.w1", "model.layers.18.block_sparse_moe.experts.185.w1", "model.layers.18.block_sparse_moe.experts.186.w1", "model.layers.18.block_sparse_moe.experts.187.w1", "model.layers.18.block_sparse_moe.experts.188.w1", "model.layers.18.block_sparse_moe.experts.189.w1", "model.layers.18.block_sparse_moe.experts.190.w1", "model.layers.18.block_sparse_moe.experts.191.w1", "model.layers.18.block_sparse_moe.experts.192.w1", "model.layers.18.block_sparse_moe.experts.193.w1", "model.layers.18.block_sparse_moe.experts.194.w1", "model.layers.18.block_sparse_moe.experts.195.w1", "model.layers.18.block_sparse_moe.experts.196.w1", "model.layers.18.block_sparse_moe.experts.197.w1", "model.layers.18.block_sparse_moe.experts.198.w1", "model.layers.18.block_sparse_moe.experts.199.w1", "model.layers.18.block_sparse_moe.experts.200.w1", "model.layers.18.block_sparse_moe.experts.201.w1", "model.layers.18.block_sparse_moe.experts.202.w1", "model.layers.18.block_sparse_moe.experts.203.w1", "model.layers.18.block_sparse_moe.experts.204.w1", "model.layers.18.block_sparse_moe.experts.205.w1", "model.layers.18.block_sparse_moe.experts.206.w1", "model.layers.18.block_sparse_moe.experts.207.w1", "model.layers.18.block_sparse_moe.experts.208.w1", "model.layers.18.block_sparse_moe.experts.209.w1", "model.layers.18.block_sparse_moe.experts.210.w1", "model.layers.18.block_sparse_moe.experts.211.w1", "model.layers.18.block_sparse_moe.experts.212.w1", "model.layers.18.block_sparse_moe.experts.213.w1", "model.layers.18.block_sparse_moe.experts.214.w1", "model.layers.18.block_sparse_moe.experts.215.w1", "model.layers.18.block_sparse_moe.experts.216.w1", "model.layers.18.block_sparse_moe.experts.217.w1", "model.layers.18.block_sparse_moe.experts.218.w1", "model.layers.18.block_sparse_moe.experts.219.w1", "model.layers.18.block_sparse_moe.experts.220.w1", "model.layers.18.block_sparse_moe.experts.221.w1", "model.layers.18.block_sparse_moe.experts.222.w1", "model.layers.18.block_sparse_moe.experts.223.w1", "model.layers.18.block_sparse_moe.experts.224.w1", "model.layers.18.block_sparse_moe.experts.225.w1", "model.layers.18.block_sparse_moe.experts.226.w1", "model.layers.18.block_sparse_moe.experts.227.w1", "model.layers.18.block_sparse_moe.experts.228.w1", "model.layers.18.block_sparse_moe.experts.229.w1", "model.layers.18.block_sparse_moe.experts.230.w1", "model.layers.18.block_sparse_moe.experts.231.w1", "model.layers.18.block_sparse_moe.experts.232.w1", "model.layers.18.block_sparse_moe.experts.233.w1", "model.layers.18.block_sparse_moe.experts.234.w1", "model.layers.18.block_sparse_moe.experts.235.w1", "model.layers.18.block_sparse_moe.experts.236.w1", "model.layers.18.block_sparse_moe.experts.237.w1", "model.layers.18.block_sparse_moe.experts.238.w1", "model.layers.18.block_sparse_moe.experts.239.w1", "model.layers.18.block_sparse_moe.experts.240.w1", "model.layers.18.block_sparse_moe.experts.241.w1", "model.layers.18.block_sparse_moe.experts.242.w1", "model.layers.18.block_sparse_moe.experts.243.w1", "model.layers.18.block_sparse_moe.experts.244.w1", "model.layers.18.block_sparse_moe.experts.245.w1", "model.layers.18.block_sparse_moe.experts.246.w1", "model.layers.18.block_sparse_moe.experts.247.w1", "model.layers.18.block_sparse_moe.experts.248.w1", "model.layers.18.block_sparse_moe.experts.249.w1", "model.layers.18.block_sparse_moe.experts.250.w1", "model.layers.18.block_sparse_moe.experts.251.w1", "model.layers.18.block_sparse_moe.experts.252.w1", "model.layers.18.block_sparse_moe.experts.253.w1", "model.layers.18.block_sparse_moe.experts.254.w1", "model.layers.18.block_sparse_moe.experts.255.w1", "model.layers.18.block_sparse_moe.experts.0.w3", "model.layers.18.block_sparse_moe.experts.1.w3", "model.layers.18.block_sparse_moe.experts.2.w3", "model.layers.18.block_sparse_moe.experts.3.w3", "model.layers.18.block_sparse_moe.experts.4.w3", "model.layers.18.block_sparse_moe.experts.5.w3", "model.layers.18.block_sparse_moe.experts.6.w3", "model.layers.18.block_sparse_moe.experts.7.w3", "model.layers.18.block_sparse_moe.experts.8.w3", "model.layers.18.block_sparse_moe.experts.9.w3", "model.layers.18.block_sparse_moe.experts.10.w3", "model.layers.18.block_sparse_moe.experts.11.w3", "model.layers.18.block_sparse_moe.experts.12.w3", "model.layers.18.block_sparse_moe.experts.13.w3", "model.layers.18.block_sparse_moe.experts.14.w3", "model.layers.18.block_sparse_moe.experts.15.w3", "model.layers.18.block_sparse_moe.experts.16.w3", "model.layers.18.block_sparse_moe.experts.17.w3", "model.layers.18.block_sparse_moe.experts.18.w3", "model.layers.18.block_sparse_moe.experts.19.w3", "model.layers.18.block_sparse_moe.experts.20.w3", "model.layers.18.block_sparse_moe.experts.21.w3", "model.layers.18.block_sparse_moe.experts.22.w3", "model.layers.18.block_sparse_moe.experts.23.w3", "model.layers.18.block_sparse_moe.experts.24.w3", "model.layers.18.block_sparse_moe.experts.25.w3", "model.layers.18.block_sparse_moe.experts.26.w3", "model.layers.18.block_sparse_moe.experts.27.w3", "model.layers.18.block_sparse_moe.experts.28.w3", "model.layers.18.block_sparse_moe.experts.29.w3", "model.layers.18.block_sparse_moe.experts.30.w3", "model.layers.18.block_sparse_moe.experts.31.w3", "model.layers.18.block_sparse_moe.experts.32.w3", "model.layers.18.block_sparse_moe.experts.33.w3", "model.layers.18.block_sparse_moe.experts.34.w3", "model.layers.18.block_sparse_moe.experts.35.w3", "model.layers.18.block_sparse_moe.experts.36.w3", "model.layers.18.block_sparse_moe.experts.37.w3", "model.layers.18.block_sparse_moe.experts.38.w3", "model.layers.18.block_sparse_moe.experts.39.w3", "model.layers.18.block_sparse_moe.experts.40.w3", "model.layers.18.block_sparse_moe.experts.41.w3", "model.layers.18.block_sparse_moe.experts.42.w3", "model.layers.18.block_sparse_moe.experts.43.w3", "model.layers.18.block_sparse_moe.experts.44.w3", "model.layers.18.block_sparse_moe.experts.45.w3", "model.layers.18.block_sparse_moe.experts.46.w3", "model.layers.18.block_sparse_moe.experts.47.w3", "model.layers.18.block_sparse_moe.experts.48.w3", "model.layers.18.block_sparse_moe.experts.49.w3", "model.layers.18.block_sparse_moe.experts.50.w3", "model.layers.18.block_sparse_moe.experts.51.w3", "model.layers.18.block_sparse_moe.experts.52.w3", "model.layers.18.block_sparse_moe.experts.53.w3", "model.layers.18.block_sparse_moe.experts.54.w3", "model.layers.18.block_sparse_moe.experts.55.w3", "model.layers.18.block_sparse_moe.experts.56.w3", "model.layers.18.block_sparse_moe.experts.57.w3", "model.layers.18.block_sparse_moe.experts.58.w3", "model.layers.18.block_sparse_moe.experts.59.w3", "model.layers.18.block_sparse_moe.experts.60.w3", "model.layers.18.block_sparse_moe.experts.61.w3", "model.layers.18.block_sparse_moe.experts.62.w3", "model.layers.18.block_sparse_moe.experts.63.w3", "model.layers.18.block_sparse_moe.experts.64.w3", "model.layers.18.block_sparse_moe.experts.65.w3", "model.layers.18.block_sparse_moe.experts.66.w3", "model.layers.18.block_sparse_moe.experts.67.w3", "model.layers.18.block_sparse_moe.experts.68.w3", "model.layers.18.block_sparse_moe.experts.69.w3", "model.layers.18.block_sparse_moe.experts.70.w3", "model.layers.18.block_sparse_moe.experts.71.w3", "model.layers.18.block_sparse_moe.experts.72.w3", "model.layers.18.block_sparse_moe.experts.73.w3", "model.layers.18.block_sparse_moe.experts.74.w3", "model.layers.18.block_sparse_moe.experts.75.w3", "model.layers.18.block_sparse_moe.experts.76.w3", "model.layers.18.block_sparse_moe.experts.77.w3", "model.layers.18.block_sparse_moe.experts.78.w3", "model.layers.18.block_sparse_moe.experts.79.w3", "model.layers.18.block_sparse_moe.experts.80.w3", "model.layers.18.block_sparse_moe.experts.81.w3", "model.layers.18.block_sparse_moe.experts.82.w3", "model.layers.18.block_sparse_moe.experts.83.w3", "model.layers.18.block_sparse_moe.experts.84.w3", "model.layers.18.block_sparse_moe.experts.85.w3", "model.layers.18.block_sparse_moe.experts.86.w3", "model.layers.18.block_sparse_moe.experts.87.w3", "model.layers.18.block_sparse_moe.experts.88.w3", "model.layers.18.block_sparse_moe.experts.89.w3", "model.layers.18.block_sparse_moe.experts.90.w3", "model.layers.18.block_sparse_moe.experts.91.w3", "model.layers.18.block_sparse_moe.experts.92.w3", "model.layers.18.block_sparse_moe.experts.93.w3", "model.layers.18.block_sparse_moe.experts.94.w3", "model.layers.18.block_sparse_moe.experts.95.w3", "model.layers.18.block_sparse_moe.experts.96.w3", "model.layers.18.block_sparse_moe.experts.97.w3", "model.layers.18.block_sparse_moe.experts.98.w3", "model.layers.18.block_sparse_moe.experts.99.w3", "model.layers.18.block_sparse_moe.experts.100.w3", "model.layers.18.block_sparse_moe.experts.101.w3", "model.layers.18.block_sparse_moe.experts.102.w3", "model.layers.18.block_sparse_moe.experts.103.w3", "model.layers.18.block_sparse_moe.experts.104.w3", "model.layers.18.block_sparse_moe.experts.105.w3", "model.layers.18.block_sparse_moe.experts.106.w3", "model.layers.18.block_sparse_moe.experts.107.w3", "model.layers.18.block_sparse_moe.experts.108.w3", "model.layers.18.block_sparse_moe.experts.109.w3", "model.layers.18.block_sparse_moe.experts.110.w3", "model.layers.18.block_sparse_moe.experts.111.w3", "model.layers.18.block_sparse_moe.experts.112.w3", "model.layers.18.block_sparse_moe.experts.113.w3", "model.layers.18.block_sparse_moe.experts.114.w3", "model.layers.18.block_sparse_moe.experts.115.w3", "model.layers.18.block_sparse_moe.experts.116.w3", "model.layers.18.block_sparse_moe.experts.117.w3", "model.layers.18.block_sparse_moe.experts.118.w3", "model.layers.18.block_sparse_moe.experts.119.w3", "model.layers.18.block_sparse_moe.experts.120.w3", "model.layers.18.block_sparse_moe.experts.121.w3", "model.layers.18.block_sparse_moe.experts.122.w3", "model.layers.18.block_sparse_moe.experts.123.w3", "model.layers.18.block_sparse_moe.experts.124.w3", "model.layers.18.block_sparse_moe.experts.125.w3", "model.layers.18.block_sparse_moe.experts.126.w3", "model.layers.18.block_sparse_moe.experts.127.w3", "model.layers.18.block_sparse_moe.experts.128.w3", "model.layers.18.block_sparse_moe.experts.129.w3", "model.layers.18.block_sparse_moe.experts.130.w3", "model.layers.18.block_sparse_moe.experts.131.w3", "model.layers.18.block_sparse_moe.experts.132.w3", "model.layers.18.block_sparse_moe.experts.133.w3", "model.layers.18.block_sparse_moe.experts.134.w3", "model.layers.18.block_sparse_moe.experts.135.w3", "model.layers.18.block_sparse_moe.experts.136.w3", "model.layers.18.block_sparse_moe.experts.137.w3", "model.layers.18.block_sparse_moe.experts.138.w3", "model.layers.18.block_sparse_moe.experts.139.w3", "model.layers.18.block_sparse_moe.experts.140.w3", "model.layers.18.block_sparse_moe.experts.141.w3", "model.layers.18.block_sparse_moe.experts.142.w3", "model.layers.18.block_sparse_moe.experts.143.w3", "model.layers.18.block_sparse_moe.experts.144.w3", "model.layers.18.block_sparse_moe.experts.145.w3", "model.layers.18.block_sparse_moe.experts.146.w3", "model.layers.18.block_sparse_moe.experts.147.w3", "model.layers.18.block_sparse_moe.experts.148.w3", "model.layers.18.block_sparse_moe.experts.149.w3", "model.layers.18.block_sparse_moe.experts.150.w3", "model.layers.18.block_sparse_moe.experts.151.w3", "model.layers.18.block_sparse_moe.experts.152.w3", "model.layers.18.block_sparse_moe.experts.153.w3", "model.layers.18.block_sparse_moe.experts.154.w3", "model.layers.18.block_sparse_moe.experts.155.w3", "model.layers.18.block_sparse_moe.experts.156.w3", "model.layers.18.block_sparse_moe.experts.157.w3", "model.layers.18.block_sparse_moe.experts.158.w3", "model.layers.18.block_sparse_moe.experts.159.w3", "model.layers.18.block_sparse_moe.experts.160.w3", "model.layers.18.block_sparse_moe.experts.161.w3", "model.layers.18.block_sparse_moe.experts.162.w3", "model.layers.18.block_sparse_moe.experts.163.w3", "model.layers.18.block_sparse_moe.experts.164.w3", "model.layers.18.block_sparse_moe.experts.165.w3", "model.layers.18.block_sparse_moe.experts.166.w3", "model.layers.18.block_sparse_moe.experts.167.w3", "model.layers.18.block_sparse_moe.experts.168.w3", "model.layers.18.block_sparse_moe.experts.169.w3", "model.layers.18.block_sparse_moe.experts.170.w3", "model.layers.18.block_sparse_moe.experts.171.w3", "model.layers.18.block_sparse_moe.experts.172.w3", "model.layers.18.block_sparse_moe.experts.173.w3", "model.layers.18.block_sparse_moe.experts.174.w3", "model.layers.18.block_sparse_moe.experts.175.w3", "model.layers.18.block_sparse_moe.experts.176.w3", "model.layers.18.block_sparse_moe.experts.177.w3", "model.layers.18.block_sparse_moe.experts.178.w3", "model.layers.18.block_sparse_moe.experts.179.w3", "model.layers.18.block_sparse_moe.experts.180.w3", "model.layers.18.block_sparse_moe.experts.181.w3", "model.layers.18.block_sparse_moe.experts.182.w3", "model.layers.18.block_sparse_moe.experts.183.w3", "model.layers.18.block_sparse_moe.experts.184.w3", "model.layers.18.block_sparse_moe.experts.185.w3", "model.layers.18.block_sparse_moe.experts.186.w3", "model.layers.18.block_sparse_moe.experts.187.w3", "model.layers.18.block_sparse_moe.experts.188.w3", "model.layers.18.block_sparse_moe.experts.189.w3", "model.layers.18.block_sparse_moe.experts.190.w3", "model.layers.18.block_sparse_moe.experts.191.w3", "model.layers.18.block_sparse_moe.experts.192.w3", "model.layers.18.block_sparse_moe.experts.193.w3", "model.layers.18.block_sparse_moe.experts.194.w3", "model.layers.18.block_sparse_moe.experts.195.w3", "model.layers.18.block_sparse_moe.experts.196.w3", "model.layers.18.block_sparse_moe.experts.197.w3", "model.layers.18.block_sparse_moe.experts.198.w3", "model.layers.18.block_sparse_moe.experts.199.w3", "model.layers.18.block_sparse_moe.experts.200.w3", "model.layers.18.block_sparse_moe.experts.201.w3", "model.layers.18.block_sparse_moe.experts.202.w3", "model.layers.18.block_sparse_moe.experts.203.w3", "model.layers.18.block_sparse_moe.experts.204.w3", "model.layers.18.block_sparse_moe.experts.205.w3", "model.layers.18.block_sparse_moe.experts.206.w3", "model.layers.18.block_sparse_moe.experts.207.w3", "model.layers.18.block_sparse_moe.experts.208.w3", "model.layers.18.block_sparse_moe.experts.209.w3", "model.layers.18.block_sparse_moe.experts.210.w3", "model.layers.18.block_sparse_moe.experts.211.w3", "model.layers.18.block_sparse_moe.experts.212.w3", "model.layers.18.block_sparse_moe.experts.213.w3", "model.layers.18.block_sparse_moe.experts.214.w3", "model.layers.18.block_sparse_moe.experts.215.w3", "model.layers.18.block_sparse_moe.experts.216.w3", "model.layers.18.block_sparse_moe.experts.217.w3", "model.layers.18.block_sparse_moe.experts.218.w3", "model.layers.18.block_sparse_moe.experts.219.w3", "model.layers.18.block_sparse_moe.experts.220.w3", "model.layers.18.block_sparse_moe.experts.221.w3", "model.layers.18.block_sparse_moe.experts.222.w3", "model.layers.18.block_sparse_moe.experts.223.w3", "model.layers.18.block_sparse_moe.experts.224.w3", "model.layers.18.block_sparse_moe.experts.225.w3", "model.layers.18.block_sparse_moe.experts.226.w3", "model.layers.18.block_sparse_moe.experts.227.w3", "model.layers.18.block_sparse_moe.experts.228.w3", "model.layers.18.block_sparse_moe.experts.229.w3", "model.layers.18.block_sparse_moe.experts.230.w3", "model.layers.18.block_sparse_moe.experts.231.w3", "model.layers.18.block_sparse_moe.experts.232.w3", "model.layers.18.block_sparse_moe.experts.233.w3", "model.layers.18.block_sparse_moe.experts.234.w3", "model.layers.18.block_sparse_moe.experts.235.w3", "model.layers.18.block_sparse_moe.experts.236.w3", "model.layers.18.block_sparse_moe.experts.237.w3", "model.layers.18.block_sparse_moe.experts.238.w3", "model.layers.18.block_sparse_moe.experts.239.w3", "model.layers.18.block_sparse_moe.experts.240.w3", "model.layers.18.block_sparse_moe.experts.241.w3", "model.layers.18.block_sparse_moe.experts.242.w3", "model.layers.18.block_sparse_moe.experts.243.w3", "model.layers.18.block_sparse_moe.experts.244.w3", "model.layers.18.block_sparse_moe.experts.245.w3", "model.layers.18.block_sparse_moe.experts.246.w3", "model.layers.18.block_sparse_moe.experts.247.w3", "model.layers.18.block_sparse_moe.experts.248.w3", "model.layers.18.block_sparse_moe.experts.249.w3", "model.layers.18.block_sparse_moe.experts.250.w3", "model.layers.18.block_sparse_moe.experts.251.w3", "model.layers.18.block_sparse_moe.experts.252.w3", "model.layers.18.block_sparse_moe.experts.253.w3", "model.layers.18.block_sparse_moe.experts.254.w3", "model.layers.18.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00012325961142778397, "dbits": 2415919104 } ] }, { "idx": 94, "layers": [ "model.layers.18.block_sparse_moe.experts.0.w2", "model.layers.18.block_sparse_moe.experts.1.w2", "model.layers.18.block_sparse_moe.experts.2.w2", "model.layers.18.block_sparse_moe.experts.3.w2", "model.layers.18.block_sparse_moe.experts.4.w2", "model.layers.18.block_sparse_moe.experts.5.w2", "model.layers.18.block_sparse_moe.experts.6.w2", "model.layers.18.block_sparse_moe.experts.7.w2", "model.layers.18.block_sparse_moe.experts.8.w2", "model.layers.18.block_sparse_moe.experts.9.w2", "model.layers.18.block_sparse_moe.experts.10.w2", "model.layers.18.block_sparse_moe.experts.11.w2", "model.layers.18.block_sparse_moe.experts.12.w2", "model.layers.18.block_sparse_moe.experts.13.w2", "model.layers.18.block_sparse_moe.experts.14.w2", "model.layers.18.block_sparse_moe.experts.15.w2", "model.layers.18.block_sparse_moe.experts.16.w2", "model.layers.18.block_sparse_moe.experts.17.w2", "model.layers.18.block_sparse_moe.experts.18.w2", "model.layers.18.block_sparse_moe.experts.19.w2", "model.layers.18.block_sparse_moe.experts.20.w2", "model.layers.18.block_sparse_moe.experts.21.w2", "model.layers.18.block_sparse_moe.experts.22.w2", "model.layers.18.block_sparse_moe.experts.23.w2", "model.layers.18.block_sparse_moe.experts.24.w2", "model.layers.18.block_sparse_moe.experts.25.w2", "model.layers.18.block_sparse_moe.experts.26.w2", "model.layers.18.block_sparse_moe.experts.27.w2", "model.layers.18.block_sparse_moe.experts.28.w2", "model.layers.18.block_sparse_moe.experts.29.w2", "model.layers.18.block_sparse_moe.experts.30.w2", "model.layers.18.block_sparse_moe.experts.31.w2", "model.layers.18.block_sparse_moe.experts.32.w2", "model.layers.18.block_sparse_moe.experts.33.w2", "model.layers.18.block_sparse_moe.experts.34.w2", "model.layers.18.block_sparse_moe.experts.35.w2", "model.layers.18.block_sparse_moe.experts.36.w2", "model.layers.18.block_sparse_moe.experts.37.w2", "model.layers.18.block_sparse_moe.experts.38.w2", "model.layers.18.block_sparse_moe.experts.39.w2", "model.layers.18.block_sparse_moe.experts.40.w2", "model.layers.18.block_sparse_moe.experts.41.w2", "model.layers.18.block_sparse_moe.experts.42.w2", "model.layers.18.block_sparse_moe.experts.43.w2", "model.layers.18.block_sparse_moe.experts.44.w2", "model.layers.18.block_sparse_moe.experts.45.w2", "model.layers.18.block_sparse_moe.experts.46.w2", "model.layers.18.block_sparse_moe.experts.47.w2", "model.layers.18.block_sparse_moe.experts.48.w2", "model.layers.18.block_sparse_moe.experts.49.w2", "model.layers.18.block_sparse_moe.experts.50.w2", "model.layers.18.block_sparse_moe.experts.51.w2", "model.layers.18.block_sparse_moe.experts.52.w2", "model.layers.18.block_sparse_moe.experts.53.w2", "model.layers.18.block_sparse_moe.experts.54.w2", "model.layers.18.block_sparse_moe.experts.55.w2", "model.layers.18.block_sparse_moe.experts.56.w2", "model.layers.18.block_sparse_moe.experts.57.w2", "model.layers.18.block_sparse_moe.experts.58.w2", "model.layers.18.block_sparse_moe.experts.59.w2", "model.layers.18.block_sparse_moe.experts.60.w2", "model.layers.18.block_sparse_moe.experts.61.w2", "model.layers.18.block_sparse_moe.experts.62.w2", "model.layers.18.block_sparse_moe.experts.63.w2", "model.layers.18.block_sparse_moe.experts.64.w2", "model.layers.18.block_sparse_moe.experts.65.w2", "model.layers.18.block_sparse_moe.experts.66.w2", "model.layers.18.block_sparse_moe.experts.67.w2", "model.layers.18.block_sparse_moe.experts.68.w2", "model.layers.18.block_sparse_moe.experts.69.w2", "model.layers.18.block_sparse_moe.experts.70.w2", "model.layers.18.block_sparse_moe.experts.71.w2", "model.layers.18.block_sparse_moe.experts.72.w2", "model.layers.18.block_sparse_moe.experts.73.w2", "model.layers.18.block_sparse_moe.experts.74.w2", "model.layers.18.block_sparse_moe.experts.75.w2", "model.layers.18.block_sparse_moe.experts.76.w2", "model.layers.18.block_sparse_moe.experts.77.w2", "model.layers.18.block_sparse_moe.experts.78.w2", "model.layers.18.block_sparse_moe.experts.79.w2", "model.layers.18.block_sparse_moe.experts.80.w2", "model.layers.18.block_sparse_moe.experts.81.w2", "model.layers.18.block_sparse_moe.experts.82.w2", "model.layers.18.block_sparse_moe.experts.83.w2", "model.layers.18.block_sparse_moe.experts.84.w2", "model.layers.18.block_sparse_moe.experts.85.w2", "model.layers.18.block_sparse_moe.experts.86.w2", "model.layers.18.block_sparse_moe.experts.87.w2", "model.layers.18.block_sparse_moe.experts.88.w2", "model.layers.18.block_sparse_moe.experts.89.w2", "model.layers.18.block_sparse_moe.experts.90.w2", "model.layers.18.block_sparse_moe.experts.91.w2", "model.layers.18.block_sparse_moe.experts.92.w2", "model.layers.18.block_sparse_moe.experts.93.w2", "model.layers.18.block_sparse_moe.experts.94.w2", "model.layers.18.block_sparse_moe.experts.95.w2", "model.layers.18.block_sparse_moe.experts.96.w2", "model.layers.18.block_sparse_moe.experts.97.w2", "model.layers.18.block_sparse_moe.experts.98.w2", "model.layers.18.block_sparse_moe.experts.99.w2", "model.layers.18.block_sparse_moe.experts.100.w2", "model.layers.18.block_sparse_moe.experts.101.w2", "model.layers.18.block_sparse_moe.experts.102.w2", "model.layers.18.block_sparse_moe.experts.103.w2", "model.layers.18.block_sparse_moe.experts.104.w2", "model.layers.18.block_sparse_moe.experts.105.w2", "model.layers.18.block_sparse_moe.experts.106.w2", "model.layers.18.block_sparse_moe.experts.107.w2", "model.layers.18.block_sparse_moe.experts.108.w2", "model.layers.18.block_sparse_moe.experts.109.w2", "model.layers.18.block_sparse_moe.experts.110.w2", "model.layers.18.block_sparse_moe.experts.111.w2", "model.layers.18.block_sparse_moe.experts.112.w2", "model.layers.18.block_sparse_moe.experts.113.w2", "model.layers.18.block_sparse_moe.experts.114.w2", "model.layers.18.block_sparse_moe.experts.115.w2", "model.layers.18.block_sparse_moe.experts.116.w2", "model.layers.18.block_sparse_moe.experts.117.w2", "model.layers.18.block_sparse_moe.experts.118.w2", "model.layers.18.block_sparse_moe.experts.119.w2", "model.layers.18.block_sparse_moe.experts.120.w2", "model.layers.18.block_sparse_moe.experts.121.w2", "model.layers.18.block_sparse_moe.experts.122.w2", "model.layers.18.block_sparse_moe.experts.123.w2", "model.layers.18.block_sparse_moe.experts.124.w2", "model.layers.18.block_sparse_moe.experts.125.w2", "model.layers.18.block_sparse_moe.experts.126.w2", "model.layers.18.block_sparse_moe.experts.127.w2", "model.layers.18.block_sparse_moe.experts.128.w2", "model.layers.18.block_sparse_moe.experts.129.w2", "model.layers.18.block_sparse_moe.experts.130.w2", "model.layers.18.block_sparse_moe.experts.131.w2", "model.layers.18.block_sparse_moe.experts.132.w2", "model.layers.18.block_sparse_moe.experts.133.w2", "model.layers.18.block_sparse_moe.experts.134.w2", "model.layers.18.block_sparse_moe.experts.135.w2", "model.layers.18.block_sparse_moe.experts.136.w2", "model.layers.18.block_sparse_moe.experts.137.w2", "model.layers.18.block_sparse_moe.experts.138.w2", "model.layers.18.block_sparse_moe.experts.139.w2", "model.layers.18.block_sparse_moe.experts.140.w2", "model.layers.18.block_sparse_moe.experts.141.w2", "model.layers.18.block_sparse_moe.experts.142.w2", "model.layers.18.block_sparse_moe.experts.143.w2", "model.layers.18.block_sparse_moe.experts.144.w2", "model.layers.18.block_sparse_moe.experts.145.w2", "model.layers.18.block_sparse_moe.experts.146.w2", "model.layers.18.block_sparse_moe.experts.147.w2", "model.layers.18.block_sparse_moe.experts.148.w2", "model.layers.18.block_sparse_moe.experts.149.w2", "model.layers.18.block_sparse_moe.experts.150.w2", "model.layers.18.block_sparse_moe.experts.151.w2", "model.layers.18.block_sparse_moe.experts.152.w2", "model.layers.18.block_sparse_moe.experts.153.w2", "model.layers.18.block_sparse_moe.experts.154.w2", "model.layers.18.block_sparse_moe.experts.155.w2", "model.layers.18.block_sparse_moe.experts.156.w2", "model.layers.18.block_sparse_moe.experts.157.w2", "model.layers.18.block_sparse_moe.experts.158.w2", "model.layers.18.block_sparse_moe.experts.159.w2", "model.layers.18.block_sparse_moe.experts.160.w2", "model.layers.18.block_sparse_moe.experts.161.w2", "model.layers.18.block_sparse_moe.experts.162.w2", "model.layers.18.block_sparse_moe.experts.163.w2", "model.layers.18.block_sparse_moe.experts.164.w2", "model.layers.18.block_sparse_moe.experts.165.w2", "model.layers.18.block_sparse_moe.experts.166.w2", "model.layers.18.block_sparse_moe.experts.167.w2", "model.layers.18.block_sparse_moe.experts.168.w2", "model.layers.18.block_sparse_moe.experts.169.w2", "model.layers.18.block_sparse_moe.experts.170.w2", "model.layers.18.block_sparse_moe.experts.171.w2", "model.layers.18.block_sparse_moe.experts.172.w2", "model.layers.18.block_sparse_moe.experts.173.w2", "model.layers.18.block_sparse_moe.experts.174.w2", "model.layers.18.block_sparse_moe.experts.175.w2", "model.layers.18.block_sparse_moe.experts.176.w2", "model.layers.18.block_sparse_moe.experts.177.w2", "model.layers.18.block_sparse_moe.experts.178.w2", "model.layers.18.block_sparse_moe.experts.179.w2", "model.layers.18.block_sparse_moe.experts.180.w2", "model.layers.18.block_sparse_moe.experts.181.w2", "model.layers.18.block_sparse_moe.experts.182.w2", "model.layers.18.block_sparse_moe.experts.183.w2", "model.layers.18.block_sparse_moe.experts.184.w2", "model.layers.18.block_sparse_moe.experts.185.w2", "model.layers.18.block_sparse_moe.experts.186.w2", "model.layers.18.block_sparse_moe.experts.187.w2", "model.layers.18.block_sparse_moe.experts.188.w2", "model.layers.18.block_sparse_moe.experts.189.w2", "model.layers.18.block_sparse_moe.experts.190.w2", "model.layers.18.block_sparse_moe.experts.191.w2", "model.layers.18.block_sparse_moe.experts.192.w2", "model.layers.18.block_sparse_moe.experts.193.w2", "model.layers.18.block_sparse_moe.experts.194.w2", "model.layers.18.block_sparse_moe.experts.195.w2", "model.layers.18.block_sparse_moe.experts.196.w2", "model.layers.18.block_sparse_moe.experts.197.w2", "model.layers.18.block_sparse_moe.experts.198.w2", "model.layers.18.block_sparse_moe.experts.199.w2", "model.layers.18.block_sparse_moe.experts.200.w2", "model.layers.18.block_sparse_moe.experts.201.w2", "model.layers.18.block_sparse_moe.experts.202.w2", "model.layers.18.block_sparse_moe.experts.203.w2", "model.layers.18.block_sparse_moe.experts.204.w2", "model.layers.18.block_sparse_moe.experts.205.w2", "model.layers.18.block_sparse_moe.experts.206.w2", "model.layers.18.block_sparse_moe.experts.207.w2", "model.layers.18.block_sparse_moe.experts.208.w2", "model.layers.18.block_sparse_moe.experts.209.w2", "model.layers.18.block_sparse_moe.experts.210.w2", "model.layers.18.block_sparse_moe.experts.211.w2", "model.layers.18.block_sparse_moe.experts.212.w2", "model.layers.18.block_sparse_moe.experts.213.w2", "model.layers.18.block_sparse_moe.experts.214.w2", "model.layers.18.block_sparse_moe.experts.215.w2", "model.layers.18.block_sparse_moe.experts.216.w2", "model.layers.18.block_sparse_moe.experts.217.w2", "model.layers.18.block_sparse_moe.experts.218.w2", "model.layers.18.block_sparse_moe.experts.219.w2", "model.layers.18.block_sparse_moe.experts.220.w2", "model.layers.18.block_sparse_moe.experts.221.w2", "model.layers.18.block_sparse_moe.experts.222.w2", "model.layers.18.block_sparse_moe.experts.223.w2", "model.layers.18.block_sparse_moe.experts.224.w2", "model.layers.18.block_sparse_moe.experts.225.w2", "model.layers.18.block_sparse_moe.experts.226.w2", "model.layers.18.block_sparse_moe.experts.227.w2", "model.layers.18.block_sparse_moe.experts.228.w2", "model.layers.18.block_sparse_moe.experts.229.w2", "model.layers.18.block_sparse_moe.experts.230.w2", "model.layers.18.block_sparse_moe.experts.231.w2", "model.layers.18.block_sparse_moe.experts.232.w2", "model.layers.18.block_sparse_moe.experts.233.w2", "model.layers.18.block_sparse_moe.experts.234.w2", "model.layers.18.block_sparse_moe.experts.235.w2", "model.layers.18.block_sparse_moe.experts.236.w2", "model.layers.18.block_sparse_moe.experts.237.w2", "model.layers.18.block_sparse_moe.experts.238.w2", "model.layers.18.block_sparse_moe.experts.239.w2", "model.layers.18.block_sparse_moe.experts.240.w2", "model.layers.18.block_sparse_moe.experts.241.w2", "model.layers.18.block_sparse_moe.experts.242.w2", "model.layers.18.block_sparse_moe.experts.243.w2", "model.layers.18.block_sparse_moe.experts.244.w2", "model.layers.18.block_sparse_moe.experts.245.w2", "model.layers.18.block_sparse_moe.experts.246.w2", "model.layers.18.block_sparse_moe.experts.247.w2", "model.layers.18.block_sparse_moe.experts.248.w2", "model.layers.18.block_sparse_moe.experts.249.w2", "model.layers.18.block_sparse_moe.experts.250.w2", "model.layers.18.block_sparse_moe.experts.251.w2", "model.layers.18.block_sparse_moe.experts.252.w2", "model.layers.18.block_sparse_moe.experts.253.w2", "model.layers.18.block_sparse_moe.experts.254.w2", "model.layers.18.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0008078016340732491, "dbits": 1207959552 } ] }, { "idx": 95, "layers": [ "model.layers.19.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0005781086161732729, "dbits": 18874368 } ] }, { "idx": 96, "layers": [ "model.layers.19.self_attn.k_proj", "model.layers.19.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0030252562835812596, "dbits": 6291456 } ] }, { "idx": 97, "layers": [ "model.layers.19.self_attn.o_proj" ], "candidates": [ { "dkld": 0.001945628225803367, "dbits": 18874368 } ] }, { "idx": 98, "layers": [ "model.layers.19.block_sparse_moe.experts.0.w1", "model.layers.19.block_sparse_moe.experts.1.w1", "model.layers.19.block_sparse_moe.experts.2.w1", "model.layers.19.block_sparse_moe.experts.3.w1", "model.layers.19.block_sparse_moe.experts.4.w1", "model.layers.19.block_sparse_moe.experts.5.w1", "model.layers.19.block_sparse_moe.experts.6.w1", "model.layers.19.block_sparse_moe.experts.7.w1", "model.layers.19.block_sparse_moe.experts.8.w1", "model.layers.19.block_sparse_moe.experts.9.w1", "model.layers.19.block_sparse_moe.experts.10.w1", "model.layers.19.block_sparse_moe.experts.11.w1", "model.layers.19.block_sparse_moe.experts.12.w1", "model.layers.19.block_sparse_moe.experts.13.w1", "model.layers.19.block_sparse_moe.experts.14.w1", "model.layers.19.block_sparse_moe.experts.15.w1", "model.layers.19.block_sparse_moe.experts.16.w1", "model.layers.19.block_sparse_moe.experts.17.w1", "model.layers.19.block_sparse_moe.experts.18.w1", "model.layers.19.block_sparse_moe.experts.19.w1", "model.layers.19.block_sparse_moe.experts.20.w1", "model.layers.19.block_sparse_moe.experts.21.w1", "model.layers.19.block_sparse_moe.experts.22.w1", "model.layers.19.block_sparse_moe.experts.23.w1", "model.layers.19.block_sparse_moe.experts.24.w1", "model.layers.19.block_sparse_moe.experts.25.w1", "model.layers.19.block_sparse_moe.experts.26.w1", "model.layers.19.block_sparse_moe.experts.27.w1", "model.layers.19.block_sparse_moe.experts.28.w1", "model.layers.19.block_sparse_moe.experts.29.w1", "model.layers.19.block_sparse_moe.experts.30.w1", "model.layers.19.block_sparse_moe.experts.31.w1", "model.layers.19.block_sparse_moe.experts.32.w1", "model.layers.19.block_sparse_moe.experts.33.w1", "model.layers.19.block_sparse_moe.experts.34.w1", "model.layers.19.block_sparse_moe.experts.35.w1", "model.layers.19.block_sparse_moe.experts.36.w1", "model.layers.19.block_sparse_moe.experts.37.w1", "model.layers.19.block_sparse_moe.experts.38.w1", "model.layers.19.block_sparse_moe.experts.39.w1", "model.layers.19.block_sparse_moe.experts.40.w1", "model.layers.19.block_sparse_moe.experts.41.w1", "model.layers.19.block_sparse_moe.experts.42.w1", "model.layers.19.block_sparse_moe.experts.43.w1", "model.layers.19.block_sparse_moe.experts.44.w1", "model.layers.19.block_sparse_moe.experts.45.w1", "model.layers.19.block_sparse_moe.experts.46.w1", "model.layers.19.block_sparse_moe.experts.47.w1", "model.layers.19.block_sparse_moe.experts.48.w1", "model.layers.19.block_sparse_moe.experts.49.w1", "model.layers.19.block_sparse_moe.experts.50.w1", "model.layers.19.block_sparse_moe.experts.51.w1", "model.layers.19.block_sparse_moe.experts.52.w1", "model.layers.19.block_sparse_moe.experts.53.w1", "model.layers.19.block_sparse_moe.experts.54.w1", "model.layers.19.block_sparse_moe.experts.55.w1", "model.layers.19.block_sparse_moe.experts.56.w1", "model.layers.19.block_sparse_moe.experts.57.w1", "model.layers.19.block_sparse_moe.experts.58.w1", "model.layers.19.block_sparse_moe.experts.59.w1", "model.layers.19.block_sparse_moe.experts.60.w1", "model.layers.19.block_sparse_moe.experts.61.w1", "model.layers.19.block_sparse_moe.experts.62.w1", "model.layers.19.block_sparse_moe.experts.63.w1", "model.layers.19.block_sparse_moe.experts.64.w1", "model.layers.19.block_sparse_moe.experts.65.w1", "model.layers.19.block_sparse_moe.experts.66.w1", "model.layers.19.block_sparse_moe.experts.67.w1", "model.layers.19.block_sparse_moe.experts.68.w1", "model.layers.19.block_sparse_moe.experts.69.w1", "model.layers.19.block_sparse_moe.experts.70.w1", "model.layers.19.block_sparse_moe.experts.71.w1", "model.layers.19.block_sparse_moe.experts.72.w1", "model.layers.19.block_sparse_moe.experts.73.w1", "model.layers.19.block_sparse_moe.experts.74.w1", "model.layers.19.block_sparse_moe.experts.75.w1", "model.layers.19.block_sparse_moe.experts.76.w1", "model.layers.19.block_sparse_moe.experts.77.w1", "model.layers.19.block_sparse_moe.experts.78.w1", "model.layers.19.block_sparse_moe.experts.79.w1", "model.layers.19.block_sparse_moe.experts.80.w1", "model.layers.19.block_sparse_moe.experts.81.w1", "model.layers.19.block_sparse_moe.experts.82.w1", "model.layers.19.block_sparse_moe.experts.83.w1", "model.layers.19.block_sparse_moe.experts.84.w1", "model.layers.19.block_sparse_moe.experts.85.w1", "model.layers.19.block_sparse_moe.experts.86.w1", "model.layers.19.block_sparse_moe.experts.87.w1", "model.layers.19.block_sparse_moe.experts.88.w1", "model.layers.19.block_sparse_moe.experts.89.w1", "model.layers.19.block_sparse_moe.experts.90.w1", "model.layers.19.block_sparse_moe.experts.91.w1", "model.layers.19.block_sparse_moe.experts.92.w1", "model.layers.19.block_sparse_moe.experts.93.w1", "model.layers.19.block_sparse_moe.experts.94.w1", "model.layers.19.block_sparse_moe.experts.95.w1", "model.layers.19.block_sparse_moe.experts.96.w1", "model.layers.19.block_sparse_moe.experts.97.w1", "model.layers.19.block_sparse_moe.experts.98.w1", "model.layers.19.block_sparse_moe.experts.99.w1", "model.layers.19.block_sparse_moe.experts.100.w1", "model.layers.19.block_sparse_moe.experts.101.w1", "model.layers.19.block_sparse_moe.experts.102.w1", "model.layers.19.block_sparse_moe.experts.103.w1", "model.layers.19.block_sparse_moe.experts.104.w1", "model.layers.19.block_sparse_moe.experts.105.w1", "model.layers.19.block_sparse_moe.experts.106.w1", "model.layers.19.block_sparse_moe.experts.107.w1", "model.layers.19.block_sparse_moe.experts.108.w1", "model.layers.19.block_sparse_moe.experts.109.w1", "model.layers.19.block_sparse_moe.experts.110.w1", "model.layers.19.block_sparse_moe.experts.111.w1", "model.layers.19.block_sparse_moe.experts.112.w1", "model.layers.19.block_sparse_moe.experts.113.w1", "model.layers.19.block_sparse_moe.experts.114.w1", "model.layers.19.block_sparse_moe.experts.115.w1", "model.layers.19.block_sparse_moe.experts.116.w1", "model.layers.19.block_sparse_moe.experts.117.w1", "model.layers.19.block_sparse_moe.experts.118.w1", "model.layers.19.block_sparse_moe.experts.119.w1", "model.layers.19.block_sparse_moe.experts.120.w1", "model.layers.19.block_sparse_moe.experts.121.w1", "model.layers.19.block_sparse_moe.experts.122.w1", "model.layers.19.block_sparse_moe.experts.123.w1", "model.layers.19.block_sparse_moe.experts.124.w1", "model.layers.19.block_sparse_moe.experts.125.w1", "model.layers.19.block_sparse_moe.experts.126.w1", "model.layers.19.block_sparse_moe.experts.127.w1", "model.layers.19.block_sparse_moe.experts.128.w1", "model.layers.19.block_sparse_moe.experts.129.w1", "model.layers.19.block_sparse_moe.experts.130.w1", "model.layers.19.block_sparse_moe.experts.131.w1", "model.layers.19.block_sparse_moe.experts.132.w1", "model.layers.19.block_sparse_moe.experts.133.w1", "model.layers.19.block_sparse_moe.experts.134.w1", "model.layers.19.block_sparse_moe.experts.135.w1", "model.layers.19.block_sparse_moe.experts.136.w1", "model.layers.19.block_sparse_moe.experts.137.w1", "model.layers.19.block_sparse_moe.experts.138.w1", "model.layers.19.block_sparse_moe.experts.139.w1", "model.layers.19.block_sparse_moe.experts.140.w1", "model.layers.19.block_sparse_moe.experts.141.w1", "model.layers.19.block_sparse_moe.experts.142.w1", "model.layers.19.block_sparse_moe.experts.143.w1", "model.layers.19.block_sparse_moe.experts.144.w1", "model.layers.19.block_sparse_moe.experts.145.w1", "model.layers.19.block_sparse_moe.experts.146.w1", "model.layers.19.block_sparse_moe.experts.147.w1", "model.layers.19.block_sparse_moe.experts.148.w1", "model.layers.19.block_sparse_moe.experts.149.w1", "model.layers.19.block_sparse_moe.experts.150.w1", "model.layers.19.block_sparse_moe.experts.151.w1", "model.layers.19.block_sparse_moe.experts.152.w1", "model.layers.19.block_sparse_moe.experts.153.w1", "model.layers.19.block_sparse_moe.experts.154.w1", "model.layers.19.block_sparse_moe.experts.155.w1", "model.layers.19.block_sparse_moe.experts.156.w1", "model.layers.19.block_sparse_moe.experts.157.w1", "model.layers.19.block_sparse_moe.experts.158.w1", "model.layers.19.block_sparse_moe.experts.159.w1", "model.layers.19.block_sparse_moe.experts.160.w1", "model.layers.19.block_sparse_moe.experts.161.w1", "model.layers.19.block_sparse_moe.experts.162.w1", "model.layers.19.block_sparse_moe.experts.163.w1", "model.layers.19.block_sparse_moe.experts.164.w1", "model.layers.19.block_sparse_moe.experts.165.w1", "model.layers.19.block_sparse_moe.experts.166.w1", "model.layers.19.block_sparse_moe.experts.167.w1", "model.layers.19.block_sparse_moe.experts.168.w1", "model.layers.19.block_sparse_moe.experts.169.w1", "model.layers.19.block_sparse_moe.experts.170.w1", "model.layers.19.block_sparse_moe.experts.171.w1", "model.layers.19.block_sparse_moe.experts.172.w1", "model.layers.19.block_sparse_moe.experts.173.w1", "model.layers.19.block_sparse_moe.experts.174.w1", "model.layers.19.block_sparse_moe.experts.175.w1", "model.layers.19.block_sparse_moe.experts.176.w1", "model.layers.19.block_sparse_moe.experts.177.w1", "model.layers.19.block_sparse_moe.experts.178.w1", "model.layers.19.block_sparse_moe.experts.179.w1", "model.layers.19.block_sparse_moe.experts.180.w1", "model.layers.19.block_sparse_moe.experts.181.w1", "model.layers.19.block_sparse_moe.experts.182.w1", "model.layers.19.block_sparse_moe.experts.183.w1", "model.layers.19.block_sparse_moe.experts.184.w1", "model.layers.19.block_sparse_moe.experts.185.w1", "model.layers.19.block_sparse_moe.experts.186.w1", "model.layers.19.block_sparse_moe.experts.187.w1", "model.layers.19.block_sparse_moe.experts.188.w1", "model.layers.19.block_sparse_moe.experts.189.w1", "model.layers.19.block_sparse_moe.experts.190.w1", "model.layers.19.block_sparse_moe.experts.191.w1", "model.layers.19.block_sparse_moe.experts.192.w1", "model.layers.19.block_sparse_moe.experts.193.w1", "model.layers.19.block_sparse_moe.experts.194.w1", "model.layers.19.block_sparse_moe.experts.195.w1", "model.layers.19.block_sparse_moe.experts.196.w1", "model.layers.19.block_sparse_moe.experts.197.w1", "model.layers.19.block_sparse_moe.experts.198.w1", "model.layers.19.block_sparse_moe.experts.199.w1", "model.layers.19.block_sparse_moe.experts.200.w1", "model.layers.19.block_sparse_moe.experts.201.w1", "model.layers.19.block_sparse_moe.experts.202.w1", "model.layers.19.block_sparse_moe.experts.203.w1", "model.layers.19.block_sparse_moe.experts.204.w1", "model.layers.19.block_sparse_moe.experts.205.w1", "model.layers.19.block_sparse_moe.experts.206.w1", "model.layers.19.block_sparse_moe.experts.207.w1", "model.layers.19.block_sparse_moe.experts.208.w1", "model.layers.19.block_sparse_moe.experts.209.w1", "model.layers.19.block_sparse_moe.experts.210.w1", "model.layers.19.block_sparse_moe.experts.211.w1", "model.layers.19.block_sparse_moe.experts.212.w1", "model.layers.19.block_sparse_moe.experts.213.w1", "model.layers.19.block_sparse_moe.experts.214.w1", "model.layers.19.block_sparse_moe.experts.215.w1", "model.layers.19.block_sparse_moe.experts.216.w1", "model.layers.19.block_sparse_moe.experts.217.w1", "model.layers.19.block_sparse_moe.experts.218.w1", "model.layers.19.block_sparse_moe.experts.219.w1", "model.layers.19.block_sparse_moe.experts.220.w1", "model.layers.19.block_sparse_moe.experts.221.w1", "model.layers.19.block_sparse_moe.experts.222.w1", "model.layers.19.block_sparse_moe.experts.223.w1", "model.layers.19.block_sparse_moe.experts.224.w1", "model.layers.19.block_sparse_moe.experts.225.w1", "model.layers.19.block_sparse_moe.experts.226.w1", "model.layers.19.block_sparse_moe.experts.227.w1", "model.layers.19.block_sparse_moe.experts.228.w1", "model.layers.19.block_sparse_moe.experts.229.w1", "model.layers.19.block_sparse_moe.experts.230.w1", "model.layers.19.block_sparse_moe.experts.231.w1", "model.layers.19.block_sparse_moe.experts.232.w1", "model.layers.19.block_sparse_moe.experts.233.w1", "model.layers.19.block_sparse_moe.experts.234.w1", "model.layers.19.block_sparse_moe.experts.235.w1", "model.layers.19.block_sparse_moe.experts.236.w1", "model.layers.19.block_sparse_moe.experts.237.w1", "model.layers.19.block_sparse_moe.experts.238.w1", "model.layers.19.block_sparse_moe.experts.239.w1", "model.layers.19.block_sparse_moe.experts.240.w1", "model.layers.19.block_sparse_moe.experts.241.w1", "model.layers.19.block_sparse_moe.experts.242.w1", "model.layers.19.block_sparse_moe.experts.243.w1", "model.layers.19.block_sparse_moe.experts.244.w1", "model.layers.19.block_sparse_moe.experts.245.w1", "model.layers.19.block_sparse_moe.experts.246.w1", "model.layers.19.block_sparse_moe.experts.247.w1", "model.layers.19.block_sparse_moe.experts.248.w1", "model.layers.19.block_sparse_moe.experts.249.w1", "model.layers.19.block_sparse_moe.experts.250.w1", "model.layers.19.block_sparse_moe.experts.251.w1", "model.layers.19.block_sparse_moe.experts.252.w1", "model.layers.19.block_sparse_moe.experts.253.w1", "model.layers.19.block_sparse_moe.experts.254.w1", "model.layers.19.block_sparse_moe.experts.255.w1", "model.layers.19.block_sparse_moe.experts.0.w3", "model.layers.19.block_sparse_moe.experts.1.w3", "model.layers.19.block_sparse_moe.experts.2.w3", "model.layers.19.block_sparse_moe.experts.3.w3", "model.layers.19.block_sparse_moe.experts.4.w3", "model.layers.19.block_sparse_moe.experts.5.w3", "model.layers.19.block_sparse_moe.experts.6.w3", "model.layers.19.block_sparse_moe.experts.7.w3", "model.layers.19.block_sparse_moe.experts.8.w3", "model.layers.19.block_sparse_moe.experts.9.w3", "model.layers.19.block_sparse_moe.experts.10.w3", "model.layers.19.block_sparse_moe.experts.11.w3", "model.layers.19.block_sparse_moe.experts.12.w3", "model.layers.19.block_sparse_moe.experts.13.w3", "model.layers.19.block_sparse_moe.experts.14.w3", "model.layers.19.block_sparse_moe.experts.15.w3", "model.layers.19.block_sparse_moe.experts.16.w3", "model.layers.19.block_sparse_moe.experts.17.w3", "model.layers.19.block_sparse_moe.experts.18.w3", "model.layers.19.block_sparse_moe.experts.19.w3", "model.layers.19.block_sparse_moe.experts.20.w3", "model.layers.19.block_sparse_moe.experts.21.w3", "model.layers.19.block_sparse_moe.experts.22.w3", "model.layers.19.block_sparse_moe.experts.23.w3", "model.layers.19.block_sparse_moe.experts.24.w3", "model.layers.19.block_sparse_moe.experts.25.w3", "model.layers.19.block_sparse_moe.experts.26.w3", "model.layers.19.block_sparse_moe.experts.27.w3", "model.layers.19.block_sparse_moe.experts.28.w3", "model.layers.19.block_sparse_moe.experts.29.w3", "model.layers.19.block_sparse_moe.experts.30.w3", "model.layers.19.block_sparse_moe.experts.31.w3", "model.layers.19.block_sparse_moe.experts.32.w3", "model.layers.19.block_sparse_moe.experts.33.w3", "model.layers.19.block_sparse_moe.experts.34.w3", "model.layers.19.block_sparse_moe.experts.35.w3", "model.layers.19.block_sparse_moe.experts.36.w3", "model.layers.19.block_sparse_moe.experts.37.w3", "model.layers.19.block_sparse_moe.experts.38.w3", "model.layers.19.block_sparse_moe.experts.39.w3", "model.layers.19.block_sparse_moe.experts.40.w3", "model.layers.19.block_sparse_moe.experts.41.w3", "model.layers.19.block_sparse_moe.experts.42.w3", "model.layers.19.block_sparse_moe.experts.43.w3", "model.layers.19.block_sparse_moe.experts.44.w3", "model.layers.19.block_sparse_moe.experts.45.w3", "model.layers.19.block_sparse_moe.experts.46.w3", "model.layers.19.block_sparse_moe.experts.47.w3", "model.layers.19.block_sparse_moe.experts.48.w3", "model.layers.19.block_sparse_moe.experts.49.w3", "model.layers.19.block_sparse_moe.experts.50.w3", "model.layers.19.block_sparse_moe.experts.51.w3", "model.layers.19.block_sparse_moe.experts.52.w3", "model.layers.19.block_sparse_moe.experts.53.w3", "model.layers.19.block_sparse_moe.experts.54.w3", "model.layers.19.block_sparse_moe.experts.55.w3", "model.layers.19.block_sparse_moe.experts.56.w3", "model.layers.19.block_sparse_moe.experts.57.w3", "model.layers.19.block_sparse_moe.experts.58.w3", "model.layers.19.block_sparse_moe.experts.59.w3", "model.layers.19.block_sparse_moe.experts.60.w3", "model.layers.19.block_sparse_moe.experts.61.w3", "model.layers.19.block_sparse_moe.experts.62.w3", "model.layers.19.block_sparse_moe.experts.63.w3", "model.layers.19.block_sparse_moe.experts.64.w3", "model.layers.19.block_sparse_moe.experts.65.w3", "model.layers.19.block_sparse_moe.experts.66.w3", "model.layers.19.block_sparse_moe.experts.67.w3", "model.layers.19.block_sparse_moe.experts.68.w3", "model.layers.19.block_sparse_moe.experts.69.w3", "model.layers.19.block_sparse_moe.experts.70.w3", "model.layers.19.block_sparse_moe.experts.71.w3", "model.layers.19.block_sparse_moe.experts.72.w3", "model.layers.19.block_sparse_moe.experts.73.w3", "model.layers.19.block_sparse_moe.experts.74.w3", "model.layers.19.block_sparse_moe.experts.75.w3", "model.layers.19.block_sparse_moe.experts.76.w3", "model.layers.19.block_sparse_moe.experts.77.w3", "model.layers.19.block_sparse_moe.experts.78.w3", "model.layers.19.block_sparse_moe.experts.79.w3", "model.layers.19.block_sparse_moe.experts.80.w3", "model.layers.19.block_sparse_moe.experts.81.w3", "model.layers.19.block_sparse_moe.experts.82.w3", "model.layers.19.block_sparse_moe.experts.83.w3", "model.layers.19.block_sparse_moe.experts.84.w3", "model.layers.19.block_sparse_moe.experts.85.w3", "model.layers.19.block_sparse_moe.experts.86.w3", "model.layers.19.block_sparse_moe.experts.87.w3", "model.layers.19.block_sparse_moe.experts.88.w3", "model.layers.19.block_sparse_moe.experts.89.w3", "model.layers.19.block_sparse_moe.experts.90.w3", "model.layers.19.block_sparse_moe.experts.91.w3", "model.layers.19.block_sparse_moe.experts.92.w3", "model.layers.19.block_sparse_moe.experts.93.w3", "model.layers.19.block_sparse_moe.experts.94.w3", "model.layers.19.block_sparse_moe.experts.95.w3", "model.layers.19.block_sparse_moe.experts.96.w3", "model.layers.19.block_sparse_moe.experts.97.w3", "model.layers.19.block_sparse_moe.experts.98.w3", "model.layers.19.block_sparse_moe.experts.99.w3", "model.layers.19.block_sparse_moe.experts.100.w3", "model.layers.19.block_sparse_moe.experts.101.w3", "model.layers.19.block_sparse_moe.experts.102.w3", "model.layers.19.block_sparse_moe.experts.103.w3", "model.layers.19.block_sparse_moe.experts.104.w3", "model.layers.19.block_sparse_moe.experts.105.w3", "model.layers.19.block_sparse_moe.experts.106.w3", "model.layers.19.block_sparse_moe.experts.107.w3", "model.layers.19.block_sparse_moe.experts.108.w3", "model.layers.19.block_sparse_moe.experts.109.w3", "model.layers.19.block_sparse_moe.experts.110.w3", "model.layers.19.block_sparse_moe.experts.111.w3", "model.layers.19.block_sparse_moe.experts.112.w3", "model.layers.19.block_sparse_moe.experts.113.w3", "model.layers.19.block_sparse_moe.experts.114.w3", "model.layers.19.block_sparse_moe.experts.115.w3", "model.layers.19.block_sparse_moe.experts.116.w3", "model.layers.19.block_sparse_moe.experts.117.w3", "model.layers.19.block_sparse_moe.experts.118.w3", "model.layers.19.block_sparse_moe.experts.119.w3", "model.layers.19.block_sparse_moe.experts.120.w3", "model.layers.19.block_sparse_moe.experts.121.w3", "model.layers.19.block_sparse_moe.experts.122.w3", "model.layers.19.block_sparse_moe.experts.123.w3", "model.layers.19.block_sparse_moe.experts.124.w3", "model.layers.19.block_sparse_moe.experts.125.w3", "model.layers.19.block_sparse_moe.experts.126.w3", "model.layers.19.block_sparse_moe.experts.127.w3", "model.layers.19.block_sparse_moe.experts.128.w3", "model.layers.19.block_sparse_moe.experts.129.w3", "model.layers.19.block_sparse_moe.experts.130.w3", "model.layers.19.block_sparse_moe.experts.131.w3", "model.layers.19.block_sparse_moe.experts.132.w3", "model.layers.19.block_sparse_moe.experts.133.w3", "model.layers.19.block_sparse_moe.experts.134.w3", "model.layers.19.block_sparse_moe.experts.135.w3", "model.layers.19.block_sparse_moe.experts.136.w3", "model.layers.19.block_sparse_moe.experts.137.w3", "model.layers.19.block_sparse_moe.experts.138.w3", "model.layers.19.block_sparse_moe.experts.139.w3", "model.layers.19.block_sparse_moe.experts.140.w3", "model.layers.19.block_sparse_moe.experts.141.w3", "model.layers.19.block_sparse_moe.experts.142.w3", "model.layers.19.block_sparse_moe.experts.143.w3", "model.layers.19.block_sparse_moe.experts.144.w3", "model.layers.19.block_sparse_moe.experts.145.w3", "model.layers.19.block_sparse_moe.experts.146.w3", "model.layers.19.block_sparse_moe.experts.147.w3", "model.layers.19.block_sparse_moe.experts.148.w3", "model.layers.19.block_sparse_moe.experts.149.w3", "model.layers.19.block_sparse_moe.experts.150.w3", "model.layers.19.block_sparse_moe.experts.151.w3", "model.layers.19.block_sparse_moe.experts.152.w3", "model.layers.19.block_sparse_moe.experts.153.w3", "model.layers.19.block_sparse_moe.experts.154.w3", "model.layers.19.block_sparse_moe.experts.155.w3", "model.layers.19.block_sparse_moe.experts.156.w3", "model.layers.19.block_sparse_moe.experts.157.w3", "model.layers.19.block_sparse_moe.experts.158.w3", "model.layers.19.block_sparse_moe.experts.159.w3", "model.layers.19.block_sparse_moe.experts.160.w3", "model.layers.19.block_sparse_moe.experts.161.w3", "model.layers.19.block_sparse_moe.experts.162.w3", "model.layers.19.block_sparse_moe.experts.163.w3", "model.layers.19.block_sparse_moe.experts.164.w3", "model.layers.19.block_sparse_moe.experts.165.w3", "model.layers.19.block_sparse_moe.experts.166.w3", "model.layers.19.block_sparse_moe.experts.167.w3", "model.layers.19.block_sparse_moe.experts.168.w3", "model.layers.19.block_sparse_moe.experts.169.w3", "model.layers.19.block_sparse_moe.experts.170.w3", "model.layers.19.block_sparse_moe.experts.171.w3", "model.layers.19.block_sparse_moe.experts.172.w3", "model.layers.19.block_sparse_moe.experts.173.w3", "model.layers.19.block_sparse_moe.experts.174.w3", "model.layers.19.block_sparse_moe.experts.175.w3", "model.layers.19.block_sparse_moe.experts.176.w3", "model.layers.19.block_sparse_moe.experts.177.w3", "model.layers.19.block_sparse_moe.experts.178.w3", "model.layers.19.block_sparse_moe.experts.179.w3", "model.layers.19.block_sparse_moe.experts.180.w3", "model.layers.19.block_sparse_moe.experts.181.w3", "model.layers.19.block_sparse_moe.experts.182.w3", "model.layers.19.block_sparse_moe.experts.183.w3", "model.layers.19.block_sparse_moe.experts.184.w3", "model.layers.19.block_sparse_moe.experts.185.w3", "model.layers.19.block_sparse_moe.experts.186.w3", "model.layers.19.block_sparse_moe.experts.187.w3", "model.layers.19.block_sparse_moe.experts.188.w3", "model.layers.19.block_sparse_moe.experts.189.w3", "model.layers.19.block_sparse_moe.experts.190.w3", "model.layers.19.block_sparse_moe.experts.191.w3", "model.layers.19.block_sparse_moe.experts.192.w3", "model.layers.19.block_sparse_moe.experts.193.w3", "model.layers.19.block_sparse_moe.experts.194.w3", "model.layers.19.block_sparse_moe.experts.195.w3", "model.layers.19.block_sparse_moe.experts.196.w3", "model.layers.19.block_sparse_moe.experts.197.w3", "model.layers.19.block_sparse_moe.experts.198.w3", "model.layers.19.block_sparse_moe.experts.199.w3", "model.layers.19.block_sparse_moe.experts.200.w3", "model.layers.19.block_sparse_moe.experts.201.w3", "model.layers.19.block_sparse_moe.experts.202.w3", "model.layers.19.block_sparse_moe.experts.203.w3", "model.layers.19.block_sparse_moe.experts.204.w3", "model.layers.19.block_sparse_moe.experts.205.w3", "model.layers.19.block_sparse_moe.experts.206.w3", "model.layers.19.block_sparse_moe.experts.207.w3", "model.layers.19.block_sparse_moe.experts.208.w3", "model.layers.19.block_sparse_moe.experts.209.w3", "model.layers.19.block_sparse_moe.experts.210.w3", "model.layers.19.block_sparse_moe.experts.211.w3", "model.layers.19.block_sparse_moe.experts.212.w3", "model.layers.19.block_sparse_moe.experts.213.w3", "model.layers.19.block_sparse_moe.experts.214.w3", "model.layers.19.block_sparse_moe.experts.215.w3", "model.layers.19.block_sparse_moe.experts.216.w3", "model.layers.19.block_sparse_moe.experts.217.w3", "model.layers.19.block_sparse_moe.experts.218.w3", "model.layers.19.block_sparse_moe.experts.219.w3", "model.layers.19.block_sparse_moe.experts.220.w3", "model.layers.19.block_sparse_moe.experts.221.w3", "model.layers.19.block_sparse_moe.experts.222.w3", "model.layers.19.block_sparse_moe.experts.223.w3", "model.layers.19.block_sparse_moe.experts.224.w3", "model.layers.19.block_sparse_moe.experts.225.w3", "model.layers.19.block_sparse_moe.experts.226.w3", "model.layers.19.block_sparse_moe.experts.227.w3", "model.layers.19.block_sparse_moe.experts.228.w3", "model.layers.19.block_sparse_moe.experts.229.w3", "model.layers.19.block_sparse_moe.experts.230.w3", "model.layers.19.block_sparse_moe.experts.231.w3", "model.layers.19.block_sparse_moe.experts.232.w3", "model.layers.19.block_sparse_moe.experts.233.w3", "model.layers.19.block_sparse_moe.experts.234.w3", "model.layers.19.block_sparse_moe.experts.235.w3", "model.layers.19.block_sparse_moe.experts.236.w3", "model.layers.19.block_sparse_moe.experts.237.w3", "model.layers.19.block_sparse_moe.experts.238.w3", "model.layers.19.block_sparse_moe.experts.239.w3", "model.layers.19.block_sparse_moe.experts.240.w3", "model.layers.19.block_sparse_moe.experts.241.w3", "model.layers.19.block_sparse_moe.experts.242.w3", "model.layers.19.block_sparse_moe.experts.243.w3", "model.layers.19.block_sparse_moe.experts.244.w3", "model.layers.19.block_sparse_moe.experts.245.w3", "model.layers.19.block_sparse_moe.experts.246.w3", "model.layers.19.block_sparse_moe.experts.247.w3", "model.layers.19.block_sparse_moe.experts.248.w3", "model.layers.19.block_sparse_moe.experts.249.w3", "model.layers.19.block_sparse_moe.experts.250.w3", "model.layers.19.block_sparse_moe.experts.251.w3", "model.layers.19.block_sparse_moe.experts.252.w3", "model.layers.19.block_sparse_moe.experts.253.w3", "model.layers.19.block_sparse_moe.experts.254.w3", "model.layers.19.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00026499759405851364, "dbits": 2415919104 } ] }, { "idx": 99, "layers": [ "model.layers.19.block_sparse_moe.experts.0.w2", "model.layers.19.block_sparse_moe.experts.1.w2", "model.layers.19.block_sparse_moe.experts.2.w2", "model.layers.19.block_sparse_moe.experts.3.w2", "model.layers.19.block_sparse_moe.experts.4.w2", "model.layers.19.block_sparse_moe.experts.5.w2", "model.layers.19.block_sparse_moe.experts.6.w2", "model.layers.19.block_sparse_moe.experts.7.w2", "model.layers.19.block_sparse_moe.experts.8.w2", "model.layers.19.block_sparse_moe.experts.9.w2", "model.layers.19.block_sparse_moe.experts.10.w2", "model.layers.19.block_sparse_moe.experts.11.w2", "model.layers.19.block_sparse_moe.experts.12.w2", "model.layers.19.block_sparse_moe.experts.13.w2", "model.layers.19.block_sparse_moe.experts.14.w2", "model.layers.19.block_sparse_moe.experts.15.w2", "model.layers.19.block_sparse_moe.experts.16.w2", "model.layers.19.block_sparse_moe.experts.17.w2", "model.layers.19.block_sparse_moe.experts.18.w2", "model.layers.19.block_sparse_moe.experts.19.w2", "model.layers.19.block_sparse_moe.experts.20.w2", "model.layers.19.block_sparse_moe.experts.21.w2", "model.layers.19.block_sparse_moe.experts.22.w2", "model.layers.19.block_sparse_moe.experts.23.w2", "model.layers.19.block_sparse_moe.experts.24.w2", "model.layers.19.block_sparse_moe.experts.25.w2", "model.layers.19.block_sparse_moe.experts.26.w2", "model.layers.19.block_sparse_moe.experts.27.w2", "model.layers.19.block_sparse_moe.experts.28.w2", "model.layers.19.block_sparse_moe.experts.29.w2", "model.layers.19.block_sparse_moe.experts.30.w2", "model.layers.19.block_sparse_moe.experts.31.w2", "model.layers.19.block_sparse_moe.experts.32.w2", "model.layers.19.block_sparse_moe.experts.33.w2", "model.layers.19.block_sparse_moe.experts.34.w2", "model.layers.19.block_sparse_moe.experts.35.w2", "model.layers.19.block_sparse_moe.experts.36.w2", "model.layers.19.block_sparse_moe.experts.37.w2", "model.layers.19.block_sparse_moe.experts.38.w2", "model.layers.19.block_sparse_moe.experts.39.w2", "model.layers.19.block_sparse_moe.experts.40.w2", "model.layers.19.block_sparse_moe.experts.41.w2", "model.layers.19.block_sparse_moe.experts.42.w2", "model.layers.19.block_sparse_moe.experts.43.w2", "model.layers.19.block_sparse_moe.experts.44.w2", "model.layers.19.block_sparse_moe.experts.45.w2", "model.layers.19.block_sparse_moe.experts.46.w2", "model.layers.19.block_sparse_moe.experts.47.w2", "model.layers.19.block_sparse_moe.experts.48.w2", "model.layers.19.block_sparse_moe.experts.49.w2", "model.layers.19.block_sparse_moe.experts.50.w2", "model.layers.19.block_sparse_moe.experts.51.w2", "model.layers.19.block_sparse_moe.experts.52.w2", "model.layers.19.block_sparse_moe.experts.53.w2", "model.layers.19.block_sparse_moe.experts.54.w2", "model.layers.19.block_sparse_moe.experts.55.w2", "model.layers.19.block_sparse_moe.experts.56.w2", "model.layers.19.block_sparse_moe.experts.57.w2", "model.layers.19.block_sparse_moe.experts.58.w2", "model.layers.19.block_sparse_moe.experts.59.w2", "model.layers.19.block_sparse_moe.experts.60.w2", "model.layers.19.block_sparse_moe.experts.61.w2", "model.layers.19.block_sparse_moe.experts.62.w2", "model.layers.19.block_sparse_moe.experts.63.w2", "model.layers.19.block_sparse_moe.experts.64.w2", "model.layers.19.block_sparse_moe.experts.65.w2", "model.layers.19.block_sparse_moe.experts.66.w2", "model.layers.19.block_sparse_moe.experts.67.w2", "model.layers.19.block_sparse_moe.experts.68.w2", "model.layers.19.block_sparse_moe.experts.69.w2", "model.layers.19.block_sparse_moe.experts.70.w2", "model.layers.19.block_sparse_moe.experts.71.w2", "model.layers.19.block_sparse_moe.experts.72.w2", "model.layers.19.block_sparse_moe.experts.73.w2", "model.layers.19.block_sparse_moe.experts.74.w2", "model.layers.19.block_sparse_moe.experts.75.w2", "model.layers.19.block_sparse_moe.experts.76.w2", "model.layers.19.block_sparse_moe.experts.77.w2", "model.layers.19.block_sparse_moe.experts.78.w2", "model.layers.19.block_sparse_moe.experts.79.w2", "model.layers.19.block_sparse_moe.experts.80.w2", "model.layers.19.block_sparse_moe.experts.81.w2", "model.layers.19.block_sparse_moe.experts.82.w2", "model.layers.19.block_sparse_moe.experts.83.w2", "model.layers.19.block_sparse_moe.experts.84.w2", "model.layers.19.block_sparse_moe.experts.85.w2", "model.layers.19.block_sparse_moe.experts.86.w2", "model.layers.19.block_sparse_moe.experts.87.w2", "model.layers.19.block_sparse_moe.experts.88.w2", "model.layers.19.block_sparse_moe.experts.89.w2", "model.layers.19.block_sparse_moe.experts.90.w2", "model.layers.19.block_sparse_moe.experts.91.w2", "model.layers.19.block_sparse_moe.experts.92.w2", "model.layers.19.block_sparse_moe.experts.93.w2", "model.layers.19.block_sparse_moe.experts.94.w2", "model.layers.19.block_sparse_moe.experts.95.w2", "model.layers.19.block_sparse_moe.experts.96.w2", "model.layers.19.block_sparse_moe.experts.97.w2", "model.layers.19.block_sparse_moe.experts.98.w2", "model.layers.19.block_sparse_moe.experts.99.w2", "model.layers.19.block_sparse_moe.experts.100.w2", "model.layers.19.block_sparse_moe.experts.101.w2", "model.layers.19.block_sparse_moe.experts.102.w2", "model.layers.19.block_sparse_moe.experts.103.w2", "model.layers.19.block_sparse_moe.experts.104.w2", "model.layers.19.block_sparse_moe.experts.105.w2", "model.layers.19.block_sparse_moe.experts.106.w2", "model.layers.19.block_sparse_moe.experts.107.w2", "model.layers.19.block_sparse_moe.experts.108.w2", "model.layers.19.block_sparse_moe.experts.109.w2", "model.layers.19.block_sparse_moe.experts.110.w2", "model.layers.19.block_sparse_moe.experts.111.w2", "model.layers.19.block_sparse_moe.experts.112.w2", "model.layers.19.block_sparse_moe.experts.113.w2", "model.layers.19.block_sparse_moe.experts.114.w2", "model.layers.19.block_sparse_moe.experts.115.w2", "model.layers.19.block_sparse_moe.experts.116.w2", "model.layers.19.block_sparse_moe.experts.117.w2", "model.layers.19.block_sparse_moe.experts.118.w2", "model.layers.19.block_sparse_moe.experts.119.w2", "model.layers.19.block_sparse_moe.experts.120.w2", "model.layers.19.block_sparse_moe.experts.121.w2", "model.layers.19.block_sparse_moe.experts.122.w2", "model.layers.19.block_sparse_moe.experts.123.w2", "model.layers.19.block_sparse_moe.experts.124.w2", "model.layers.19.block_sparse_moe.experts.125.w2", "model.layers.19.block_sparse_moe.experts.126.w2", "model.layers.19.block_sparse_moe.experts.127.w2", "model.layers.19.block_sparse_moe.experts.128.w2", "model.layers.19.block_sparse_moe.experts.129.w2", "model.layers.19.block_sparse_moe.experts.130.w2", "model.layers.19.block_sparse_moe.experts.131.w2", "model.layers.19.block_sparse_moe.experts.132.w2", "model.layers.19.block_sparse_moe.experts.133.w2", "model.layers.19.block_sparse_moe.experts.134.w2", "model.layers.19.block_sparse_moe.experts.135.w2", "model.layers.19.block_sparse_moe.experts.136.w2", "model.layers.19.block_sparse_moe.experts.137.w2", "model.layers.19.block_sparse_moe.experts.138.w2", "model.layers.19.block_sparse_moe.experts.139.w2", "model.layers.19.block_sparse_moe.experts.140.w2", "model.layers.19.block_sparse_moe.experts.141.w2", "model.layers.19.block_sparse_moe.experts.142.w2", "model.layers.19.block_sparse_moe.experts.143.w2", "model.layers.19.block_sparse_moe.experts.144.w2", "model.layers.19.block_sparse_moe.experts.145.w2", "model.layers.19.block_sparse_moe.experts.146.w2", "model.layers.19.block_sparse_moe.experts.147.w2", "model.layers.19.block_sparse_moe.experts.148.w2", "model.layers.19.block_sparse_moe.experts.149.w2", "model.layers.19.block_sparse_moe.experts.150.w2", "model.layers.19.block_sparse_moe.experts.151.w2", "model.layers.19.block_sparse_moe.experts.152.w2", "model.layers.19.block_sparse_moe.experts.153.w2", "model.layers.19.block_sparse_moe.experts.154.w2", "model.layers.19.block_sparse_moe.experts.155.w2", "model.layers.19.block_sparse_moe.experts.156.w2", "model.layers.19.block_sparse_moe.experts.157.w2", "model.layers.19.block_sparse_moe.experts.158.w2", "model.layers.19.block_sparse_moe.experts.159.w2", "model.layers.19.block_sparse_moe.experts.160.w2", "model.layers.19.block_sparse_moe.experts.161.w2", "model.layers.19.block_sparse_moe.experts.162.w2", "model.layers.19.block_sparse_moe.experts.163.w2", "model.layers.19.block_sparse_moe.experts.164.w2", "model.layers.19.block_sparse_moe.experts.165.w2", "model.layers.19.block_sparse_moe.experts.166.w2", "model.layers.19.block_sparse_moe.experts.167.w2", "model.layers.19.block_sparse_moe.experts.168.w2", "model.layers.19.block_sparse_moe.experts.169.w2", "model.layers.19.block_sparse_moe.experts.170.w2", "model.layers.19.block_sparse_moe.experts.171.w2", "model.layers.19.block_sparse_moe.experts.172.w2", "model.layers.19.block_sparse_moe.experts.173.w2", "model.layers.19.block_sparse_moe.experts.174.w2", "model.layers.19.block_sparse_moe.experts.175.w2", "model.layers.19.block_sparse_moe.experts.176.w2", "model.layers.19.block_sparse_moe.experts.177.w2", "model.layers.19.block_sparse_moe.experts.178.w2", "model.layers.19.block_sparse_moe.experts.179.w2", "model.layers.19.block_sparse_moe.experts.180.w2", "model.layers.19.block_sparse_moe.experts.181.w2", "model.layers.19.block_sparse_moe.experts.182.w2", "model.layers.19.block_sparse_moe.experts.183.w2", "model.layers.19.block_sparse_moe.experts.184.w2", "model.layers.19.block_sparse_moe.experts.185.w2", "model.layers.19.block_sparse_moe.experts.186.w2", "model.layers.19.block_sparse_moe.experts.187.w2", "model.layers.19.block_sparse_moe.experts.188.w2", "model.layers.19.block_sparse_moe.experts.189.w2", "model.layers.19.block_sparse_moe.experts.190.w2", "model.layers.19.block_sparse_moe.experts.191.w2", "model.layers.19.block_sparse_moe.experts.192.w2", "model.layers.19.block_sparse_moe.experts.193.w2", "model.layers.19.block_sparse_moe.experts.194.w2", "model.layers.19.block_sparse_moe.experts.195.w2", "model.layers.19.block_sparse_moe.experts.196.w2", "model.layers.19.block_sparse_moe.experts.197.w2", "model.layers.19.block_sparse_moe.experts.198.w2", "model.layers.19.block_sparse_moe.experts.199.w2", "model.layers.19.block_sparse_moe.experts.200.w2", "model.layers.19.block_sparse_moe.experts.201.w2", "model.layers.19.block_sparse_moe.experts.202.w2", "model.layers.19.block_sparse_moe.experts.203.w2", "model.layers.19.block_sparse_moe.experts.204.w2", "model.layers.19.block_sparse_moe.experts.205.w2", "model.layers.19.block_sparse_moe.experts.206.w2", "model.layers.19.block_sparse_moe.experts.207.w2", "model.layers.19.block_sparse_moe.experts.208.w2", "model.layers.19.block_sparse_moe.experts.209.w2", "model.layers.19.block_sparse_moe.experts.210.w2", "model.layers.19.block_sparse_moe.experts.211.w2", "model.layers.19.block_sparse_moe.experts.212.w2", "model.layers.19.block_sparse_moe.experts.213.w2", "model.layers.19.block_sparse_moe.experts.214.w2", "model.layers.19.block_sparse_moe.experts.215.w2", "model.layers.19.block_sparse_moe.experts.216.w2", "model.layers.19.block_sparse_moe.experts.217.w2", "model.layers.19.block_sparse_moe.experts.218.w2", "model.layers.19.block_sparse_moe.experts.219.w2", "model.layers.19.block_sparse_moe.experts.220.w2", "model.layers.19.block_sparse_moe.experts.221.w2", "model.layers.19.block_sparse_moe.experts.222.w2", "model.layers.19.block_sparse_moe.experts.223.w2", "model.layers.19.block_sparse_moe.experts.224.w2", "model.layers.19.block_sparse_moe.experts.225.w2", "model.layers.19.block_sparse_moe.experts.226.w2", "model.layers.19.block_sparse_moe.experts.227.w2", "model.layers.19.block_sparse_moe.experts.228.w2", "model.layers.19.block_sparse_moe.experts.229.w2", "model.layers.19.block_sparse_moe.experts.230.w2", "model.layers.19.block_sparse_moe.experts.231.w2", "model.layers.19.block_sparse_moe.experts.232.w2", "model.layers.19.block_sparse_moe.experts.233.w2", "model.layers.19.block_sparse_moe.experts.234.w2", "model.layers.19.block_sparse_moe.experts.235.w2", "model.layers.19.block_sparse_moe.experts.236.w2", "model.layers.19.block_sparse_moe.experts.237.w2", "model.layers.19.block_sparse_moe.experts.238.w2", "model.layers.19.block_sparse_moe.experts.239.w2", "model.layers.19.block_sparse_moe.experts.240.w2", "model.layers.19.block_sparse_moe.experts.241.w2", "model.layers.19.block_sparse_moe.experts.242.w2", "model.layers.19.block_sparse_moe.experts.243.w2", "model.layers.19.block_sparse_moe.experts.244.w2", "model.layers.19.block_sparse_moe.experts.245.w2", "model.layers.19.block_sparse_moe.experts.246.w2", "model.layers.19.block_sparse_moe.experts.247.w2", "model.layers.19.block_sparse_moe.experts.248.w2", "model.layers.19.block_sparse_moe.experts.249.w2", "model.layers.19.block_sparse_moe.experts.250.w2", "model.layers.19.block_sparse_moe.experts.251.w2", "model.layers.19.block_sparse_moe.experts.252.w2", "model.layers.19.block_sparse_moe.experts.253.w2", "model.layers.19.block_sparse_moe.experts.254.w2", "model.layers.19.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0005179138854145976, "dbits": 1207959552 } ] }, { "idx": 100, "layers": [ "model.layers.20.self_attn.q_proj" ], "candidates": [ { "dkld": -6.721112877131064e-05, "dbits": 18874368 } ] }, { "idx": 101, "layers": [ "model.layers.20.self_attn.k_proj", "model.layers.20.self_attn.v_proj" ], "candidates": [ { "dkld": 0.002236079983413214, "dbits": 6291456 } ] }, { "idx": 102, "layers": [ "model.layers.20.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0009257145226001656, "dbits": 18874368 } ] }, { "idx": 103, "layers": [ "model.layers.20.block_sparse_moe.experts.0.w1", "model.layers.20.block_sparse_moe.experts.1.w1", "model.layers.20.block_sparse_moe.experts.2.w1", "model.layers.20.block_sparse_moe.experts.3.w1", "model.layers.20.block_sparse_moe.experts.4.w1", "model.layers.20.block_sparse_moe.experts.5.w1", "model.layers.20.block_sparse_moe.experts.6.w1", "model.layers.20.block_sparse_moe.experts.7.w1", "model.layers.20.block_sparse_moe.experts.8.w1", "model.layers.20.block_sparse_moe.experts.9.w1", "model.layers.20.block_sparse_moe.experts.10.w1", "model.layers.20.block_sparse_moe.experts.11.w1", "model.layers.20.block_sparse_moe.experts.12.w1", "model.layers.20.block_sparse_moe.experts.13.w1", "model.layers.20.block_sparse_moe.experts.14.w1", "model.layers.20.block_sparse_moe.experts.15.w1", "model.layers.20.block_sparse_moe.experts.16.w1", "model.layers.20.block_sparse_moe.experts.17.w1", "model.layers.20.block_sparse_moe.experts.18.w1", "model.layers.20.block_sparse_moe.experts.19.w1", "model.layers.20.block_sparse_moe.experts.20.w1", "model.layers.20.block_sparse_moe.experts.21.w1", "model.layers.20.block_sparse_moe.experts.22.w1", "model.layers.20.block_sparse_moe.experts.23.w1", "model.layers.20.block_sparse_moe.experts.24.w1", "model.layers.20.block_sparse_moe.experts.25.w1", "model.layers.20.block_sparse_moe.experts.26.w1", "model.layers.20.block_sparse_moe.experts.27.w1", "model.layers.20.block_sparse_moe.experts.28.w1", "model.layers.20.block_sparse_moe.experts.29.w1", "model.layers.20.block_sparse_moe.experts.30.w1", "model.layers.20.block_sparse_moe.experts.31.w1", "model.layers.20.block_sparse_moe.experts.32.w1", "model.layers.20.block_sparse_moe.experts.33.w1", "model.layers.20.block_sparse_moe.experts.34.w1", "model.layers.20.block_sparse_moe.experts.35.w1", "model.layers.20.block_sparse_moe.experts.36.w1", "model.layers.20.block_sparse_moe.experts.37.w1", "model.layers.20.block_sparse_moe.experts.38.w1", "model.layers.20.block_sparse_moe.experts.39.w1", "model.layers.20.block_sparse_moe.experts.40.w1", "model.layers.20.block_sparse_moe.experts.41.w1", "model.layers.20.block_sparse_moe.experts.42.w1", "model.layers.20.block_sparse_moe.experts.43.w1", "model.layers.20.block_sparse_moe.experts.44.w1", "model.layers.20.block_sparse_moe.experts.45.w1", "model.layers.20.block_sparse_moe.experts.46.w1", "model.layers.20.block_sparse_moe.experts.47.w1", "model.layers.20.block_sparse_moe.experts.48.w1", "model.layers.20.block_sparse_moe.experts.49.w1", "model.layers.20.block_sparse_moe.experts.50.w1", "model.layers.20.block_sparse_moe.experts.51.w1", "model.layers.20.block_sparse_moe.experts.52.w1", "model.layers.20.block_sparse_moe.experts.53.w1", "model.layers.20.block_sparse_moe.experts.54.w1", "model.layers.20.block_sparse_moe.experts.55.w1", "model.layers.20.block_sparse_moe.experts.56.w1", "model.layers.20.block_sparse_moe.experts.57.w1", "model.layers.20.block_sparse_moe.experts.58.w1", "model.layers.20.block_sparse_moe.experts.59.w1", "model.layers.20.block_sparse_moe.experts.60.w1", "model.layers.20.block_sparse_moe.experts.61.w1", "model.layers.20.block_sparse_moe.experts.62.w1", "model.layers.20.block_sparse_moe.experts.63.w1", "model.layers.20.block_sparse_moe.experts.64.w1", "model.layers.20.block_sparse_moe.experts.65.w1", "model.layers.20.block_sparse_moe.experts.66.w1", "model.layers.20.block_sparse_moe.experts.67.w1", "model.layers.20.block_sparse_moe.experts.68.w1", "model.layers.20.block_sparse_moe.experts.69.w1", "model.layers.20.block_sparse_moe.experts.70.w1", "model.layers.20.block_sparse_moe.experts.71.w1", "model.layers.20.block_sparse_moe.experts.72.w1", "model.layers.20.block_sparse_moe.experts.73.w1", "model.layers.20.block_sparse_moe.experts.74.w1", "model.layers.20.block_sparse_moe.experts.75.w1", "model.layers.20.block_sparse_moe.experts.76.w1", "model.layers.20.block_sparse_moe.experts.77.w1", "model.layers.20.block_sparse_moe.experts.78.w1", "model.layers.20.block_sparse_moe.experts.79.w1", "model.layers.20.block_sparse_moe.experts.80.w1", "model.layers.20.block_sparse_moe.experts.81.w1", "model.layers.20.block_sparse_moe.experts.82.w1", "model.layers.20.block_sparse_moe.experts.83.w1", "model.layers.20.block_sparse_moe.experts.84.w1", "model.layers.20.block_sparse_moe.experts.85.w1", "model.layers.20.block_sparse_moe.experts.86.w1", "model.layers.20.block_sparse_moe.experts.87.w1", "model.layers.20.block_sparse_moe.experts.88.w1", "model.layers.20.block_sparse_moe.experts.89.w1", "model.layers.20.block_sparse_moe.experts.90.w1", "model.layers.20.block_sparse_moe.experts.91.w1", "model.layers.20.block_sparse_moe.experts.92.w1", "model.layers.20.block_sparse_moe.experts.93.w1", "model.layers.20.block_sparse_moe.experts.94.w1", "model.layers.20.block_sparse_moe.experts.95.w1", "model.layers.20.block_sparse_moe.experts.96.w1", "model.layers.20.block_sparse_moe.experts.97.w1", "model.layers.20.block_sparse_moe.experts.98.w1", "model.layers.20.block_sparse_moe.experts.99.w1", "model.layers.20.block_sparse_moe.experts.100.w1", "model.layers.20.block_sparse_moe.experts.101.w1", "model.layers.20.block_sparse_moe.experts.102.w1", "model.layers.20.block_sparse_moe.experts.103.w1", "model.layers.20.block_sparse_moe.experts.104.w1", "model.layers.20.block_sparse_moe.experts.105.w1", "model.layers.20.block_sparse_moe.experts.106.w1", "model.layers.20.block_sparse_moe.experts.107.w1", "model.layers.20.block_sparse_moe.experts.108.w1", "model.layers.20.block_sparse_moe.experts.109.w1", "model.layers.20.block_sparse_moe.experts.110.w1", "model.layers.20.block_sparse_moe.experts.111.w1", "model.layers.20.block_sparse_moe.experts.112.w1", "model.layers.20.block_sparse_moe.experts.113.w1", "model.layers.20.block_sparse_moe.experts.114.w1", "model.layers.20.block_sparse_moe.experts.115.w1", "model.layers.20.block_sparse_moe.experts.116.w1", "model.layers.20.block_sparse_moe.experts.117.w1", "model.layers.20.block_sparse_moe.experts.118.w1", "model.layers.20.block_sparse_moe.experts.119.w1", "model.layers.20.block_sparse_moe.experts.120.w1", "model.layers.20.block_sparse_moe.experts.121.w1", "model.layers.20.block_sparse_moe.experts.122.w1", "model.layers.20.block_sparse_moe.experts.123.w1", "model.layers.20.block_sparse_moe.experts.124.w1", "model.layers.20.block_sparse_moe.experts.125.w1", "model.layers.20.block_sparse_moe.experts.126.w1", "model.layers.20.block_sparse_moe.experts.127.w1", "model.layers.20.block_sparse_moe.experts.128.w1", "model.layers.20.block_sparse_moe.experts.129.w1", "model.layers.20.block_sparse_moe.experts.130.w1", "model.layers.20.block_sparse_moe.experts.131.w1", "model.layers.20.block_sparse_moe.experts.132.w1", "model.layers.20.block_sparse_moe.experts.133.w1", "model.layers.20.block_sparse_moe.experts.134.w1", "model.layers.20.block_sparse_moe.experts.135.w1", "model.layers.20.block_sparse_moe.experts.136.w1", "model.layers.20.block_sparse_moe.experts.137.w1", "model.layers.20.block_sparse_moe.experts.138.w1", "model.layers.20.block_sparse_moe.experts.139.w1", "model.layers.20.block_sparse_moe.experts.140.w1", "model.layers.20.block_sparse_moe.experts.141.w1", "model.layers.20.block_sparse_moe.experts.142.w1", "model.layers.20.block_sparse_moe.experts.143.w1", "model.layers.20.block_sparse_moe.experts.144.w1", "model.layers.20.block_sparse_moe.experts.145.w1", "model.layers.20.block_sparse_moe.experts.146.w1", "model.layers.20.block_sparse_moe.experts.147.w1", "model.layers.20.block_sparse_moe.experts.148.w1", "model.layers.20.block_sparse_moe.experts.149.w1", "model.layers.20.block_sparse_moe.experts.150.w1", "model.layers.20.block_sparse_moe.experts.151.w1", "model.layers.20.block_sparse_moe.experts.152.w1", "model.layers.20.block_sparse_moe.experts.153.w1", "model.layers.20.block_sparse_moe.experts.154.w1", "model.layers.20.block_sparse_moe.experts.155.w1", "model.layers.20.block_sparse_moe.experts.156.w1", "model.layers.20.block_sparse_moe.experts.157.w1", "model.layers.20.block_sparse_moe.experts.158.w1", "model.layers.20.block_sparse_moe.experts.159.w1", "model.layers.20.block_sparse_moe.experts.160.w1", "model.layers.20.block_sparse_moe.experts.161.w1", "model.layers.20.block_sparse_moe.experts.162.w1", "model.layers.20.block_sparse_moe.experts.163.w1", "model.layers.20.block_sparse_moe.experts.164.w1", "model.layers.20.block_sparse_moe.experts.165.w1", "model.layers.20.block_sparse_moe.experts.166.w1", "model.layers.20.block_sparse_moe.experts.167.w1", "model.layers.20.block_sparse_moe.experts.168.w1", "model.layers.20.block_sparse_moe.experts.169.w1", "model.layers.20.block_sparse_moe.experts.170.w1", "model.layers.20.block_sparse_moe.experts.171.w1", "model.layers.20.block_sparse_moe.experts.172.w1", "model.layers.20.block_sparse_moe.experts.173.w1", "model.layers.20.block_sparse_moe.experts.174.w1", "model.layers.20.block_sparse_moe.experts.175.w1", "model.layers.20.block_sparse_moe.experts.176.w1", "model.layers.20.block_sparse_moe.experts.177.w1", "model.layers.20.block_sparse_moe.experts.178.w1", "model.layers.20.block_sparse_moe.experts.179.w1", "model.layers.20.block_sparse_moe.experts.180.w1", "model.layers.20.block_sparse_moe.experts.181.w1", "model.layers.20.block_sparse_moe.experts.182.w1", "model.layers.20.block_sparse_moe.experts.183.w1", "model.layers.20.block_sparse_moe.experts.184.w1", "model.layers.20.block_sparse_moe.experts.185.w1", "model.layers.20.block_sparse_moe.experts.186.w1", "model.layers.20.block_sparse_moe.experts.187.w1", "model.layers.20.block_sparse_moe.experts.188.w1", "model.layers.20.block_sparse_moe.experts.189.w1", "model.layers.20.block_sparse_moe.experts.190.w1", "model.layers.20.block_sparse_moe.experts.191.w1", "model.layers.20.block_sparse_moe.experts.192.w1", "model.layers.20.block_sparse_moe.experts.193.w1", "model.layers.20.block_sparse_moe.experts.194.w1", "model.layers.20.block_sparse_moe.experts.195.w1", "model.layers.20.block_sparse_moe.experts.196.w1", "model.layers.20.block_sparse_moe.experts.197.w1", "model.layers.20.block_sparse_moe.experts.198.w1", "model.layers.20.block_sparse_moe.experts.199.w1", "model.layers.20.block_sparse_moe.experts.200.w1", "model.layers.20.block_sparse_moe.experts.201.w1", "model.layers.20.block_sparse_moe.experts.202.w1", "model.layers.20.block_sparse_moe.experts.203.w1", "model.layers.20.block_sparse_moe.experts.204.w1", "model.layers.20.block_sparse_moe.experts.205.w1", "model.layers.20.block_sparse_moe.experts.206.w1", "model.layers.20.block_sparse_moe.experts.207.w1", "model.layers.20.block_sparse_moe.experts.208.w1", "model.layers.20.block_sparse_moe.experts.209.w1", "model.layers.20.block_sparse_moe.experts.210.w1", "model.layers.20.block_sparse_moe.experts.211.w1", "model.layers.20.block_sparse_moe.experts.212.w1", "model.layers.20.block_sparse_moe.experts.213.w1", "model.layers.20.block_sparse_moe.experts.214.w1", "model.layers.20.block_sparse_moe.experts.215.w1", "model.layers.20.block_sparse_moe.experts.216.w1", "model.layers.20.block_sparse_moe.experts.217.w1", "model.layers.20.block_sparse_moe.experts.218.w1", "model.layers.20.block_sparse_moe.experts.219.w1", "model.layers.20.block_sparse_moe.experts.220.w1", "model.layers.20.block_sparse_moe.experts.221.w1", "model.layers.20.block_sparse_moe.experts.222.w1", "model.layers.20.block_sparse_moe.experts.223.w1", "model.layers.20.block_sparse_moe.experts.224.w1", "model.layers.20.block_sparse_moe.experts.225.w1", "model.layers.20.block_sparse_moe.experts.226.w1", "model.layers.20.block_sparse_moe.experts.227.w1", "model.layers.20.block_sparse_moe.experts.228.w1", "model.layers.20.block_sparse_moe.experts.229.w1", "model.layers.20.block_sparse_moe.experts.230.w1", "model.layers.20.block_sparse_moe.experts.231.w1", "model.layers.20.block_sparse_moe.experts.232.w1", "model.layers.20.block_sparse_moe.experts.233.w1", "model.layers.20.block_sparse_moe.experts.234.w1", "model.layers.20.block_sparse_moe.experts.235.w1", "model.layers.20.block_sparse_moe.experts.236.w1", "model.layers.20.block_sparse_moe.experts.237.w1", "model.layers.20.block_sparse_moe.experts.238.w1", "model.layers.20.block_sparse_moe.experts.239.w1", "model.layers.20.block_sparse_moe.experts.240.w1", "model.layers.20.block_sparse_moe.experts.241.w1", "model.layers.20.block_sparse_moe.experts.242.w1", "model.layers.20.block_sparse_moe.experts.243.w1", "model.layers.20.block_sparse_moe.experts.244.w1", "model.layers.20.block_sparse_moe.experts.245.w1", "model.layers.20.block_sparse_moe.experts.246.w1", "model.layers.20.block_sparse_moe.experts.247.w1", "model.layers.20.block_sparse_moe.experts.248.w1", "model.layers.20.block_sparse_moe.experts.249.w1", "model.layers.20.block_sparse_moe.experts.250.w1", "model.layers.20.block_sparse_moe.experts.251.w1", "model.layers.20.block_sparse_moe.experts.252.w1", "model.layers.20.block_sparse_moe.experts.253.w1", "model.layers.20.block_sparse_moe.experts.254.w1", "model.layers.20.block_sparse_moe.experts.255.w1", "model.layers.20.block_sparse_moe.experts.0.w3", "model.layers.20.block_sparse_moe.experts.1.w3", "model.layers.20.block_sparse_moe.experts.2.w3", "model.layers.20.block_sparse_moe.experts.3.w3", "model.layers.20.block_sparse_moe.experts.4.w3", "model.layers.20.block_sparse_moe.experts.5.w3", "model.layers.20.block_sparse_moe.experts.6.w3", "model.layers.20.block_sparse_moe.experts.7.w3", "model.layers.20.block_sparse_moe.experts.8.w3", "model.layers.20.block_sparse_moe.experts.9.w3", "model.layers.20.block_sparse_moe.experts.10.w3", "model.layers.20.block_sparse_moe.experts.11.w3", "model.layers.20.block_sparse_moe.experts.12.w3", "model.layers.20.block_sparse_moe.experts.13.w3", "model.layers.20.block_sparse_moe.experts.14.w3", "model.layers.20.block_sparse_moe.experts.15.w3", "model.layers.20.block_sparse_moe.experts.16.w3", "model.layers.20.block_sparse_moe.experts.17.w3", "model.layers.20.block_sparse_moe.experts.18.w3", "model.layers.20.block_sparse_moe.experts.19.w3", "model.layers.20.block_sparse_moe.experts.20.w3", "model.layers.20.block_sparse_moe.experts.21.w3", "model.layers.20.block_sparse_moe.experts.22.w3", "model.layers.20.block_sparse_moe.experts.23.w3", "model.layers.20.block_sparse_moe.experts.24.w3", "model.layers.20.block_sparse_moe.experts.25.w3", "model.layers.20.block_sparse_moe.experts.26.w3", "model.layers.20.block_sparse_moe.experts.27.w3", "model.layers.20.block_sparse_moe.experts.28.w3", "model.layers.20.block_sparse_moe.experts.29.w3", "model.layers.20.block_sparse_moe.experts.30.w3", "model.layers.20.block_sparse_moe.experts.31.w3", "model.layers.20.block_sparse_moe.experts.32.w3", "model.layers.20.block_sparse_moe.experts.33.w3", "model.layers.20.block_sparse_moe.experts.34.w3", "model.layers.20.block_sparse_moe.experts.35.w3", "model.layers.20.block_sparse_moe.experts.36.w3", "model.layers.20.block_sparse_moe.experts.37.w3", "model.layers.20.block_sparse_moe.experts.38.w3", "model.layers.20.block_sparse_moe.experts.39.w3", "model.layers.20.block_sparse_moe.experts.40.w3", "model.layers.20.block_sparse_moe.experts.41.w3", "model.layers.20.block_sparse_moe.experts.42.w3", "model.layers.20.block_sparse_moe.experts.43.w3", "model.layers.20.block_sparse_moe.experts.44.w3", "model.layers.20.block_sparse_moe.experts.45.w3", "model.layers.20.block_sparse_moe.experts.46.w3", "model.layers.20.block_sparse_moe.experts.47.w3", "model.layers.20.block_sparse_moe.experts.48.w3", "model.layers.20.block_sparse_moe.experts.49.w3", "model.layers.20.block_sparse_moe.experts.50.w3", "model.layers.20.block_sparse_moe.experts.51.w3", "model.layers.20.block_sparse_moe.experts.52.w3", "model.layers.20.block_sparse_moe.experts.53.w3", "model.layers.20.block_sparse_moe.experts.54.w3", "model.layers.20.block_sparse_moe.experts.55.w3", "model.layers.20.block_sparse_moe.experts.56.w3", "model.layers.20.block_sparse_moe.experts.57.w3", "model.layers.20.block_sparse_moe.experts.58.w3", "model.layers.20.block_sparse_moe.experts.59.w3", "model.layers.20.block_sparse_moe.experts.60.w3", "model.layers.20.block_sparse_moe.experts.61.w3", "model.layers.20.block_sparse_moe.experts.62.w3", "model.layers.20.block_sparse_moe.experts.63.w3", "model.layers.20.block_sparse_moe.experts.64.w3", "model.layers.20.block_sparse_moe.experts.65.w3", "model.layers.20.block_sparse_moe.experts.66.w3", "model.layers.20.block_sparse_moe.experts.67.w3", "model.layers.20.block_sparse_moe.experts.68.w3", "model.layers.20.block_sparse_moe.experts.69.w3", "model.layers.20.block_sparse_moe.experts.70.w3", "model.layers.20.block_sparse_moe.experts.71.w3", "model.layers.20.block_sparse_moe.experts.72.w3", "model.layers.20.block_sparse_moe.experts.73.w3", "model.layers.20.block_sparse_moe.experts.74.w3", "model.layers.20.block_sparse_moe.experts.75.w3", "model.layers.20.block_sparse_moe.experts.76.w3", "model.layers.20.block_sparse_moe.experts.77.w3", "model.layers.20.block_sparse_moe.experts.78.w3", "model.layers.20.block_sparse_moe.experts.79.w3", "model.layers.20.block_sparse_moe.experts.80.w3", "model.layers.20.block_sparse_moe.experts.81.w3", "model.layers.20.block_sparse_moe.experts.82.w3", "model.layers.20.block_sparse_moe.experts.83.w3", "model.layers.20.block_sparse_moe.experts.84.w3", "model.layers.20.block_sparse_moe.experts.85.w3", "model.layers.20.block_sparse_moe.experts.86.w3", "model.layers.20.block_sparse_moe.experts.87.w3", "model.layers.20.block_sparse_moe.experts.88.w3", "model.layers.20.block_sparse_moe.experts.89.w3", "model.layers.20.block_sparse_moe.experts.90.w3", "model.layers.20.block_sparse_moe.experts.91.w3", "model.layers.20.block_sparse_moe.experts.92.w3", "model.layers.20.block_sparse_moe.experts.93.w3", "model.layers.20.block_sparse_moe.experts.94.w3", "model.layers.20.block_sparse_moe.experts.95.w3", "model.layers.20.block_sparse_moe.experts.96.w3", "model.layers.20.block_sparse_moe.experts.97.w3", "model.layers.20.block_sparse_moe.experts.98.w3", "model.layers.20.block_sparse_moe.experts.99.w3", "model.layers.20.block_sparse_moe.experts.100.w3", "model.layers.20.block_sparse_moe.experts.101.w3", "model.layers.20.block_sparse_moe.experts.102.w3", "model.layers.20.block_sparse_moe.experts.103.w3", "model.layers.20.block_sparse_moe.experts.104.w3", "model.layers.20.block_sparse_moe.experts.105.w3", "model.layers.20.block_sparse_moe.experts.106.w3", "model.layers.20.block_sparse_moe.experts.107.w3", "model.layers.20.block_sparse_moe.experts.108.w3", "model.layers.20.block_sparse_moe.experts.109.w3", "model.layers.20.block_sparse_moe.experts.110.w3", "model.layers.20.block_sparse_moe.experts.111.w3", "model.layers.20.block_sparse_moe.experts.112.w3", "model.layers.20.block_sparse_moe.experts.113.w3", "model.layers.20.block_sparse_moe.experts.114.w3", "model.layers.20.block_sparse_moe.experts.115.w3", "model.layers.20.block_sparse_moe.experts.116.w3", "model.layers.20.block_sparse_moe.experts.117.w3", "model.layers.20.block_sparse_moe.experts.118.w3", "model.layers.20.block_sparse_moe.experts.119.w3", "model.layers.20.block_sparse_moe.experts.120.w3", "model.layers.20.block_sparse_moe.experts.121.w3", "model.layers.20.block_sparse_moe.experts.122.w3", "model.layers.20.block_sparse_moe.experts.123.w3", "model.layers.20.block_sparse_moe.experts.124.w3", "model.layers.20.block_sparse_moe.experts.125.w3", "model.layers.20.block_sparse_moe.experts.126.w3", "model.layers.20.block_sparse_moe.experts.127.w3", "model.layers.20.block_sparse_moe.experts.128.w3", "model.layers.20.block_sparse_moe.experts.129.w3", "model.layers.20.block_sparse_moe.experts.130.w3", "model.layers.20.block_sparse_moe.experts.131.w3", "model.layers.20.block_sparse_moe.experts.132.w3", "model.layers.20.block_sparse_moe.experts.133.w3", "model.layers.20.block_sparse_moe.experts.134.w3", "model.layers.20.block_sparse_moe.experts.135.w3", "model.layers.20.block_sparse_moe.experts.136.w3", "model.layers.20.block_sparse_moe.experts.137.w3", "model.layers.20.block_sparse_moe.experts.138.w3", "model.layers.20.block_sparse_moe.experts.139.w3", "model.layers.20.block_sparse_moe.experts.140.w3", "model.layers.20.block_sparse_moe.experts.141.w3", "model.layers.20.block_sparse_moe.experts.142.w3", "model.layers.20.block_sparse_moe.experts.143.w3", "model.layers.20.block_sparse_moe.experts.144.w3", "model.layers.20.block_sparse_moe.experts.145.w3", "model.layers.20.block_sparse_moe.experts.146.w3", "model.layers.20.block_sparse_moe.experts.147.w3", "model.layers.20.block_sparse_moe.experts.148.w3", "model.layers.20.block_sparse_moe.experts.149.w3", "model.layers.20.block_sparse_moe.experts.150.w3", "model.layers.20.block_sparse_moe.experts.151.w3", "model.layers.20.block_sparse_moe.experts.152.w3", "model.layers.20.block_sparse_moe.experts.153.w3", "model.layers.20.block_sparse_moe.experts.154.w3", "model.layers.20.block_sparse_moe.experts.155.w3", "model.layers.20.block_sparse_moe.experts.156.w3", "model.layers.20.block_sparse_moe.experts.157.w3", "model.layers.20.block_sparse_moe.experts.158.w3", "model.layers.20.block_sparse_moe.experts.159.w3", "model.layers.20.block_sparse_moe.experts.160.w3", "model.layers.20.block_sparse_moe.experts.161.w3", "model.layers.20.block_sparse_moe.experts.162.w3", "model.layers.20.block_sparse_moe.experts.163.w3", "model.layers.20.block_sparse_moe.experts.164.w3", "model.layers.20.block_sparse_moe.experts.165.w3", "model.layers.20.block_sparse_moe.experts.166.w3", "model.layers.20.block_sparse_moe.experts.167.w3", "model.layers.20.block_sparse_moe.experts.168.w3", "model.layers.20.block_sparse_moe.experts.169.w3", "model.layers.20.block_sparse_moe.experts.170.w3", "model.layers.20.block_sparse_moe.experts.171.w3", "model.layers.20.block_sparse_moe.experts.172.w3", "model.layers.20.block_sparse_moe.experts.173.w3", "model.layers.20.block_sparse_moe.experts.174.w3", "model.layers.20.block_sparse_moe.experts.175.w3", "model.layers.20.block_sparse_moe.experts.176.w3", "model.layers.20.block_sparse_moe.experts.177.w3", "model.layers.20.block_sparse_moe.experts.178.w3", "model.layers.20.block_sparse_moe.experts.179.w3", "model.layers.20.block_sparse_moe.experts.180.w3", "model.layers.20.block_sparse_moe.experts.181.w3", "model.layers.20.block_sparse_moe.experts.182.w3", "model.layers.20.block_sparse_moe.experts.183.w3", "model.layers.20.block_sparse_moe.experts.184.w3", "model.layers.20.block_sparse_moe.experts.185.w3", "model.layers.20.block_sparse_moe.experts.186.w3", "model.layers.20.block_sparse_moe.experts.187.w3", "model.layers.20.block_sparse_moe.experts.188.w3", "model.layers.20.block_sparse_moe.experts.189.w3", "model.layers.20.block_sparse_moe.experts.190.w3", "model.layers.20.block_sparse_moe.experts.191.w3", "model.layers.20.block_sparse_moe.experts.192.w3", "model.layers.20.block_sparse_moe.experts.193.w3", "model.layers.20.block_sparse_moe.experts.194.w3", "model.layers.20.block_sparse_moe.experts.195.w3", "model.layers.20.block_sparse_moe.experts.196.w3", "model.layers.20.block_sparse_moe.experts.197.w3", "model.layers.20.block_sparse_moe.experts.198.w3", "model.layers.20.block_sparse_moe.experts.199.w3", "model.layers.20.block_sparse_moe.experts.200.w3", "model.layers.20.block_sparse_moe.experts.201.w3", "model.layers.20.block_sparse_moe.experts.202.w3", "model.layers.20.block_sparse_moe.experts.203.w3", "model.layers.20.block_sparse_moe.experts.204.w3", "model.layers.20.block_sparse_moe.experts.205.w3", "model.layers.20.block_sparse_moe.experts.206.w3", "model.layers.20.block_sparse_moe.experts.207.w3", "model.layers.20.block_sparse_moe.experts.208.w3", "model.layers.20.block_sparse_moe.experts.209.w3", "model.layers.20.block_sparse_moe.experts.210.w3", "model.layers.20.block_sparse_moe.experts.211.w3", "model.layers.20.block_sparse_moe.experts.212.w3", "model.layers.20.block_sparse_moe.experts.213.w3", "model.layers.20.block_sparse_moe.experts.214.w3", "model.layers.20.block_sparse_moe.experts.215.w3", "model.layers.20.block_sparse_moe.experts.216.w3", "model.layers.20.block_sparse_moe.experts.217.w3", "model.layers.20.block_sparse_moe.experts.218.w3", "model.layers.20.block_sparse_moe.experts.219.w3", "model.layers.20.block_sparse_moe.experts.220.w3", "model.layers.20.block_sparse_moe.experts.221.w3", "model.layers.20.block_sparse_moe.experts.222.w3", "model.layers.20.block_sparse_moe.experts.223.w3", "model.layers.20.block_sparse_moe.experts.224.w3", "model.layers.20.block_sparse_moe.experts.225.w3", "model.layers.20.block_sparse_moe.experts.226.w3", "model.layers.20.block_sparse_moe.experts.227.w3", "model.layers.20.block_sparse_moe.experts.228.w3", "model.layers.20.block_sparse_moe.experts.229.w3", "model.layers.20.block_sparse_moe.experts.230.w3", "model.layers.20.block_sparse_moe.experts.231.w3", "model.layers.20.block_sparse_moe.experts.232.w3", "model.layers.20.block_sparse_moe.experts.233.w3", "model.layers.20.block_sparse_moe.experts.234.w3", "model.layers.20.block_sparse_moe.experts.235.w3", "model.layers.20.block_sparse_moe.experts.236.w3", "model.layers.20.block_sparse_moe.experts.237.w3", "model.layers.20.block_sparse_moe.experts.238.w3", "model.layers.20.block_sparse_moe.experts.239.w3", "model.layers.20.block_sparse_moe.experts.240.w3", "model.layers.20.block_sparse_moe.experts.241.w3", "model.layers.20.block_sparse_moe.experts.242.w3", "model.layers.20.block_sparse_moe.experts.243.w3", "model.layers.20.block_sparse_moe.experts.244.w3", "model.layers.20.block_sparse_moe.experts.245.w3", "model.layers.20.block_sparse_moe.experts.246.w3", "model.layers.20.block_sparse_moe.experts.247.w3", "model.layers.20.block_sparse_moe.experts.248.w3", "model.layers.20.block_sparse_moe.experts.249.w3", "model.layers.20.block_sparse_moe.experts.250.w3", "model.layers.20.block_sparse_moe.experts.251.w3", "model.layers.20.block_sparse_moe.experts.252.w3", "model.layers.20.block_sparse_moe.experts.253.w3", "model.layers.20.block_sparse_moe.experts.254.w3", "model.layers.20.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00046541895717382153, "dbits": 2415919104 } ] }, { "idx": 104, "layers": [ "model.layers.20.block_sparse_moe.experts.0.w2", "model.layers.20.block_sparse_moe.experts.1.w2", "model.layers.20.block_sparse_moe.experts.2.w2", "model.layers.20.block_sparse_moe.experts.3.w2", "model.layers.20.block_sparse_moe.experts.4.w2", "model.layers.20.block_sparse_moe.experts.5.w2", "model.layers.20.block_sparse_moe.experts.6.w2", "model.layers.20.block_sparse_moe.experts.7.w2", "model.layers.20.block_sparse_moe.experts.8.w2", "model.layers.20.block_sparse_moe.experts.9.w2", "model.layers.20.block_sparse_moe.experts.10.w2", "model.layers.20.block_sparse_moe.experts.11.w2", "model.layers.20.block_sparse_moe.experts.12.w2", "model.layers.20.block_sparse_moe.experts.13.w2", "model.layers.20.block_sparse_moe.experts.14.w2", "model.layers.20.block_sparse_moe.experts.15.w2", "model.layers.20.block_sparse_moe.experts.16.w2", "model.layers.20.block_sparse_moe.experts.17.w2", "model.layers.20.block_sparse_moe.experts.18.w2", "model.layers.20.block_sparse_moe.experts.19.w2", "model.layers.20.block_sparse_moe.experts.20.w2", "model.layers.20.block_sparse_moe.experts.21.w2", "model.layers.20.block_sparse_moe.experts.22.w2", "model.layers.20.block_sparse_moe.experts.23.w2", "model.layers.20.block_sparse_moe.experts.24.w2", "model.layers.20.block_sparse_moe.experts.25.w2", "model.layers.20.block_sparse_moe.experts.26.w2", "model.layers.20.block_sparse_moe.experts.27.w2", "model.layers.20.block_sparse_moe.experts.28.w2", "model.layers.20.block_sparse_moe.experts.29.w2", "model.layers.20.block_sparse_moe.experts.30.w2", "model.layers.20.block_sparse_moe.experts.31.w2", "model.layers.20.block_sparse_moe.experts.32.w2", "model.layers.20.block_sparse_moe.experts.33.w2", "model.layers.20.block_sparse_moe.experts.34.w2", "model.layers.20.block_sparse_moe.experts.35.w2", "model.layers.20.block_sparse_moe.experts.36.w2", "model.layers.20.block_sparse_moe.experts.37.w2", "model.layers.20.block_sparse_moe.experts.38.w2", "model.layers.20.block_sparse_moe.experts.39.w2", "model.layers.20.block_sparse_moe.experts.40.w2", "model.layers.20.block_sparse_moe.experts.41.w2", "model.layers.20.block_sparse_moe.experts.42.w2", "model.layers.20.block_sparse_moe.experts.43.w2", "model.layers.20.block_sparse_moe.experts.44.w2", "model.layers.20.block_sparse_moe.experts.45.w2", "model.layers.20.block_sparse_moe.experts.46.w2", "model.layers.20.block_sparse_moe.experts.47.w2", "model.layers.20.block_sparse_moe.experts.48.w2", "model.layers.20.block_sparse_moe.experts.49.w2", "model.layers.20.block_sparse_moe.experts.50.w2", "model.layers.20.block_sparse_moe.experts.51.w2", "model.layers.20.block_sparse_moe.experts.52.w2", "model.layers.20.block_sparse_moe.experts.53.w2", "model.layers.20.block_sparse_moe.experts.54.w2", "model.layers.20.block_sparse_moe.experts.55.w2", "model.layers.20.block_sparse_moe.experts.56.w2", "model.layers.20.block_sparse_moe.experts.57.w2", "model.layers.20.block_sparse_moe.experts.58.w2", "model.layers.20.block_sparse_moe.experts.59.w2", "model.layers.20.block_sparse_moe.experts.60.w2", "model.layers.20.block_sparse_moe.experts.61.w2", "model.layers.20.block_sparse_moe.experts.62.w2", "model.layers.20.block_sparse_moe.experts.63.w2", "model.layers.20.block_sparse_moe.experts.64.w2", "model.layers.20.block_sparse_moe.experts.65.w2", "model.layers.20.block_sparse_moe.experts.66.w2", "model.layers.20.block_sparse_moe.experts.67.w2", "model.layers.20.block_sparse_moe.experts.68.w2", "model.layers.20.block_sparse_moe.experts.69.w2", "model.layers.20.block_sparse_moe.experts.70.w2", "model.layers.20.block_sparse_moe.experts.71.w2", "model.layers.20.block_sparse_moe.experts.72.w2", "model.layers.20.block_sparse_moe.experts.73.w2", "model.layers.20.block_sparse_moe.experts.74.w2", "model.layers.20.block_sparse_moe.experts.75.w2", "model.layers.20.block_sparse_moe.experts.76.w2", "model.layers.20.block_sparse_moe.experts.77.w2", "model.layers.20.block_sparse_moe.experts.78.w2", "model.layers.20.block_sparse_moe.experts.79.w2", "model.layers.20.block_sparse_moe.experts.80.w2", "model.layers.20.block_sparse_moe.experts.81.w2", "model.layers.20.block_sparse_moe.experts.82.w2", "model.layers.20.block_sparse_moe.experts.83.w2", "model.layers.20.block_sparse_moe.experts.84.w2", "model.layers.20.block_sparse_moe.experts.85.w2", "model.layers.20.block_sparse_moe.experts.86.w2", "model.layers.20.block_sparse_moe.experts.87.w2", "model.layers.20.block_sparse_moe.experts.88.w2", "model.layers.20.block_sparse_moe.experts.89.w2", "model.layers.20.block_sparse_moe.experts.90.w2", "model.layers.20.block_sparse_moe.experts.91.w2", "model.layers.20.block_sparse_moe.experts.92.w2", "model.layers.20.block_sparse_moe.experts.93.w2", "model.layers.20.block_sparse_moe.experts.94.w2", "model.layers.20.block_sparse_moe.experts.95.w2", "model.layers.20.block_sparse_moe.experts.96.w2", "model.layers.20.block_sparse_moe.experts.97.w2", "model.layers.20.block_sparse_moe.experts.98.w2", "model.layers.20.block_sparse_moe.experts.99.w2", "model.layers.20.block_sparse_moe.experts.100.w2", "model.layers.20.block_sparse_moe.experts.101.w2", "model.layers.20.block_sparse_moe.experts.102.w2", "model.layers.20.block_sparse_moe.experts.103.w2", "model.layers.20.block_sparse_moe.experts.104.w2", "model.layers.20.block_sparse_moe.experts.105.w2", "model.layers.20.block_sparse_moe.experts.106.w2", "model.layers.20.block_sparse_moe.experts.107.w2", "model.layers.20.block_sparse_moe.experts.108.w2", "model.layers.20.block_sparse_moe.experts.109.w2", "model.layers.20.block_sparse_moe.experts.110.w2", "model.layers.20.block_sparse_moe.experts.111.w2", "model.layers.20.block_sparse_moe.experts.112.w2", "model.layers.20.block_sparse_moe.experts.113.w2", "model.layers.20.block_sparse_moe.experts.114.w2", "model.layers.20.block_sparse_moe.experts.115.w2", "model.layers.20.block_sparse_moe.experts.116.w2", "model.layers.20.block_sparse_moe.experts.117.w2", "model.layers.20.block_sparse_moe.experts.118.w2", "model.layers.20.block_sparse_moe.experts.119.w2", "model.layers.20.block_sparse_moe.experts.120.w2", "model.layers.20.block_sparse_moe.experts.121.w2", "model.layers.20.block_sparse_moe.experts.122.w2", "model.layers.20.block_sparse_moe.experts.123.w2", "model.layers.20.block_sparse_moe.experts.124.w2", "model.layers.20.block_sparse_moe.experts.125.w2", "model.layers.20.block_sparse_moe.experts.126.w2", "model.layers.20.block_sparse_moe.experts.127.w2", "model.layers.20.block_sparse_moe.experts.128.w2", "model.layers.20.block_sparse_moe.experts.129.w2", "model.layers.20.block_sparse_moe.experts.130.w2", "model.layers.20.block_sparse_moe.experts.131.w2", "model.layers.20.block_sparse_moe.experts.132.w2", "model.layers.20.block_sparse_moe.experts.133.w2", "model.layers.20.block_sparse_moe.experts.134.w2", "model.layers.20.block_sparse_moe.experts.135.w2", "model.layers.20.block_sparse_moe.experts.136.w2", "model.layers.20.block_sparse_moe.experts.137.w2", "model.layers.20.block_sparse_moe.experts.138.w2", "model.layers.20.block_sparse_moe.experts.139.w2", "model.layers.20.block_sparse_moe.experts.140.w2", "model.layers.20.block_sparse_moe.experts.141.w2", "model.layers.20.block_sparse_moe.experts.142.w2", "model.layers.20.block_sparse_moe.experts.143.w2", "model.layers.20.block_sparse_moe.experts.144.w2", "model.layers.20.block_sparse_moe.experts.145.w2", "model.layers.20.block_sparse_moe.experts.146.w2", "model.layers.20.block_sparse_moe.experts.147.w2", "model.layers.20.block_sparse_moe.experts.148.w2", "model.layers.20.block_sparse_moe.experts.149.w2", "model.layers.20.block_sparse_moe.experts.150.w2", "model.layers.20.block_sparse_moe.experts.151.w2", "model.layers.20.block_sparse_moe.experts.152.w2", "model.layers.20.block_sparse_moe.experts.153.w2", "model.layers.20.block_sparse_moe.experts.154.w2", "model.layers.20.block_sparse_moe.experts.155.w2", "model.layers.20.block_sparse_moe.experts.156.w2", "model.layers.20.block_sparse_moe.experts.157.w2", "model.layers.20.block_sparse_moe.experts.158.w2", "model.layers.20.block_sparse_moe.experts.159.w2", "model.layers.20.block_sparse_moe.experts.160.w2", "model.layers.20.block_sparse_moe.experts.161.w2", "model.layers.20.block_sparse_moe.experts.162.w2", "model.layers.20.block_sparse_moe.experts.163.w2", "model.layers.20.block_sparse_moe.experts.164.w2", "model.layers.20.block_sparse_moe.experts.165.w2", "model.layers.20.block_sparse_moe.experts.166.w2", "model.layers.20.block_sparse_moe.experts.167.w2", "model.layers.20.block_sparse_moe.experts.168.w2", "model.layers.20.block_sparse_moe.experts.169.w2", "model.layers.20.block_sparse_moe.experts.170.w2", "model.layers.20.block_sparse_moe.experts.171.w2", "model.layers.20.block_sparse_moe.experts.172.w2", "model.layers.20.block_sparse_moe.experts.173.w2", "model.layers.20.block_sparse_moe.experts.174.w2", "model.layers.20.block_sparse_moe.experts.175.w2", "model.layers.20.block_sparse_moe.experts.176.w2", "model.layers.20.block_sparse_moe.experts.177.w2", "model.layers.20.block_sparse_moe.experts.178.w2", "model.layers.20.block_sparse_moe.experts.179.w2", "model.layers.20.block_sparse_moe.experts.180.w2", "model.layers.20.block_sparse_moe.experts.181.w2", "model.layers.20.block_sparse_moe.experts.182.w2", "model.layers.20.block_sparse_moe.experts.183.w2", "model.layers.20.block_sparse_moe.experts.184.w2", "model.layers.20.block_sparse_moe.experts.185.w2", "model.layers.20.block_sparse_moe.experts.186.w2", "model.layers.20.block_sparse_moe.experts.187.w2", "model.layers.20.block_sparse_moe.experts.188.w2", "model.layers.20.block_sparse_moe.experts.189.w2", "model.layers.20.block_sparse_moe.experts.190.w2", "model.layers.20.block_sparse_moe.experts.191.w2", "model.layers.20.block_sparse_moe.experts.192.w2", "model.layers.20.block_sparse_moe.experts.193.w2", "model.layers.20.block_sparse_moe.experts.194.w2", "model.layers.20.block_sparse_moe.experts.195.w2", "model.layers.20.block_sparse_moe.experts.196.w2", "model.layers.20.block_sparse_moe.experts.197.w2", "model.layers.20.block_sparse_moe.experts.198.w2", "model.layers.20.block_sparse_moe.experts.199.w2", "model.layers.20.block_sparse_moe.experts.200.w2", "model.layers.20.block_sparse_moe.experts.201.w2", "model.layers.20.block_sparse_moe.experts.202.w2", "model.layers.20.block_sparse_moe.experts.203.w2", "model.layers.20.block_sparse_moe.experts.204.w2", "model.layers.20.block_sparse_moe.experts.205.w2", "model.layers.20.block_sparse_moe.experts.206.w2", "model.layers.20.block_sparse_moe.experts.207.w2", "model.layers.20.block_sparse_moe.experts.208.w2", "model.layers.20.block_sparse_moe.experts.209.w2", "model.layers.20.block_sparse_moe.experts.210.w2", "model.layers.20.block_sparse_moe.experts.211.w2", "model.layers.20.block_sparse_moe.experts.212.w2", "model.layers.20.block_sparse_moe.experts.213.w2", "model.layers.20.block_sparse_moe.experts.214.w2", "model.layers.20.block_sparse_moe.experts.215.w2", "model.layers.20.block_sparse_moe.experts.216.w2", "model.layers.20.block_sparse_moe.experts.217.w2", "model.layers.20.block_sparse_moe.experts.218.w2", "model.layers.20.block_sparse_moe.experts.219.w2", "model.layers.20.block_sparse_moe.experts.220.w2", "model.layers.20.block_sparse_moe.experts.221.w2", "model.layers.20.block_sparse_moe.experts.222.w2", "model.layers.20.block_sparse_moe.experts.223.w2", "model.layers.20.block_sparse_moe.experts.224.w2", "model.layers.20.block_sparse_moe.experts.225.w2", "model.layers.20.block_sparse_moe.experts.226.w2", "model.layers.20.block_sparse_moe.experts.227.w2", "model.layers.20.block_sparse_moe.experts.228.w2", "model.layers.20.block_sparse_moe.experts.229.w2", "model.layers.20.block_sparse_moe.experts.230.w2", "model.layers.20.block_sparse_moe.experts.231.w2", "model.layers.20.block_sparse_moe.experts.232.w2", "model.layers.20.block_sparse_moe.experts.233.w2", "model.layers.20.block_sparse_moe.experts.234.w2", "model.layers.20.block_sparse_moe.experts.235.w2", "model.layers.20.block_sparse_moe.experts.236.w2", "model.layers.20.block_sparse_moe.experts.237.w2", "model.layers.20.block_sparse_moe.experts.238.w2", "model.layers.20.block_sparse_moe.experts.239.w2", "model.layers.20.block_sparse_moe.experts.240.w2", "model.layers.20.block_sparse_moe.experts.241.w2", "model.layers.20.block_sparse_moe.experts.242.w2", "model.layers.20.block_sparse_moe.experts.243.w2", "model.layers.20.block_sparse_moe.experts.244.w2", "model.layers.20.block_sparse_moe.experts.245.w2", "model.layers.20.block_sparse_moe.experts.246.w2", "model.layers.20.block_sparse_moe.experts.247.w2", "model.layers.20.block_sparse_moe.experts.248.w2", "model.layers.20.block_sparse_moe.experts.249.w2", "model.layers.20.block_sparse_moe.experts.250.w2", "model.layers.20.block_sparse_moe.experts.251.w2", "model.layers.20.block_sparse_moe.experts.252.w2", "model.layers.20.block_sparse_moe.experts.253.w2", "model.layers.20.block_sparse_moe.experts.254.w2", "model.layers.20.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0005308231338858604, "dbits": 1207959552 } ] }, { "idx": 105, "layers": [ "model.layers.21.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0008922645822167369, "dbits": 18874368 } ] }, { "idx": 106, "layers": [ "model.layers.21.self_attn.k_proj", "model.layers.21.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00108431838452816, "dbits": 6291456 } ] }, { "idx": 107, "layers": [ "model.layers.21.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0007731694728136118, "dbits": 18874368 } ] }, { "idx": 108, "layers": [ "model.layers.21.block_sparse_moe.experts.0.w1", "model.layers.21.block_sparse_moe.experts.1.w1", "model.layers.21.block_sparse_moe.experts.2.w1", "model.layers.21.block_sparse_moe.experts.3.w1", "model.layers.21.block_sparse_moe.experts.4.w1", "model.layers.21.block_sparse_moe.experts.5.w1", "model.layers.21.block_sparse_moe.experts.6.w1", "model.layers.21.block_sparse_moe.experts.7.w1", "model.layers.21.block_sparse_moe.experts.8.w1", "model.layers.21.block_sparse_moe.experts.9.w1", "model.layers.21.block_sparse_moe.experts.10.w1", "model.layers.21.block_sparse_moe.experts.11.w1", "model.layers.21.block_sparse_moe.experts.12.w1", "model.layers.21.block_sparse_moe.experts.13.w1", "model.layers.21.block_sparse_moe.experts.14.w1", "model.layers.21.block_sparse_moe.experts.15.w1", "model.layers.21.block_sparse_moe.experts.16.w1", "model.layers.21.block_sparse_moe.experts.17.w1", "model.layers.21.block_sparse_moe.experts.18.w1", "model.layers.21.block_sparse_moe.experts.19.w1", "model.layers.21.block_sparse_moe.experts.20.w1", "model.layers.21.block_sparse_moe.experts.21.w1", "model.layers.21.block_sparse_moe.experts.22.w1", "model.layers.21.block_sparse_moe.experts.23.w1", "model.layers.21.block_sparse_moe.experts.24.w1", "model.layers.21.block_sparse_moe.experts.25.w1", "model.layers.21.block_sparse_moe.experts.26.w1", "model.layers.21.block_sparse_moe.experts.27.w1", "model.layers.21.block_sparse_moe.experts.28.w1", "model.layers.21.block_sparse_moe.experts.29.w1", "model.layers.21.block_sparse_moe.experts.30.w1", "model.layers.21.block_sparse_moe.experts.31.w1", "model.layers.21.block_sparse_moe.experts.32.w1", "model.layers.21.block_sparse_moe.experts.33.w1", "model.layers.21.block_sparse_moe.experts.34.w1", "model.layers.21.block_sparse_moe.experts.35.w1", "model.layers.21.block_sparse_moe.experts.36.w1", "model.layers.21.block_sparse_moe.experts.37.w1", "model.layers.21.block_sparse_moe.experts.38.w1", "model.layers.21.block_sparse_moe.experts.39.w1", "model.layers.21.block_sparse_moe.experts.40.w1", "model.layers.21.block_sparse_moe.experts.41.w1", "model.layers.21.block_sparse_moe.experts.42.w1", "model.layers.21.block_sparse_moe.experts.43.w1", "model.layers.21.block_sparse_moe.experts.44.w1", "model.layers.21.block_sparse_moe.experts.45.w1", "model.layers.21.block_sparse_moe.experts.46.w1", "model.layers.21.block_sparse_moe.experts.47.w1", "model.layers.21.block_sparse_moe.experts.48.w1", "model.layers.21.block_sparse_moe.experts.49.w1", "model.layers.21.block_sparse_moe.experts.50.w1", "model.layers.21.block_sparse_moe.experts.51.w1", "model.layers.21.block_sparse_moe.experts.52.w1", "model.layers.21.block_sparse_moe.experts.53.w1", "model.layers.21.block_sparse_moe.experts.54.w1", "model.layers.21.block_sparse_moe.experts.55.w1", "model.layers.21.block_sparse_moe.experts.56.w1", "model.layers.21.block_sparse_moe.experts.57.w1", "model.layers.21.block_sparse_moe.experts.58.w1", "model.layers.21.block_sparse_moe.experts.59.w1", "model.layers.21.block_sparse_moe.experts.60.w1", "model.layers.21.block_sparse_moe.experts.61.w1", "model.layers.21.block_sparse_moe.experts.62.w1", "model.layers.21.block_sparse_moe.experts.63.w1", "model.layers.21.block_sparse_moe.experts.64.w1", "model.layers.21.block_sparse_moe.experts.65.w1", "model.layers.21.block_sparse_moe.experts.66.w1", "model.layers.21.block_sparse_moe.experts.67.w1", "model.layers.21.block_sparse_moe.experts.68.w1", "model.layers.21.block_sparse_moe.experts.69.w1", "model.layers.21.block_sparse_moe.experts.70.w1", "model.layers.21.block_sparse_moe.experts.71.w1", "model.layers.21.block_sparse_moe.experts.72.w1", "model.layers.21.block_sparse_moe.experts.73.w1", "model.layers.21.block_sparse_moe.experts.74.w1", "model.layers.21.block_sparse_moe.experts.75.w1", "model.layers.21.block_sparse_moe.experts.76.w1", "model.layers.21.block_sparse_moe.experts.77.w1", "model.layers.21.block_sparse_moe.experts.78.w1", "model.layers.21.block_sparse_moe.experts.79.w1", "model.layers.21.block_sparse_moe.experts.80.w1", "model.layers.21.block_sparse_moe.experts.81.w1", "model.layers.21.block_sparse_moe.experts.82.w1", "model.layers.21.block_sparse_moe.experts.83.w1", "model.layers.21.block_sparse_moe.experts.84.w1", "model.layers.21.block_sparse_moe.experts.85.w1", "model.layers.21.block_sparse_moe.experts.86.w1", "model.layers.21.block_sparse_moe.experts.87.w1", "model.layers.21.block_sparse_moe.experts.88.w1", "model.layers.21.block_sparse_moe.experts.89.w1", "model.layers.21.block_sparse_moe.experts.90.w1", "model.layers.21.block_sparse_moe.experts.91.w1", "model.layers.21.block_sparse_moe.experts.92.w1", "model.layers.21.block_sparse_moe.experts.93.w1", "model.layers.21.block_sparse_moe.experts.94.w1", "model.layers.21.block_sparse_moe.experts.95.w1", "model.layers.21.block_sparse_moe.experts.96.w1", "model.layers.21.block_sparse_moe.experts.97.w1", "model.layers.21.block_sparse_moe.experts.98.w1", "model.layers.21.block_sparse_moe.experts.99.w1", "model.layers.21.block_sparse_moe.experts.100.w1", "model.layers.21.block_sparse_moe.experts.101.w1", "model.layers.21.block_sparse_moe.experts.102.w1", "model.layers.21.block_sparse_moe.experts.103.w1", "model.layers.21.block_sparse_moe.experts.104.w1", "model.layers.21.block_sparse_moe.experts.105.w1", "model.layers.21.block_sparse_moe.experts.106.w1", "model.layers.21.block_sparse_moe.experts.107.w1", "model.layers.21.block_sparse_moe.experts.108.w1", "model.layers.21.block_sparse_moe.experts.109.w1", "model.layers.21.block_sparse_moe.experts.110.w1", "model.layers.21.block_sparse_moe.experts.111.w1", "model.layers.21.block_sparse_moe.experts.112.w1", "model.layers.21.block_sparse_moe.experts.113.w1", "model.layers.21.block_sparse_moe.experts.114.w1", "model.layers.21.block_sparse_moe.experts.115.w1", "model.layers.21.block_sparse_moe.experts.116.w1", "model.layers.21.block_sparse_moe.experts.117.w1", "model.layers.21.block_sparse_moe.experts.118.w1", "model.layers.21.block_sparse_moe.experts.119.w1", "model.layers.21.block_sparse_moe.experts.120.w1", "model.layers.21.block_sparse_moe.experts.121.w1", "model.layers.21.block_sparse_moe.experts.122.w1", "model.layers.21.block_sparse_moe.experts.123.w1", "model.layers.21.block_sparse_moe.experts.124.w1", "model.layers.21.block_sparse_moe.experts.125.w1", "model.layers.21.block_sparse_moe.experts.126.w1", "model.layers.21.block_sparse_moe.experts.127.w1", "model.layers.21.block_sparse_moe.experts.128.w1", "model.layers.21.block_sparse_moe.experts.129.w1", "model.layers.21.block_sparse_moe.experts.130.w1", "model.layers.21.block_sparse_moe.experts.131.w1", "model.layers.21.block_sparse_moe.experts.132.w1", "model.layers.21.block_sparse_moe.experts.133.w1", "model.layers.21.block_sparse_moe.experts.134.w1", "model.layers.21.block_sparse_moe.experts.135.w1", "model.layers.21.block_sparse_moe.experts.136.w1", "model.layers.21.block_sparse_moe.experts.137.w1", "model.layers.21.block_sparse_moe.experts.138.w1", "model.layers.21.block_sparse_moe.experts.139.w1", "model.layers.21.block_sparse_moe.experts.140.w1", "model.layers.21.block_sparse_moe.experts.141.w1", "model.layers.21.block_sparse_moe.experts.142.w1", "model.layers.21.block_sparse_moe.experts.143.w1", "model.layers.21.block_sparse_moe.experts.144.w1", "model.layers.21.block_sparse_moe.experts.145.w1", "model.layers.21.block_sparse_moe.experts.146.w1", "model.layers.21.block_sparse_moe.experts.147.w1", "model.layers.21.block_sparse_moe.experts.148.w1", "model.layers.21.block_sparse_moe.experts.149.w1", "model.layers.21.block_sparse_moe.experts.150.w1", "model.layers.21.block_sparse_moe.experts.151.w1", "model.layers.21.block_sparse_moe.experts.152.w1", "model.layers.21.block_sparse_moe.experts.153.w1", "model.layers.21.block_sparse_moe.experts.154.w1", "model.layers.21.block_sparse_moe.experts.155.w1", "model.layers.21.block_sparse_moe.experts.156.w1", "model.layers.21.block_sparse_moe.experts.157.w1", "model.layers.21.block_sparse_moe.experts.158.w1", "model.layers.21.block_sparse_moe.experts.159.w1", "model.layers.21.block_sparse_moe.experts.160.w1", "model.layers.21.block_sparse_moe.experts.161.w1", "model.layers.21.block_sparse_moe.experts.162.w1", "model.layers.21.block_sparse_moe.experts.163.w1", "model.layers.21.block_sparse_moe.experts.164.w1", "model.layers.21.block_sparse_moe.experts.165.w1", "model.layers.21.block_sparse_moe.experts.166.w1", "model.layers.21.block_sparse_moe.experts.167.w1", "model.layers.21.block_sparse_moe.experts.168.w1", "model.layers.21.block_sparse_moe.experts.169.w1", "model.layers.21.block_sparse_moe.experts.170.w1", "model.layers.21.block_sparse_moe.experts.171.w1", "model.layers.21.block_sparse_moe.experts.172.w1", "model.layers.21.block_sparse_moe.experts.173.w1", "model.layers.21.block_sparse_moe.experts.174.w1", "model.layers.21.block_sparse_moe.experts.175.w1", "model.layers.21.block_sparse_moe.experts.176.w1", "model.layers.21.block_sparse_moe.experts.177.w1", "model.layers.21.block_sparse_moe.experts.178.w1", "model.layers.21.block_sparse_moe.experts.179.w1", "model.layers.21.block_sparse_moe.experts.180.w1", "model.layers.21.block_sparse_moe.experts.181.w1", "model.layers.21.block_sparse_moe.experts.182.w1", "model.layers.21.block_sparse_moe.experts.183.w1", "model.layers.21.block_sparse_moe.experts.184.w1", "model.layers.21.block_sparse_moe.experts.185.w1", "model.layers.21.block_sparse_moe.experts.186.w1", "model.layers.21.block_sparse_moe.experts.187.w1", "model.layers.21.block_sparse_moe.experts.188.w1", "model.layers.21.block_sparse_moe.experts.189.w1", "model.layers.21.block_sparse_moe.experts.190.w1", "model.layers.21.block_sparse_moe.experts.191.w1", "model.layers.21.block_sparse_moe.experts.192.w1", "model.layers.21.block_sparse_moe.experts.193.w1", "model.layers.21.block_sparse_moe.experts.194.w1", "model.layers.21.block_sparse_moe.experts.195.w1", "model.layers.21.block_sparse_moe.experts.196.w1", "model.layers.21.block_sparse_moe.experts.197.w1", "model.layers.21.block_sparse_moe.experts.198.w1", "model.layers.21.block_sparse_moe.experts.199.w1", "model.layers.21.block_sparse_moe.experts.200.w1", "model.layers.21.block_sparse_moe.experts.201.w1", "model.layers.21.block_sparse_moe.experts.202.w1", "model.layers.21.block_sparse_moe.experts.203.w1", "model.layers.21.block_sparse_moe.experts.204.w1", "model.layers.21.block_sparse_moe.experts.205.w1", "model.layers.21.block_sparse_moe.experts.206.w1", "model.layers.21.block_sparse_moe.experts.207.w1", "model.layers.21.block_sparse_moe.experts.208.w1", "model.layers.21.block_sparse_moe.experts.209.w1", "model.layers.21.block_sparse_moe.experts.210.w1", "model.layers.21.block_sparse_moe.experts.211.w1", "model.layers.21.block_sparse_moe.experts.212.w1", "model.layers.21.block_sparse_moe.experts.213.w1", "model.layers.21.block_sparse_moe.experts.214.w1", "model.layers.21.block_sparse_moe.experts.215.w1", "model.layers.21.block_sparse_moe.experts.216.w1", "model.layers.21.block_sparse_moe.experts.217.w1", "model.layers.21.block_sparse_moe.experts.218.w1", "model.layers.21.block_sparse_moe.experts.219.w1", "model.layers.21.block_sparse_moe.experts.220.w1", "model.layers.21.block_sparse_moe.experts.221.w1", "model.layers.21.block_sparse_moe.experts.222.w1", "model.layers.21.block_sparse_moe.experts.223.w1", "model.layers.21.block_sparse_moe.experts.224.w1", "model.layers.21.block_sparse_moe.experts.225.w1", "model.layers.21.block_sparse_moe.experts.226.w1", "model.layers.21.block_sparse_moe.experts.227.w1", "model.layers.21.block_sparse_moe.experts.228.w1", "model.layers.21.block_sparse_moe.experts.229.w1", "model.layers.21.block_sparse_moe.experts.230.w1", "model.layers.21.block_sparse_moe.experts.231.w1", "model.layers.21.block_sparse_moe.experts.232.w1", "model.layers.21.block_sparse_moe.experts.233.w1", "model.layers.21.block_sparse_moe.experts.234.w1", "model.layers.21.block_sparse_moe.experts.235.w1", "model.layers.21.block_sparse_moe.experts.236.w1", "model.layers.21.block_sparse_moe.experts.237.w1", "model.layers.21.block_sparse_moe.experts.238.w1", "model.layers.21.block_sparse_moe.experts.239.w1", "model.layers.21.block_sparse_moe.experts.240.w1", "model.layers.21.block_sparse_moe.experts.241.w1", "model.layers.21.block_sparse_moe.experts.242.w1", "model.layers.21.block_sparse_moe.experts.243.w1", "model.layers.21.block_sparse_moe.experts.244.w1", "model.layers.21.block_sparse_moe.experts.245.w1", "model.layers.21.block_sparse_moe.experts.246.w1", "model.layers.21.block_sparse_moe.experts.247.w1", "model.layers.21.block_sparse_moe.experts.248.w1", "model.layers.21.block_sparse_moe.experts.249.w1", "model.layers.21.block_sparse_moe.experts.250.w1", "model.layers.21.block_sparse_moe.experts.251.w1", "model.layers.21.block_sparse_moe.experts.252.w1", "model.layers.21.block_sparse_moe.experts.253.w1", "model.layers.21.block_sparse_moe.experts.254.w1", "model.layers.21.block_sparse_moe.experts.255.w1", "model.layers.21.block_sparse_moe.experts.0.w3", "model.layers.21.block_sparse_moe.experts.1.w3", "model.layers.21.block_sparse_moe.experts.2.w3", "model.layers.21.block_sparse_moe.experts.3.w3", "model.layers.21.block_sparse_moe.experts.4.w3", "model.layers.21.block_sparse_moe.experts.5.w3", "model.layers.21.block_sparse_moe.experts.6.w3", "model.layers.21.block_sparse_moe.experts.7.w3", "model.layers.21.block_sparse_moe.experts.8.w3", "model.layers.21.block_sparse_moe.experts.9.w3", "model.layers.21.block_sparse_moe.experts.10.w3", "model.layers.21.block_sparse_moe.experts.11.w3", "model.layers.21.block_sparse_moe.experts.12.w3", "model.layers.21.block_sparse_moe.experts.13.w3", "model.layers.21.block_sparse_moe.experts.14.w3", "model.layers.21.block_sparse_moe.experts.15.w3", "model.layers.21.block_sparse_moe.experts.16.w3", "model.layers.21.block_sparse_moe.experts.17.w3", "model.layers.21.block_sparse_moe.experts.18.w3", "model.layers.21.block_sparse_moe.experts.19.w3", "model.layers.21.block_sparse_moe.experts.20.w3", "model.layers.21.block_sparse_moe.experts.21.w3", "model.layers.21.block_sparse_moe.experts.22.w3", "model.layers.21.block_sparse_moe.experts.23.w3", "model.layers.21.block_sparse_moe.experts.24.w3", "model.layers.21.block_sparse_moe.experts.25.w3", "model.layers.21.block_sparse_moe.experts.26.w3", "model.layers.21.block_sparse_moe.experts.27.w3", "model.layers.21.block_sparse_moe.experts.28.w3", "model.layers.21.block_sparse_moe.experts.29.w3", "model.layers.21.block_sparse_moe.experts.30.w3", "model.layers.21.block_sparse_moe.experts.31.w3", "model.layers.21.block_sparse_moe.experts.32.w3", "model.layers.21.block_sparse_moe.experts.33.w3", "model.layers.21.block_sparse_moe.experts.34.w3", "model.layers.21.block_sparse_moe.experts.35.w3", "model.layers.21.block_sparse_moe.experts.36.w3", "model.layers.21.block_sparse_moe.experts.37.w3", "model.layers.21.block_sparse_moe.experts.38.w3", "model.layers.21.block_sparse_moe.experts.39.w3", "model.layers.21.block_sparse_moe.experts.40.w3", "model.layers.21.block_sparse_moe.experts.41.w3", "model.layers.21.block_sparse_moe.experts.42.w3", "model.layers.21.block_sparse_moe.experts.43.w3", "model.layers.21.block_sparse_moe.experts.44.w3", "model.layers.21.block_sparse_moe.experts.45.w3", "model.layers.21.block_sparse_moe.experts.46.w3", "model.layers.21.block_sparse_moe.experts.47.w3", "model.layers.21.block_sparse_moe.experts.48.w3", "model.layers.21.block_sparse_moe.experts.49.w3", "model.layers.21.block_sparse_moe.experts.50.w3", "model.layers.21.block_sparse_moe.experts.51.w3", "model.layers.21.block_sparse_moe.experts.52.w3", "model.layers.21.block_sparse_moe.experts.53.w3", "model.layers.21.block_sparse_moe.experts.54.w3", "model.layers.21.block_sparse_moe.experts.55.w3", "model.layers.21.block_sparse_moe.experts.56.w3", "model.layers.21.block_sparse_moe.experts.57.w3", "model.layers.21.block_sparse_moe.experts.58.w3", "model.layers.21.block_sparse_moe.experts.59.w3", "model.layers.21.block_sparse_moe.experts.60.w3", "model.layers.21.block_sparse_moe.experts.61.w3", "model.layers.21.block_sparse_moe.experts.62.w3", "model.layers.21.block_sparse_moe.experts.63.w3", "model.layers.21.block_sparse_moe.experts.64.w3", "model.layers.21.block_sparse_moe.experts.65.w3", "model.layers.21.block_sparse_moe.experts.66.w3", "model.layers.21.block_sparse_moe.experts.67.w3", "model.layers.21.block_sparse_moe.experts.68.w3", "model.layers.21.block_sparse_moe.experts.69.w3", "model.layers.21.block_sparse_moe.experts.70.w3", "model.layers.21.block_sparse_moe.experts.71.w3", "model.layers.21.block_sparse_moe.experts.72.w3", "model.layers.21.block_sparse_moe.experts.73.w3", "model.layers.21.block_sparse_moe.experts.74.w3", "model.layers.21.block_sparse_moe.experts.75.w3", "model.layers.21.block_sparse_moe.experts.76.w3", "model.layers.21.block_sparse_moe.experts.77.w3", "model.layers.21.block_sparse_moe.experts.78.w3", "model.layers.21.block_sparse_moe.experts.79.w3", "model.layers.21.block_sparse_moe.experts.80.w3", "model.layers.21.block_sparse_moe.experts.81.w3", "model.layers.21.block_sparse_moe.experts.82.w3", "model.layers.21.block_sparse_moe.experts.83.w3", "model.layers.21.block_sparse_moe.experts.84.w3", "model.layers.21.block_sparse_moe.experts.85.w3", "model.layers.21.block_sparse_moe.experts.86.w3", "model.layers.21.block_sparse_moe.experts.87.w3", "model.layers.21.block_sparse_moe.experts.88.w3", "model.layers.21.block_sparse_moe.experts.89.w3", "model.layers.21.block_sparse_moe.experts.90.w3", "model.layers.21.block_sparse_moe.experts.91.w3", "model.layers.21.block_sparse_moe.experts.92.w3", "model.layers.21.block_sparse_moe.experts.93.w3", "model.layers.21.block_sparse_moe.experts.94.w3", "model.layers.21.block_sparse_moe.experts.95.w3", "model.layers.21.block_sparse_moe.experts.96.w3", "model.layers.21.block_sparse_moe.experts.97.w3", "model.layers.21.block_sparse_moe.experts.98.w3", "model.layers.21.block_sparse_moe.experts.99.w3", "model.layers.21.block_sparse_moe.experts.100.w3", "model.layers.21.block_sparse_moe.experts.101.w3", "model.layers.21.block_sparse_moe.experts.102.w3", "model.layers.21.block_sparse_moe.experts.103.w3", "model.layers.21.block_sparse_moe.experts.104.w3", "model.layers.21.block_sparse_moe.experts.105.w3", "model.layers.21.block_sparse_moe.experts.106.w3", "model.layers.21.block_sparse_moe.experts.107.w3", "model.layers.21.block_sparse_moe.experts.108.w3", "model.layers.21.block_sparse_moe.experts.109.w3", "model.layers.21.block_sparse_moe.experts.110.w3", "model.layers.21.block_sparse_moe.experts.111.w3", "model.layers.21.block_sparse_moe.experts.112.w3", "model.layers.21.block_sparse_moe.experts.113.w3", "model.layers.21.block_sparse_moe.experts.114.w3", "model.layers.21.block_sparse_moe.experts.115.w3", "model.layers.21.block_sparse_moe.experts.116.w3", "model.layers.21.block_sparse_moe.experts.117.w3", "model.layers.21.block_sparse_moe.experts.118.w3", "model.layers.21.block_sparse_moe.experts.119.w3", "model.layers.21.block_sparse_moe.experts.120.w3", "model.layers.21.block_sparse_moe.experts.121.w3", "model.layers.21.block_sparse_moe.experts.122.w3", "model.layers.21.block_sparse_moe.experts.123.w3", "model.layers.21.block_sparse_moe.experts.124.w3", "model.layers.21.block_sparse_moe.experts.125.w3", "model.layers.21.block_sparse_moe.experts.126.w3", "model.layers.21.block_sparse_moe.experts.127.w3", "model.layers.21.block_sparse_moe.experts.128.w3", "model.layers.21.block_sparse_moe.experts.129.w3", "model.layers.21.block_sparse_moe.experts.130.w3", "model.layers.21.block_sparse_moe.experts.131.w3", "model.layers.21.block_sparse_moe.experts.132.w3", "model.layers.21.block_sparse_moe.experts.133.w3", "model.layers.21.block_sparse_moe.experts.134.w3", "model.layers.21.block_sparse_moe.experts.135.w3", "model.layers.21.block_sparse_moe.experts.136.w3", "model.layers.21.block_sparse_moe.experts.137.w3", "model.layers.21.block_sparse_moe.experts.138.w3", "model.layers.21.block_sparse_moe.experts.139.w3", "model.layers.21.block_sparse_moe.experts.140.w3", "model.layers.21.block_sparse_moe.experts.141.w3", "model.layers.21.block_sparse_moe.experts.142.w3", "model.layers.21.block_sparse_moe.experts.143.w3", "model.layers.21.block_sparse_moe.experts.144.w3", "model.layers.21.block_sparse_moe.experts.145.w3", "model.layers.21.block_sparse_moe.experts.146.w3", "model.layers.21.block_sparse_moe.experts.147.w3", "model.layers.21.block_sparse_moe.experts.148.w3", "model.layers.21.block_sparse_moe.experts.149.w3", "model.layers.21.block_sparse_moe.experts.150.w3", "model.layers.21.block_sparse_moe.experts.151.w3", "model.layers.21.block_sparse_moe.experts.152.w3", "model.layers.21.block_sparse_moe.experts.153.w3", "model.layers.21.block_sparse_moe.experts.154.w3", "model.layers.21.block_sparse_moe.experts.155.w3", "model.layers.21.block_sparse_moe.experts.156.w3", "model.layers.21.block_sparse_moe.experts.157.w3", "model.layers.21.block_sparse_moe.experts.158.w3", "model.layers.21.block_sparse_moe.experts.159.w3", "model.layers.21.block_sparse_moe.experts.160.w3", "model.layers.21.block_sparse_moe.experts.161.w3", "model.layers.21.block_sparse_moe.experts.162.w3", "model.layers.21.block_sparse_moe.experts.163.w3", "model.layers.21.block_sparse_moe.experts.164.w3", "model.layers.21.block_sparse_moe.experts.165.w3", "model.layers.21.block_sparse_moe.experts.166.w3", "model.layers.21.block_sparse_moe.experts.167.w3", "model.layers.21.block_sparse_moe.experts.168.w3", "model.layers.21.block_sparse_moe.experts.169.w3", "model.layers.21.block_sparse_moe.experts.170.w3", "model.layers.21.block_sparse_moe.experts.171.w3", "model.layers.21.block_sparse_moe.experts.172.w3", "model.layers.21.block_sparse_moe.experts.173.w3", "model.layers.21.block_sparse_moe.experts.174.w3", "model.layers.21.block_sparse_moe.experts.175.w3", "model.layers.21.block_sparse_moe.experts.176.w3", "model.layers.21.block_sparse_moe.experts.177.w3", "model.layers.21.block_sparse_moe.experts.178.w3", "model.layers.21.block_sparse_moe.experts.179.w3", "model.layers.21.block_sparse_moe.experts.180.w3", "model.layers.21.block_sparse_moe.experts.181.w3", "model.layers.21.block_sparse_moe.experts.182.w3", "model.layers.21.block_sparse_moe.experts.183.w3", "model.layers.21.block_sparse_moe.experts.184.w3", "model.layers.21.block_sparse_moe.experts.185.w3", "model.layers.21.block_sparse_moe.experts.186.w3", "model.layers.21.block_sparse_moe.experts.187.w3", "model.layers.21.block_sparse_moe.experts.188.w3", "model.layers.21.block_sparse_moe.experts.189.w3", "model.layers.21.block_sparse_moe.experts.190.w3", "model.layers.21.block_sparse_moe.experts.191.w3", "model.layers.21.block_sparse_moe.experts.192.w3", "model.layers.21.block_sparse_moe.experts.193.w3", "model.layers.21.block_sparse_moe.experts.194.w3", "model.layers.21.block_sparse_moe.experts.195.w3", "model.layers.21.block_sparse_moe.experts.196.w3", "model.layers.21.block_sparse_moe.experts.197.w3", "model.layers.21.block_sparse_moe.experts.198.w3", "model.layers.21.block_sparse_moe.experts.199.w3", "model.layers.21.block_sparse_moe.experts.200.w3", "model.layers.21.block_sparse_moe.experts.201.w3", "model.layers.21.block_sparse_moe.experts.202.w3", "model.layers.21.block_sparse_moe.experts.203.w3", "model.layers.21.block_sparse_moe.experts.204.w3", "model.layers.21.block_sparse_moe.experts.205.w3", "model.layers.21.block_sparse_moe.experts.206.w3", "model.layers.21.block_sparse_moe.experts.207.w3", "model.layers.21.block_sparse_moe.experts.208.w3", "model.layers.21.block_sparse_moe.experts.209.w3", "model.layers.21.block_sparse_moe.experts.210.w3", "model.layers.21.block_sparse_moe.experts.211.w3", "model.layers.21.block_sparse_moe.experts.212.w3", "model.layers.21.block_sparse_moe.experts.213.w3", "model.layers.21.block_sparse_moe.experts.214.w3", "model.layers.21.block_sparse_moe.experts.215.w3", "model.layers.21.block_sparse_moe.experts.216.w3", "model.layers.21.block_sparse_moe.experts.217.w3", "model.layers.21.block_sparse_moe.experts.218.w3", "model.layers.21.block_sparse_moe.experts.219.w3", "model.layers.21.block_sparse_moe.experts.220.w3", "model.layers.21.block_sparse_moe.experts.221.w3", "model.layers.21.block_sparse_moe.experts.222.w3", "model.layers.21.block_sparse_moe.experts.223.w3", "model.layers.21.block_sparse_moe.experts.224.w3", "model.layers.21.block_sparse_moe.experts.225.w3", "model.layers.21.block_sparse_moe.experts.226.w3", "model.layers.21.block_sparse_moe.experts.227.w3", "model.layers.21.block_sparse_moe.experts.228.w3", "model.layers.21.block_sparse_moe.experts.229.w3", "model.layers.21.block_sparse_moe.experts.230.w3", "model.layers.21.block_sparse_moe.experts.231.w3", "model.layers.21.block_sparse_moe.experts.232.w3", "model.layers.21.block_sparse_moe.experts.233.w3", "model.layers.21.block_sparse_moe.experts.234.w3", "model.layers.21.block_sparse_moe.experts.235.w3", "model.layers.21.block_sparse_moe.experts.236.w3", "model.layers.21.block_sparse_moe.experts.237.w3", "model.layers.21.block_sparse_moe.experts.238.w3", "model.layers.21.block_sparse_moe.experts.239.w3", "model.layers.21.block_sparse_moe.experts.240.w3", "model.layers.21.block_sparse_moe.experts.241.w3", "model.layers.21.block_sparse_moe.experts.242.w3", "model.layers.21.block_sparse_moe.experts.243.w3", "model.layers.21.block_sparse_moe.experts.244.w3", "model.layers.21.block_sparse_moe.experts.245.w3", "model.layers.21.block_sparse_moe.experts.246.w3", "model.layers.21.block_sparse_moe.experts.247.w3", "model.layers.21.block_sparse_moe.experts.248.w3", "model.layers.21.block_sparse_moe.experts.249.w3", "model.layers.21.block_sparse_moe.experts.250.w3", "model.layers.21.block_sparse_moe.experts.251.w3", "model.layers.21.block_sparse_moe.experts.252.w3", "model.layers.21.block_sparse_moe.experts.253.w3", "model.layers.21.block_sparse_moe.experts.254.w3", "model.layers.21.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0004665486514568301, "dbits": 2415919104 } ] }, { "idx": 109, "layers": [ "model.layers.21.block_sparse_moe.experts.0.w2", "model.layers.21.block_sparse_moe.experts.1.w2", "model.layers.21.block_sparse_moe.experts.2.w2", "model.layers.21.block_sparse_moe.experts.3.w2", "model.layers.21.block_sparse_moe.experts.4.w2", "model.layers.21.block_sparse_moe.experts.5.w2", "model.layers.21.block_sparse_moe.experts.6.w2", "model.layers.21.block_sparse_moe.experts.7.w2", "model.layers.21.block_sparse_moe.experts.8.w2", "model.layers.21.block_sparse_moe.experts.9.w2", "model.layers.21.block_sparse_moe.experts.10.w2", "model.layers.21.block_sparse_moe.experts.11.w2", "model.layers.21.block_sparse_moe.experts.12.w2", "model.layers.21.block_sparse_moe.experts.13.w2", "model.layers.21.block_sparse_moe.experts.14.w2", "model.layers.21.block_sparse_moe.experts.15.w2", "model.layers.21.block_sparse_moe.experts.16.w2", "model.layers.21.block_sparse_moe.experts.17.w2", "model.layers.21.block_sparse_moe.experts.18.w2", "model.layers.21.block_sparse_moe.experts.19.w2", "model.layers.21.block_sparse_moe.experts.20.w2", "model.layers.21.block_sparse_moe.experts.21.w2", "model.layers.21.block_sparse_moe.experts.22.w2", "model.layers.21.block_sparse_moe.experts.23.w2", "model.layers.21.block_sparse_moe.experts.24.w2", "model.layers.21.block_sparse_moe.experts.25.w2", "model.layers.21.block_sparse_moe.experts.26.w2", "model.layers.21.block_sparse_moe.experts.27.w2", "model.layers.21.block_sparse_moe.experts.28.w2", "model.layers.21.block_sparse_moe.experts.29.w2", "model.layers.21.block_sparse_moe.experts.30.w2", "model.layers.21.block_sparse_moe.experts.31.w2", "model.layers.21.block_sparse_moe.experts.32.w2", "model.layers.21.block_sparse_moe.experts.33.w2", "model.layers.21.block_sparse_moe.experts.34.w2", "model.layers.21.block_sparse_moe.experts.35.w2", "model.layers.21.block_sparse_moe.experts.36.w2", "model.layers.21.block_sparse_moe.experts.37.w2", "model.layers.21.block_sparse_moe.experts.38.w2", "model.layers.21.block_sparse_moe.experts.39.w2", "model.layers.21.block_sparse_moe.experts.40.w2", "model.layers.21.block_sparse_moe.experts.41.w2", "model.layers.21.block_sparse_moe.experts.42.w2", "model.layers.21.block_sparse_moe.experts.43.w2", "model.layers.21.block_sparse_moe.experts.44.w2", "model.layers.21.block_sparse_moe.experts.45.w2", "model.layers.21.block_sparse_moe.experts.46.w2", "model.layers.21.block_sparse_moe.experts.47.w2", "model.layers.21.block_sparse_moe.experts.48.w2", "model.layers.21.block_sparse_moe.experts.49.w2", "model.layers.21.block_sparse_moe.experts.50.w2", "model.layers.21.block_sparse_moe.experts.51.w2", "model.layers.21.block_sparse_moe.experts.52.w2", "model.layers.21.block_sparse_moe.experts.53.w2", "model.layers.21.block_sparse_moe.experts.54.w2", "model.layers.21.block_sparse_moe.experts.55.w2", "model.layers.21.block_sparse_moe.experts.56.w2", "model.layers.21.block_sparse_moe.experts.57.w2", "model.layers.21.block_sparse_moe.experts.58.w2", "model.layers.21.block_sparse_moe.experts.59.w2", "model.layers.21.block_sparse_moe.experts.60.w2", "model.layers.21.block_sparse_moe.experts.61.w2", "model.layers.21.block_sparse_moe.experts.62.w2", "model.layers.21.block_sparse_moe.experts.63.w2", "model.layers.21.block_sparse_moe.experts.64.w2", "model.layers.21.block_sparse_moe.experts.65.w2", "model.layers.21.block_sparse_moe.experts.66.w2", "model.layers.21.block_sparse_moe.experts.67.w2", "model.layers.21.block_sparse_moe.experts.68.w2", "model.layers.21.block_sparse_moe.experts.69.w2", "model.layers.21.block_sparse_moe.experts.70.w2", "model.layers.21.block_sparse_moe.experts.71.w2", "model.layers.21.block_sparse_moe.experts.72.w2", "model.layers.21.block_sparse_moe.experts.73.w2", "model.layers.21.block_sparse_moe.experts.74.w2", "model.layers.21.block_sparse_moe.experts.75.w2", "model.layers.21.block_sparse_moe.experts.76.w2", "model.layers.21.block_sparse_moe.experts.77.w2", "model.layers.21.block_sparse_moe.experts.78.w2", "model.layers.21.block_sparse_moe.experts.79.w2", "model.layers.21.block_sparse_moe.experts.80.w2", "model.layers.21.block_sparse_moe.experts.81.w2", "model.layers.21.block_sparse_moe.experts.82.w2", "model.layers.21.block_sparse_moe.experts.83.w2", "model.layers.21.block_sparse_moe.experts.84.w2", "model.layers.21.block_sparse_moe.experts.85.w2", "model.layers.21.block_sparse_moe.experts.86.w2", "model.layers.21.block_sparse_moe.experts.87.w2", "model.layers.21.block_sparse_moe.experts.88.w2", "model.layers.21.block_sparse_moe.experts.89.w2", "model.layers.21.block_sparse_moe.experts.90.w2", "model.layers.21.block_sparse_moe.experts.91.w2", "model.layers.21.block_sparse_moe.experts.92.w2", "model.layers.21.block_sparse_moe.experts.93.w2", "model.layers.21.block_sparse_moe.experts.94.w2", "model.layers.21.block_sparse_moe.experts.95.w2", "model.layers.21.block_sparse_moe.experts.96.w2", "model.layers.21.block_sparse_moe.experts.97.w2", "model.layers.21.block_sparse_moe.experts.98.w2", "model.layers.21.block_sparse_moe.experts.99.w2", "model.layers.21.block_sparse_moe.experts.100.w2", "model.layers.21.block_sparse_moe.experts.101.w2", "model.layers.21.block_sparse_moe.experts.102.w2", "model.layers.21.block_sparse_moe.experts.103.w2", "model.layers.21.block_sparse_moe.experts.104.w2", "model.layers.21.block_sparse_moe.experts.105.w2", "model.layers.21.block_sparse_moe.experts.106.w2", "model.layers.21.block_sparse_moe.experts.107.w2", "model.layers.21.block_sparse_moe.experts.108.w2", "model.layers.21.block_sparse_moe.experts.109.w2", "model.layers.21.block_sparse_moe.experts.110.w2", "model.layers.21.block_sparse_moe.experts.111.w2", "model.layers.21.block_sparse_moe.experts.112.w2", "model.layers.21.block_sparse_moe.experts.113.w2", "model.layers.21.block_sparse_moe.experts.114.w2", "model.layers.21.block_sparse_moe.experts.115.w2", "model.layers.21.block_sparse_moe.experts.116.w2", "model.layers.21.block_sparse_moe.experts.117.w2", "model.layers.21.block_sparse_moe.experts.118.w2", "model.layers.21.block_sparse_moe.experts.119.w2", "model.layers.21.block_sparse_moe.experts.120.w2", "model.layers.21.block_sparse_moe.experts.121.w2", "model.layers.21.block_sparse_moe.experts.122.w2", "model.layers.21.block_sparse_moe.experts.123.w2", "model.layers.21.block_sparse_moe.experts.124.w2", "model.layers.21.block_sparse_moe.experts.125.w2", "model.layers.21.block_sparse_moe.experts.126.w2", "model.layers.21.block_sparse_moe.experts.127.w2", "model.layers.21.block_sparse_moe.experts.128.w2", "model.layers.21.block_sparse_moe.experts.129.w2", "model.layers.21.block_sparse_moe.experts.130.w2", "model.layers.21.block_sparse_moe.experts.131.w2", "model.layers.21.block_sparse_moe.experts.132.w2", "model.layers.21.block_sparse_moe.experts.133.w2", "model.layers.21.block_sparse_moe.experts.134.w2", "model.layers.21.block_sparse_moe.experts.135.w2", "model.layers.21.block_sparse_moe.experts.136.w2", "model.layers.21.block_sparse_moe.experts.137.w2", "model.layers.21.block_sparse_moe.experts.138.w2", "model.layers.21.block_sparse_moe.experts.139.w2", "model.layers.21.block_sparse_moe.experts.140.w2", "model.layers.21.block_sparse_moe.experts.141.w2", "model.layers.21.block_sparse_moe.experts.142.w2", "model.layers.21.block_sparse_moe.experts.143.w2", "model.layers.21.block_sparse_moe.experts.144.w2", "model.layers.21.block_sparse_moe.experts.145.w2", "model.layers.21.block_sparse_moe.experts.146.w2", "model.layers.21.block_sparse_moe.experts.147.w2", "model.layers.21.block_sparse_moe.experts.148.w2", "model.layers.21.block_sparse_moe.experts.149.w2", "model.layers.21.block_sparse_moe.experts.150.w2", "model.layers.21.block_sparse_moe.experts.151.w2", "model.layers.21.block_sparse_moe.experts.152.w2", "model.layers.21.block_sparse_moe.experts.153.w2", "model.layers.21.block_sparse_moe.experts.154.w2", "model.layers.21.block_sparse_moe.experts.155.w2", "model.layers.21.block_sparse_moe.experts.156.w2", "model.layers.21.block_sparse_moe.experts.157.w2", "model.layers.21.block_sparse_moe.experts.158.w2", "model.layers.21.block_sparse_moe.experts.159.w2", "model.layers.21.block_sparse_moe.experts.160.w2", "model.layers.21.block_sparse_moe.experts.161.w2", "model.layers.21.block_sparse_moe.experts.162.w2", "model.layers.21.block_sparse_moe.experts.163.w2", "model.layers.21.block_sparse_moe.experts.164.w2", "model.layers.21.block_sparse_moe.experts.165.w2", "model.layers.21.block_sparse_moe.experts.166.w2", "model.layers.21.block_sparse_moe.experts.167.w2", "model.layers.21.block_sparse_moe.experts.168.w2", "model.layers.21.block_sparse_moe.experts.169.w2", "model.layers.21.block_sparse_moe.experts.170.w2", "model.layers.21.block_sparse_moe.experts.171.w2", "model.layers.21.block_sparse_moe.experts.172.w2", "model.layers.21.block_sparse_moe.experts.173.w2", "model.layers.21.block_sparse_moe.experts.174.w2", "model.layers.21.block_sparse_moe.experts.175.w2", "model.layers.21.block_sparse_moe.experts.176.w2", "model.layers.21.block_sparse_moe.experts.177.w2", "model.layers.21.block_sparse_moe.experts.178.w2", "model.layers.21.block_sparse_moe.experts.179.w2", "model.layers.21.block_sparse_moe.experts.180.w2", "model.layers.21.block_sparse_moe.experts.181.w2", "model.layers.21.block_sparse_moe.experts.182.w2", "model.layers.21.block_sparse_moe.experts.183.w2", "model.layers.21.block_sparse_moe.experts.184.w2", "model.layers.21.block_sparse_moe.experts.185.w2", "model.layers.21.block_sparse_moe.experts.186.w2", "model.layers.21.block_sparse_moe.experts.187.w2", "model.layers.21.block_sparse_moe.experts.188.w2", "model.layers.21.block_sparse_moe.experts.189.w2", "model.layers.21.block_sparse_moe.experts.190.w2", "model.layers.21.block_sparse_moe.experts.191.w2", "model.layers.21.block_sparse_moe.experts.192.w2", "model.layers.21.block_sparse_moe.experts.193.w2", "model.layers.21.block_sparse_moe.experts.194.w2", "model.layers.21.block_sparse_moe.experts.195.w2", "model.layers.21.block_sparse_moe.experts.196.w2", "model.layers.21.block_sparse_moe.experts.197.w2", "model.layers.21.block_sparse_moe.experts.198.w2", "model.layers.21.block_sparse_moe.experts.199.w2", "model.layers.21.block_sparse_moe.experts.200.w2", "model.layers.21.block_sparse_moe.experts.201.w2", "model.layers.21.block_sparse_moe.experts.202.w2", "model.layers.21.block_sparse_moe.experts.203.w2", "model.layers.21.block_sparse_moe.experts.204.w2", "model.layers.21.block_sparse_moe.experts.205.w2", "model.layers.21.block_sparse_moe.experts.206.w2", "model.layers.21.block_sparse_moe.experts.207.w2", "model.layers.21.block_sparse_moe.experts.208.w2", "model.layers.21.block_sparse_moe.experts.209.w2", "model.layers.21.block_sparse_moe.experts.210.w2", "model.layers.21.block_sparse_moe.experts.211.w2", "model.layers.21.block_sparse_moe.experts.212.w2", "model.layers.21.block_sparse_moe.experts.213.w2", "model.layers.21.block_sparse_moe.experts.214.w2", "model.layers.21.block_sparse_moe.experts.215.w2", "model.layers.21.block_sparse_moe.experts.216.w2", "model.layers.21.block_sparse_moe.experts.217.w2", "model.layers.21.block_sparse_moe.experts.218.w2", "model.layers.21.block_sparse_moe.experts.219.w2", "model.layers.21.block_sparse_moe.experts.220.w2", "model.layers.21.block_sparse_moe.experts.221.w2", "model.layers.21.block_sparse_moe.experts.222.w2", "model.layers.21.block_sparse_moe.experts.223.w2", "model.layers.21.block_sparse_moe.experts.224.w2", "model.layers.21.block_sparse_moe.experts.225.w2", "model.layers.21.block_sparse_moe.experts.226.w2", "model.layers.21.block_sparse_moe.experts.227.w2", "model.layers.21.block_sparse_moe.experts.228.w2", "model.layers.21.block_sparse_moe.experts.229.w2", "model.layers.21.block_sparse_moe.experts.230.w2", "model.layers.21.block_sparse_moe.experts.231.w2", "model.layers.21.block_sparse_moe.experts.232.w2", "model.layers.21.block_sparse_moe.experts.233.w2", "model.layers.21.block_sparse_moe.experts.234.w2", "model.layers.21.block_sparse_moe.experts.235.w2", "model.layers.21.block_sparse_moe.experts.236.w2", "model.layers.21.block_sparse_moe.experts.237.w2", "model.layers.21.block_sparse_moe.experts.238.w2", "model.layers.21.block_sparse_moe.experts.239.w2", "model.layers.21.block_sparse_moe.experts.240.w2", "model.layers.21.block_sparse_moe.experts.241.w2", "model.layers.21.block_sparse_moe.experts.242.w2", "model.layers.21.block_sparse_moe.experts.243.w2", "model.layers.21.block_sparse_moe.experts.244.w2", "model.layers.21.block_sparse_moe.experts.245.w2", "model.layers.21.block_sparse_moe.experts.246.w2", "model.layers.21.block_sparse_moe.experts.247.w2", "model.layers.21.block_sparse_moe.experts.248.w2", "model.layers.21.block_sparse_moe.experts.249.w2", "model.layers.21.block_sparse_moe.experts.250.w2", "model.layers.21.block_sparse_moe.experts.251.w2", "model.layers.21.block_sparse_moe.experts.252.w2", "model.layers.21.block_sparse_moe.experts.253.w2", "model.layers.21.block_sparse_moe.experts.254.w2", "model.layers.21.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006702013313770322, "dbits": 1207959552 } ] }, { "idx": 110, "layers": [ "model.layers.22.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0008030064404010717, "dbits": 18874368 } ] }, { "idx": 111, "layers": [ "model.layers.22.self_attn.k_proj", "model.layers.22.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0005808778107166263, "dbits": 6291456 } ] }, { "idx": 112, "layers": [ "model.layers.22.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0022941900417208644, "dbits": 18874368 } ] }, { "idx": 113, "layers": [ "model.layers.22.block_sparse_moe.experts.0.w1", "model.layers.22.block_sparse_moe.experts.1.w1", "model.layers.22.block_sparse_moe.experts.2.w1", "model.layers.22.block_sparse_moe.experts.3.w1", "model.layers.22.block_sparse_moe.experts.4.w1", "model.layers.22.block_sparse_moe.experts.5.w1", "model.layers.22.block_sparse_moe.experts.6.w1", "model.layers.22.block_sparse_moe.experts.7.w1", "model.layers.22.block_sparse_moe.experts.8.w1", "model.layers.22.block_sparse_moe.experts.9.w1", "model.layers.22.block_sparse_moe.experts.10.w1", "model.layers.22.block_sparse_moe.experts.11.w1", "model.layers.22.block_sparse_moe.experts.12.w1", "model.layers.22.block_sparse_moe.experts.13.w1", "model.layers.22.block_sparse_moe.experts.14.w1", "model.layers.22.block_sparse_moe.experts.15.w1", "model.layers.22.block_sparse_moe.experts.16.w1", "model.layers.22.block_sparse_moe.experts.17.w1", "model.layers.22.block_sparse_moe.experts.18.w1", "model.layers.22.block_sparse_moe.experts.19.w1", "model.layers.22.block_sparse_moe.experts.20.w1", "model.layers.22.block_sparse_moe.experts.21.w1", "model.layers.22.block_sparse_moe.experts.22.w1", "model.layers.22.block_sparse_moe.experts.23.w1", "model.layers.22.block_sparse_moe.experts.24.w1", "model.layers.22.block_sparse_moe.experts.25.w1", "model.layers.22.block_sparse_moe.experts.26.w1", "model.layers.22.block_sparse_moe.experts.27.w1", "model.layers.22.block_sparse_moe.experts.28.w1", "model.layers.22.block_sparse_moe.experts.29.w1", "model.layers.22.block_sparse_moe.experts.30.w1", "model.layers.22.block_sparse_moe.experts.31.w1", "model.layers.22.block_sparse_moe.experts.32.w1", "model.layers.22.block_sparse_moe.experts.33.w1", "model.layers.22.block_sparse_moe.experts.34.w1", "model.layers.22.block_sparse_moe.experts.35.w1", "model.layers.22.block_sparse_moe.experts.36.w1", "model.layers.22.block_sparse_moe.experts.37.w1", "model.layers.22.block_sparse_moe.experts.38.w1", "model.layers.22.block_sparse_moe.experts.39.w1", "model.layers.22.block_sparse_moe.experts.40.w1", "model.layers.22.block_sparse_moe.experts.41.w1", "model.layers.22.block_sparse_moe.experts.42.w1", "model.layers.22.block_sparse_moe.experts.43.w1", "model.layers.22.block_sparse_moe.experts.44.w1", "model.layers.22.block_sparse_moe.experts.45.w1", "model.layers.22.block_sparse_moe.experts.46.w1", "model.layers.22.block_sparse_moe.experts.47.w1", "model.layers.22.block_sparse_moe.experts.48.w1", "model.layers.22.block_sparse_moe.experts.49.w1", "model.layers.22.block_sparse_moe.experts.50.w1", "model.layers.22.block_sparse_moe.experts.51.w1", "model.layers.22.block_sparse_moe.experts.52.w1", "model.layers.22.block_sparse_moe.experts.53.w1", "model.layers.22.block_sparse_moe.experts.54.w1", "model.layers.22.block_sparse_moe.experts.55.w1", "model.layers.22.block_sparse_moe.experts.56.w1", "model.layers.22.block_sparse_moe.experts.57.w1", "model.layers.22.block_sparse_moe.experts.58.w1", "model.layers.22.block_sparse_moe.experts.59.w1", "model.layers.22.block_sparse_moe.experts.60.w1", "model.layers.22.block_sparse_moe.experts.61.w1", "model.layers.22.block_sparse_moe.experts.62.w1", "model.layers.22.block_sparse_moe.experts.63.w1", "model.layers.22.block_sparse_moe.experts.64.w1", "model.layers.22.block_sparse_moe.experts.65.w1", "model.layers.22.block_sparse_moe.experts.66.w1", "model.layers.22.block_sparse_moe.experts.67.w1", "model.layers.22.block_sparse_moe.experts.68.w1", "model.layers.22.block_sparse_moe.experts.69.w1", "model.layers.22.block_sparse_moe.experts.70.w1", "model.layers.22.block_sparse_moe.experts.71.w1", "model.layers.22.block_sparse_moe.experts.72.w1", "model.layers.22.block_sparse_moe.experts.73.w1", "model.layers.22.block_sparse_moe.experts.74.w1", "model.layers.22.block_sparse_moe.experts.75.w1", "model.layers.22.block_sparse_moe.experts.76.w1", "model.layers.22.block_sparse_moe.experts.77.w1", "model.layers.22.block_sparse_moe.experts.78.w1", "model.layers.22.block_sparse_moe.experts.79.w1", "model.layers.22.block_sparse_moe.experts.80.w1", "model.layers.22.block_sparse_moe.experts.81.w1", "model.layers.22.block_sparse_moe.experts.82.w1", "model.layers.22.block_sparse_moe.experts.83.w1", "model.layers.22.block_sparse_moe.experts.84.w1", "model.layers.22.block_sparse_moe.experts.85.w1", "model.layers.22.block_sparse_moe.experts.86.w1", "model.layers.22.block_sparse_moe.experts.87.w1", "model.layers.22.block_sparse_moe.experts.88.w1", "model.layers.22.block_sparse_moe.experts.89.w1", "model.layers.22.block_sparse_moe.experts.90.w1", "model.layers.22.block_sparse_moe.experts.91.w1", "model.layers.22.block_sparse_moe.experts.92.w1", "model.layers.22.block_sparse_moe.experts.93.w1", "model.layers.22.block_sparse_moe.experts.94.w1", "model.layers.22.block_sparse_moe.experts.95.w1", "model.layers.22.block_sparse_moe.experts.96.w1", "model.layers.22.block_sparse_moe.experts.97.w1", "model.layers.22.block_sparse_moe.experts.98.w1", "model.layers.22.block_sparse_moe.experts.99.w1", "model.layers.22.block_sparse_moe.experts.100.w1", "model.layers.22.block_sparse_moe.experts.101.w1", "model.layers.22.block_sparse_moe.experts.102.w1", "model.layers.22.block_sparse_moe.experts.103.w1", "model.layers.22.block_sparse_moe.experts.104.w1", "model.layers.22.block_sparse_moe.experts.105.w1", "model.layers.22.block_sparse_moe.experts.106.w1", "model.layers.22.block_sparse_moe.experts.107.w1", "model.layers.22.block_sparse_moe.experts.108.w1", "model.layers.22.block_sparse_moe.experts.109.w1", "model.layers.22.block_sparse_moe.experts.110.w1", "model.layers.22.block_sparse_moe.experts.111.w1", "model.layers.22.block_sparse_moe.experts.112.w1", "model.layers.22.block_sparse_moe.experts.113.w1", "model.layers.22.block_sparse_moe.experts.114.w1", "model.layers.22.block_sparse_moe.experts.115.w1", "model.layers.22.block_sparse_moe.experts.116.w1", "model.layers.22.block_sparse_moe.experts.117.w1", "model.layers.22.block_sparse_moe.experts.118.w1", "model.layers.22.block_sparse_moe.experts.119.w1", "model.layers.22.block_sparse_moe.experts.120.w1", "model.layers.22.block_sparse_moe.experts.121.w1", "model.layers.22.block_sparse_moe.experts.122.w1", "model.layers.22.block_sparse_moe.experts.123.w1", "model.layers.22.block_sparse_moe.experts.124.w1", "model.layers.22.block_sparse_moe.experts.125.w1", "model.layers.22.block_sparse_moe.experts.126.w1", "model.layers.22.block_sparse_moe.experts.127.w1", "model.layers.22.block_sparse_moe.experts.128.w1", "model.layers.22.block_sparse_moe.experts.129.w1", "model.layers.22.block_sparse_moe.experts.130.w1", "model.layers.22.block_sparse_moe.experts.131.w1", "model.layers.22.block_sparse_moe.experts.132.w1", "model.layers.22.block_sparse_moe.experts.133.w1", "model.layers.22.block_sparse_moe.experts.134.w1", "model.layers.22.block_sparse_moe.experts.135.w1", "model.layers.22.block_sparse_moe.experts.136.w1", "model.layers.22.block_sparse_moe.experts.137.w1", "model.layers.22.block_sparse_moe.experts.138.w1", "model.layers.22.block_sparse_moe.experts.139.w1", "model.layers.22.block_sparse_moe.experts.140.w1", "model.layers.22.block_sparse_moe.experts.141.w1", "model.layers.22.block_sparse_moe.experts.142.w1", "model.layers.22.block_sparse_moe.experts.143.w1", "model.layers.22.block_sparse_moe.experts.144.w1", "model.layers.22.block_sparse_moe.experts.145.w1", "model.layers.22.block_sparse_moe.experts.146.w1", "model.layers.22.block_sparse_moe.experts.147.w1", "model.layers.22.block_sparse_moe.experts.148.w1", "model.layers.22.block_sparse_moe.experts.149.w1", "model.layers.22.block_sparse_moe.experts.150.w1", "model.layers.22.block_sparse_moe.experts.151.w1", "model.layers.22.block_sparse_moe.experts.152.w1", "model.layers.22.block_sparse_moe.experts.153.w1", "model.layers.22.block_sparse_moe.experts.154.w1", "model.layers.22.block_sparse_moe.experts.155.w1", "model.layers.22.block_sparse_moe.experts.156.w1", "model.layers.22.block_sparse_moe.experts.157.w1", "model.layers.22.block_sparse_moe.experts.158.w1", "model.layers.22.block_sparse_moe.experts.159.w1", "model.layers.22.block_sparse_moe.experts.160.w1", "model.layers.22.block_sparse_moe.experts.161.w1", "model.layers.22.block_sparse_moe.experts.162.w1", "model.layers.22.block_sparse_moe.experts.163.w1", "model.layers.22.block_sparse_moe.experts.164.w1", "model.layers.22.block_sparse_moe.experts.165.w1", "model.layers.22.block_sparse_moe.experts.166.w1", "model.layers.22.block_sparse_moe.experts.167.w1", "model.layers.22.block_sparse_moe.experts.168.w1", "model.layers.22.block_sparse_moe.experts.169.w1", "model.layers.22.block_sparse_moe.experts.170.w1", "model.layers.22.block_sparse_moe.experts.171.w1", "model.layers.22.block_sparse_moe.experts.172.w1", "model.layers.22.block_sparse_moe.experts.173.w1", "model.layers.22.block_sparse_moe.experts.174.w1", "model.layers.22.block_sparse_moe.experts.175.w1", "model.layers.22.block_sparse_moe.experts.176.w1", "model.layers.22.block_sparse_moe.experts.177.w1", "model.layers.22.block_sparse_moe.experts.178.w1", "model.layers.22.block_sparse_moe.experts.179.w1", "model.layers.22.block_sparse_moe.experts.180.w1", "model.layers.22.block_sparse_moe.experts.181.w1", "model.layers.22.block_sparse_moe.experts.182.w1", "model.layers.22.block_sparse_moe.experts.183.w1", "model.layers.22.block_sparse_moe.experts.184.w1", "model.layers.22.block_sparse_moe.experts.185.w1", "model.layers.22.block_sparse_moe.experts.186.w1", "model.layers.22.block_sparse_moe.experts.187.w1", "model.layers.22.block_sparse_moe.experts.188.w1", "model.layers.22.block_sparse_moe.experts.189.w1", "model.layers.22.block_sparse_moe.experts.190.w1", "model.layers.22.block_sparse_moe.experts.191.w1", "model.layers.22.block_sparse_moe.experts.192.w1", "model.layers.22.block_sparse_moe.experts.193.w1", "model.layers.22.block_sparse_moe.experts.194.w1", "model.layers.22.block_sparse_moe.experts.195.w1", "model.layers.22.block_sparse_moe.experts.196.w1", "model.layers.22.block_sparse_moe.experts.197.w1", "model.layers.22.block_sparse_moe.experts.198.w1", "model.layers.22.block_sparse_moe.experts.199.w1", "model.layers.22.block_sparse_moe.experts.200.w1", "model.layers.22.block_sparse_moe.experts.201.w1", "model.layers.22.block_sparse_moe.experts.202.w1", "model.layers.22.block_sparse_moe.experts.203.w1", "model.layers.22.block_sparse_moe.experts.204.w1", "model.layers.22.block_sparse_moe.experts.205.w1", "model.layers.22.block_sparse_moe.experts.206.w1", "model.layers.22.block_sparse_moe.experts.207.w1", "model.layers.22.block_sparse_moe.experts.208.w1", "model.layers.22.block_sparse_moe.experts.209.w1", "model.layers.22.block_sparse_moe.experts.210.w1", "model.layers.22.block_sparse_moe.experts.211.w1", "model.layers.22.block_sparse_moe.experts.212.w1", "model.layers.22.block_sparse_moe.experts.213.w1", "model.layers.22.block_sparse_moe.experts.214.w1", "model.layers.22.block_sparse_moe.experts.215.w1", "model.layers.22.block_sparse_moe.experts.216.w1", "model.layers.22.block_sparse_moe.experts.217.w1", "model.layers.22.block_sparse_moe.experts.218.w1", "model.layers.22.block_sparse_moe.experts.219.w1", "model.layers.22.block_sparse_moe.experts.220.w1", "model.layers.22.block_sparse_moe.experts.221.w1", "model.layers.22.block_sparse_moe.experts.222.w1", "model.layers.22.block_sparse_moe.experts.223.w1", "model.layers.22.block_sparse_moe.experts.224.w1", "model.layers.22.block_sparse_moe.experts.225.w1", "model.layers.22.block_sparse_moe.experts.226.w1", "model.layers.22.block_sparse_moe.experts.227.w1", "model.layers.22.block_sparse_moe.experts.228.w1", "model.layers.22.block_sparse_moe.experts.229.w1", "model.layers.22.block_sparse_moe.experts.230.w1", "model.layers.22.block_sparse_moe.experts.231.w1", "model.layers.22.block_sparse_moe.experts.232.w1", "model.layers.22.block_sparse_moe.experts.233.w1", "model.layers.22.block_sparse_moe.experts.234.w1", "model.layers.22.block_sparse_moe.experts.235.w1", "model.layers.22.block_sparse_moe.experts.236.w1", "model.layers.22.block_sparse_moe.experts.237.w1", "model.layers.22.block_sparse_moe.experts.238.w1", "model.layers.22.block_sparse_moe.experts.239.w1", "model.layers.22.block_sparse_moe.experts.240.w1", "model.layers.22.block_sparse_moe.experts.241.w1", "model.layers.22.block_sparse_moe.experts.242.w1", "model.layers.22.block_sparse_moe.experts.243.w1", "model.layers.22.block_sparse_moe.experts.244.w1", "model.layers.22.block_sparse_moe.experts.245.w1", "model.layers.22.block_sparse_moe.experts.246.w1", "model.layers.22.block_sparse_moe.experts.247.w1", "model.layers.22.block_sparse_moe.experts.248.w1", "model.layers.22.block_sparse_moe.experts.249.w1", "model.layers.22.block_sparse_moe.experts.250.w1", "model.layers.22.block_sparse_moe.experts.251.w1", "model.layers.22.block_sparse_moe.experts.252.w1", "model.layers.22.block_sparse_moe.experts.253.w1", "model.layers.22.block_sparse_moe.experts.254.w1", "model.layers.22.block_sparse_moe.experts.255.w1", "model.layers.22.block_sparse_moe.experts.0.w3", "model.layers.22.block_sparse_moe.experts.1.w3", "model.layers.22.block_sparse_moe.experts.2.w3", "model.layers.22.block_sparse_moe.experts.3.w3", "model.layers.22.block_sparse_moe.experts.4.w3", "model.layers.22.block_sparse_moe.experts.5.w3", "model.layers.22.block_sparse_moe.experts.6.w3", "model.layers.22.block_sparse_moe.experts.7.w3", "model.layers.22.block_sparse_moe.experts.8.w3", "model.layers.22.block_sparse_moe.experts.9.w3", "model.layers.22.block_sparse_moe.experts.10.w3", "model.layers.22.block_sparse_moe.experts.11.w3", "model.layers.22.block_sparse_moe.experts.12.w3", "model.layers.22.block_sparse_moe.experts.13.w3", "model.layers.22.block_sparse_moe.experts.14.w3", "model.layers.22.block_sparse_moe.experts.15.w3", "model.layers.22.block_sparse_moe.experts.16.w3", "model.layers.22.block_sparse_moe.experts.17.w3", "model.layers.22.block_sparse_moe.experts.18.w3", "model.layers.22.block_sparse_moe.experts.19.w3", "model.layers.22.block_sparse_moe.experts.20.w3", "model.layers.22.block_sparse_moe.experts.21.w3", "model.layers.22.block_sparse_moe.experts.22.w3", "model.layers.22.block_sparse_moe.experts.23.w3", "model.layers.22.block_sparse_moe.experts.24.w3", "model.layers.22.block_sparse_moe.experts.25.w3", "model.layers.22.block_sparse_moe.experts.26.w3", "model.layers.22.block_sparse_moe.experts.27.w3", "model.layers.22.block_sparse_moe.experts.28.w3", "model.layers.22.block_sparse_moe.experts.29.w3", "model.layers.22.block_sparse_moe.experts.30.w3", "model.layers.22.block_sparse_moe.experts.31.w3", "model.layers.22.block_sparse_moe.experts.32.w3", "model.layers.22.block_sparse_moe.experts.33.w3", "model.layers.22.block_sparse_moe.experts.34.w3", "model.layers.22.block_sparse_moe.experts.35.w3", "model.layers.22.block_sparse_moe.experts.36.w3", "model.layers.22.block_sparse_moe.experts.37.w3", "model.layers.22.block_sparse_moe.experts.38.w3", "model.layers.22.block_sparse_moe.experts.39.w3", "model.layers.22.block_sparse_moe.experts.40.w3", "model.layers.22.block_sparse_moe.experts.41.w3", "model.layers.22.block_sparse_moe.experts.42.w3", "model.layers.22.block_sparse_moe.experts.43.w3", "model.layers.22.block_sparse_moe.experts.44.w3", "model.layers.22.block_sparse_moe.experts.45.w3", "model.layers.22.block_sparse_moe.experts.46.w3", "model.layers.22.block_sparse_moe.experts.47.w3", "model.layers.22.block_sparse_moe.experts.48.w3", "model.layers.22.block_sparse_moe.experts.49.w3", "model.layers.22.block_sparse_moe.experts.50.w3", "model.layers.22.block_sparse_moe.experts.51.w3", "model.layers.22.block_sparse_moe.experts.52.w3", "model.layers.22.block_sparse_moe.experts.53.w3", "model.layers.22.block_sparse_moe.experts.54.w3", "model.layers.22.block_sparse_moe.experts.55.w3", "model.layers.22.block_sparse_moe.experts.56.w3", "model.layers.22.block_sparse_moe.experts.57.w3", "model.layers.22.block_sparse_moe.experts.58.w3", "model.layers.22.block_sparse_moe.experts.59.w3", "model.layers.22.block_sparse_moe.experts.60.w3", "model.layers.22.block_sparse_moe.experts.61.w3", "model.layers.22.block_sparse_moe.experts.62.w3", "model.layers.22.block_sparse_moe.experts.63.w3", "model.layers.22.block_sparse_moe.experts.64.w3", "model.layers.22.block_sparse_moe.experts.65.w3", "model.layers.22.block_sparse_moe.experts.66.w3", "model.layers.22.block_sparse_moe.experts.67.w3", "model.layers.22.block_sparse_moe.experts.68.w3", "model.layers.22.block_sparse_moe.experts.69.w3", "model.layers.22.block_sparse_moe.experts.70.w3", "model.layers.22.block_sparse_moe.experts.71.w3", "model.layers.22.block_sparse_moe.experts.72.w3", "model.layers.22.block_sparse_moe.experts.73.w3", "model.layers.22.block_sparse_moe.experts.74.w3", "model.layers.22.block_sparse_moe.experts.75.w3", "model.layers.22.block_sparse_moe.experts.76.w3", "model.layers.22.block_sparse_moe.experts.77.w3", "model.layers.22.block_sparse_moe.experts.78.w3", "model.layers.22.block_sparse_moe.experts.79.w3", "model.layers.22.block_sparse_moe.experts.80.w3", "model.layers.22.block_sparse_moe.experts.81.w3", "model.layers.22.block_sparse_moe.experts.82.w3", "model.layers.22.block_sparse_moe.experts.83.w3", "model.layers.22.block_sparse_moe.experts.84.w3", "model.layers.22.block_sparse_moe.experts.85.w3", "model.layers.22.block_sparse_moe.experts.86.w3", "model.layers.22.block_sparse_moe.experts.87.w3", "model.layers.22.block_sparse_moe.experts.88.w3", "model.layers.22.block_sparse_moe.experts.89.w3", "model.layers.22.block_sparse_moe.experts.90.w3", "model.layers.22.block_sparse_moe.experts.91.w3", "model.layers.22.block_sparse_moe.experts.92.w3", "model.layers.22.block_sparse_moe.experts.93.w3", "model.layers.22.block_sparse_moe.experts.94.w3", "model.layers.22.block_sparse_moe.experts.95.w3", "model.layers.22.block_sparse_moe.experts.96.w3", "model.layers.22.block_sparse_moe.experts.97.w3", "model.layers.22.block_sparse_moe.experts.98.w3", "model.layers.22.block_sparse_moe.experts.99.w3", "model.layers.22.block_sparse_moe.experts.100.w3", "model.layers.22.block_sparse_moe.experts.101.w3", "model.layers.22.block_sparse_moe.experts.102.w3", "model.layers.22.block_sparse_moe.experts.103.w3", "model.layers.22.block_sparse_moe.experts.104.w3", "model.layers.22.block_sparse_moe.experts.105.w3", "model.layers.22.block_sparse_moe.experts.106.w3", "model.layers.22.block_sparse_moe.experts.107.w3", "model.layers.22.block_sparse_moe.experts.108.w3", "model.layers.22.block_sparse_moe.experts.109.w3", "model.layers.22.block_sparse_moe.experts.110.w3", "model.layers.22.block_sparse_moe.experts.111.w3", "model.layers.22.block_sparse_moe.experts.112.w3", "model.layers.22.block_sparse_moe.experts.113.w3", "model.layers.22.block_sparse_moe.experts.114.w3", "model.layers.22.block_sparse_moe.experts.115.w3", "model.layers.22.block_sparse_moe.experts.116.w3", "model.layers.22.block_sparse_moe.experts.117.w3", "model.layers.22.block_sparse_moe.experts.118.w3", "model.layers.22.block_sparse_moe.experts.119.w3", "model.layers.22.block_sparse_moe.experts.120.w3", "model.layers.22.block_sparse_moe.experts.121.w3", "model.layers.22.block_sparse_moe.experts.122.w3", "model.layers.22.block_sparse_moe.experts.123.w3", "model.layers.22.block_sparse_moe.experts.124.w3", "model.layers.22.block_sparse_moe.experts.125.w3", "model.layers.22.block_sparse_moe.experts.126.w3", "model.layers.22.block_sparse_moe.experts.127.w3", "model.layers.22.block_sparse_moe.experts.128.w3", "model.layers.22.block_sparse_moe.experts.129.w3", "model.layers.22.block_sparse_moe.experts.130.w3", "model.layers.22.block_sparse_moe.experts.131.w3", "model.layers.22.block_sparse_moe.experts.132.w3", "model.layers.22.block_sparse_moe.experts.133.w3", "model.layers.22.block_sparse_moe.experts.134.w3", "model.layers.22.block_sparse_moe.experts.135.w3", "model.layers.22.block_sparse_moe.experts.136.w3", "model.layers.22.block_sparse_moe.experts.137.w3", "model.layers.22.block_sparse_moe.experts.138.w3", "model.layers.22.block_sparse_moe.experts.139.w3", "model.layers.22.block_sparse_moe.experts.140.w3", "model.layers.22.block_sparse_moe.experts.141.w3", "model.layers.22.block_sparse_moe.experts.142.w3", "model.layers.22.block_sparse_moe.experts.143.w3", "model.layers.22.block_sparse_moe.experts.144.w3", "model.layers.22.block_sparse_moe.experts.145.w3", "model.layers.22.block_sparse_moe.experts.146.w3", "model.layers.22.block_sparse_moe.experts.147.w3", "model.layers.22.block_sparse_moe.experts.148.w3", "model.layers.22.block_sparse_moe.experts.149.w3", "model.layers.22.block_sparse_moe.experts.150.w3", "model.layers.22.block_sparse_moe.experts.151.w3", "model.layers.22.block_sparse_moe.experts.152.w3", "model.layers.22.block_sparse_moe.experts.153.w3", "model.layers.22.block_sparse_moe.experts.154.w3", "model.layers.22.block_sparse_moe.experts.155.w3", "model.layers.22.block_sparse_moe.experts.156.w3", "model.layers.22.block_sparse_moe.experts.157.w3", "model.layers.22.block_sparse_moe.experts.158.w3", "model.layers.22.block_sparse_moe.experts.159.w3", "model.layers.22.block_sparse_moe.experts.160.w3", "model.layers.22.block_sparse_moe.experts.161.w3", "model.layers.22.block_sparse_moe.experts.162.w3", "model.layers.22.block_sparse_moe.experts.163.w3", "model.layers.22.block_sparse_moe.experts.164.w3", "model.layers.22.block_sparse_moe.experts.165.w3", "model.layers.22.block_sparse_moe.experts.166.w3", "model.layers.22.block_sparse_moe.experts.167.w3", "model.layers.22.block_sparse_moe.experts.168.w3", "model.layers.22.block_sparse_moe.experts.169.w3", "model.layers.22.block_sparse_moe.experts.170.w3", "model.layers.22.block_sparse_moe.experts.171.w3", "model.layers.22.block_sparse_moe.experts.172.w3", "model.layers.22.block_sparse_moe.experts.173.w3", "model.layers.22.block_sparse_moe.experts.174.w3", "model.layers.22.block_sparse_moe.experts.175.w3", "model.layers.22.block_sparse_moe.experts.176.w3", "model.layers.22.block_sparse_moe.experts.177.w3", "model.layers.22.block_sparse_moe.experts.178.w3", "model.layers.22.block_sparse_moe.experts.179.w3", "model.layers.22.block_sparse_moe.experts.180.w3", "model.layers.22.block_sparse_moe.experts.181.w3", "model.layers.22.block_sparse_moe.experts.182.w3", "model.layers.22.block_sparse_moe.experts.183.w3", "model.layers.22.block_sparse_moe.experts.184.w3", "model.layers.22.block_sparse_moe.experts.185.w3", "model.layers.22.block_sparse_moe.experts.186.w3", "model.layers.22.block_sparse_moe.experts.187.w3", "model.layers.22.block_sparse_moe.experts.188.w3", "model.layers.22.block_sparse_moe.experts.189.w3", "model.layers.22.block_sparse_moe.experts.190.w3", "model.layers.22.block_sparse_moe.experts.191.w3", "model.layers.22.block_sparse_moe.experts.192.w3", "model.layers.22.block_sparse_moe.experts.193.w3", "model.layers.22.block_sparse_moe.experts.194.w3", "model.layers.22.block_sparse_moe.experts.195.w3", "model.layers.22.block_sparse_moe.experts.196.w3", "model.layers.22.block_sparse_moe.experts.197.w3", "model.layers.22.block_sparse_moe.experts.198.w3", "model.layers.22.block_sparse_moe.experts.199.w3", "model.layers.22.block_sparse_moe.experts.200.w3", "model.layers.22.block_sparse_moe.experts.201.w3", "model.layers.22.block_sparse_moe.experts.202.w3", "model.layers.22.block_sparse_moe.experts.203.w3", "model.layers.22.block_sparse_moe.experts.204.w3", "model.layers.22.block_sparse_moe.experts.205.w3", "model.layers.22.block_sparse_moe.experts.206.w3", "model.layers.22.block_sparse_moe.experts.207.w3", "model.layers.22.block_sparse_moe.experts.208.w3", "model.layers.22.block_sparse_moe.experts.209.w3", "model.layers.22.block_sparse_moe.experts.210.w3", "model.layers.22.block_sparse_moe.experts.211.w3", "model.layers.22.block_sparse_moe.experts.212.w3", "model.layers.22.block_sparse_moe.experts.213.w3", "model.layers.22.block_sparse_moe.experts.214.w3", "model.layers.22.block_sparse_moe.experts.215.w3", "model.layers.22.block_sparse_moe.experts.216.w3", "model.layers.22.block_sparse_moe.experts.217.w3", "model.layers.22.block_sparse_moe.experts.218.w3", "model.layers.22.block_sparse_moe.experts.219.w3", "model.layers.22.block_sparse_moe.experts.220.w3", "model.layers.22.block_sparse_moe.experts.221.w3", "model.layers.22.block_sparse_moe.experts.222.w3", "model.layers.22.block_sparse_moe.experts.223.w3", "model.layers.22.block_sparse_moe.experts.224.w3", "model.layers.22.block_sparse_moe.experts.225.w3", "model.layers.22.block_sparse_moe.experts.226.w3", "model.layers.22.block_sparse_moe.experts.227.w3", "model.layers.22.block_sparse_moe.experts.228.w3", "model.layers.22.block_sparse_moe.experts.229.w3", "model.layers.22.block_sparse_moe.experts.230.w3", "model.layers.22.block_sparse_moe.experts.231.w3", "model.layers.22.block_sparse_moe.experts.232.w3", "model.layers.22.block_sparse_moe.experts.233.w3", "model.layers.22.block_sparse_moe.experts.234.w3", "model.layers.22.block_sparse_moe.experts.235.w3", "model.layers.22.block_sparse_moe.experts.236.w3", "model.layers.22.block_sparse_moe.experts.237.w3", "model.layers.22.block_sparse_moe.experts.238.w3", "model.layers.22.block_sparse_moe.experts.239.w3", "model.layers.22.block_sparse_moe.experts.240.w3", "model.layers.22.block_sparse_moe.experts.241.w3", "model.layers.22.block_sparse_moe.experts.242.w3", "model.layers.22.block_sparse_moe.experts.243.w3", "model.layers.22.block_sparse_moe.experts.244.w3", "model.layers.22.block_sparse_moe.experts.245.w3", "model.layers.22.block_sparse_moe.experts.246.w3", "model.layers.22.block_sparse_moe.experts.247.w3", "model.layers.22.block_sparse_moe.experts.248.w3", "model.layers.22.block_sparse_moe.experts.249.w3", "model.layers.22.block_sparse_moe.experts.250.w3", "model.layers.22.block_sparse_moe.experts.251.w3", "model.layers.22.block_sparse_moe.experts.252.w3", "model.layers.22.block_sparse_moe.experts.253.w3", "model.layers.22.block_sparse_moe.experts.254.w3", "model.layers.22.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00013569351285695752, "dbits": 2415919104 } ] }, { "idx": 114, "layers": [ "model.layers.22.block_sparse_moe.experts.0.w2", "model.layers.22.block_sparse_moe.experts.1.w2", "model.layers.22.block_sparse_moe.experts.2.w2", "model.layers.22.block_sparse_moe.experts.3.w2", "model.layers.22.block_sparse_moe.experts.4.w2", "model.layers.22.block_sparse_moe.experts.5.w2", "model.layers.22.block_sparse_moe.experts.6.w2", "model.layers.22.block_sparse_moe.experts.7.w2", "model.layers.22.block_sparse_moe.experts.8.w2", "model.layers.22.block_sparse_moe.experts.9.w2", "model.layers.22.block_sparse_moe.experts.10.w2", "model.layers.22.block_sparse_moe.experts.11.w2", "model.layers.22.block_sparse_moe.experts.12.w2", "model.layers.22.block_sparse_moe.experts.13.w2", "model.layers.22.block_sparse_moe.experts.14.w2", "model.layers.22.block_sparse_moe.experts.15.w2", "model.layers.22.block_sparse_moe.experts.16.w2", "model.layers.22.block_sparse_moe.experts.17.w2", "model.layers.22.block_sparse_moe.experts.18.w2", "model.layers.22.block_sparse_moe.experts.19.w2", "model.layers.22.block_sparse_moe.experts.20.w2", "model.layers.22.block_sparse_moe.experts.21.w2", "model.layers.22.block_sparse_moe.experts.22.w2", "model.layers.22.block_sparse_moe.experts.23.w2", "model.layers.22.block_sparse_moe.experts.24.w2", "model.layers.22.block_sparse_moe.experts.25.w2", "model.layers.22.block_sparse_moe.experts.26.w2", "model.layers.22.block_sparse_moe.experts.27.w2", "model.layers.22.block_sparse_moe.experts.28.w2", "model.layers.22.block_sparse_moe.experts.29.w2", "model.layers.22.block_sparse_moe.experts.30.w2", "model.layers.22.block_sparse_moe.experts.31.w2", "model.layers.22.block_sparse_moe.experts.32.w2", "model.layers.22.block_sparse_moe.experts.33.w2", "model.layers.22.block_sparse_moe.experts.34.w2", "model.layers.22.block_sparse_moe.experts.35.w2", "model.layers.22.block_sparse_moe.experts.36.w2", "model.layers.22.block_sparse_moe.experts.37.w2", "model.layers.22.block_sparse_moe.experts.38.w2", "model.layers.22.block_sparse_moe.experts.39.w2", "model.layers.22.block_sparse_moe.experts.40.w2", "model.layers.22.block_sparse_moe.experts.41.w2", "model.layers.22.block_sparse_moe.experts.42.w2", "model.layers.22.block_sparse_moe.experts.43.w2", "model.layers.22.block_sparse_moe.experts.44.w2", "model.layers.22.block_sparse_moe.experts.45.w2", "model.layers.22.block_sparse_moe.experts.46.w2", "model.layers.22.block_sparse_moe.experts.47.w2", "model.layers.22.block_sparse_moe.experts.48.w2", "model.layers.22.block_sparse_moe.experts.49.w2", "model.layers.22.block_sparse_moe.experts.50.w2", "model.layers.22.block_sparse_moe.experts.51.w2", "model.layers.22.block_sparse_moe.experts.52.w2", "model.layers.22.block_sparse_moe.experts.53.w2", "model.layers.22.block_sparse_moe.experts.54.w2", "model.layers.22.block_sparse_moe.experts.55.w2", "model.layers.22.block_sparse_moe.experts.56.w2", "model.layers.22.block_sparse_moe.experts.57.w2", "model.layers.22.block_sparse_moe.experts.58.w2", "model.layers.22.block_sparse_moe.experts.59.w2", "model.layers.22.block_sparse_moe.experts.60.w2", "model.layers.22.block_sparse_moe.experts.61.w2", "model.layers.22.block_sparse_moe.experts.62.w2", "model.layers.22.block_sparse_moe.experts.63.w2", "model.layers.22.block_sparse_moe.experts.64.w2", "model.layers.22.block_sparse_moe.experts.65.w2", "model.layers.22.block_sparse_moe.experts.66.w2", "model.layers.22.block_sparse_moe.experts.67.w2", "model.layers.22.block_sparse_moe.experts.68.w2", "model.layers.22.block_sparse_moe.experts.69.w2", "model.layers.22.block_sparse_moe.experts.70.w2", "model.layers.22.block_sparse_moe.experts.71.w2", "model.layers.22.block_sparse_moe.experts.72.w2", "model.layers.22.block_sparse_moe.experts.73.w2", "model.layers.22.block_sparse_moe.experts.74.w2", "model.layers.22.block_sparse_moe.experts.75.w2", "model.layers.22.block_sparse_moe.experts.76.w2", "model.layers.22.block_sparse_moe.experts.77.w2", "model.layers.22.block_sparse_moe.experts.78.w2", "model.layers.22.block_sparse_moe.experts.79.w2", "model.layers.22.block_sparse_moe.experts.80.w2", "model.layers.22.block_sparse_moe.experts.81.w2", "model.layers.22.block_sparse_moe.experts.82.w2", "model.layers.22.block_sparse_moe.experts.83.w2", "model.layers.22.block_sparse_moe.experts.84.w2", "model.layers.22.block_sparse_moe.experts.85.w2", "model.layers.22.block_sparse_moe.experts.86.w2", "model.layers.22.block_sparse_moe.experts.87.w2", "model.layers.22.block_sparse_moe.experts.88.w2", "model.layers.22.block_sparse_moe.experts.89.w2", "model.layers.22.block_sparse_moe.experts.90.w2", "model.layers.22.block_sparse_moe.experts.91.w2", "model.layers.22.block_sparse_moe.experts.92.w2", "model.layers.22.block_sparse_moe.experts.93.w2", "model.layers.22.block_sparse_moe.experts.94.w2", "model.layers.22.block_sparse_moe.experts.95.w2", "model.layers.22.block_sparse_moe.experts.96.w2", "model.layers.22.block_sparse_moe.experts.97.w2", "model.layers.22.block_sparse_moe.experts.98.w2", "model.layers.22.block_sparse_moe.experts.99.w2", "model.layers.22.block_sparse_moe.experts.100.w2", "model.layers.22.block_sparse_moe.experts.101.w2", "model.layers.22.block_sparse_moe.experts.102.w2", "model.layers.22.block_sparse_moe.experts.103.w2", "model.layers.22.block_sparse_moe.experts.104.w2", "model.layers.22.block_sparse_moe.experts.105.w2", "model.layers.22.block_sparse_moe.experts.106.w2", "model.layers.22.block_sparse_moe.experts.107.w2", "model.layers.22.block_sparse_moe.experts.108.w2", "model.layers.22.block_sparse_moe.experts.109.w2", "model.layers.22.block_sparse_moe.experts.110.w2", "model.layers.22.block_sparse_moe.experts.111.w2", "model.layers.22.block_sparse_moe.experts.112.w2", "model.layers.22.block_sparse_moe.experts.113.w2", "model.layers.22.block_sparse_moe.experts.114.w2", "model.layers.22.block_sparse_moe.experts.115.w2", "model.layers.22.block_sparse_moe.experts.116.w2", "model.layers.22.block_sparse_moe.experts.117.w2", "model.layers.22.block_sparse_moe.experts.118.w2", "model.layers.22.block_sparse_moe.experts.119.w2", "model.layers.22.block_sparse_moe.experts.120.w2", "model.layers.22.block_sparse_moe.experts.121.w2", "model.layers.22.block_sparse_moe.experts.122.w2", "model.layers.22.block_sparse_moe.experts.123.w2", "model.layers.22.block_sparse_moe.experts.124.w2", "model.layers.22.block_sparse_moe.experts.125.w2", "model.layers.22.block_sparse_moe.experts.126.w2", "model.layers.22.block_sparse_moe.experts.127.w2", "model.layers.22.block_sparse_moe.experts.128.w2", "model.layers.22.block_sparse_moe.experts.129.w2", "model.layers.22.block_sparse_moe.experts.130.w2", "model.layers.22.block_sparse_moe.experts.131.w2", "model.layers.22.block_sparse_moe.experts.132.w2", "model.layers.22.block_sparse_moe.experts.133.w2", "model.layers.22.block_sparse_moe.experts.134.w2", "model.layers.22.block_sparse_moe.experts.135.w2", "model.layers.22.block_sparse_moe.experts.136.w2", "model.layers.22.block_sparse_moe.experts.137.w2", "model.layers.22.block_sparse_moe.experts.138.w2", "model.layers.22.block_sparse_moe.experts.139.w2", "model.layers.22.block_sparse_moe.experts.140.w2", "model.layers.22.block_sparse_moe.experts.141.w2", "model.layers.22.block_sparse_moe.experts.142.w2", "model.layers.22.block_sparse_moe.experts.143.w2", "model.layers.22.block_sparse_moe.experts.144.w2", "model.layers.22.block_sparse_moe.experts.145.w2", "model.layers.22.block_sparse_moe.experts.146.w2", "model.layers.22.block_sparse_moe.experts.147.w2", "model.layers.22.block_sparse_moe.experts.148.w2", "model.layers.22.block_sparse_moe.experts.149.w2", "model.layers.22.block_sparse_moe.experts.150.w2", "model.layers.22.block_sparse_moe.experts.151.w2", "model.layers.22.block_sparse_moe.experts.152.w2", "model.layers.22.block_sparse_moe.experts.153.w2", "model.layers.22.block_sparse_moe.experts.154.w2", "model.layers.22.block_sparse_moe.experts.155.w2", "model.layers.22.block_sparse_moe.experts.156.w2", "model.layers.22.block_sparse_moe.experts.157.w2", "model.layers.22.block_sparse_moe.experts.158.w2", "model.layers.22.block_sparse_moe.experts.159.w2", "model.layers.22.block_sparse_moe.experts.160.w2", "model.layers.22.block_sparse_moe.experts.161.w2", "model.layers.22.block_sparse_moe.experts.162.w2", "model.layers.22.block_sparse_moe.experts.163.w2", "model.layers.22.block_sparse_moe.experts.164.w2", "model.layers.22.block_sparse_moe.experts.165.w2", "model.layers.22.block_sparse_moe.experts.166.w2", "model.layers.22.block_sparse_moe.experts.167.w2", "model.layers.22.block_sparse_moe.experts.168.w2", "model.layers.22.block_sparse_moe.experts.169.w2", "model.layers.22.block_sparse_moe.experts.170.w2", "model.layers.22.block_sparse_moe.experts.171.w2", "model.layers.22.block_sparse_moe.experts.172.w2", "model.layers.22.block_sparse_moe.experts.173.w2", "model.layers.22.block_sparse_moe.experts.174.w2", "model.layers.22.block_sparse_moe.experts.175.w2", "model.layers.22.block_sparse_moe.experts.176.w2", "model.layers.22.block_sparse_moe.experts.177.w2", "model.layers.22.block_sparse_moe.experts.178.w2", "model.layers.22.block_sparse_moe.experts.179.w2", "model.layers.22.block_sparse_moe.experts.180.w2", "model.layers.22.block_sparse_moe.experts.181.w2", "model.layers.22.block_sparse_moe.experts.182.w2", "model.layers.22.block_sparse_moe.experts.183.w2", "model.layers.22.block_sparse_moe.experts.184.w2", "model.layers.22.block_sparse_moe.experts.185.w2", "model.layers.22.block_sparse_moe.experts.186.w2", "model.layers.22.block_sparse_moe.experts.187.w2", "model.layers.22.block_sparse_moe.experts.188.w2", "model.layers.22.block_sparse_moe.experts.189.w2", "model.layers.22.block_sparse_moe.experts.190.w2", "model.layers.22.block_sparse_moe.experts.191.w2", "model.layers.22.block_sparse_moe.experts.192.w2", "model.layers.22.block_sparse_moe.experts.193.w2", "model.layers.22.block_sparse_moe.experts.194.w2", "model.layers.22.block_sparse_moe.experts.195.w2", "model.layers.22.block_sparse_moe.experts.196.w2", "model.layers.22.block_sparse_moe.experts.197.w2", "model.layers.22.block_sparse_moe.experts.198.w2", "model.layers.22.block_sparse_moe.experts.199.w2", "model.layers.22.block_sparse_moe.experts.200.w2", "model.layers.22.block_sparse_moe.experts.201.w2", "model.layers.22.block_sparse_moe.experts.202.w2", "model.layers.22.block_sparse_moe.experts.203.w2", "model.layers.22.block_sparse_moe.experts.204.w2", "model.layers.22.block_sparse_moe.experts.205.w2", "model.layers.22.block_sparse_moe.experts.206.w2", "model.layers.22.block_sparse_moe.experts.207.w2", "model.layers.22.block_sparse_moe.experts.208.w2", "model.layers.22.block_sparse_moe.experts.209.w2", "model.layers.22.block_sparse_moe.experts.210.w2", "model.layers.22.block_sparse_moe.experts.211.w2", "model.layers.22.block_sparse_moe.experts.212.w2", "model.layers.22.block_sparse_moe.experts.213.w2", "model.layers.22.block_sparse_moe.experts.214.w2", "model.layers.22.block_sparse_moe.experts.215.w2", "model.layers.22.block_sparse_moe.experts.216.w2", "model.layers.22.block_sparse_moe.experts.217.w2", "model.layers.22.block_sparse_moe.experts.218.w2", "model.layers.22.block_sparse_moe.experts.219.w2", "model.layers.22.block_sparse_moe.experts.220.w2", "model.layers.22.block_sparse_moe.experts.221.w2", "model.layers.22.block_sparse_moe.experts.222.w2", "model.layers.22.block_sparse_moe.experts.223.w2", "model.layers.22.block_sparse_moe.experts.224.w2", "model.layers.22.block_sparse_moe.experts.225.w2", "model.layers.22.block_sparse_moe.experts.226.w2", "model.layers.22.block_sparse_moe.experts.227.w2", "model.layers.22.block_sparse_moe.experts.228.w2", "model.layers.22.block_sparse_moe.experts.229.w2", "model.layers.22.block_sparse_moe.experts.230.w2", "model.layers.22.block_sparse_moe.experts.231.w2", "model.layers.22.block_sparse_moe.experts.232.w2", "model.layers.22.block_sparse_moe.experts.233.w2", "model.layers.22.block_sparse_moe.experts.234.w2", "model.layers.22.block_sparse_moe.experts.235.w2", "model.layers.22.block_sparse_moe.experts.236.w2", "model.layers.22.block_sparse_moe.experts.237.w2", "model.layers.22.block_sparse_moe.experts.238.w2", "model.layers.22.block_sparse_moe.experts.239.w2", "model.layers.22.block_sparse_moe.experts.240.w2", "model.layers.22.block_sparse_moe.experts.241.w2", "model.layers.22.block_sparse_moe.experts.242.w2", "model.layers.22.block_sparse_moe.experts.243.w2", "model.layers.22.block_sparse_moe.experts.244.w2", "model.layers.22.block_sparse_moe.experts.245.w2", "model.layers.22.block_sparse_moe.experts.246.w2", "model.layers.22.block_sparse_moe.experts.247.w2", "model.layers.22.block_sparse_moe.experts.248.w2", "model.layers.22.block_sparse_moe.experts.249.w2", "model.layers.22.block_sparse_moe.experts.250.w2", "model.layers.22.block_sparse_moe.experts.251.w2", "model.layers.22.block_sparse_moe.experts.252.w2", "model.layers.22.block_sparse_moe.experts.253.w2", "model.layers.22.block_sparse_moe.experts.254.w2", "model.layers.22.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -5.132947117091058e-05, "dbits": 1207959552 } ] }, { "idx": 115, "layers": [ "model.layers.23.self_attn.q_proj" ], "candidates": [ { "dkld": -4.078708589076718e-05, "dbits": 18874368 } ] }, { "idx": 116, "layers": [ "model.layers.23.self_attn.k_proj", "model.layers.23.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0015734381973743355, "dbits": 6291456 } ] }, { "idx": 117, "layers": [ "model.layers.23.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0003855802118778312, "dbits": 18874368 } ] }, { "idx": 118, "layers": [ "model.layers.23.block_sparse_moe.experts.0.w1", "model.layers.23.block_sparse_moe.experts.1.w1", "model.layers.23.block_sparse_moe.experts.2.w1", "model.layers.23.block_sparse_moe.experts.3.w1", "model.layers.23.block_sparse_moe.experts.4.w1", "model.layers.23.block_sparse_moe.experts.5.w1", "model.layers.23.block_sparse_moe.experts.6.w1", "model.layers.23.block_sparse_moe.experts.7.w1", "model.layers.23.block_sparse_moe.experts.8.w1", "model.layers.23.block_sparse_moe.experts.9.w1", "model.layers.23.block_sparse_moe.experts.10.w1", "model.layers.23.block_sparse_moe.experts.11.w1", "model.layers.23.block_sparse_moe.experts.12.w1", "model.layers.23.block_sparse_moe.experts.13.w1", "model.layers.23.block_sparse_moe.experts.14.w1", "model.layers.23.block_sparse_moe.experts.15.w1", "model.layers.23.block_sparse_moe.experts.16.w1", "model.layers.23.block_sparse_moe.experts.17.w1", "model.layers.23.block_sparse_moe.experts.18.w1", "model.layers.23.block_sparse_moe.experts.19.w1", "model.layers.23.block_sparse_moe.experts.20.w1", "model.layers.23.block_sparse_moe.experts.21.w1", "model.layers.23.block_sparse_moe.experts.22.w1", "model.layers.23.block_sparse_moe.experts.23.w1", "model.layers.23.block_sparse_moe.experts.24.w1", "model.layers.23.block_sparse_moe.experts.25.w1", "model.layers.23.block_sparse_moe.experts.26.w1", "model.layers.23.block_sparse_moe.experts.27.w1", "model.layers.23.block_sparse_moe.experts.28.w1", "model.layers.23.block_sparse_moe.experts.29.w1", "model.layers.23.block_sparse_moe.experts.30.w1", "model.layers.23.block_sparse_moe.experts.31.w1", "model.layers.23.block_sparse_moe.experts.32.w1", "model.layers.23.block_sparse_moe.experts.33.w1", "model.layers.23.block_sparse_moe.experts.34.w1", "model.layers.23.block_sparse_moe.experts.35.w1", "model.layers.23.block_sparse_moe.experts.36.w1", "model.layers.23.block_sparse_moe.experts.37.w1", "model.layers.23.block_sparse_moe.experts.38.w1", "model.layers.23.block_sparse_moe.experts.39.w1", "model.layers.23.block_sparse_moe.experts.40.w1", "model.layers.23.block_sparse_moe.experts.41.w1", "model.layers.23.block_sparse_moe.experts.42.w1", "model.layers.23.block_sparse_moe.experts.43.w1", "model.layers.23.block_sparse_moe.experts.44.w1", "model.layers.23.block_sparse_moe.experts.45.w1", "model.layers.23.block_sparse_moe.experts.46.w1", "model.layers.23.block_sparse_moe.experts.47.w1", "model.layers.23.block_sparse_moe.experts.48.w1", "model.layers.23.block_sparse_moe.experts.49.w1", "model.layers.23.block_sparse_moe.experts.50.w1", "model.layers.23.block_sparse_moe.experts.51.w1", "model.layers.23.block_sparse_moe.experts.52.w1", "model.layers.23.block_sparse_moe.experts.53.w1", "model.layers.23.block_sparse_moe.experts.54.w1", "model.layers.23.block_sparse_moe.experts.55.w1", "model.layers.23.block_sparse_moe.experts.56.w1", "model.layers.23.block_sparse_moe.experts.57.w1", "model.layers.23.block_sparse_moe.experts.58.w1", "model.layers.23.block_sparse_moe.experts.59.w1", "model.layers.23.block_sparse_moe.experts.60.w1", "model.layers.23.block_sparse_moe.experts.61.w1", "model.layers.23.block_sparse_moe.experts.62.w1", "model.layers.23.block_sparse_moe.experts.63.w1", "model.layers.23.block_sparse_moe.experts.64.w1", "model.layers.23.block_sparse_moe.experts.65.w1", "model.layers.23.block_sparse_moe.experts.66.w1", "model.layers.23.block_sparse_moe.experts.67.w1", "model.layers.23.block_sparse_moe.experts.68.w1", "model.layers.23.block_sparse_moe.experts.69.w1", "model.layers.23.block_sparse_moe.experts.70.w1", "model.layers.23.block_sparse_moe.experts.71.w1", "model.layers.23.block_sparse_moe.experts.72.w1", "model.layers.23.block_sparse_moe.experts.73.w1", "model.layers.23.block_sparse_moe.experts.74.w1", "model.layers.23.block_sparse_moe.experts.75.w1", "model.layers.23.block_sparse_moe.experts.76.w1", "model.layers.23.block_sparse_moe.experts.77.w1", "model.layers.23.block_sparse_moe.experts.78.w1", "model.layers.23.block_sparse_moe.experts.79.w1", "model.layers.23.block_sparse_moe.experts.80.w1", "model.layers.23.block_sparse_moe.experts.81.w1", "model.layers.23.block_sparse_moe.experts.82.w1", "model.layers.23.block_sparse_moe.experts.83.w1", "model.layers.23.block_sparse_moe.experts.84.w1", "model.layers.23.block_sparse_moe.experts.85.w1", "model.layers.23.block_sparse_moe.experts.86.w1", "model.layers.23.block_sparse_moe.experts.87.w1", "model.layers.23.block_sparse_moe.experts.88.w1", "model.layers.23.block_sparse_moe.experts.89.w1", "model.layers.23.block_sparse_moe.experts.90.w1", "model.layers.23.block_sparse_moe.experts.91.w1", "model.layers.23.block_sparse_moe.experts.92.w1", "model.layers.23.block_sparse_moe.experts.93.w1", "model.layers.23.block_sparse_moe.experts.94.w1", "model.layers.23.block_sparse_moe.experts.95.w1", "model.layers.23.block_sparse_moe.experts.96.w1", "model.layers.23.block_sparse_moe.experts.97.w1", "model.layers.23.block_sparse_moe.experts.98.w1", "model.layers.23.block_sparse_moe.experts.99.w1", "model.layers.23.block_sparse_moe.experts.100.w1", "model.layers.23.block_sparse_moe.experts.101.w1", "model.layers.23.block_sparse_moe.experts.102.w1", "model.layers.23.block_sparse_moe.experts.103.w1", "model.layers.23.block_sparse_moe.experts.104.w1", "model.layers.23.block_sparse_moe.experts.105.w1", "model.layers.23.block_sparse_moe.experts.106.w1", "model.layers.23.block_sparse_moe.experts.107.w1", "model.layers.23.block_sparse_moe.experts.108.w1", "model.layers.23.block_sparse_moe.experts.109.w1", "model.layers.23.block_sparse_moe.experts.110.w1", "model.layers.23.block_sparse_moe.experts.111.w1", "model.layers.23.block_sparse_moe.experts.112.w1", "model.layers.23.block_sparse_moe.experts.113.w1", "model.layers.23.block_sparse_moe.experts.114.w1", "model.layers.23.block_sparse_moe.experts.115.w1", "model.layers.23.block_sparse_moe.experts.116.w1", "model.layers.23.block_sparse_moe.experts.117.w1", "model.layers.23.block_sparse_moe.experts.118.w1", "model.layers.23.block_sparse_moe.experts.119.w1", "model.layers.23.block_sparse_moe.experts.120.w1", "model.layers.23.block_sparse_moe.experts.121.w1", "model.layers.23.block_sparse_moe.experts.122.w1", "model.layers.23.block_sparse_moe.experts.123.w1", "model.layers.23.block_sparse_moe.experts.124.w1", "model.layers.23.block_sparse_moe.experts.125.w1", "model.layers.23.block_sparse_moe.experts.126.w1", "model.layers.23.block_sparse_moe.experts.127.w1", "model.layers.23.block_sparse_moe.experts.128.w1", "model.layers.23.block_sparse_moe.experts.129.w1", "model.layers.23.block_sparse_moe.experts.130.w1", "model.layers.23.block_sparse_moe.experts.131.w1", "model.layers.23.block_sparse_moe.experts.132.w1", "model.layers.23.block_sparse_moe.experts.133.w1", "model.layers.23.block_sparse_moe.experts.134.w1", "model.layers.23.block_sparse_moe.experts.135.w1", "model.layers.23.block_sparse_moe.experts.136.w1", "model.layers.23.block_sparse_moe.experts.137.w1", "model.layers.23.block_sparse_moe.experts.138.w1", "model.layers.23.block_sparse_moe.experts.139.w1", "model.layers.23.block_sparse_moe.experts.140.w1", "model.layers.23.block_sparse_moe.experts.141.w1", "model.layers.23.block_sparse_moe.experts.142.w1", "model.layers.23.block_sparse_moe.experts.143.w1", "model.layers.23.block_sparse_moe.experts.144.w1", "model.layers.23.block_sparse_moe.experts.145.w1", "model.layers.23.block_sparse_moe.experts.146.w1", "model.layers.23.block_sparse_moe.experts.147.w1", "model.layers.23.block_sparse_moe.experts.148.w1", "model.layers.23.block_sparse_moe.experts.149.w1", "model.layers.23.block_sparse_moe.experts.150.w1", "model.layers.23.block_sparse_moe.experts.151.w1", "model.layers.23.block_sparse_moe.experts.152.w1", "model.layers.23.block_sparse_moe.experts.153.w1", "model.layers.23.block_sparse_moe.experts.154.w1", "model.layers.23.block_sparse_moe.experts.155.w1", "model.layers.23.block_sparse_moe.experts.156.w1", "model.layers.23.block_sparse_moe.experts.157.w1", "model.layers.23.block_sparse_moe.experts.158.w1", "model.layers.23.block_sparse_moe.experts.159.w1", "model.layers.23.block_sparse_moe.experts.160.w1", "model.layers.23.block_sparse_moe.experts.161.w1", "model.layers.23.block_sparse_moe.experts.162.w1", "model.layers.23.block_sparse_moe.experts.163.w1", "model.layers.23.block_sparse_moe.experts.164.w1", "model.layers.23.block_sparse_moe.experts.165.w1", "model.layers.23.block_sparse_moe.experts.166.w1", "model.layers.23.block_sparse_moe.experts.167.w1", "model.layers.23.block_sparse_moe.experts.168.w1", "model.layers.23.block_sparse_moe.experts.169.w1", "model.layers.23.block_sparse_moe.experts.170.w1", "model.layers.23.block_sparse_moe.experts.171.w1", "model.layers.23.block_sparse_moe.experts.172.w1", "model.layers.23.block_sparse_moe.experts.173.w1", "model.layers.23.block_sparse_moe.experts.174.w1", "model.layers.23.block_sparse_moe.experts.175.w1", "model.layers.23.block_sparse_moe.experts.176.w1", "model.layers.23.block_sparse_moe.experts.177.w1", "model.layers.23.block_sparse_moe.experts.178.w1", "model.layers.23.block_sparse_moe.experts.179.w1", "model.layers.23.block_sparse_moe.experts.180.w1", "model.layers.23.block_sparse_moe.experts.181.w1", "model.layers.23.block_sparse_moe.experts.182.w1", "model.layers.23.block_sparse_moe.experts.183.w1", "model.layers.23.block_sparse_moe.experts.184.w1", "model.layers.23.block_sparse_moe.experts.185.w1", "model.layers.23.block_sparse_moe.experts.186.w1", "model.layers.23.block_sparse_moe.experts.187.w1", "model.layers.23.block_sparse_moe.experts.188.w1", "model.layers.23.block_sparse_moe.experts.189.w1", "model.layers.23.block_sparse_moe.experts.190.w1", "model.layers.23.block_sparse_moe.experts.191.w1", "model.layers.23.block_sparse_moe.experts.192.w1", "model.layers.23.block_sparse_moe.experts.193.w1", "model.layers.23.block_sparse_moe.experts.194.w1", "model.layers.23.block_sparse_moe.experts.195.w1", "model.layers.23.block_sparse_moe.experts.196.w1", "model.layers.23.block_sparse_moe.experts.197.w1", "model.layers.23.block_sparse_moe.experts.198.w1", "model.layers.23.block_sparse_moe.experts.199.w1", "model.layers.23.block_sparse_moe.experts.200.w1", "model.layers.23.block_sparse_moe.experts.201.w1", "model.layers.23.block_sparse_moe.experts.202.w1", "model.layers.23.block_sparse_moe.experts.203.w1", "model.layers.23.block_sparse_moe.experts.204.w1", "model.layers.23.block_sparse_moe.experts.205.w1", "model.layers.23.block_sparse_moe.experts.206.w1", "model.layers.23.block_sparse_moe.experts.207.w1", "model.layers.23.block_sparse_moe.experts.208.w1", "model.layers.23.block_sparse_moe.experts.209.w1", "model.layers.23.block_sparse_moe.experts.210.w1", "model.layers.23.block_sparse_moe.experts.211.w1", "model.layers.23.block_sparse_moe.experts.212.w1", "model.layers.23.block_sparse_moe.experts.213.w1", "model.layers.23.block_sparse_moe.experts.214.w1", "model.layers.23.block_sparse_moe.experts.215.w1", "model.layers.23.block_sparse_moe.experts.216.w1", "model.layers.23.block_sparse_moe.experts.217.w1", "model.layers.23.block_sparse_moe.experts.218.w1", "model.layers.23.block_sparse_moe.experts.219.w1", "model.layers.23.block_sparse_moe.experts.220.w1", "model.layers.23.block_sparse_moe.experts.221.w1", "model.layers.23.block_sparse_moe.experts.222.w1", "model.layers.23.block_sparse_moe.experts.223.w1", "model.layers.23.block_sparse_moe.experts.224.w1", "model.layers.23.block_sparse_moe.experts.225.w1", "model.layers.23.block_sparse_moe.experts.226.w1", "model.layers.23.block_sparse_moe.experts.227.w1", "model.layers.23.block_sparse_moe.experts.228.w1", "model.layers.23.block_sparse_moe.experts.229.w1", "model.layers.23.block_sparse_moe.experts.230.w1", "model.layers.23.block_sparse_moe.experts.231.w1", "model.layers.23.block_sparse_moe.experts.232.w1", "model.layers.23.block_sparse_moe.experts.233.w1", "model.layers.23.block_sparse_moe.experts.234.w1", "model.layers.23.block_sparse_moe.experts.235.w1", "model.layers.23.block_sparse_moe.experts.236.w1", "model.layers.23.block_sparse_moe.experts.237.w1", "model.layers.23.block_sparse_moe.experts.238.w1", "model.layers.23.block_sparse_moe.experts.239.w1", "model.layers.23.block_sparse_moe.experts.240.w1", "model.layers.23.block_sparse_moe.experts.241.w1", "model.layers.23.block_sparse_moe.experts.242.w1", "model.layers.23.block_sparse_moe.experts.243.w1", "model.layers.23.block_sparse_moe.experts.244.w1", "model.layers.23.block_sparse_moe.experts.245.w1", "model.layers.23.block_sparse_moe.experts.246.w1", "model.layers.23.block_sparse_moe.experts.247.w1", "model.layers.23.block_sparse_moe.experts.248.w1", "model.layers.23.block_sparse_moe.experts.249.w1", "model.layers.23.block_sparse_moe.experts.250.w1", "model.layers.23.block_sparse_moe.experts.251.w1", "model.layers.23.block_sparse_moe.experts.252.w1", "model.layers.23.block_sparse_moe.experts.253.w1", "model.layers.23.block_sparse_moe.experts.254.w1", "model.layers.23.block_sparse_moe.experts.255.w1", "model.layers.23.block_sparse_moe.experts.0.w3", "model.layers.23.block_sparse_moe.experts.1.w3", "model.layers.23.block_sparse_moe.experts.2.w3", "model.layers.23.block_sparse_moe.experts.3.w3", "model.layers.23.block_sparse_moe.experts.4.w3", "model.layers.23.block_sparse_moe.experts.5.w3", "model.layers.23.block_sparse_moe.experts.6.w3", "model.layers.23.block_sparse_moe.experts.7.w3", "model.layers.23.block_sparse_moe.experts.8.w3", "model.layers.23.block_sparse_moe.experts.9.w3", "model.layers.23.block_sparse_moe.experts.10.w3", "model.layers.23.block_sparse_moe.experts.11.w3", "model.layers.23.block_sparse_moe.experts.12.w3", "model.layers.23.block_sparse_moe.experts.13.w3", "model.layers.23.block_sparse_moe.experts.14.w3", "model.layers.23.block_sparse_moe.experts.15.w3", "model.layers.23.block_sparse_moe.experts.16.w3", "model.layers.23.block_sparse_moe.experts.17.w3", "model.layers.23.block_sparse_moe.experts.18.w3", "model.layers.23.block_sparse_moe.experts.19.w3", "model.layers.23.block_sparse_moe.experts.20.w3", "model.layers.23.block_sparse_moe.experts.21.w3", "model.layers.23.block_sparse_moe.experts.22.w3", "model.layers.23.block_sparse_moe.experts.23.w3", "model.layers.23.block_sparse_moe.experts.24.w3", "model.layers.23.block_sparse_moe.experts.25.w3", "model.layers.23.block_sparse_moe.experts.26.w3", "model.layers.23.block_sparse_moe.experts.27.w3", "model.layers.23.block_sparse_moe.experts.28.w3", "model.layers.23.block_sparse_moe.experts.29.w3", "model.layers.23.block_sparse_moe.experts.30.w3", "model.layers.23.block_sparse_moe.experts.31.w3", "model.layers.23.block_sparse_moe.experts.32.w3", "model.layers.23.block_sparse_moe.experts.33.w3", "model.layers.23.block_sparse_moe.experts.34.w3", "model.layers.23.block_sparse_moe.experts.35.w3", "model.layers.23.block_sparse_moe.experts.36.w3", "model.layers.23.block_sparse_moe.experts.37.w3", "model.layers.23.block_sparse_moe.experts.38.w3", "model.layers.23.block_sparse_moe.experts.39.w3", "model.layers.23.block_sparse_moe.experts.40.w3", "model.layers.23.block_sparse_moe.experts.41.w3", "model.layers.23.block_sparse_moe.experts.42.w3", "model.layers.23.block_sparse_moe.experts.43.w3", "model.layers.23.block_sparse_moe.experts.44.w3", "model.layers.23.block_sparse_moe.experts.45.w3", "model.layers.23.block_sparse_moe.experts.46.w3", "model.layers.23.block_sparse_moe.experts.47.w3", "model.layers.23.block_sparse_moe.experts.48.w3", "model.layers.23.block_sparse_moe.experts.49.w3", "model.layers.23.block_sparse_moe.experts.50.w3", "model.layers.23.block_sparse_moe.experts.51.w3", "model.layers.23.block_sparse_moe.experts.52.w3", "model.layers.23.block_sparse_moe.experts.53.w3", "model.layers.23.block_sparse_moe.experts.54.w3", "model.layers.23.block_sparse_moe.experts.55.w3", "model.layers.23.block_sparse_moe.experts.56.w3", "model.layers.23.block_sparse_moe.experts.57.w3", "model.layers.23.block_sparse_moe.experts.58.w3", "model.layers.23.block_sparse_moe.experts.59.w3", "model.layers.23.block_sparse_moe.experts.60.w3", "model.layers.23.block_sparse_moe.experts.61.w3", "model.layers.23.block_sparse_moe.experts.62.w3", "model.layers.23.block_sparse_moe.experts.63.w3", "model.layers.23.block_sparse_moe.experts.64.w3", "model.layers.23.block_sparse_moe.experts.65.w3", "model.layers.23.block_sparse_moe.experts.66.w3", "model.layers.23.block_sparse_moe.experts.67.w3", "model.layers.23.block_sparse_moe.experts.68.w3", "model.layers.23.block_sparse_moe.experts.69.w3", "model.layers.23.block_sparse_moe.experts.70.w3", "model.layers.23.block_sparse_moe.experts.71.w3", "model.layers.23.block_sparse_moe.experts.72.w3", "model.layers.23.block_sparse_moe.experts.73.w3", "model.layers.23.block_sparse_moe.experts.74.w3", "model.layers.23.block_sparse_moe.experts.75.w3", "model.layers.23.block_sparse_moe.experts.76.w3", "model.layers.23.block_sparse_moe.experts.77.w3", "model.layers.23.block_sparse_moe.experts.78.w3", "model.layers.23.block_sparse_moe.experts.79.w3", "model.layers.23.block_sparse_moe.experts.80.w3", "model.layers.23.block_sparse_moe.experts.81.w3", "model.layers.23.block_sparse_moe.experts.82.w3", "model.layers.23.block_sparse_moe.experts.83.w3", "model.layers.23.block_sparse_moe.experts.84.w3", "model.layers.23.block_sparse_moe.experts.85.w3", "model.layers.23.block_sparse_moe.experts.86.w3", "model.layers.23.block_sparse_moe.experts.87.w3", "model.layers.23.block_sparse_moe.experts.88.w3", "model.layers.23.block_sparse_moe.experts.89.w3", "model.layers.23.block_sparse_moe.experts.90.w3", "model.layers.23.block_sparse_moe.experts.91.w3", "model.layers.23.block_sparse_moe.experts.92.w3", "model.layers.23.block_sparse_moe.experts.93.w3", "model.layers.23.block_sparse_moe.experts.94.w3", "model.layers.23.block_sparse_moe.experts.95.w3", "model.layers.23.block_sparse_moe.experts.96.w3", "model.layers.23.block_sparse_moe.experts.97.w3", "model.layers.23.block_sparse_moe.experts.98.w3", "model.layers.23.block_sparse_moe.experts.99.w3", "model.layers.23.block_sparse_moe.experts.100.w3", "model.layers.23.block_sparse_moe.experts.101.w3", "model.layers.23.block_sparse_moe.experts.102.w3", "model.layers.23.block_sparse_moe.experts.103.w3", "model.layers.23.block_sparse_moe.experts.104.w3", "model.layers.23.block_sparse_moe.experts.105.w3", "model.layers.23.block_sparse_moe.experts.106.w3", "model.layers.23.block_sparse_moe.experts.107.w3", "model.layers.23.block_sparse_moe.experts.108.w3", "model.layers.23.block_sparse_moe.experts.109.w3", "model.layers.23.block_sparse_moe.experts.110.w3", "model.layers.23.block_sparse_moe.experts.111.w3", "model.layers.23.block_sparse_moe.experts.112.w3", "model.layers.23.block_sparse_moe.experts.113.w3", "model.layers.23.block_sparse_moe.experts.114.w3", "model.layers.23.block_sparse_moe.experts.115.w3", "model.layers.23.block_sparse_moe.experts.116.w3", "model.layers.23.block_sparse_moe.experts.117.w3", "model.layers.23.block_sparse_moe.experts.118.w3", "model.layers.23.block_sparse_moe.experts.119.w3", "model.layers.23.block_sparse_moe.experts.120.w3", "model.layers.23.block_sparse_moe.experts.121.w3", "model.layers.23.block_sparse_moe.experts.122.w3", "model.layers.23.block_sparse_moe.experts.123.w3", "model.layers.23.block_sparse_moe.experts.124.w3", "model.layers.23.block_sparse_moe.experts.125.w3", "model.layers.23.block_sparse_moe.experts.126.w3", "model.layers.23.block_sparse_moe.experts.127.w3", "model.layers.23.block_sparse_moe.experts.128.w3", "model.layers.23.block_sparse_moe.experts.129.w3", "model.layers.23.block_sparse_moe.experts.130.w3", "model.layers.23.block_sparse_moe.experts.131.w3", "model.layers.23.block_sparse_moe.experts.132.w3", "model.layers.23.block_sparse_moe.experts.133.w3", "model.layers.23.block_sparse_moe.experts.134.w3", "model.layers.23.block_sparse_moe.experts.135.w3", "model.layers.23.block_sparse_moe.experts.136.w3", "model.layers.23.block_sparse_moe.experts.137.w3", "model.layers.23.block_sparse_moe.experts.138.w3", "model.layers.23.block_sparse_moe.experts.139.w3", "model.layers.23.block_sparse_moe.experts.140.w3", "model.layers.23.block_sparse_moe.experts.141.w3", "model.layers.23.block_sparse_moe.experts.142.w3", "model.layers.23.block_sparse_moe.experts.143.w3", "model.layers.23.block_sparse_moe.experts.144.w3", "model.layers.23.block_sparse_moe.experts.145.w3", "model.layers.23.block_sparse_moe.experts.146.w3", "model.layers.23.block_sparse_moe.experts.147.w3", "model.layers.23.block_sparse_moe.experts.148.w3", "model.layers.23.block_sparse_moe.experts.149.w3", "model.layers.23.block_sparse_moe.experts.150.w3", "model.layers.23.block_sparse_moe.experts.151.w3", "model.layers.23.block_sparse_moe.experts.152.w3", "model.layers.23.block_sparse_moe.experts.153.w3", "model.layers.23.block_sparse_moe.experts.154.w3", "model.layers.23.block_sparse_moe.experts.155.w3", "model.layers.23.block_sparse_moe.experts.156.w3", "model.layers.23.block_sparse_moe.experts.157.w3", "model.layers.23.block_sparse_moe.experts.158.w3", "model.layers.23.block_sparse_moe.experts.159.w3", "model.layers.23.block_sparse_moe.experts.160.w3", "model.layers.23.block_sparse_moe.experts.161.w3", "model.layers.23.block_sparse_moe.experts.162.w3", "model.layers.23.block_sparse_moe.experts.163.w3", "model.layers.23.block_sparse_moe.experts.164.w3", "model.layers.23.block_sparse_moe.experts.165.w3", "model.layers.23.block_sparse_moe.experts.166.w3", "model.layers.23.block_sparse_moe.experts.167.w3", "model.layers.23.block_sparse_moe.experts.168.w3", "model.layers.23.block_sparse_moe.experts.169.w3", "model.layers.23.block_sparse_moe.experts.170.w3", "model.layers.23.block_sparse_moe.experts.171.w3", "model.layers.23.block_sparse_moe.experts.172.w3", "model.layers.23.block_sparse_moe.experts.173.w3", "model.layers.23.block_sparse_moe.experts.174.w3", "model.layers.23.block_sparse_moe.experts.175.w3", "model.layers.23.block_sparse_moe.experts.176.w3", "model.layers.23.block_sparse_moe.experts.177.w3", "model.layers.23.block_sparse_moe.experts.178.w3", "model.layers.23.block_sparse_moe.experts.179.w3", "model.layers.23.block_sparse_moe.experts.180.w3", "model.layers.23.block_sparse_moe.experts.181.w3", "model.layers.23.block_sparse_moe.experts.182.w3", "model.layers.23.block_sparse_moe.experts.183.w3", "model.layers.23.block_sparse_moe.experts.184.w3", "model.layers.23.block_sparse_moe.experts.185.w3", "model.layers.23.block_sparse_moe.experts.186.w3", "model.layers.23.block_sparse_moe.experts.187.w3", "model.layers.23.block_sparse_moe.experts.188.w3", "model.layers.23.block_sparse_moe.experts.189.w3", "model.layers.23.block_sparse_moe.experts.190.w3", "model.layers.23.block_sparse_moe.experts.191.w3", "model.layers.23.block_sparse_moe.experts.192.w3", "model.layers.23.block_sparse_moe.experts.193.w3", "model.layers.23.block_sparse_moe.experts.194.w3", "model.layers.23.block_sparse_moe.experts.195.w3", "model.layers.23.block_sparse_moe.experts.196.w3", "model.layers.23.block_sparse_moe.experts.197.w3", "model.layers.23.block_sparse_moe.experts.198.w3", "model.layers.23.block_sparse_moe.experts.199.w3", "model.layers.23.block_sparse_moe.experts.200.w3", "model.layers.23.block_sparse_moe.experts.201.w3", "model.layers.23.block_sparse_moe.experts.202.w3", "model.layers.23.block_sparse_moe.experts.203.w3", "model.layers.23.block_sparse_moe.experts.204.w3", "model.layers.23.block_sparse_moe.experts.205.w3", "model.layers.23.block_sparse_moe.experts.206.w3", "model.layers.23.block_sparse_moe.experts.207.w3", "model.layers.23.block_sparse_moe.experts.208.w3", "model.layers.23.block_sparse_moe.experts.209.w3", "model.layers.23.block_sparse_moe.experts.210.w3", "model.layers.23.block_sparse_moe.experts.211.w3", "model.layers.23.block_sparse_moe.experts.212.w3", "model.layers.23.block_sparse_moe.experts.213.w3", "model.layers.23.block_sparse_moe.experts.214.w3", "model.layers.23.block_sparse_moe.experts.215.w3", "model.layers.23.block_sparse_moe.experts.216.w3", "model.layers.23.block_sparse_moe.experts.217.w3", "model.layers.23.block_sparse_moe.experts.218.w3", "model.layers.23.block_sparse_moe.experts.219.w3", "model.layers.23.block_sparse_moe.experts.220.w3", "model.layers.23.block_sparse_moe.experts.221.w3", "model.layers.23.block_sparse_moe.experts.222.w3", "model.layers.23.block_sparse_moe.experts.223.w3", "model.layers.23.block_sparse_moe.experts.224.w3", "model.layers.23.block_sparse_moe.experts.225.w3", "model.layers.23.block_sparse_moe.experts.226.w3", "model.layers.23.block_sparse_moe.experts.227.w3", "model.layers.23.block_sparse_moe.experts.228.w3", "model.layers.23.block_sparse_moe.experts.229.w3", "model.layers.23.block_sparse_moe.experts.230.w3", "model.layers.23.block_sparse_moe.experts.231.w3", "model.layers.23.block_sparse_moe.experts.232.w3", "model.layers.23.block_sparse_moe.experts.233.w3", "model.layers.23.block_sparse_moe.experts.234.w3", "model.layers.23.block_sparse_moe.experts.235.w3", "model.layers.23.block_sparse_moe.experts.236.w3", "model.layers.23.block_sparse_moe.experts.237.w3", "model.layers.23.block_sparse_moe.experts.238.w3", "model.layers.23.block_sparse_moe.experts.239.w3", "model.layers.23.block_sparse_moe.experts.240.w3", "model.layers.23.block_sparse_moe.experts.241.w3", "model.layers.23.block_sparse_moe.experts.242.w3", "model.layers.23.block_sparse_moe.experts.243.w3", "model.layers.23.block_sparse_moe.experts.244.w3", "model.layers.23.block_sparse_moe.experts.245.w3", "model.layers.23.block_sparse_moe.experts.246.w3", "model.layers.23.block_sparse_moe.experts.247.w3", "model.layers.23.block_sparse_moe.experts.248.w3", "model.layers.23.block_sparse_moe.experts.249.w3", "model.layers.23.block_sparse_moe.experts.250.w3", "model.layers.23.block_sparse_moe.experts.251.w3", "model.layers.23.block_sparse_moe.experts.252.w3", "model.layers.23.block_sparse_moe.experts.253.w3", "model.layers.23.block_sparse_moe.experts.254.w3", "model.layers.23.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00044386163353919705, "dbits": 2415919104 } ] }, { "idx": 119, "layers": [ "model.layers.23.block_sparse_moe.experts.0.w2", "model.layers.23.block_sparse_moe.experts.1.w2", "model.layers.23.block_sparse_moe.experts.2.w2", "model.layers.23.block_sparse_moe.experts.3.w2", "model.layers.23.block_sparse_moe.experts.4.w2", "model.layers.23.block_sparse_moe.experts.5.w2", "model.layers.23.block_sparse_moe.experts.6.w2", "model.layers.23.block_sparse_moe.experts.7.w2", "model.layers.23.block_sparse_moe.experts.8.w2", "model.layers.23.block_sparse_moe.experts.9.w2", "model.layers.23.block_sparse_moe.experts.10.w2", "model.layers.23.block_sparse_moe.experts.11.w2", "model.layers.23.block_sparse_moe.experts.12.w2", "model.layers.23.block_sparse_moe.experts.13.w2", "model.layers.23.block_sparse_moe.experts.14.w2", "model.layers.23.block_sparse_moe.experts.15.w2", "model.layers.23.block_sparse_moe.experts.16.w2", "model.layers.23.block_sparse_moe.experts.17.w2", "model.layers.23.block_sparse_moe.experts.18.w2", "model.layers.23.block_sparse_moe.experts.19.w2", "model.layers.23.block_sparse_moe.experts.20.w2", "model.layers.23.block_sparse_moe.experts.21.w2", "model.layers.23.block_sparse_moe.experts.22.w2", "model.layers.23.block_sparse_moe.experts.23.w2", "model.layers.23.block_sparse_moe.experts.24.w2", "model.layers.23.block_sparse_moe.experts.25.w2", "model.layers.23.block_sparse_moe.experts.26.w2", "model.layers.23.block_sparse_moe.experts.27.w2", "model.layers.23.block_sparse_moe.experts.28.w2", "model.layers.23.block_sparse_moe.experts.29.w2", "model.layers.23.block_sparse_moe.experts.30.w2", "model.layers.23.block_sparse_moe.experts.31.w2", "model.layers.23.block_sparse_moe.experts.32.w2", "model.layers.23.block_sparse_moe.experts.33.w2", "model.layers.23.block_sparse_moe.experts.34.w2", "model.layers.23.block_sparse_moe.experts.35.w2", "model.layers.23.block_sparse_moe.experts.36.w2", "model.layers.23.block_sparse_moe.experts.37.w2", "model.layers.23.block_sparse_moe.experts.38.w2", "model.layers.23.block_sparse_moe.experts.39.w2", "model.layers.23.block_sparse_moe.experts.40.w2", "model.layers.23.block_sparse_moe.experts.41.w2", "model.layers.23.block_sparse_moe.experts.42.w2", "model.layers.23.block_sparse_moe.experts.43.w2", "model.layers.23.block_sparse_moe.experts.44.w2", "model.layers.23.block_sparse_moe.experts.45.w2", "model.layers.23.block_sparse_moe.experts.46.w2", "model.layers.23.block_sparse_moe.experts.47.w2", "model.layers.23.block_sparse_moe.experts.48.w2", "model.layers.23.block_sparse_moe.experts.49.w2", "model.layers.23.block_sparse_moe.experts.50.w2", "model.layers.23.block_sparse_moe.experts.51.w2", "model.layers.23.block_sparse_moe.experts.52.w2", "model.layers.23.block_sparse_moe.experts.53.w2", "model.layers.23.block_sparse_moe.experts.54.w2", "model.layers.23.block_sparse_moe.experts.55.w2", "model.layers.23.block_sparse_moe.experts.56.w2", "model.layers.23.block_sparse_moe.experts.57.w2", "model.layers.23.block_sparse_moe.experts.58.w2", "model.layers.23.block_sparse_moe.experts.59.w2", "model.layers.23.block_sparse_moe.experts.60.w2", "model.layers.23.block_sparse_moe.experts.61.w2", "model.layers.23.block_sparse_moe.experts.62.w2", "model.layers.23.block_sparse_moe.experts.63.w2", "model.layers.23.block_sparse_moe.experts.64.w2", "model.layers.23.block_sparse_moe.experts.65.w2", "model.layers.23.block_sparse_moe.experts.66.w2", "model.layers.23.block_sparse_moe.experts.67.w2", "model.layers.23.block_sparse_moe.experts.68.w2", "model.layers.23.block_sparse_moe.experts.69.w2", "model.layers.23.block_sparse_moe.experts.70.w2", "model.layers.23.block_sparse_moe.experts.71.w2", "model.layers.23.block_sparse_moe.experts.72.w2", "model.layers.23.block_sparse_moe.experts.73.w2", "model.layers.23.block_sparse_moe.experts.74.w2", "model.layers.23.block_sparse_moe.experts.75.w2", "model.layers.23.block_sparse_moe.experts.76.w2", "model.layers.23.block_sparse_moe.experts.77.w2", "model.layers.23.block_sparse_moe.experts.78.w2", "model.layers.23.block_sparse_moe.experts.79.w2", "model.layers.23.block_sparse_moe.experts.80.w2", "model.layers.23.block_sparse_moe.experts.81.w2", "model.layers.23.block_sparse_moe.experts.82.w2", "model.layers.23.block_sparse_moe.experts.83.w2", "model.layers.23.block_sparse_moe.experts.84.w2", "model.layers.23.block_sparse_moe.experts.85.w2", "model.layers.23.block_sparse_moe.experts.86.w2", "model.layers.23.block_sparse_moe.experts.87.w2", "model.layers.23.block_sparse_moe.experts.88.w2", "model.layers.23.block_sparse_moe.experts.89.w2", "model.layers.23.block_sparse_moe.experts.90.w2", "model.layers.23.block_sparse_moe.experts.91.w2", "model.layers.23.block_sparse_moe.experts.92.w2", "model.layers.23.block_sparse_moe.experts.93.w2", "model.layers.23.block_sparse_moe.experts.94.w2", "model.layers.23.block_sparse_moe.experts.95.w2", "model.layers.23.block_sparse_moe.experts.96.w2", "model.layers.23.block_sparse_moe.experts.97.w2", "model.layers.23.block_sparse_moe.experts.98.w2", "model.layers.23.block_sparse_moe.experts.99.w2", "model.layers.23.block_sparse_moe.experts.100.w2", "model.layers.23.block_sparse_moe.experts.101.w2", "model.layers.23.block_sparse_moe.experts.102.w2", "model.layers.23.block_sparse_moe.experts.103.w2", "model.layers.23.block_sparse_moe.experts.104.w2", "model.layers.23.block_sparse_moe.experts.105.w2", "model.layers.23.block_sparse_moe.experts.106.w2", "model.layers.23.block_sparse_moe.experts.107.w2", "model.layers.23.block_sparse_moe.experts.108.w2", "model.layers.23.block_sparse_moe.experts.109.w2", "model.layers.23.block_sparse_moe.experts.110.w2", "model.layers.23.block_sparse_moe.experts.111.w2", "model.layers.23.block_sparse_moe.experts.112.w2", "model.layers.23.block_sparse_moe.experts.113.w2", "model.layers.23.block_sparse_moe.experts.114.w2", "model.layers.23.block_sparse_moe.experts.115.w2", "model.layers.23.block_sparse_moe.experts.116.w2", "model.layers.23.block_sparse_moe.experts.117.w2", "model.layers.23.block_sparse_moe.experts.118.w2", "model.layers.23.block_sparse_moe.experts.119.w2", "model.layers.23.block_sparse_moe.experts.120.w2", "model.layers.23.block_sparse_moe.experts.121.w2", "model.layers.23.block_sparse_moe.experts.122.w2", "model.layers.23.block_sparse_moe.experts.123.w2", "model.layers.23.block_sparse_moe.experts.124.w2", "model.layers.23.block_sparse_moe.experts.125.w2", "model.layers.23.block_sparse_moe.experts.126.w2", "model.layers.23.block_sparse_moe.experts.127.w2", "model.layers.23.block_sparse_moe.experts.128.w2", "model.layers.23.block_sparse_moe.experts.129.w2", "model.layers.23.block_sparse_moe.experts.130.w2", "model.layers.23.block_sparse_moe.experts.131.w2", "model.layers.23.block_sparse_moe.experts.132.w2", "model.layers.23.block_sparse_moe.experts.133.w2", "model.layers.23.block_sparse_moe.experts.134.w2", "model.layers.23.block_sparse_moe.experts.135.w2", "model.layers.23.block_sparse_moe.experts.136.w2", "model.layers.23.block_sparse_moe.experts.137.w2", "model.layers.23.block_sparse_moe.experts.138.w2", "model.layers.23.block_sparse_moe.experts.139.w2", "model.layers.23.block_sparse_moe.experts.140.w2", "model.layers.23.block_sparse_moe.experts.141.w2", "model.layers.23.block_sparse_moe.experts.142.w2", "model.layers.23.block_sparse_moe.experts.143.w2", "model.layers.23.block_sparse_moe.experts.144.w2", "model.layers.23.block_sparse_moe.experts.145.w2", "model.layers.23.block_sparse_moe.experts.146.w2", "model.layers.23.block_sparse_moe.experts.147.w2", "model.layers.23.block_sparse_moe.experts.148.w2", "model.layers.23.block_sparse_moe.experts.149.w2", "model.layers.23.block_sparse_moe.experts.150.w2", "model.layers.23.block_sparse_moe.experts.151.w2", "model.layers.23.block_sparse_moe.experts.152.w2", "model.layers.23.block_sparse_moe.experts.153.w2", "model.layers.23.block_sparse_moe.experts.154.w2", "model.layers.23.block_sparse_moe.experts.155.w2", "model.layers.23.block_sparse_moe.experts.156.w2", "model.layers.23.block_sparse_moe.experts.157.w2", "model.layers.23.block_sparse_moe.experts.158.w2", "model.layers.23.block_sparse_moe.experts.159.w2", "model.layers.23.block_sparse_moe.experts.160.w2", "model.layers.23.block_sparse_moe.experts.161.w2", "model.layers.23.block_sparse_moe.experts.162.w2", "model.layers.23.block_sparse_moe.experts.163.w2", "model.layers.23.block_sparse_moe.experts.164.w2", "model.layers.23.block_sparse_moe.experts.165.w2", "model.layers.23.block_sparse_moe.experts.166.w2", "model.layers.23.block_sparse_moe.experts.167.w2", "model.layers.23.block_sparse_moe.experts.168.w2", "model.layers.23.block_sparse_moe.experts.169.w2", "model.layers.23.block_sparse_moe.experts.170.w2", "model.layers.23.block_sparse_moe.experts.171.w2", "model.layers.23.block_sparse_moe.experts.172.w2", "model.layers.23.block_sparse_moe.experts.173.w2", "model.layers.23.block_sparse_moe.experts.174.w2", "model.layers.23.block_sparse_moe.experts.175.w2", "model.layers.23.block_sparse_moe.experts.176.w2", "model.layers.23.block_sparse_moe.experts.177.w2", "model.layers.23.block_sparse_moe.experts.178.w2", "model.layers.23.block_sparse_moe.experts.179.w2", "model.layers.23.block_sparse_moe.experts.180.w2", "model.layers.23.block_sparse_moe.experts.181.w2", "model.layers.23.block_sparse_moe.experts.182.w2", "model.layers.23.block_sparse_moe.experts.183.w2", "model.layers.23.block_sparse_moe.experts.184.w2", "model.layers.23.block_sparse_moe.experts.185.w2", "model.layers.23.block_sparse_moe.experts.186.w2", "model.layers.23.block_sparse_moe.experts.187.w2", "model.layers.23.block_sparse_moe.experts.188.w2", "model.layers.23.block_sparse_moe.experts.189.w2", "model.layers.23.block_sparse_moe.experts.190.w2", "model.layers.23.block_sparse_moe.experts.191.w2", "model.layers.23.block_sparse_moe.experts.192.w2", "model.layers.23.block_sparse_moe.experts.193.w2", "model.layers.23.block_sparse_moe.experts.194.w2", "model.layers.23.block_sparse_moe.experts.195.w2", "model.layers.23.block_sparse_moe.experts.196.w2", "model.layers.23.block_sparse_moe.experts.197.w2", "model.layers.23.block_sparse_moe.experts.198.w2", "model.layers.23.block_sparse_moe.experts.199.w2", "model.layers.23.block_sparse_moe.experts.200.w2", "model.layers.23.block_sparse_moe.experts.201.w2", "model.layers.23.block_sparse_moe.experts.202.w2", "model.layers.23.block_sparse_moe.experts.203.w2", "model.layers.23.block_sparse_moe.experts.204.w2", "model.layers.23.block_sparse_moe.experts.205.w2", "model.layers.23.block_sparse_moe.experts.206.w2", "model.layers.23.block_sparse_moe.experts.207.w2", "model.layers.23.block_sparse_moe.experts.208.w2", "model.layers.23.block_sparse_moe.experts.209.w2", "model.layers.23.block_sparse_moe.experts.210.w2", "model.layers.23.block_sparse_moe.experts.211.w2", "model.layers.23.block_sparse_moe.experts.212.w2", "model.layers.23.block_sparse_moe.experts.213.w2", "model.layers.23.block_sparse_moe.experts.214.w2", "model.layers.23.block_sparse_moe.experts.215.w2", "model.layers.23.block_sparse_moe.experts.216.w2", "model.layers.23.block_sparse_moe.experts.217.w2", "model.layers.23.block_sparse_moe.experts.218.w2", "model.layers.23.block_sparse_moe.experts.219.w2", "model.layers.23.block_sparse_moe.experts.220.w2", "model.layers.23.block_sparse_moe.experts.221.w2", "model.layers.23.block_sparse_moe.experts.222.w2", "model.layers.23.block_sparse_moe.experts.223.w2", "model.layers.23.block_sparse_moe.experts.224.w2", "model.layers.23.block_sparse_moe.experts.225.w2", "model.layers.23.block_sparse_moe.experts.226.w2", "model.layers.23.block_sparse_moe.experts.227.w2", "model.layers.23.block_sparse_moe.experts.228.w2", "model.layers.23.block_sparse_moe.experts.229.w2", "model.layers.23.block_sparse_moe.experts.230.w2", "model.layers.23.block_sparse_moe.experts.231.w2", "model.layers.23.block_sparse_moe.experts.232.w2", "model.layers.23.block_sparse_moe.experts.233.w2", "model.layers.23.block_sparse_moe.experts.234.w2", "model.layers.23.block_sparse_moe.experts.235.w2", "model.layers.23.block_sparse_moe.experts.236.w2", "model.layers.23.block_sparse_moe.experts.237.w2", "model.layers.23.block_sparse_moe.experts.238.w2", "model.layers.23.block_sparse_moe.experts.239.w2", "model.layers.23.block_sparse_moe.experts.240.w2", "model.layers.23.block_sparse_moe.experts.241.w2", "model.layers.23.block_sparse_moe.experts.242.w2", "model.layers.23.block_sparse_moe.experts.243.w2", "model.layers.23.block_sparse_moe.experts.244.w2", "model.layers.23.block_sparse_moe.experts.245.w2", "model.layers.23.block_sparse_moe.experts.246.w2", "model.layers.23.block_sparse_moe.experts.247.w2", "model.layers.23.block_sparse_moe.experts.248.w2", "model.layers.23.block_sparse_moe.experts.249.w2", "model.layers.23.block_sparse_moe.experts.250.w2", "model.layers.23.block_sparse_moe.experts.251.w2", "model.layers.23.block_sparse_moe.experts.252.w2", "model.layers.23.block_sparse_moe.experts.253.w2", "model.layers.23.block_sparse_moe.experts.254.w2", "model.layers.23.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0010278370231389944, "dbits": 1207959552 } ] }, { "idx": 120, "layers": [ "model.layers.24.self_attn.q_proj" ], "candidates": [ { "dkld": 5.2835978567603026e-05, "dbits": 18874368 } ] }, { "idx": 121, "layers": [ "model.layers.24.self_attn.k_proj", "model.layers.24.self_attn.v_proj" ], "candidates": [ { "dkld": -0.005269835703074932, "dbits": 6291456 } ] }, { "idx": 122, "layers": [ "model.layers.24.self_attn.o_proj" ], "candidates": [ { "dkld": 0.002439771592617032, "dbits": 18874368 } ] }, { "idx": 123, "layers": [ "model.layers.24.block_sparse_moe.experts.0.w1", "model.layers.24.block_sparse_moe.experts.1.w1", "model.layers.24.block_sparse_moe.experts.2.w1", "model.layers.24.block_sparse_moe.experts.3.w1", "model.layers.24.block_sparse_moe.experts.4.w1", "model.layers.24.block_sparse_moe.experts.5.w1", "model.layers.24.block_sparse_moe.experts.6.w1", "model.layers.24.block_sparse_moe.experts.7.w1", "model.layers.24.block_sparse_moe.experts.8.w1", "model.layers.24.block_sparse_moe.experts.9.w1", "model.layers.24.block_sparse_moe.experts.10.w1", "model.layers.24.block_sparse_moe.experts.11.w1", "model.layers.24.block_sparse_moe.experts.12.w1", "model.layers.24.block_sparse_moe.experts.13.w1", "model.layers.24.block_sparse_moe.experts.14.w1", "model.layers.24.block_sparse_moe.experts.15.w1", "model.layers.24.block_sparse_moe.experts.16.w1", "model.layers.24.block_sparse_moe.experts.17.w1", "model.layers.24.block_sparse_moe.experts.18.w1", "model.layers.24.block_sparse_moe.experts.19.w1", "model.layers.24.block_sparse_moe.experts.20.w1", "model.layers.24.block_sparse_moe.experts.21.w1", "model.layers.24.block_sparse_moe.experts.22.w1", "model.layers.24.block_sparse_moe.experts.23.w1", "model.layers.24.block_sparse_moe.experts.24.w1", "model.layers.24.block_sparse_moe.experts.25.w1", "model.layers.24.block_sparse_moe.experts.26.w1", "model.layers.24.block_sparse_moe.experts.27.w1", "model.layers.24.block_sparse_moe.experts.28.w1", "model.layers.24.block_sparse_moe.experts.29.w1", "model.layers.24.block_sparse_moe.experts.30.w1", "model.layers.24.block_sparse_moe.experts.31.w1", "model.layers.24.block_sparse_moe.experts.32.w1", "model.layers.24.block_sparse_moe.experts.33.w1", "model.layers.24.block_sparse_moe.experts.34.w1", "model.layers.24.block_sparse_moe.experts.35.w1", "model.layers.24.block_sparse_moe.experts.36.w1", "model.layers.24.block_sparse_moe.experts.37.w1", "model.layers.24.block_sparse_moe.experts.38.w1", "model.layers.24.block_sparse_moe.experts.39.w1", "model.layers.24.block_sparse_moe.experts.40.w1", "model.layers.24.block_sparse_moe.experts.41.w1", "model.layers.24.block_sparse_moe.experts.42.w1", "model.layers.24.block_sparse_moe.experts.43.w1", "model.layers.24.block_sparse_moe.experts.44.w1", "model.layers.24.block_sparse_moe.experts.45.w1", "model.layers.24.block_sparse_moe.experts.46.w1", "model.layers.24.block_sparse_moe.experts.47.w1", "model.layers.24.block_sparse_moe.experts.48.w1", "model.layers.24.block_sparse_moe.experts.49.w1", "model.layers.24.block_sparse_moe.experts.50.w1", "model.layers.24.block_sparse_moe.experts.51.w1", "model.layers.24.block_sparse_moe.experts.52.w1", "model.layers.24.block_sparse_moe.experts.53.w1", "model.layers.24.block_sparse_moe.experts.54.w1", "model.layers.24.block_sparse_moe.experts.55.w1", "model.layers.24.block_sparse_moe.experts.56.w1", "model.layers.24.block_sparse_moe.experts.57.w1", "model.layers.24.block_sparse_moe.experts.58.w1", "model.layers.24.block_sparse_moe.experts.59.w1", "model.layers.24.block_sparse_moe.experts.60.w1", "model.layers.24.block_sparse_moe.experts.61.w1", "model.layers.24.block_sparse_moe.experts.62.w1", "model.layers.24.block_sparse_moe.experts.63.w1", "model.layers.24.block_sparse_moe.experts.64.w1", "model.layers.24.block_sparse_moe.experts.65.w1", "model.layers.24.block_sparse_moe.experts.66.w1", "model.layers.24.block_sparse_moe.experts.67.w1", "model.layers.24.block_sparse_moe.experts.68.w1", "model.layers.24.block_sparse_moe.experts.69.w1", "model.layers.24.block_sparse_moe.experts.70.w1", "model.layers.24.block_sparse_moe.experts.71.w1", "model.layers.24.block_sparse_moe.experts.72.w1", "model.layers.24.block_sparse_moe.experts.73.w1", "model.layers.24.block_sparse_moe.experts.74.w1", "model.layers.24.block_sparse_moe.experts.75.w1", "model.layers.24.block_sparse_moe.experts.76.w1", "model.layers.24.block_sparse_moe.experts.77.w1", "model.layers.24.block_sparse_moe.experts.78.w1", "model.layers.24.block_sparse_moe.experts.79.w1", "model.layers.24.block_sparse_moe.experts.80.w1", "model.layers.24.block_sparse_moe.experts.81.w1", "model.layers.24.block_sparse_moe.experts.82.w1", "model.layers.24.block_sparse_moe.experts.83.w1", "model.layers.24.block_sparse_moe.experts.84.w1", "model.layers.24.block_sparse_moe.experts.85.w1", "model.layers.24.block_sparse_moe.experts.86.w1", "model.layers.24.block_sparse_moe.experts.87.w1", "model.layers.24.block_sparse_moe.experts.88.w1", "model.layers.24.block_sparse_moe.experts.89.w1", "model.layers.24.block_sparse_moe.experts.90.w1", "model.layers.24.block_sparse_moe.experts.91.w1", "model.layers.24.block_sparse_moe.experts.92.w1", "model.layers.24.block_sparse_moe.experts.93.w1", "model.layers.24.block_sparse_moe.experts.94.w1", "model.layers.24.block_sparse_moe.experts.95.w1", "model.layers.24.block_sparse_moe.experts.96.w1", "model.layers.24.block_sparse_moe.experts.97.w1", "model.layers.24.block_sparse_moe.experts.98.w1", "model.layers.24.block_sparse_moe.experts.99.w1", "model.layers.24.block_sparse_moe.experts.100.w1", "model.layers.24.block_sparse_moe.experts.101.w1", "model.layers.24.block_sparse_moe.experts.102.w1", "model.layers.24.block_sparse_moe.experts.103.w1", "model.layers.24.block_sparse_moe.experts.104.w1", "model.layers.24.block_sparse_moe.experts.105.w1", "model.layers.24.block_sparse_moe.experts.106.w1", "model.layers.24.block_sparse_moe.experts.107.w1", "model.layers.24.block_sparse_moe.experts.108.w1", "model.layers.24.block_sparse_moe.experts.109.w1", "model.layers.24.block_sparse_moe.experts.110.w1", "model.layers.24.block_sparse_moe.experts.111.w1", "model.layers.24.block_sparse_moe.experts.112.w1", "model.layers.24.block_sparse_moe.experts.113.w1", "model.layers.24.block_sparse_moe.experts.114.w1", "model.layers.24.block_sparse_moe.experts.115.w1", "model.layers.24.block_sparse_moe.experts.116.w1", "model.layers.24.block_sparse_moe.experts.117.w1", "model.layers.24.block_sparse_moe.experts.118.w1", "model.layers.24.block_sparse_moe.experts.119.w1", "model.layers.24.block_sparse_moe.experts.120.w1", "model.layers.24.block_sparse_moe.experts.121.w1", "model.layers.24.block_sparse_moe.experts.122.w1", "model.layers.24.block_sparse_moe.experts.123.w1", "model.layers.24.block_sparse_moe.experts.124.w1", "model.layers.24.block_sparse_moe.experts.125.w1", "model.layers.24.block_sparse_moe.experts.126.w1", "model.layers.24.block_sparse_moe.experts.127.w1", "model.layers.24.block_sparse_moe.experts.128.w1", "model.layers.24.block_sparse_moe.experts.129.w1", "model.layers.24.block_sparse_moe.experts.130.w1", "model.layers.24.block_sparse_moe.experts.131.w1", "model.layers.24.block_sparse_moe.experts.132.w1", "model.layers.24.block_sparse_moe.experts.133.w1", "model.layers.24.block_sparse_moe.experts.134.w1", "model.layers.24.block_sparse_moe.experts.135.w1", "model.layers.24.block_sparse_moe.experts.136.w1", "model.layers.24.block_sparse_moe.experts.137.w1", "model.layers.24.block_sparse_moe.experts.138.w1", "model.layers.24.block_sparse_moe.experts.139.w1", "model.layers.24.block_sparse_moe.experts.140.w1", "model.layers.24.block_sparse_moe.experts.141.w1", "model.layers.24.block_sparse_moe.experts.142.w1", "model.layers.24.block_sparse_moe.experts.143.w1", "model.layers.24.block_sparse_moe.experts.144.w1", "model.layers.24.block_sparse_moe.experts.145.w1", "model.layers.24.block_sparse_moe.experts.146.w1", "model.layers.24.block_sparse_moe.experts.147.w1", "model.layers.24.block_sparse_moe.experts.148.w1", "model.layers.24.block_sparse_moe.experts.149.w1", "model.layers.24.block_sparse_moe.experts.150.w1", "model.layers.24.block_sparse_moe.experts.151.w1", "model.layers.24.block_sparse_moe.experts.152.w1", "model.layers.24.block_sparse_moe.experts.153.w1", "model.layers.24.block_sparse_moe.experts.154.w1", "model.layers.24.block_sparse_moe.experts.155.w1", "model.layers.24.block_sparse_moe.experts.156.w1", "model.layers.24.block_sparse_moe.experts.157.w1", "model.layers.24.block_sparse_moe.experts.158.w1", "model.layers.24.block_sparse_moe.experts.159.w1", "model.layers.24.block_sparse_moe.experts.160.w1", "model.layers.24.block_sparse_moe.experts.161.w1", "model.layers.24.block_sparse_moe.experts.162.w1", "model.layers.24.block_sparse_moe.experts.163.w1", "model.layers.24.block_sparse_moe.experts.164.w1", "model.layers.24.block_sparse_moe.experts.165.w1", "model.layers.24.block_sparse_moe.experts.166.w1", "model.layers.24.block_sparse_moe.experts.167.w1", "model.layers.24.block_sparse_moe.experts.168.w1", "model.layers.24.block_sparse_moe.experts.169.w1", "model.layers.24.block_sparse_moe.experts.170.w1", "model.layers.24.block_sparse_moe.experts.171.w1", "model.layers.24.block_sparse_moe.experts.172.w1", "model.layers.24.block_sparse_moe.experts.173.w1", "model.layers.24.block_sparse_moe.experts.174.w1", "model.layers.24.block_sparse_moe.experts.175.w1", "model.layers.24.block_sparse_moe.experts.176.w1", "model.layers.24.block_sparse_moe.experts.177.w1", "model.layers.24.block_sparse_moe.experts.178.w1", "model.layers.24.block_sparse_moe.experts.179.w1", "model.layers.24.block_sparse_moe.experts.180.w1", "model.layers.24.block_sparse_moe.experts.181.w1", "model.layers.24.block_sparse_moe.experts.182.w1", "model.layers.24.block_sparse_moe.experts.183.w1", "model.layers.24.block_sparse_moe.experts.184.w1", "model.layers.24.block_sparse_moe.experts.185.w1", "model.layers.24.block_sparse_moe.experts.186.w1", "model.layers.24.block_sparse_moe.experts.187.w1", "model.layers.24.block_sparse_moe.experts.188.w1", "model.layers.24.block_sparse_moe.experts.189.w1", "model.layers.24.block_sparse_moe.experts.190.w1", "model.layers.24.block_sparse_moe.experts.191.w1", "model.layers.24.block_sparse_moe.experts.192.w1", "model.layers.24.block_sparse_moe.experts.193.w1", "model.layers.24.block_sparse_moe.experts.194.w1", "model.layers.24.block_sparse_moe.experts.195.w1", "model.layers.24.block_sparse_moe.experts.196.w1", "model.layers.24.block_sparse_moe.experts.197.w1", "model.layers.24.block_sparse_moe.experts.198.w1", "model.layers.24.block_sparse_moe.experts.199.w1", "model.layers.24.block_sparse_moe.experts.200.w1", "model.layers.24.block_sparse_moe.experts.201.w1", "model.layers.24.block_sparse_moe.experts.202.w1", "model.layers.24.block_sparse_moe.experts.203.w1", "model.layers.24.block_sparse_moe.experts.204.w1", "model.layers.24.block_sparse_moe.experts.205.w1", "model.layers.24.block_sparse_moe.experts.206.w1", "model.layers.24.block_sparse_moe.experts.207.w1", "model.layers.24.block_sparse_moe.experts.208.w1", "model.layers.24.block_sparse_moe.experts.209.w1", "model.layers.24.block_sparse_moe.experts.210.w1", "model.layers.24.block_sparse_moe.experts.211.w1", "model.layers.24.block_sparse_moe.experts.212.w1", "model.layers.24.block_sparse_moe.experts.213.w1", "model.layers.24.block_sparse_moe.experts.214.w1", "model.layers.24.block_sparse_moe.experts.215.w1", "model.layers.24.block_sparse_moe.experts.216.w1", "model.layers.24.block_sparse_moe.experts.217.w1", "model.layers.24.block_sparse_moe.experts.218.w1", "model.layers.24.block_sparse_moe.experts.219.w1", "model.layers.24.block_sparse_moe.experts.220.w1", "model.layers.24.block_sparse_moe.experts.221.w1", "model.layers.24.block_sparse_moe.experts.222.w1", "model.layers.24.block_sparse_moe.experts.223.w1", "model.layers.24.block_sparse_moe.experts.224.w1", "model.layers.24.block_sparse_moe.experts.225.w1", "model.layers.24.block_sparse_moe.experts.226.w1", "model.layers.24.block_sparse_moe.experts.227.w1", "model.layers.24.block_sparse_moe.experts.228.w1", "model.layers.24.block_sparse_moe.experts.229.w1", "model.layers.24.block_sparse_moe.experts.230.w1", "model.layers.24.block_sparse_moe.experts.231.w1", "model.layers.24.block_sparse_moe.experts.232.w1", "model.layers.24.block_sparse_moe.experts.233.w1", "model.layers.24.block_sparse_moe.experts.234.w1", "model.layers.24.block_sparse_moe.experts.235.w1", "model.layers.24.block_sparse_moe.experts.236.w1", "model.layers.24.block_sparse_moe.experts.237.w1", "model.layers.24.block_sparse_moe.experts.238.w1", "model.layers.24.block_sparse_moe.experts.239.w1", "model.layers.24.block_sparse_moe.experts.240.w1", "model.layers.24.block_sparse_moe.experts.241.w1", "model.layers.24.block_sparse_moe.experts.242.w1", "model.layers.24.block_sparse_moe.experts.243.w1", "model.layers.24.block_sparse_moe.experts.244.w1", "model.layers.24.block_sparse_moe.experts.245.w1", "model.layers.24.block_sparse_moe.experts.246.w1", "model.layers.24.block_sparse_moe.experts.247.w1", "model.layers.24.block_sparse_moe.experts.248.w1", "model.layers.24.block_sparse_moe.experts.249.w1", "model.layers.24.block_sparse_moe.experts.250.w1", "model.layers.24.block_sparse_moe.experts.251.w1", "model.layers.24.block_sparse_moe.experts.252.w1", "model.layers.24.block_sparse_moe.experts.253.w1", "model.layers.24.block_sparse_moe.experts.254.w1", "model.layers.24.block_sparse_moe.experts.255.w1", "model.layers.24.block_sparse_moe.experts.0.w3", "model.layers.24.block_sparse_moe.experts.1.w3", "model.layers.24.block_sparse_moe.experts.2.w3", "model.layers.24.block_sparse_moe.experts.3.w3", "model.layers.24.block_sparse_moe.experts.4.w3", "model.layers.24.block_sparse_moe.experts.5.w3", "model.layers.24.block_sparse_moe.experts.6.w3", "model.layers.24.block_sparse_moe.experts.7.w3", "model.layers.24.block_sparse_moe.experts.8.w3", "model.layers.24.block_sparse_moe.experts.9.w3", "model.layers.24.block_sparse_moe.experts.10.w3", "model.layers.24.block_sparse_moe.experts.11.w3", "model.layers.24.block_sparse_moe.experts.12.w3", "model.layers.24.block_sparse_moe.experts.13.w3", "model.layers.24.block_sparse_moe.experts.14.w3", "model.layers.24.block_sparse_moe.experts.15.w3", "model.layers.24.block_sparse_moe.experts.16.w3", "model.layers.24.block_sparse_moe.experts.17.w3", "model.layers.24.block_sparse_moe.experts.18.w3", "model.layers.24.block_sparse_moe.experts.19.w3", "model.layers.24.block_sparse_moe.experts.20.w3", "model.layers.24.block_sparse_moe.experts.21.w3", "model.layers.24.block_sparse_moe.experts.22.w3", "model.layers.24.block_sparse_moe.experts.23.w3", "model.layers.24.block_sparse_moe.experts.24.w3", "model.layers.24.block_sparse_moe.experts.25.w3", "model.layers.24.block_sparse_moe.experts.26.w3", "model.layers.24.block_sparse_moe.experts.27.w3", "model.layers.24.block_sparse_moe.experts.28.w3", "model.layers.24.block_sparse_moe.experts.29.w3", "model.layers.24.block_sparse_moe.experts.30.w3", "model.layers.24.block_sparse_moe.experts.31.w3", "model.layers.24.block_sparse_moe.experts.32.w3", "model.layers.24.block_sparse_moe.experts.33.w3", "model.layers.24.block_sparse_moe.experts.34.w3", "model.layers.24.block_sparse_moe.experts.35.w3", "model.layers.24.block_sparse_moe.experts.36.w3", "model.layers.24.block_sparse_moe.experts.37.w3", "model.layers.24.block_sparse_moe.experts.38.w3", "model.layers.24.block_sparse_moe.experts.39.w3", "model.layers.24.block_sparse_moe.experts.40.w3", "model.layers.24.block_sparse_moe.experts.41.w3", "model.layers.24.block_sparse_moe.experts.42.w3", "model.layers.24.block_sparse_moe.experts.43.w3", "model.layers.24.block_sparse_moe.experts.44.w3", "model.layers.24.block_sparse_moe.experts.45.w3", "model.layers.24.block_sparse_moe.experts.46.w3", "model.layers.24.block_sparse_moe.experts.47.w3", "model.layers.24.block_sparse_moe.experts.48.w3", "model.layers.24.block_sparse_moe.experts.49.w3", "model.layers.24.block_sparse_moe.experts.50.w3", "model.layers.24.block_sparse_moe.experts.51.w3", "model.layers.24.block_sparse_moe.experts.52.w3", "model.layers.24.block_sparse_moe.experts.53.w3", "model.layers.24.block_sparse_moe.experts.54.w3", "model.layers.24.block_sparse_moe.experts.55.w3", "model.layers.24.block_sparse_moe.experts.56.w3", "model.layers.24.block_sparse_moe.experts.57.w3", "model.layers.24.block_sparse_moe.experts.58.w3", "model.layers.24.block_sparse_moe.experts.59.w3", "model.layers.24.block_sparse_moe.experts.60.w3", "model.layers.24.block_sparse_moe.experts.61.w3", "model.layers.24.block_sparse_moe.experts.62.w3", "model.layers.24.block_sparse_moe.experts.63.w3", "model.layers.24.block_sparse_moe.experts.64.w3", "model.layers.24.block_sparse_moe.experts.65.w3", "model.layers.24.block_sparse_moe.experts.66.w3", "model.layers.24.block_sparse_moe.experts.67.w3", "model.layers.24.block_sparse_moe.experts.68.w3", "model.layers.24.block_sparse_moe.experts.69.w3", "model.layers.24.block_sparse_moe.experts.70.w3", "model.layers.24.block_sparse_moe.experts.71.w3", "model.layers.24.block_sparse_moe.experts.72.w3", "model.layers.24.block_sparse_moe.experts.73.w3", "model.layers.24.block_sparse_moe.experts.74.w3", "model.layers.24.block_sparse_moe.experts.75.w3", "model.layers.24.block_sparse_moe.experts.76.w3", "model.layers.24.block_sparse_moe.experts.77.w3", "model.layers.24.block_sparse_moe.experts.78.w3", "model.layers.24.block_sparse_moe.experts.79.w3", "model.layers.24.block_sparse_moe.experts.80.w3", "model.layers.24.block_sparse_moe.experts.81.w3", "model.layers.24.block_sparse_moe.experts.82.w3", "model.layers.24.block_sparse_moe.experts.83.w3", "model.layers.24.block_sparse_moe.experts.84.w3", "model.layers.24.block_sparse_moe.experts.85.w3", "model.layers.24.block_sparse_moe.experts.86.w3", "model.layers.24.block_sparse_moe.experts.87.w3", "model.layers.24.block_sparse_moe.experts.88.w3", "model.layers.24.block_sparse_moe.experts.89.w3", "model.layers.24.block_sparse_moe.experts.90.w3", "model.layers.24.block_sparse_moe.experts.91.w3", "model.layers.24.block_sparse_moe.experts.92.w3", "model.layers.24.block_sparse_moe.experts.93.w3", "model.layers.24.block_sparse_moe.experts.94.w3", "model.layers.24.block_sparse_moe.experts.95.w3", "model.layers.24.block_sparse_moe.experts.96.w3", "model.layers.24.block_sparse_moe.experts.97.w3", "model.layers.24.block_sparse_moe.experts.98.w3", "model.layers.24.block_sparse_moe.experts.99.w3", "model.layers.24.block_sparse_moe.experts.100.w3", "model.layers.24.block_sparse_moe.experts.101.w3", "model.layers.24.block_sparse_moe.experts.102.w3", "model.layers.24.block_sparse_moe.experts.103.w3", "model.layers.24.block_sparse_moe.experts.104.w3", "model.layers.24.block_sparse_moe.experts.105.w3", "model.layers.24.block_sparse_moe.experts.106.w3", "model.layers.24.block_sparse_moe.experts.107.w3", "model.layers.24.block_sparse_moe.experts.108.w3", "model.layers.24.block_sparse_moe.experts.109.w3", "model.layers.24.block_sparse_moe.experts.110.w3", "model.layers.24.block_sparse_moe.experts.111.w3", "model.layers.24.block_sparse_moe.experts.112.w3", "model.layers.24.block_sparse_moe.experts.113.w3", "model.layers.24.block_sparse_moe.experts.114.w3", "model.layers.24.block_sparse_moe.experts.115.w3", "model.layers.24.block_sparse_moe.experts.116.w3", "model.layers.24.block_sparse_moe.experts.117.w3", "model.layers.24.block_sparse_moe.experts.118.w3", "model.layers.24.block_sparse_moe.experts.119.w3", "model.layers.24.block_sparse_moe.experts.120.w3", "model.layers.24.block_sparse_moe.experts.121.w3", "model.layers.24.block_sparse_moe.experts.122.w3", "model.layers.24.block_sparse_moe.experts.123.w3", "model.layers.24.block_sparse_moe.experts.124.w3", "model.layers.24.block_sparse_moe.experts.125.w3", "model.layers.24.block_sparse_moe.experts.126.w3", "model.layers.24.block_sparse_moe.experts.127.w3", "model.layers.24.block_sparse_moe.experts.128.w3", "model.layers.24.block_sparse_moe.experts.129.w3", "model.layers.24.block_sparse_moe.experts.130.w3", "model.layers.24.block_sparse_moe.experts.131.w3", "model.layers.24.block_sparse_moe.experts.132.w3", "model.layers.24.block_sparse_moe.experts.133.w3", "model.layers.24.block_sparse_moe.experts.134.w3", "model.layers.24.block_sparse_moe.experts.135.w3", "model.layers.24.block_sparse_moe.experts.136.w3", "model.layers.24.block_sparse_moe.experts.137.w3", "model.layers.24.block_sparse_moe.experts.138.w3", "model.layers.24.block_sparse_moe.experts.139.w3", "model.layers.24.block_sparse_moe.experts.140.w3", "model.layers.24.block_sparse_moe.experts.141.w3", "model.layers.24.block_sparse_moe.experts.142.w3", "model.layers.24.block_sparse_moe.experts.143.w3", "model.layers.24.block_sparse_moe.experts.144.w3", "model.layers.24.block_sparse_moe.experts.145.w3", "model.layers.24.block_sparse_moe.experts.146.w3", "model.layers.24.block_sparse_moe.experts.147.w3", "model.layers.24.block_sparse_moe.experts.148.w3", "model.layers.24.block_sparse_moe.experts.149.w3", "model.layers.24.block_sparse_moe.experts.150.w3", "model.layers.24.block_sparse_moe.experts.151.w3", "model.layers.24.block_sparse_moe.experts.152.w3", "model.layers.24.block_sparse_moe.experts.153.w3", "model.layers.24.block_sparse_moe.experts.154.w3", "model.layers.24.block_sparse_moe.experts.155.w3", "model.layers.24.block_sparse_moe.experts.156.w3", "model.layers.24.block_sparse_moe.experts.157.w3", "model.layers.24.block_sparse_moe.experts.158.w3", "model.layers.24.block_sparse_moe.experts.159.w3", "model.layers.24.block_sparse_moe.experts.160.w3", "model.layers.24.block_sparse_moe.experts.161.w3", "model.layers.24.block_sparse_moe.experts.162.w3", "model.layers.24.block_sparse_moe.experts.163.w3", "model.layers.24.block_sparse_moe.experts.164.w3", "model.layers.24.block_sparse_moe.experts.165.w3", "model.layers.24.block_sparse_moe.experts.166.w3", "model.layers.24.block_sparse_moe.experts.167.w3", "model.layers.24.block_sparse_moe.experts.168.w3", "model.layers.24.block_sparse_moe.experts.169.w3", "model.layers.24.block_sparse_moe.experts.170.w3", "model.layers.24.block_sparse_moe.experts.171.w3", "model.layers.24.block_sparse_moe.experts.172.w3", "model.layers.24.block_sparse_moe.experts.173.w3", "model.layers.24.block_sparse_moe.experts.174.w3", "model.layers.24.block_sparse_moe.experts.175.w3", "model.layers.24.block_sparse_moe.experts.176.w3", "model.layers.24.block_sparse_moe.experts.177.w3", "model.layers.24.block_sparse_moe.experts.178.w3", "model.layers.24.block_sparse_moe.experts.179.w3", "model.layers.24.block_sparse_moe.experts.180.w3", "model.layers.24.block_sparse_moe.experts.181.w3", "model.layers.24.block_sparse_moe.experts.182.w3", "model.layers.24.block_sparse_moe.experts.183.w3", "model.layers.24.block_sparse_moe.experts.184.w3", "model.layers.24.block_sparse_moe.experts.185.w3", "model.layers.24.block_sparse_moe.experts.186.w3", "model.layers.24.block_sparse_moe.experts.187.w3", "model.layers.24.block_sparse_moe.experts.188.w3", "model.layers.24.block_sparse_moe.experts.189.w3", "model.layers.24.block_sparse_moe.experts.190.w3", "model.layers.24.block_sparse_moe.experts.191.w3", "model.layers.24.block_sparse_moe.experts.192.w3", "model.layers.24.block_sparse_moe.experts.193.w3", "model.layers.24.block_sparse_moe.experts.194.w3", "model.layers.24.block_sparse_moe.experts.195.w3", "model.layers.24.block_sparse_moe.experts.196.w3", "model.layers.24.block_sparse_moe.experts.197.w3", "model.layers.24.block_sparse_moe.experts.198.w3", "model.layers.24.block_sparse_moe.experts.199.w3", "model.layers.24.block_sparse_moe.experts.200.w3", "model.layers.24.block_sparse_moe.experts.201.w3", "model.layers.24.block_sparse_moe.experts.202.w3", "model.layers.24.block_sparse_moe.experts.203.w3", "model.layers.24.block_sparse_moe.experts.204.w3", "model.layers.24.block_sparse_moe.experts.205.w3", "model.layers.24.block_sparse_moe.experts.206.w3", "model.layers.24.block_sparse_moe.experts.207.w3", "model.layers.24.block_sparse_moe.experts.208.w3", "model.layers.24.block_sparse_moe.experts.209.w3", "model.layers.24.block_sparse_moe.experts.210.w3", "model.layers.24.block_sparse_moe.experts.211.w3", "model.layers.24.block_sparse_moe.experts.212.w3", "model.layers.24.block_sparse_moe.experts.213.w3", "model.layers.24.block_sparse_moe.experts.214.w3", "model.layers.24.block_sparse_moe.experts.215.w3", "model.layers.24.block_sparse_moe.experts.216.w3", "model.layers.24.block_sparse_moe.experts.217.w3", "model.layers.24.block_sparse_moe.experts.218.w3", "model.layers.24.block_sparse_moe.experts.219.w3", "model.layers.24.block_sparse_moe.experts.220.w3", "model.layers.24.block_sparse_moe.experts.221.w3", "model.layers.24.block_sparse_moe.experts.222.w3", "model.layers.24.block_sparse_moe.experts.223.w3", "model.layers.24.block_sparse_moe.experts.224.w3", "model.layers.24.block_sparse_moe.experts.225.w3", "model.layers.24.block_sparse_moe.experts.226.w3", "model.layers.24.block_sparse_moe.experts.227.w3", "model.layers.24.block_sparse_moe.experts.228.w3", "model.layers.24.block_sparse_moe.experts.229.w3", "model.layers.24.block_sparse_moe.experts.230.w3", "model.layers.24.block_sparse_moe.experts.231.w3", "model.layers.24.block_sparse_moe.experts.232.w3", "model.layers.24.block_sparse_moe.experts.233.w3", "model.layers.24.block_sparse_moe.experts.234.w3", "model.layers.24.block_sparse_moe.experts.235.w3", "model.layers.24.block_sparse_moe.experts.236.w3", "model.layers.24.block_sparse_moe.experts.237.w3", "model.layers.24.block_sparse_moe.experts.238.w3", "model.layers.24.block_sparse_moe.experts.239.w3", "model.layers.24.block_sparse_moe.experts.240.w3", "model.layers.24.block_sparse_moe.experts.241.w3", "model.layers.24.block_sparse_moe.experts.242.w3", "model.layers.24.block_sparse_moe.experts.243.w3", "model.layers.24.block_sparse_moe.experts.244.w3", "model.layers.24.block_sparse_moe.experts.245.w3", "model.layers.24.block_sparse_moe.experts.246.w3", "model.layers.24.block_sparse_moe.experts.247.w3", "model.layers.24.block_sparse_moe.experts.248.w3", "model.layers.24.block_sparse_moe.experts.249.w3", "model.layers.24.block_sparse_moe.experts.250.w3", "model.layers.24.block_sparse_moe.experts.251.w3", "model.layers.24.block_sparse_moe.experts.252.w3", "model.layers.24.block_sparse_moe.experts.253.w3", "model.layers.24.block_sparse_moe.experts.254.w3", "model.layers.24.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 3.889016807079315e-05, "dbits": 2415919104 } ] }, { "idx": 124, "layers": [ "model.layers.24.block_sparse_moe.experts.0.w2", "model.layers.24.block_sparse_moe.experts.1.w2", "model.layers.24.block_sparse_moe.experts.2.w2", "model.layers.24.block_sparse_moe.experts.3.w2", "model.layers.24.block_sparse_moe.experts.4.w2", "model.layers.24.block_sparse_moe.experts.5.w2", "model.layers.24.block_sparse_moe.experts.6.w2", "model.layers.24.block_sparse_moe.experts.7.w2", "model.layers.24.block_sparse_moe.experts.8.w2", "model.layers.24.block_sparse_moe.experts.9.w2", "model.layers.24.block_sparse_moe.experts.10.w2", "model.layers.24.block_sparse_moe.experts.11.w2", "model.layers.24.block_sparse_moe.experts.12.w2", "model.layers.24.block_sparse_moe.experts.13.w2", "model.layers.24.block_sparse_moe.experts.14.w2", "model.layers.24.block_sparse_moe.experts.15.w2", "model.layers.24.block_sparse_moe.experts.16.w2", "model.layers.24.block_sparse_moe.experts.17.w2", "model.layers.24.block_sparse_moe.experts.18.w2", "model.layers.24.block_sparse_moe.experts.19.w2", "model.layers.24.block_sparse_moe.experts.20.w2", "model.layers.24.block_sparse_moe.experts.21.w2", "model.layers.24.block_sparse_moe.experts.22.w2", "model.layers.24.block_sparse_moe.experts.23.w2", "model.layers.24.block_sparse_moe.experts.24.w2", "model.layers.24.block_sparse_moe.experts.25.w2", "model.layers.24.block_sparse_moe.experts.26.w2", "model.layers.24.block_sparse_moe.experts.27.w2", "model.layers.24.block_sparse_moe.experts.28.w2", "model.layers.24.block_sparse_moe.experts.29.w2", "model.layers.24.block_sparse_moe.experts.30.w2", "model.layers.24.block_sparse_moe.experts.31.w2", "model.layers.24.block_sparse_moe.experts.32.w2", "model.layers.24.block_sparse_moe.experts.33.w2", "model.layers.24.block_sparse_moe.experts.34.w2", "model.layers.24.block_sparse_moe.experts.35.w2", "model.layers.24.block_sparse_moe.experts.36.w2", "model.layers.24.block_sparse_moe.experts.37.w2", "model.layers.24.block_sparse_moe.experts.38.w2", "model.layers.24.block_sparse_moe.experts.39.w2", "model.layers.24.block_sparse_moe.experts.40.w2", "model.layers.24.block_sparse_moe.experts.41.w2", "model.layers.24.block_sparse_moe.experts.42.w2", "model.layers.24.block_sparse_moe.experts.43.w2", "model.layers.24.block_sparse_moe.experts.44.w2", "model.layers.24.block_sparse_moe.experts.45.w2", "model.layers.24.block_sparse_moe.experts.46.w2", "model.layers.24.block_sparse_moe.experts.47.w2", "model.layers.24.block_sparse_moe.experts.48.w2", "model.layers.24.block_sparse_moe.experts.49.w2", "model.layers.24.block_sparse_moe.experts.50.w2", "model.layers.24.block_sparse_moe.experts.51.w2", "model.layers.24.block_sparse_moe.experts.52.w2", "model.layers.24.block_sparse_moe.experts.53.w2", "model.layers.24.block_sparse_moe.experts.54.w2", "model.layers.24.block_sparse_moe.experts.55.w2", "model.layers.24.block_sparse_moe.experts.56.w2", "model.layers.24.block_sparse_moe.experts.57.w2", "model.layers.24.block_sparse_moe.experts.58.w2", "model.layers.24.block_sparse_moe.experts.59.w2", "model.layers.24.block_sparse_moe.experts.60.w2", "model.layers.24.block_sparse_moe.experts.61.w2", "model.layers.24.block_sparse_moe.experts.62.w2", "model.layers.24.block_sparse_moe.experts.63.w2", "model.layers.24.block_sparse_moe.experts.64.w2", "model.layers.24.block_sparse_moe.experts.65.w2", "model.layers.24.block_sparse_moe.experts.66.w2", "model.layers.24.block_sparse_moe.experts.67.w2", "model.layers.24.block_sparse_moe.experts.68.w2", "model.layers.24.block_sparse_moe.experts.69.w2", "model.layers.24.block_sparse_moe.experts.70.w2", "model.layers.24.block_sparse_moe.experts.71.w2", "model.layers.24.block_sparse_moe.experts.72.w2", "model.layers.24.block_sparse_moe.experts.73.w2", "model.layers.24.block_sparse_moe.experts.74.w2", "model.layers.24.block_sparse_moe.experts.75.w2", "model.layers.24.block_sparse_moe.experts.76.w2", "model.layers.24.block_sparse_moe.experts.77.w2", "model.layers.24.block_sparse_moe.experts.78.w2", "model.layers.24.block_sparse_moe.experts.79.w2", "model.layers.24.block_sparse_moe.experts.80.w2", "model.layers.24.block_sparse_moe.experts.81.w2", "model.layers.24.block_sparse_moe.experts.82.w2", "model.layers.24.block_sparse_moe.experts.83.w2", "model.layers.24.block_sparse_moe.experts.84.w2", "model.layers.24.block_sparse_moe.experts.85.w2", "model.layers.24.block_sparse_moe.experts.86.w2", "model.layers.24.block_sparse_moe.experts.87.w2", "model.layers.24.block_sparse_moe.experts.88.w2", "model.layers.24.block_sparse_moe.experts.89.w2", "model.layers.24.block_sparse_moe.experts.90.w2", "model.layers.24.block_sparse_moe.experts.91.w2", "model.layers.24.block_sparse_moe.experts.92.w2", "model.layers.24.block_sparse_moe.experts.93.w2", "model.layers.24.block_sparse_moe.experts.94.w2", "model.layers.24.block_sparse_moe.experts.95.w2", "model.layers.24.block_sparse_moe.experts.96.w2", "model.layers.24.block_sparse_moe.experts.97.w2", "model.layers.24.block_sparse_moe.experts.98.w2", "model.layers.24.block_sparse_moe.experts.99.w2", "model.layers.24.block_sparse_moe.experts.100.w2", "model.layers.24.block_sparse_moe.experts.101.w2", "model.layers.24.block_sparse_moe.experts.102.w2", "model.layers.24.block_sparse_moe.experts.103.w2", "model.layers.24.block_sparse_moe.experts.104.w2", "model.layers.24.block_sparse_moe.experts.105.w2", "model.layers.24.block_sparse_moe.experts.106.w2", "model.layers.24.block_sparse_moe.experts.107.w2", "model.layers.24.block_sparse_moe.experts.108.w2", "model.layers.24.block_sparse_moe.experts.109.w2", "model.layers.24.block_sparse_moe.experts.110.w2", "model.layers.24.block_sparse_moe.experts.111.w2", "model.layers.24.block_sparse_moe.experts.112.w2", "model.layers.24.block_sparse_moe.experts.113.w2", "model.layers.24.block_sparse_moe.experts.114.w2", "model.layers.24.block_sparse_moe.experts.115.w2", "model.layers.24.block_sparse_moe.experts.116.w2", "model.layers.24.block_sparse_moe.experts.117.w2", "model.layers.24.block_sparse_moe.experts.118.w2", "model.layers.24.block_sparse_moe.experts.119.w2", "model.layers.24.block_sparse_moe.experts.120.w2", "model.layers.24.block_sparse_moe.experts.121.w2", "model.layers.24.block_sparse_moe.experts.122.w2", "model.layers.24.block_sparse_moe.experts.123.w2", "model.layers.24.block_sparse_moe.experts.124.w2", "model.layers.24.block_sparse_moe.experts.125.w2", "model.layers.24.block_sparse_moe.experts.126.w2", "model.layers.24.block_sparse_moe.experts.127.w2", "model.layers.24.block_sparse_moe.experts.128.w2", "model.layers.24.block_sparse_moe.experts.129.w2", "model.layers.24.block_sparse_moe.experts.130.w2", "model.layers.24.block_sparse_moe.experts.131.w2", "model.layers.24.block_sparse_moe.experts.132.w2", "model.layers.24.block_sparse_moe.experts.133.w2", "model.layers.24.block_sparse_moe.experts.134.w2", "model.layers.24.block_sparse_moe.experts.135.w2", "model.layers.24.block_sparse_moe.experts.136.w2", "model.layers.24.block_sparse_moe.experts.137.w2", "model.layers.24.block_sparse_moe.experts.138.w2", "model.layers.24.block_sparse_moe.experts.139.w2", "model.layers.24.block_sparse_moe.experts.140.w2", "model.layers.24.block_sparse_moe.experts.141.w2", "model.layers.24.block_sparse_moe.experts.142.w2", "model.layers.24.block_sparse_moe.experts.143.w2", "model.layers.24.block_sparse_moe.experts.144.w2", "model.layers.24.block_sparse_moe.experts.145.w2", "model.layers.24.block_sparse_moe.experts.146.w2", "model.layers.24.block_sparse_moe.experts.147.w2", "model.layers.24.block_sparse_moe.experts.148.w2", "model.layers.24.block_sparse_moe.experts.149.w2", "model.layers.24.block_sparse_moe.experts.150.w2", "model.layers.24.block_sparse_moe.experts.151.w2", "model.layers.24.block_sparse_moe.experts.152.w2", "model.layers.24.block_sparse_moe.experts.153.w2", "model.layers.24.block_sparse_moe.experts.154.w2", "model.layers.24.block_sparse_moe.experts.155.w2", "model.layers.24.block_sparse_moe.experts.156.w2", "model.layers.24.block_sparse_moe.experts.157.w2", "model.layers.24.block_sparse_moe.experts.158.w2", "model.layers.24.block_sparse_moe.experts.159.w2", "model.layers.24.block_sparse_moe.experts.160.w2", "model.layers.24.block_sparse_moe.experts.161.w2", "model.layers.24.block_sparse_moe.experts.162.w2", "model.layers.24.block_sparse_moe.experts.163.w2", "model.layers.24.block_sparse_moe.experts.164.w2", "model.layers.24.block_sparse_moe.experts.165.w2", "model.layers.24.block_sparse_moe.experts.166.w2", "model.layers.24.block_sparse_moe.experts.167.w2", "model.layers.24.block_sparse_moe.experts.168.w2", "model.layers.24.block_sparse_moe.experts.169.w2", "model.layers.24.block_sparse_moe.experts.170.w2", "model.layers.24.block_sparse_moe.experts.171.w2", "model.layers.24.block_sparse_moe.experts.172.w2", "model.layers.24.block_sparse_moe.experts.173.w2", "model.layers.24.block_sparse_moe.experts.174.w2", "model.layers.24.block_sparse_moe.experts.175.w2", "model.layers.24.block_sparse_moe.experts.176.w2", "model.layers.24.block_sparse_moe.experts.177.w2", "model.layers.24.block_sparse_moe.experts.178.w2", "model.layers.24.block_sparse_moe.experts.179.w2", "model.layers.24.block_sparse_moe.experts.180.w2", "model.layers.24.block_sparse_moe.experts.181.w2", "model.layers.24.block_sparse_moe.experts.182.w2", "model.layers.24.block_sparse_moe.experts.183.w2", "model.layers.24.block_sparse_moe.experts.184.w2", "model.layers.24.block_sparse_moe.experts.185.w2", "model.layers.24.block_sparse_moe.experts.186.w2", "model.layers.24.block_sparse_moe.experts.187.w2", "model.layers.24.block_sparse_moe.experts.188.w2", "model.layers.24.block_sparse_moe.experts.189.w2", "model.layers.24.block_sparse_moe.experts.190.w2", "model.layers.24.block_sparse_moe.experts.191.w2", "model.layers.24.block_sparse_moe.experts.192.w2", "model.layers.24.block_sparse_moe.experts.193.w2", "model.layers.24.block_sparse_moe.experts.194.w2", "model.layers.24.block_sparse_moe.experts.195.w2", "model.layers.24.block_sparse_moe.experts.196.w2", "model.layers.24.block_sparse_moe.experts.197.w2", "model.layers.24.block_sparse_moe.experts.198.w2", "model.layers.24.block_sparse_moe.experts.199.w2", "model.layers.24.block_sparse_moe.experts.200.w2", "model.layers.24.block_sparse_moe.experts.201.w2", "model.layers.24.block_sparse_moe.experts.202.w2", "model.layers.24.block_sparse_moe.experts.203.w2", "model.layers.24.block_sparse_moe.experts.204.w2", "model.layers.24.block_sparse_moe.experts.205.w2", "model.layers.24.block_sparse_moe.experts.206.w2", "model.layers.24.block_sparse_moe.experts.207.w2", "model.layers.24.block_sparse_moe.experts.208.w2", "model.layers.24.block_sparse_moe.experts.209.w2", "model.layers.24.block_sparse_moe.experts.210.w2", "model.layers.24.block_sparse_moe.experts.211.w2", "model.layers.24.block_sparse_moe.experts.212.w2", "model.layers.24.block_sparse_moe.experts.213.w2", "model.layers.24.block_sparse_moe.experts.214.w2", "model.layers.24.block_sparse_moe.experts.215.w2", "model.layers.24.block_sparse_moe.experts.216.w2", "model.layers.24.block_sparse_moe.experts.217.w2", "model.layers.24.block_sparse_moe.experts.218.w2", "model.layers.24.block_sparse_moe.experts.219.w2", "model.layers.24.block_sparse_moe.experts.220.w2", "model.layers.24.block_sparse_moe.experts.221.w2", "model.layers.24.block_sparse_moe.experts.222.w2", "model.layers.24.block_sparse_moe.experts.223.w2", "model.layers.24.block_sparse_moe.experts.224.w2", "model.layers.24.block_sparse_moe.experts.225.w2", "model.layers.24.block_sparse_moe.experts.226.w2", "model.layers.24.block_sparse_moe.experts.227.w2", "model.layers.24.block_sparse_moe.experts.228.w2", "model.layers.24.block_sparse_moe.experts.229.w2", "model.layers.24.block_sparse_moe.experts.230.w2", "model.layers.24.block_sparse_moe.experts.231.w2", "model.layers.24.block_sparse_moe.experts.232.w2", "model.layers.24.block_sparse_moe.experts.233.w2", "model.layers.24.block_sparse_moe.experts.234.w2", "model.layers.24.block_sparse_moe.experts.235.w2", "model.layers.24.block_sparse_moe.experts.236.w2", "model.layers.24.block_sparse_moe.experts.237.w2", "model.layers.24.block_sparse_moe.experts.238.w2", "model.layers.24.block_sparse_moe.experts.239.w2", "model.layers.24.block_sparse_moe.experts.240.w2", "model.layers.24.block_sparse_moe.experts.241.w2", "model.layers.24.block_sparse_moe.experts.242.w2", "model.layers.24.block_sparse_moe.experts.243.w2", "model.layers.24.block_sparse_moe.experts.244.w2", "model.layers.24.block_sparse_moe.experts.245.w2", "model.layers.24.block_sparse_moe.experts.246.w2", "model.layers.24.block_sparse_moe.experts.247.w2", "model.layers.24.block_sparse_moe.experts.248.w2", "model.layers.24.block_sparse_moe.experts.249.w2", "model.layers.24.block_sparse_moe.experts.250.w2", "model.layers.24.block_sparse_moe.experts.251.w2", "model.layers.24.block_sparse_moe.experts.252.w2", "model.layers.24.block_sparse_moe.experts.253.w2", "model.layers.24.block_sparse_moe.experts.254.w2", "model.layers.24.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006881378591060694, "dbits": 1207959552 } ] }, { "idx": 125, "layers": [ "model.layers.25.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0010002341121435193, "dbits": 18874368 } ] }, { "idx": 126, "layers": [ "model.layers.25.self_attn.k_proj", "model.layers.25.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0018701000139117185, "dbits": 6291456 } ] }, { "idx": 127, "layers": [ "model.layers.25.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0008108513429760933, "dbits": 18874368 } ] }, { "idx": 128, "layers": [ "model.layers.25.block_sparse_moe.experts.0.w1", "model.layers.25.block_sparse_moe.experts.1.w1", "model.layers.25.block_sparse_moe.experts.2.w1", "model.layers.25.block_sparse_moe.experts.3.w1", "model.layers.25.block_sparse_moe.experts.4.w1", "model.layers.25.block_sparse_moe.experts.5.w1", "model.layers.25.block_sparse_moe.experts.6.w1", "model.layers.25.block_sparse_moe.experts.7.w1", "model.layers.25.block_sparse_moe.experts.8.w1", "model.layers.25.block_sparse_moe.experts.9.w1", "model.layers.25.block_sparse_moe.experts.10.w1", "model.layers.25.block_sparse_moe.experts.11.w1", "model.layers.25.block_sparse_moe.experts.12.w1", "model.layers.25.block_sparse_moe.experts.13.w1", "model.layers.25.block_sparse_moe.experts.14.w1", "model.layers.25.block_sparse_moe.experts.15.w1", "model.layers.25.block_sparse_moe.experts.16.w1", "model.layers.25.block_sparse_moe.experts.17.w1", "model.layers.25.block_sparse_moe.experts.18.w1", "model.layers.25.block_sparse_moe.experts.19.w1", "model.layers.25.block_sparse_moe.experts.20.w1", "model.layers.25.block_sparse_moe.experts.21.w1", "model.layers.25.block_sparse_moe.experts.22.w1", "model.layers.25.block_sparse_moe.experts.23.w1", "model.layers.25.block_sparse_moe.experts.24.w1", "model.layers.25.block_sparse_moe.experts.25.w1", "model.layers.25.block_sparse_moe.experts.26.w1", "model.layers.25.block_sparse_moe.experts.27.w1", "model.layers.25.block_sparse_moe.experts.28.w1", "model.layers.25.block_sparse_moe.experts.29.w1", "model.layers.25.block_sparse_moe.experts.30.w1", "model.layers.25.block_sparse_moe.experts.31.w1", "model.layers.25.block_sparse_moe.experts.32.w1", "model.layers.25.block_sparse_moe.experts.33.w1", "model.layers.25.block_sparse_moe.experts.34.w1", "model.layers.25.block_sparse_moe.experts.35.w1", "model.layers.25.block_sparse_moe.experts.36.w1", "model.layers.25.block_sparse_moe.experts.37.w1", "model.layers.25.block_sparse_moe.experts.38.w1", "model.layers.25.block_sparse_moe.experts.39.w1", "model.layers.25.block_sparse_moe.experts.40.w1", "model.layers.25.block_sparse_moe.experts.41.w1", "model.layers.25.block_sparse_moe.experts.42.w1", "model.layers.25.block_sparse_moe.experts.43.w1", "model.layers.25.block_sparse_moe.experts.44.w1", "model.layers.25.block_sparse_moe.experts.45.w1", "model.layers.25.block_sparse_moe.experts.46.w1", "model.layers.25.block_sparse_moe.experts.47.w1", "model.layers.25.block_sparse_moe.experts.48.w1", "model.layers.25.block_sparse_moe.experts.49.w1", "model.layers.25.block_sparse_moe.experts.50.w1", "model.layers.25.block_sparse_moe.experts.51.w1", "model.layers.25.block_sparse_moe.experts.52.w1", "model.layers.25.block_sparse_moe.experts.53.w1", "model.layers.25.block_sparse_moe.experts.54.w1", "model.layers.25.block_sparse_moe.experts.55.w1", "model.layers.25.block_sparse_moe.experts.56.w1", "model.layers.25.block_sparse_moe.experts.57.w1", "model.layers.25.block_sparse_moe.experts.58.w1", "model.layers.25.block_sparse_moe.experts.59.w1", "model.layers.25.block_sparse_moe.experts.60.w1", "model.layers.25.block_sparse_moe.experts.61.w1", "model.layers.25.block_sparse_moe.experts.62.w1", "model.layers.25.block_sparse_moe.experts.63.w1", "model.layers.25.block_sparse_moe.experts.64.w1", "model.layers.25.block_sparse_moe.experts.65.w1", "model.layers.25.block_sparse_moe.experts.66.w1", "model.layers.25.block_sparse_moe.experts.67.w1", "model.layers.25.block_sparse_moe.experts.68.w1", "model.layers.25.block_sparse_moe.experts.69.w1", "model.layers.25.block_sparse_moe.experts.70.w1", "model.layers.25.block_sparse_moe.experts.71.w1", "model.layers.25.block_sparse_moe.experts.72.w1", "model.layers.25.block_sparse_moe.experts.73.w1", "model.layers.25.block_sparse_moe.experts.74.w1", "model.layers.25.block_sparse_moe.experts.75.w1", "model.layers.25.block_sparse_moe.experts.76.w1", "model.layers.25.block_sparse_moe.experts.77.w1", "model.layers.25.block_sparse_moe.experts.78.w1", "model.layers.25.block_sparse_moe.experts.79.w1", "model.layers.25.block_sparse_moe.experts.80.w1", "model.layers.25.block_sparse_moe.experts.81.w1", "model.layers.25.block_sparse_moe.experts.82.w1", "model.layers.25.block_sparse_moe.experts.83.w1", "model.layers.25.block_sparse_moe.experts.84.w1", "model.layers.25.block_sparse_moe.experts.85.w1", "model.layers.25.block_sparse_moe.experts.86.w1", "model.layers.25.block_sparse_moe.experts.87.w1", "model.layers.25.block_sparse_moe.experts.88.w1", "model.layers.25.block_sparse_moe.experts.89.w1", "model.layers.25.block_sparse_moe.experts.90.w1", "model.layers.25.block_sparse_moe.experts.91.w1", "model.layers.25.block_sparse_moe.experts.92.w1", "model.layers.25.block_sparse_moe.experts.93.w1", "model.layers.25.block_sparse_moe.experts.94.w1", "model.layers.25.block_sparse_moe.experts.95.w1", "model.layers.25.block_sparse_moe.experts.96.w1", "model.layers.25.block_sparse_moe.experts.97.w1", "model.layers.25.block_sparse_moe.experts.98.w1", "model.layers.25.block_sparse_moe.experts.99.w1", "model.layers.25.block_sparse_moe.experts.100.w1", "model.layers.25.block_sparse_moe.experts.101.w1", "model.layers.25.block_sparse_moe.experts.102.w1", "model.layers.25.block_sparse_moe.experts.103.w1", "model.layers.25.block_sparse_moe.experts.104.w1", "model.layers.25.block_sparse_moe.experts.105.w1", "model.layers.25.block_sparse_moe.experts.106.w1", "model.layers.25.block_sparse_moe.experts.107.w1", "model.layers.25.block_sparse_moe.experts.108.w1", "model.layers.25.block_sparse_moe.experts.109.w1", "model.layers.25.block_sparse_moe.experts.110.w1", "model.layers.25.block_sparse_moe.experts.111.w1", "model.layers.25.block_sparse_moe.experts.112.w1", "model.layers.25.block_sparse_moe.experts.113.w1", "model.layers.25.block_sparse_moe.experts.114.w1", "model.layers.25.block_sparse_moe.experts.115.w1", "model.layers.25.block_sparse_moe.experts.116.w1", "model.layers.25.block_sparse_moe.experts.117.w1", "model.layers.25.block_sparse_moe.experts.118.w1", "model.layers.25.block_sparse_moe.experts.119.w1", "model.layers.25.block_sparse_moe.experts.120.w1", "model.layers.25.block_sparse_moe.experts.121.w1", "model.layers.25.block_sparse_moe.experts.122.w1", "model.layers.25.block_sparse_moe.experts.123.w1", "model.layers.25.block_sparse_moe.experts.124.w1", "model.layers.25.block_sparse_moe.experts.125.w1", "model.layers.25.block_sparse_moe.experts.126.w1", "model.layers.25.block_sparse_moe.experts.127.w1", "model.layers.25.block_sparse_moe.experts.128.w1", "model.layers.25.block_sparse_moe.experts.129.w1", "model.layers.25.block_sparse_moe.experts.130.w1", "model.layers.25.block_sparse_moe.experts.131.w1", "model.layers.25.block_sparse_moe.experts.132.w1", "model.layers.25.block_sparse_moe.experts.133.w1", "model.layers.25.block_sparse_moe.experts.134.w1", "model.layers.25.block_sparse_moe.experts.135.w1", "model.layers.25.block_sparse_moe.experts.136.w1", "model.layers.25.block_sparse_moe.experts.137.w1", "model.layers.25.block_sparse_moe.experts.138.w1", "model.layers.25.block_sparse_moe.experts.139.w1", "model.layers.25.block_sparse_moe.experts.140.w1", "model.layers.25.block_sparse_moe.experts.141.w1", "model.layers.25.block_sparse_moe.experts.142.w1", "model.layers.25.block_sparse_moe.experts.143.w1", "model.layers.25.block_sparse_moe.experts.144.w1", "model.layers.25.block_sparse_moe.experts.145.w1", "model.layers.25.block_sparse_moe.experts.146.w1", "model.layers.25.block_sparse_moe.experts.147.w1", "model.layers.25.block_sparse_moe.experts.148.w1", "model.layers.25.block_sparse_moe.experts.149.w1", "model.layers.25.block_sparse_moe.experts.150.w1", "model.layers.25.block_sparse_moe.experts.151.w1", "model.layers.25.block_sparse_moe.experts.152.w1", "model.layers.25.block_sparse_moe.experts.153.w1", "model.layers.25.block_sparse_moe.experts.154.w1", "model.layers.25.block_sparse_moe.experts.155.w1", "model.layers.25.block_sparse_moe.experts.156.w1", "model.layers.25.block_sparse_moe.experts.157.w1", "model.layers.25.block_sparse_moe.experts.158.w1", "model.layers.25.block_sparse_moe.experts.159.w1", "model.layers.25.block_sparse_moe.experts.160.w1", "model.layers.25.block_sparse_moe.experts.161.w1", "model.layers.25.block_sparse_moe.experts.162.w1", "model.layers.25.block_sparse_moe.experts.163.w1", "model.layers.25.block_sparse_moe.experts.164.w1", "model.layers.25.block_sparse_moe.experts.165.w1", "model.layers.25.block_sparse_moe.experts.166.w1", "model.layers.25.block_sparse_moe.experts.167.w1", "model.layers.25.block_sparse_moe.experts.168.w1", "model.layers.25.block_sparse_moe.experts.169.w1", "model.layers.25.block_sparse_moe.experts.170.w1", "model.layers.25.block_sparse_moe.experts.171.w1", "model.layers.25.block_sparse_moe.experts.172.w1", "model.layers.25.block_sparse_moe.experts.173.w1", "model.layers.25.block_sparse_moe.experts.174.w1", "model.layers.25.block_sparse_moe.experts.175.w1", "model.layers.25.block_sparse_moe.experts.176.w1", "model.layers.25.block_sparse_moe.experts.177.w1", "model.layers.25.block_sparse_moe.experts.178.w1", "model.layers.25.block_sparse_moe.experts.179.w1", "model.layers.25.block_sparse_moe.experts.180.w1", "model.layers.25.block_sparse_moe.experts.181.w1", "model.layers.25.block_sparse_moe.experts.182.w1", "model.layers.25.block_sparse_moe.experts.183.w1", "model.layers.25.block_sparse_moe.experts.184.w1", "model.layers.25.block_sparse_moe.experts.185.w1", "model.layers.25.block_sparse_moe.experts.186.w1", "model.layers.25.block_sparse_moe.experts.187.w1", "model.layers.25.block_sparse_moe.experts.188.w1", "model.layers.25.block_sparse_moe.experts.189.w1", "model.layers.25.block_sparse_moe.experts.190.w1", "model.layers.25.block_sparse_moe.experts.191.w1", "model.layers.25.block_sparse_moe.experts.192.w1", "model.layers.25.block_sparse_moe.experts.193.w1", "model.layers.25.block_sparse_moe.experts.194.w1", "model.layers.25.block_sparse_moe.experts.195.w1", "model.layers.25.block_sparse_moe.experts.196.w1", "model.layers.25.block_sparse_moe.experts.197.w1", "model.layers.25.block_sparse_moe.experts.198.w1", "model.layers.25.block_sparse_moe.experts.199.w1", "model.layers.25.block_sparse_moe.experts.200.w1", "model.layers.25.block_sparse_moe.experts.201.w1", "model.layers.25.block_sparse_moe.experts.202.w1", "model.layers.25.block_sparse_moe.experts.203.w1", "model.layers.25.block_sparse_moe.experts.204.w1", "model.layers.25.block_sparse_moe.experts.205.w1", "model.layers.25.block_sparse_moe.experts.206.w1", "model.layers.25.block_sparse_moe.experts.207.w1", "model.layers.25.block_sparse_moe.experts.208.w1", "model.layers.25.block_sparse_moe.experts.209.w1", "model.layers.25.block_sparse_moe.experts.210.w1", "model.layers.25.block_sparse_moe.experts.211.w1", "model.layers.25.block_sparse_moe.experts.212.w1", "model.layers.25.block_sparse_moe.experts.213.w1", "model.layers.25.block_sparse_moe.experts.214.w1", "model.layers.25.block_sparse_moe.experts.215.w1", "model.layers.25.block_sparse_moe.experts.216.w1", "model.layers.25.block_sparse_moe.experts.217.w1", "model.layers.25.block_sparse_moe.experts.218.w1", "model.layers.25.block_sparse_moe.experts.219.w1", "model.layers.25.block_sparse_moe.experts.220.w1", "model.layers.25.block_sparse_moe.experts.221.w1", "model.layers.25.block_sparse_moe.experts.222.w1", "model.layers.25.block_sparse_moe.experts.223.w1", "model.layers.25.block_sparse_moe.experts.224.w1", "model.layers.25.block_sparse_moe.experts.225.w1", "model.layers.25.block_sparse_moe.experts.226.w1", "model.layers.25.block_sparse_moe.experts.227.w1", "model.layers.25.block_sparse_moe.experts.228.w1", "model.layers.25.block_sparse_moe.experts.229.w1", "model.layers.25.block_sparse_moe.experts.230.w1", "model.layers.25.block_sparse_moe.experts.231.w1", "model.layers.25.block_sparse_moe.experts.232.w1", "model.layers.25.block_sparse_moe.experts.233.w1", "model.layers.25.block_sparse_moe.experts.234.w1", "model.layers.25.block_sparse_moe.experts.235.w1", "model.layers.25.block_sparse_moe.experts.236.w1", "model.layers.25.block_sparse_moe.experts.237.w1", "model.layers.25.block_sparse_moe.experts.238.w1", "model.layers.25.block_sparse_moe.experts.239.w1", "model.layers.25.block_sparse_moe.experts.240.w1", "model.layers.25.block_sparse_moe.experts.241.w1", "model.layers.25.block_sparse_moe.experts.242.w1", "model.layers.25.block_sparse_moe.experts.243.w1", "model.layers.25.block_sparse_moe.experts.244.w1", "model.layers.25.block_sparse_moe.experts.245.w1", "model.layers.25.block_sparse_moe.experts.246.w1", "model.layers.25.block_sparse_moe.experts.247.w1", "model.layers.25.block_sparse_moe.experts.248.w1", "model.layers.25.block_sparse_moe.experts.249.w1", "model.layers.25.block_sparse_moe.experts.250.w1", "model.layers.25.block_sparse_moe.experts.251.w1", "model.layers.25.block_sparse_moe.experts.252.w1", "model.layers.25.block_sparse_moe.experts.253.w1", "model.layers.25.block_sparse_moe.experts.254.w1", "model.layers.25.block_sparse_moe.experts.255.w1", "model.layers.25.block_sparse_moe.experts.0.w3", "model.layers.25.block_sparse_moe.experts.1.w3", "model.layers.25.block_sparse_moe.experts.2.w3", "model.layers.25.block_sparse_moe.experts.3.w3", "model.layers.25.block_sparse_moe.experts.4.w3", "model.layers.25.block_sparse_moe.experts.5.w3", "model.layers.25.block_sparse_moe.experts.6.w3", "model.layers.25.block_sparse_moe.experts.7.w3", "model.layers.25.block_sparse_moe.experts.8.w3", "model.layers.25.block_sparse_moe.experts.9.w3", "model.layers.25.block_sparse_moe.experts.10.w3", "model.layers.25.block_sparse_moe.experts.11.w3", "model.layers.25.block_sparse_moe.experts.12.w3", "model.layers.25.block_sparse_moe.experts.13.w3", "model.layers.25.block_sparse_moe.experts.14.w3", "model.layers.25.block_sparse_moe.experts.15.w3", "model.layers.25.block_sparse_moe.experts.16.w3", "model.layers.25.block_sparse_moe.experts.17.w3", "model.layers.25.block_sparse_moe.experts.18.w3", "model.layers.25.block_sparse_moe.experts.19.w3", "model.layers.25.block_sparse_moe.experts.20.w3", "model.layers.25.block_sparse_moe.experts.21.w3", "model.layers.25.block_sparse_moe.experts.22.w3", "model.layers.25.block_sparse_moe.experts.23.w3", "model.layers.25.block_sparse_moe.experts.24.w3", "model.layers.25.block_sparse_moe.experts.25.w3", "model.layers.25.block_sparse_moe.experts.26.w3", "model.layers.25.block_sparse_moe.experts.27.w3", "model.layers.25.block_sparse_moe.experts.28.w3", "model.layers.25.block_sparse_moe.experts.29.w3", "model.layers.25.block_sparse_moe.experts.30.w3", "model.layers.25.block_sparse_moe.experts.31.w3", "model.layers.25.block_sparse_moe.experts.32.w3", "model.layers.25.block_sparse_moe.experts.33.w3", "model.layers.25.block_sparse_moe.experts.34.w3", "model.layers.25.block_sparse_moe.experts.35.w3", "model.layers.25.block_sparse_moe.experts.36.w3", "model.layers.25.block_sparse_moe.experts.37.w3", "model.layers.25.block_sparse_moe.experts.38.w3", "model.layers.25.block_sparse_moe.experts.39.w3", "model.layers.25.block_sparse_moe.experts.40.w3", "model.layers.25.block_sparse_moe.experts.41.w3", "model.layers.25.block_sparse_moe.experts.42.w3", "model.layers.25.block_sparse_moe.experts.43.w3", "model.layers.25.block_sparse_moe.experts.44.w3", "model.layers.25.block_sparse_moe.experts.45.w3", "model.layers.25.block_sparse_moe.experts.46.w3", "model.layers.25.block_sparse_moe.experts.47.w3", "model.layers.25.block_sparse_moe.experts.48.w3", "model.layers.25.block_sparse_moe.experts.49.w3", "model.layers.25.block_sparse_moe.experts.50.w3", "model.layers.25.block_sparse_moe.experts.51.w3", "model.layers.25.block_sparse_moe.experts.52.w3", "model.layers.25.block_sparse_moe.experts.53.w3", "model.layers.25.block_sparse_moe.experts.54.w3", "model.layers.25.block_sparse_moe.experts.55.w3", "model.layers.25.block_sparse_moe.experts.56.w3", "model.layers.25.block_sparse_moe.experts.57.w3", "model.layers.25.block_sparse_moe.experts.58.w3", "model.layers.25.block_sparse_moe.experts.59.w3", "model.layers.25.block_sparse_moe.experts.60.w3", "model.layers.25.block_sparse_moe.experts.61.w3", "model.layers.25.block_sparse_moe.experts.62.w3", "model.layers.25.block_sparse_moe.experts.63.w3", "model.layers.25.block_sparse_moe.experts.64.w3", "model.layers.25.block_sparse_moe.experts.65.w3", "model.layers.25.block_sparse_moe.experts.66.w3", "model.layers.25.block_sparse_moe.experts.67.w3", "model.layers.25.block_sparse_moe.experts.68.w3", "model.layers.25.block_sparse_moe.experts.69.w3", "model.layers.25.block_sparse_moe.experts.70.w3", "model.layers.25.block_sparse_moe.experts.71.w3", "model.layers.25.block_sparse_moe.experts.72.w3", "model.layers.25.block_sparse_moe.experts.73.w3", "model.layers.25.block_sparse_moe.experts.74.w3", "model.layers.25.block_sparse_moe.experts.75.w3", "model.layers.25.block_sparse_moe.experts.76.w3", "model.layers.25.block_sparse_moe.experts.77.w3", "model.layers.25.block_sparse_moe.experts.78.w3", "model.layers.25.block_sparse_moe.experts.79.w3", "model.layers.25.block_sparse_moe.experts.80.w3", "model.layers.25.block_sparse_moe.experts.81.w3", "model.layers.25.block_sparse_moe.experts.82.w3", "model.layers.25.block_sparse_moe.experts.83.w3", "model.layers.25.block_sparse_moe.experts.84.w3", "model.layers.25.block_sparse_moe.experts.85.w3", "model.layers.25.block_sparse_moe.experts.86.w3", "model.layers.25.block_sparse_moe.experts.87.w3", "model.layers.25.block_sparse_moe.experts.88.w3", "model.layers.25.block_sparse_moe.experts.89.w3", "model.layers.25.block_sparse_moe.experts.90.w3", "model.layers.25.block_sparse_moe.experts.91.w3", "model.layers.25.block_sparse_moe.experts.92.w3", "model.layers.25.block_sparse_moe.experts.93.w3", "model.layers.25.block_sparse_moe.experts.94.w3", "model.layers.25.block_sparse_moe.experts.95.w3", "model.layers.25.block_sparse_moe.experts.96.w3", "model.layers.25.block_sparse_moe.experts.97.w3", "model.layers.25.block_sparse_moe.experts.98.w3", "model.layers.25.block_sparse_moe.experts.99.w3", "model.layers.25.block_sparse_moe.experts.100.w3", "model.layers.25.block_sparse_moe.experts.101.w3", "model.layers.25.block_sparse_moe.experts.102.w3", "model.layers.25.block_sparse_moe.experts.103.w3", "model.layers.25.block_sparse_moe.experts.104.w3", "model.layers.25.block_sparse_moe.experts.105.w3", "model.layers.25.block_sparse_moe.experts.106.w3", "model.layers.25.block_sparse_moe.experts.107.w3", "model.layers.25.block_sparse_moe.experts.108.w3", "model.layers.25.block_sparse_moe.experts.109.w3", "model.layers.25.block_sparse_moe.experts.110.w3", "model.layers.25.block_sparse_moe.experts.111.w3", "model.layers.25.block_sparse_moe.experts.112.w3", "model.layers.25.block_sparse_moe.experts.113.w3", "model.layers.25.block_sparse_moe.experts.114.w3", "model.layers.25.block_sparse_moe.experts.115.w3", "model.layers.25.block_sparse_moe.experts.116.w3", "model.layers.25.block_sparse_moe.experts.117.w3", "model.layers.25.block_sparse_moe.experts.118.w3", "model.layers.25.block_sparse_moe.experts.119.w3", "model.layers.25.block_sparse_moe.experts.120.w3", "model.layers.25.block_sparse_moe.experts.121.w3", "model.layers.25.block_sparse_moe.experts.122.w3", "model.layers.25.block_sparse_moe.experts.123.w3", "model.layers.25.block_sparse_moe.experts.124.w3", "model.layers.25.block_sparse_moe.experts.125.w3", "model.layers.25.block_sparse_moe.experts.126.w3", "model.layers.25.block_sparse_moe.experts.127.w3", "model.layers.25.block_sparse_moe.experts.128.w3", "model.layers.25.block_sparse_moe.experts.129.w3", "model.layers.25.block_sparse_moe.experts.130.w3", "model.layers.25.block_sparse_moe.experts.131.w3", "model.layers.25.block_sparse_moe.experts.132.w3", "model.layers.25.block_sparse_moe.experts.133.w3", "model.layers.25.block_sparse_moe.experts.134.w3", "model.layers.25.block_sparse_moe.experts.135.w3", "model.layers.25.block_sparse_moe.experts.136.w3", "model.layers.25.block_sparse_moe.experts.137.w3", "model.layers.25.block_sparse_moe.experts.138.w3", "model.layers.25.block_sparse_moe.experts.139.w3", "model.layers.25.block_sparse_moe.experts.140.w3", "model.layers.25.block_sparse_moe.experts.141.w3", "model.layers.25.block_sparse_moe.experts.142.w3", "model.layers.25.block_sparse_moe.experts.143.w3", "model.layers.25.block_sparse_moe.experts.144.w3", "model.layers.25.block_sparse_moe.experts.145.w3", "model.layers.25.block_sparse_moe.experts.146.w3", "model.layers.25.block_sparse_moe.experts.147.w3", "model.layers.25.block_sparse_moe.experts.148.w3", "model.layers.25.block_sparse_moe.experts.149.w3", "model.layers.25.block_sparse_moe.experts.150.w3", "model.layers.25.block_sparse_moe.experts.151.w3", "model.layers.25.block_sparse_moe.experts.152.w3", "model.layers.25.block_sparse_moe.experts.153.w3", "model.layers.25.block_sparse_moe.experts.154.w3", "model.layers.25.block_sparse_moe.experts.155.w3", "model.layers.25.block_sparse_moe.experts.156.w3", "model.layers.25.block_sparse_moe.experts.157.w3", "model.layers.25.block_sparse_moe.experts.158.w3", "model.layers.25.block_sparse_moe.experts.159.w3", "model.layers.25.block_sparse_moe.experts.160.w3", "model.layers.25.block_sparse_moe.experts.161.w3", "model.layers.25.block_sparse_moe.experts.162.w3", "model.layers.25.block_sparse_moe.experts.163.w3", "model.layers.25.block_sparse_moe.experts.164.w3", "model.layers.25.block_sparse_moe.experts.165.w3", "model.layers.25.block_sparse_moe.experts.166.w3", "model.layers.25.block_sparse_moe.experts.167.w3", "model.layers.25.block_sparse_moe.experts.168.w3", "model.layers.25.block_sparse_moe.experts.169.w3", "model.layers.25.block_sparse_moe.experts.170.w3", "model.layers.25.block_sparse_moe.experts.171.w3", "model.layers.25.block_sparse_moe.experts.172.w3", "model.layers.25.block_sparse_moe.experts.173.w3", "model.layers.25.block_sparse_moe.experts.174.w3", "model.layers.25.block_sparse_moe.experts.175.w3", "model.layers.25.block_sparse_moe.experts.176.w3", "model.layers.25.block_sparse_moe.experts.177.w3", "model.layers.25.block_sparse_moe.experts.178.w3", "model.layers.25.block_sparse_moe.experts.179.w3", "model.layers.25.block_sparse_moe.experts.180.w3", "model.layers.25.block_sparse_moe.experts.181.w3", "model.layers.25.block_sparse_moe.experts.182.w3", "model.layers.25.block_sparse_moe.experts.183.w3", "model.layers.25.block_sparse_moe.experts.184.w3", "model.layers.25.block_sparse_moe.experts.185.w3", "model.layers.25.block_sparse_moe.experts.186.w3", "model.layers.25.block_sparse_moe.experts.187.w3", "model.layers.25.block_sparse_moe.experts.188.w3", "model.layers.25.block_sparse_moe.experts.189.w3", "model.layers.25.block_sparse_moe.experts.190.w3", "model.layers.25.block_sparse_moe.experts.191.w3", "model.layers.25.block_sparse_moe.experts.192.w3", "model.layers.25.block_sparse_moe.experts.193.w3", "model.layers.25.block_sparse_moe.experts.194.w3", "model.layers.25.block_sparse_moe.experts.195.w3", "model.layers.25.block_sparse_moe.experts.196.w3", "model.layers.25.block_sparse_moe.experts.197.w3", "model.layers.25.block_sparse_moe.experts.198.w3", "model.layers.25.block_sparse_moe.experts.199.w3", "model.layers.25.block_sparse_moe.experts.200.w3", "model.layers.25.block_sparse_moe.experts.201.w3", "model.layers.25.block_sparse_moe.experts.202.w3", "model.layers.25.block_sparse_moe.experts.203.w3", "model.layers.25.block_sparse_moe.experts.204.w3", "model.layers.25.block_sparse_moe.experts.205.w3", "model.layers.25.block_sparse_moe.experts.206.w3", "model.layers.25.block_sparse_moe.experts.207.w3", "model.layers.25.block_sparse_moe.experts.208.w3", "model.layers.25.block_sparse_moe.experts.209.w3", "model.layers.25.block_sparse_moe.experts.210.w3", "model.layers.25.block_sparse_moe.experts.211.w3", "model.layers.25.block_sparse_moe.experts.212.w3", "model.layers.25.block_sparse_moe.experts.213.w3", "model.layers.25.block_sparse_moe.experts.214.w3", "model.layers.25.block_sparse_moe.experts.215.w3", "model.layers.25.block_sparse_moe.experts.216.w3", "model.layers.25.block_sparse_moe.experts.217.w3", "model.layers.25.block_sparse_moe.experts.218.w3", "model.layers.25.block_sparse_moe.experts.219.w3", "model.layers.25.block_sparse_moe.experts.220.w3", "model.layers.25.block_sparse_moe.experts.221.w3", "model.layers.25.block_sparse_moe.experts.222.w3", "model.layers.25.block_sparse_moe.experts.223.w3", "model.layers.25.block_sparse_moe.experts.224.w3", "model.layers.25.block_sparse_moe.experts.225.w3", "model.layers.25.block_sparse_moe.experts.226.w3", "model.layers.25.block_sparse_moe.experts.227.w3", "model.layers.25.block_sparse_moe.experts.228.w3", "model.layers.25.block_sparse_moe.experts.229.w3", "model.layers.25.block_sparse_moe.experts.230.w3", "model.layers.25.block_sparse_moe.experts.231.w3", "model.layers.25.block_sparse_moe.experts.232.w3", "model.layers.25.block_sparse_moe.experts.233.w3", "model.layers.25.block_sparse_moe.experts.234.w3", "model.layers.25.block_sparse_moe.experts.235.w3", "model.layers.25.block_sparse_moe.experts.236.w3", "model.layers.25.block_sparse_moe.experts.237.w3", "model.layers.25.block_sparse_moe.experts.238.w3", "model.layers.25.block_sparse_moe.experts.239.w3", "model.layers.25.block_sparse_moe.experts.240.w3", "model.layers.25.block_sparse_moe.experts.241.w3", "model.layers.25.block_sparse_moe.experts.242.w3", "model.layers.25.block_sparse_moe.experts.243.w3", "model.layers.25.block_sparse_moe.experts.244.w3", "model.layers.25.block_sparse_moe.experts.245.w3", "model.layers.25.block_sparse_moe.experts.246.w3", "model.layers.25.block_sparse_moe.experts.247.w3", "model.layers.25.block_sparse_moe.experts.248.w3", "model.layers.25.block_sparse_moe.experts.249.w3", "model.layers.25.block_sparse_moe.experts.250.w3", "model.layers.25.block_sparse_moe.experts.251.w3", "model.layers.25.block_sparse_moe.experts.252.w3", "model.layers.25.block_sparse_moe.experts.253.w3", "model.layers.25.block_sparse_moe.experts.254.w3", "model.layers.25.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00029545985162257593, "dbits": 2415919104 } ] }, { "idx": 129, "layers": [ "model.layers.25.block_sparse_moe.experts.0.w2", "model.layers.25.block_sparse_moe.experts.1.w2", "model.layers.25.block_sparse_moe.experts.2.w2", "model.layers.25.block_sparse_moe.experts.3.w2", "model.layers.25.block_sparse_moe.experts.4.w2", "model.layers.25.block_sparse_moe.experts.5.w2", "model.layers.25.block_sparse_moe.experts.6.w2", "model.layers.25.block_sparse_moe.experts.7.w2", "model.layers.25.block_sparse_moe.experts.8.w2", "model.layers.25.block_sparse_moe.experts.9.w2", "model.layers.25.block_sparse_moe.experts.10.w2", "model.layers.25.block_sparse_moe.experts.11.w2", "model.layers.25.block_sparse_moe.experts.12.w2", "model.layers.25.block_sparse_moe.experts.13.w2", "model.layers.25.block_sparse_moe.experts.14.w2", "model.layers.25.block_sparse_moe.experts.15.w2", "model.layers.25.block_sparse_moe.experts.16.w2", "model.layers.25.block_sparse_moe.experts.17.w2", "model.layers.25.block_sparse_moe.experts.18.w2", "model.layers.25.block_sparse_moe.experts.19.w2", "model.layers.25.block_sparse_moe.experts.20.w2", "model.layers.25.block_sparse_moe.experts.21.w2", "model.layers.25.block_sparse_moe.experts.22.w2", "model.layers.25.block_sparse_moe.experts.23.w2", "model.layers.25.block_sparse_moe.experts.24.w2", "model.layers.25.block_sparse_moe.experts.25.w2", "model.layers.25.block_sparse_moe.experts.26.w2", "model.layers.25.block_sparse_moe.experts.27.w2", "model.layers.25.block_sparse_moe.experts.28.w2", "model.layers.25.block_sparse_moe.experts.29.w2", "model.layers.25.block_sparse_moe.experts.30.w2", "model.layers.25.block_sparse_moe.experts.31.w2", "model.layers.25.block_sparse_moe.experts.32.w2", "model.layers.25.block_sparse_moe.experts.33.w2", "model.layers.25.block_sparse_moe.experts.34.w2", "model.layers.25.block_sparse_moe.experts.35.w2", "model.layers.25.block_sparse_moe.experts.36.w2", "model.layers.25.block_sparse_moe.experts.37.w2", "model.layers.25.block_sparse_moe.experts.38.w2", "model.layers.25.block_sparse_moe.experts.39.w2", "model.layers.25.block_sparse_moe.experts.40.w2", "model.layers.25.block_sparse_moe.experts.41.w2", "model.layers.25.block_sparse_moe.experts.42.w2", "model.layers.25.block_sparse_moe.experts.43.w2", "model.layers.25.block_sparse_moe.experts.44.w2", "model.layers.25.block_sparse_moe.experts.45.w2", "model.layers.25.block_sparse_moe.experts.46.w2", "model.layers.25.block_sparse_moe.experts.47.w2", "model.layers.25.block_sparse_moe.experts.48.w2", "model.layers.25.block_sparse_moe.experts.49.w2", "model.layers.25.block_sparse_moe.experts.50.w2", "model.layers.25.block_sparse_moe.experts.51.w2", "model.layers.25.block_sparse_moe.experts.52.w2", "model.layers.25.block_sparse_moe.experts.53.w2", "model.layers.25.block_sparse_moe.experts.54.w2", "model.layers.25.block_sparse_moe.experts.55.w2", "model.layers.25.block_sparse_moe.experts.56.w2", "model.layers.25.block_sparse_moe.experts.57.w2", "model.layers.25.block_sparse_moe.experts.58.w2", "model.layers.25.block_sparse_moe.experts.59.w2", "model.layers.25.block_sparse_moe.experts.60.w2", "model.layers.25.block_sparse_moe.experts.61.w2", "model.layers.25.block_sparse_moe.experts.62.w2", "model.layers.25.block_sparse_moe.experts.63.w2", "model.layers.25.block_sparse_moe.experts.64.w2", "model.layers.25.block_sparse_moe.experts.65.w2", "model.layers.25.block_sparse_moe.experts.66.w2", "model.layers.25.block_sparse_moe.experts.67.w2", "model.layers.25.block_sparse_moe.experts.68.w2", "model.layers.25.block_sparse_moe.experts.69.w2", "model.layers.25.block_sparse_moe.experts.70.w2", "model.layers.25.block_sparse_moe.experts.71.w2", "model.layers.25.block_sparse_moe.experts.72.w2", "model.layers.25.block_sparse_moe.experts.73.w2", "model.layers.25.block_sparse_moe.experts.74.w2", "model.layers.25.block_sparse_moe.experts.75.w2", "model.layers.25.block_sparse_moe.experts.76.w2", "model.layers.25.block_sparse_moe.experts.77.w2", "model.layers.25.block_sparse_moe.experts.78.w2", "model.layers.25.block_sparse_moe.experts.79.w2", "model.layers.25.block_sparse_moe.experts.80.w2", "model.layers.25.block_sparse_moe.experts.81.w2", "model.layers.25.block_sparse_moe.experts.82.w2", "model.layers.25.block_sparse_moe.experts.83.w2", "model.layers.25.block_sparse_moe.experts.84.w2", "model.layers.25.block_sparse_moe.experts.85.w2", "model.layers.25.block_sparse_moe.experts.86.w2", "model.layers.25.block_sparse_moe.experts.87.w2", "model.layers.25.block_sparse_moe.experts.88.w2", "model.layers.25.block_sparse_moe.experts.89.w2", "model.layers.25.block_sparse_moe.experts.90.w2", "model.layers.25.block_sparse_moe.experts.91.w2", "model.layers.25.block_sparse_moe.experts.92.w2", "model.layers.25.block_sparse_moe.experts.93.w2", "model.layers.25.block_sparse_moe.experts.94.w2", "model.layers.25.block_sparse_moe.experts.95.w2", "model.layers.25.block_sparse_moe.experts.96.w2", "model.layers.25.block_sparse_moe.experts.97.w2", "model.layers.25.block_sparse_moe.experts.98.w2", "model.layers.25.block_sparse_moe.experts.99.w2", "model.layers.25.block_sparse_moe.experts.100.w2", "model.layers.25.block_sparse_moe.experts.101.w2", "model.layers.25.block_sparse_moe.experts.102.w2", "model.layers.25.block_sparse_moe.experts.103.w2", "model.layers.25.block_sparse_moe.experts.104.w2", "model.layers.25.block_sparse_moe.experts.105.w2", "model.layers.25.block_sparse_moe.experts.106.w2", "model.layers.25.block_sparse_moe.experts.107.w2", "model.layers.25.block_sparse_moe.experts.108.w2", "model.layers.25.block_sparse_moe.experts.109.w2", "model.layers.25.block_sparse_moe.experts.110.w2", "model.layers.25.block_sparse_moe.experts.111.w2", "model.layers.25.block_sparse_moe.experts.112.w2", "model.layers.25.block_sparse_moe.experts.113.w2", "model.layers.25.block_sparse_moe.experts.114.w2", "model.layers.25.block_sparse_moe.experts.115.w2", "model.layers.25.block_sparse_moe.experts.116.w2", "model.layers.25.block_sparse_moe.experts.117.w2", "model.layers.25.block_sparse_moe.experts.118.w2", "model.layers.25.block_sparse_moe.experts.119.w2", "model.layers.25.block_sparse_moe.experts.120.w2", "model.layers.25.block_sparse_moe.experts.121.w2", "model.layers.25.block_sparse_moe.experts.122.w2", "model.layers.25.block_sparse_moe.experts.123.w2", "model.layers.25.block_sparse_moe.experts.124.w2", "model.layers.25.block_sparse_moe.experts.125.w2", "model.layers.25.block_sparse_moe.experts.126.w2", "model.layers.25.block_sparse_moe.experts.127.w2", "model.layers.25.block_sparse_moe.experts.128.w2", "model.layers.25.block_sparse_moe.experts.129.w2", "model.layers.25.block_sparse_moe.experts.130.w2", "model.layers.25.block_sparse_moe.experts.131.w2", "model.layers.25.block_sparse_moe.experts.132.w2", "model.layers.25.block_sparse_moe.experts.133.w2", "model.layers.25.block_sparse_moe.experts.134.w2", "model.layers.25.block_sparse_moe.experts.135.w2", "model.layers.25.block_sparse_moe.experts.136.w2", "model.layers.25.block_sparse_moe.experts.137.w2", "model.layers.25.block_sparse_moe.experts.138.w2", "model.layers.25.block_sparse_moe.experts.139.w2", "model.layers.25.block_sparse_moe.experts.140.w2", "model.layers.25.block_sparse_moe.experts.141.w2", "model.layers.25.block_sparse_moe.experts.142.w2", "model.layers.25.block_sparse_moe.experts.143.w2", "model.layers.25.block_sparse_moe.experts.144.w2", "model.layers.25.block_sparse_moe.experts.145.w2", "model.layers.25.block_sparse_moe.experts.146.w2", "model.layers.25.block_sparse_moe.experts.147.w2", "model.layers.25.block_sparse_moe.experts.148.w2", "model.layers.25.block_sparse_moe.experts.149.w2", "model.layers.25.block_sparse_moe.experts.150.w2", "model.layers.25.block_sparse_moe.experts.151.w2", "model.layers.25.block_sparse_moe.experts.152.w2", "model.layers.25.block_sparse_moe.experts.153.w2", "model.layers.25.block_sparse_moe.experts.154.w2", "model.layers.25.block_sparse_moe.experts.155.w2", "model.layers.25.block_sparse_moe.experts.156.w2", "model.layers.25.block_sparse_moe.experts.157.w2", "model.layers.25.block_sparse_moe.experts.158.w2", "model.layers.25.block_sparse_moe.experts.159.w2", "model.layers.25.block_sparse_moe.experts.160.w2", "model.layers.25.block_sparse_moe.experts.161.w2", "model.layers.25.block_sparse_moe.experts.162.w2", "model.layers.25.block_sparse_moe.experts.163.w2", "model.layers.25.block_sparse_moe.experts.164.w2", "model.layers.25.block_sparse_moe.experts.165.w2", "model.layers.25.block_sparse_moe.experts.166.w2", "model.layers.25.block_sparse_moe.experts.167.w2", "model.layers.25.block_sparse_moe.experts.168.w2", "model.layers.25.block_sparse_moe.experts.169.w2", "model.layers.25.block_sparse_moe.experts.170.w2", "model.layers.25.block_sparse_moe.experts.171.w2", "model.layers.25.block_sparse_moe.experts.172.w2", "model.layers.25.block_sparse_moe.experts.173.w2", "model.layers.25.block_sparse_moe.experts.174.w2", "model.layers.25.block_sparse_moe.experts.175.w2", "model.layers.25.block_sparse_moe.experts.176.w2", "model.layers.25.block_sparse_moe.experts.177.w2", "model.layers.25.block_sparse_moe.experts.178.w2", "model.layers.25.block_sparse_moe.experts.179.w2", "model.layers.25.block_sparse_moe.experts.180.w2", "model.layers.25.block_sparse_moe.experts.181.w2", "model.layers.25.block_sparse_moe.experts.182.w2", "model.layers.25.block_sparse_moe.experts.183.w2", "model.layers.25.block_sparse_moe.experts.184.w2", "model.layers.25.block_sparse_moe.experts.185.w2", "model.layers.25.block_sparse_moe.experts.186.w2", "model.layers.25.block_sparse_moe.experts.187.w2", "model.layers.25.block_sparse_moe.experts.188.w2", "model.layers.25.block_sparse_moe.experts.189.w2", "model.layers.25.block_sparse_moe.experts.190.w2", "model.layers.25.block_sparse_moe.experts.191.w2", "model.layers.25.block_sparse_moe.experts.192.w2", "model.layers.25.block_sparse_moe.experts.193.w2", "model.layers.25.block_sparse_moe.experts.194.w2", "model.layers.25.block_sparse_moe.experts.195.w2", "model.layers.25.block_sparse_moe.experts.196.w2", "model.layers.25.block_sparse_moe.experts.197.w2", "model.layers.25.block_sparse_moe.experts.198.w2", "model.layers.25.block_sparse_moe.experts.199.w2", "model.layers.25.block_sparse_moe.experts.200.w2", "model.layers.25.block_sparse_moe.experts.201.w2", "model.layers.25.block_sparse_moe.experts.202.w2", "model.layers.25.block_sparse_moe.experts.203.w2", "model.layers.25.block_sparse_moe.experts.204.w2", "model.layers.25.block_sparse_moe.experts.205.w2", "model.layers.25.block_sparse_moe.experts.206.w2", "model.layers.25.block_sparse_moe.experts.207.w2", "model.layers.25.block_sparse_moe.experts.208.w2", "model.layers.25.block_sparse_moe.experts.209.w2", "model.layers.25.block_sparse_moe.experts.210.w2", "model.layers.25.block_sparse_moe.experts.211.w2", "model.layers.25.block_sparse_moe.experts.212.w2", "model.layers.25.block_sparse_moe.experts.213.w2", "model.layers.25.block_sparse_moe.experts.214.w2", "model.layers.25.block_sparse_moe.experts.215.w2", "model.layers.25.block_sparse_moe.experts.216.w2", "model.layers.25.block_sparse_moe.experts.217.w2", "model.layers.25.block_sparse_moe.experts.218.w2", "model.layers.25.block_sparse_moe.experts.219.w2", "model.layers.25.block_sparse_moe.experts.220.w2", "model.layers.25.block_sparse_moe.experts.221.w2", "model.layers.25.block_sparse_moe.experts.222.w2", "model.layers.25.block_sparse_moe.experts.223.w2", "model.layers.25.block_sparse_moe.experts.224.w2", "model.layers.25.block_sparse_moe.experts.225.w2", "model.layers.25.block_sparse_moe.experts.226.w2", "model.layers.25.block_sparse_moe.experts.227.w2", "model.layers.25.block_sparse_moe.experts.228.w2", "model.layers.25.block_sparse_moe.experts.229.w2", "model.layers.25.block_sparse_moe.experts.230.w2", "model.layers.25.block_sparse_moe.experts.231.w2", "model.layers.25.block_sparse_moe.experts.232.w2", "model.layers.25.block_sparse_moe.experts.233.w2", "model.layers.25.block_sparse_moe.experts.234.w2", "model.layers.25.block_sparse_moe.experts.235.w2", "model.layers.25.block_sparse_moe.experts.236.w2", "model.layers.25.block_sparse_moe.experts.237.w2", "model.layers.25.block_sparse_moe.experts.238.w2", "model.layers.25.block_sparse_moe.experts.239.w2", "model.layers.25.block_sparse_moe.experts.240.w2", "model.layers.25.block_sparse_moe.experts.241.w2", "model.layers.25.block_sparse_moe.experts.242.w2", "model.layers.25.block_sparse_moe.experts.243.w2", "model.layers.25.block_sparse_moe.experts.244.w2", "model.layers.25.block_sparse_moe.experts.245.w2", "model.layers.25.block_sparse_moe.experts.246.w2", "model.layers.25.block_sparse_moe.experts.247.w2", "model.layers.25.block_sparse_moe.experts.248.w2", "model.layers.25.block_sparse_moe.experts.249.w2", "model.layers.25.block_sparse_moe.experts.250.w2", "model.layers.25.block_sparse_moe.experts.251.w2", "model.layers.25.block_sparse_moe.experts.252.w2", "model.layers.25.block_sparse_moe.experts.253.w2", "model.layers.25.block_sparse_moe.experts.254.w2", "model.layers.25.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00015877168625592908, "dbits": 1207959552 } ] }, { "idx": 130, "layers": [ "model.layers.26.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00044184569269418994, "dbits": 18874368 } ] }, { "idx": 131, "layers": [ "model.layers.26.self_attn.k_proj", "model.layers.26.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0028978344053030014, "dbits": 6291456 } ] }, { "idx": 132, "layers": [ "model.layers.26.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0009921921417117147, "dbits": 18874368 } ] }, { "idx": 133, "layers": [ "model.layers.26.block_sparse_moe.experts.0.w1", "model.layers.26.block_sparse_moe.experts.1.w1", "model.layers.26.block_sparse_moe.experts.2.w1", "model.layers.26.block_sparse_moe.experts.3.w1", "model.layers.26.block_sparse_moe.experts.4.w1", "model.layers.26.block_sparse_moe.experts.5.w1", "model.layers.26.block_sparse_moe.experts.6.w1", "model.layers.26.block_sparse_moe.experts.7.w1", "model.layers.26.block_sparse_moe.experts.8.w1", "model.layers.26.block_sparse_moe.experts.9.w1", "model.layers.26.block_sparse_moe.experts.10.w1", "model.layers.26.block_sparse_moe.experts.11.w1", "model.layers.26.block_sparse_moe.experts.12.w1", "model.layers.26.block_sparse_moe.experts.13.w1", "model.layers.26.block_sparse_moe.experts.14.w1", "model.layers.26.block_sparse_moe.experts.15.w1", "model.layers.26.block_sparse_moe.experts.16.w1", "model.layers.26.block_sparse_moe.experts.17.w1", "model.layers.26.block_sparse_moe.experts.18.w1", "model.layers.26.block_sparse_moe.experts.19.w1", "model.layers.26.block_sparse_moe.experts.20.w1", "model.layers.26.block_sparse_moe.experts.21.w1", "model.layers.26.block_sparse_moe.experts.22.w1", "model.layers.26.block_sparse_moe.experts.23.w1", "model.layers.26.block_sparse_moe.experts.24.w1", "model.layers.26.block_sparse_moe.experts.25.w1", "model.layers.26.block_sparse_moe.experts.26.w1", "model.layers.26.block_sparse_moe.experts.27.w1", "model.layers.26.block_sparse_moe.experts.28.w1", "model.layers.26.block_sparse_moe.experts.29.w1", "model.layers.26.block_sparse_moe.experts.30.w1", "model.layers.26.block_sparse_moe.experts.31.w1", "model.layers.26.block_sparse_moe.experts.32.w1", "model.layers.26.block_sparse_moe.experts.33.w1", "model.layers.26.block_sparse_moe.experts.34.w1", "model.layers.26.block_sparse_moe.experts.35.w1", "model.layers.26.block_sparse_moe.experts.36.w1", "model.layers.26.block_sparse_moe.experts.37.w1", "model.layers.26.block_sparse_moe.experts.38.w1", "model.layers.26.block_sparse_moe.experts.39.w1", "model.layers.26.block_sparse_moe.experts.40.w1", "model.layers.26.block_sparse_moe.experts.41.w1", "model.layers.26.block_sparse_moe.experts.42.w1", "model.layers.26.block_sparse_moe.experts.43.w1", "model.layers.26.block_sparse_moe.experts.44.w1", "model.layers.26.block_sparse_moe.experts.45.w1", "model.layers.26.block_sparse_moe.experts.46.w1", "model.layers.26.block_sparse_moe.experts.47.w1", "model.layers.26.block_sparse_moe.experts.48.w1", "model.layers.26.block_sparse_moe.experts.49.w1", "model.layers.26.block_sparse_moe.experts.50.w1", "model.layers.26.block_sparse_moe.experts.51.w1", "model.layers.26.block_sparse_moe.experts.52.w1", "model.layers.26.block_sparse_moe.experts.53.w1", "model.layers.26.block_sparse_moe.experts.54.w1", "model.layers.26.block_sparse_moe.experts.55.w1", "model.layers.26.block_sparse_moe.experts.56.w1", "model.layers.26.block_sparse_moe.experts.57.w1", "model.layers.26.block_sparse_moe.experts.58.w1", "model.layers.26.block_sparse_moe.experts.59.w1", "model.layers.26.block_sparse_moe.experts.60.w1", "model.layers.26.block_sparse_moe.experts.61.w1", "model.layers.26.block_sparse_moe.experts.62.w1", "model.layers.26.block_sparse_moe.experts.63.w1", "model.layers.26.block_sparse_moe.experts.64.w1", "model.layers.26.block_sparse_moe.experts.65.w1", "model.layers.26.block_sparse_moe.experts.66.w1", "model.layers.26.block_sparse_moe.experts.67.w1", "model.layers.26.block_sparse_moe.experts.68.w1", "model.layers.26.block_sparse_moe.experts.69.w1", "model.layers.26.block_sparse_moe.experts.70.w1", "model.layers.26.block_sparse_moe.experts.71.w1", "model.layers.26.block_sparse_moe.experts.72.w1", "model.layers.26.block_sparse_moe.experts.73.w1", "model.layers.26.block_sparse_moe.experts.74.w1", "model.layers.26.block_sparse_moe.experts.75.w1", "model.layers.26.block_sparse_moe.experts.76.w1", "model.layers.26.block_sparse_moe.experts.77.w1", "model.layers.26.block_sparse_moe.experts.78.w1", "model.layers.26.block_sparse_moe.experts.79.w1", "model.layers.26.block_sparse_moe.experts.80.w1", "model.layers.26.block_sparse_moe.experts.81.w1", "model.layers.26.block_sparse_moe.experts.82.w1", "model.layers.26.block_sparse_moe.experts.83.w1", "model.layers.26.block_sparse_moe.experts.84.w1", "model.layers.26.block_sparse_moe.experts.85.w1", "model.layers.26.block_sparse_moe.experts.86.w1", "model.layers.26.block_sparse_moe.experts.87.w1", "model.layers.26.block_sparse_moe.experts.88.w1", "model.layers.26.block_sparse_moe.experts.89.w1", "model.layers.26.block_sparse_moe.experts.90.w1", "model.layers.26.block_sparse_moe.experts.91.w1", "model.layers.26.block_sparse_moe.experts.92.w1", "model.layers.26.block_sparse_moe.experts.93.w1", "model.layers.26.block_sparse_moe.experts.94.w1", "model.layers.26.block_sparse_moe.experts.95.w1", "model.layers.26.block_sparse_moe.experts.96.w1", "model.layers.26.block_sparse_moe.experts.97.w1", "model.layers.26.block_sparse_moe.experts.98.w1", "model.layers.26.block_sparse_moe.experts.99.w1", "model.layers.26.block_sparse_moe.experts.100.w1", "model.layers.26.block_sparse_moe.experts.101.w1", "model.layers.26.block_sparse_moe.experts.102.w1", "model.layers.26.block_sparse_moe.experts.103.w1", "model.layers.26.block_sparse_moe.experts.104.w1", "model.layers.26.block_sparse_moe.experts.105.w1", "model.layers.26.block_sparse_moe.experts.106.w1", "model.layers.26.block_sparse_moe.experts.107.w1", "model.layers.26.block_sparse_moe.experts.108.w1", "model.layers.26.block_sparse_moe.experts.109.w1", "model.layers.26.block_sparse_moe.experts.110.w1", "model.layers.26.block_sparse_moe.experts.111.w1", "model.layers.26.block_sparse_moe.experts.112.w1", "model.layers.26.block_sparse_moe.experts.113.w1", "model.layers.26.block_sparse_moe.experts.114.w1", "model.layers.26.block_sparse_moe.experts.115.w1", "model.layers.26.block_sparse_moe.experts.116.w1", "model.layers.26.block_sparse_moe.experts.117.w1", "model.layers.26.block_sparse_moe.experts.118.w1", "model.layers.26.block_sparse_moe.experts.119.w1", "model.layers.26.block_sparse_moe.experts.120.w1", "model.layers.26.block_sparse_moe.experts.121.w1", "model.layers.26.block_sparse_moe.experts.122.w1", "model.layers.26.block_sparse_moe.experts.123.w1", "model.layers.26.block_sparse_moe.experts.124.w1", "model.layers.26.block_sparse_moe.experts.125.w1", "model.layers.26.block_sparse_moe.experts.126.w1", "model.layers.26.block_sparse_moe.experts.127.w1", "model.layers.26.block_sparse_moe.experts.128.w1", "model.layers.26.block_sparse_moe.experts.129.w1", "model.layers.26.block_sparse_moe.experts.130.w1", "model.layers.26.block_sparse_moe.experts.131.w1", "model.layers.26.block_sparse_moe.experts.132.w1", "model.layers.26.block_sparse_moe.experts.133.w1", "model.layers.26.block_sparse_moe.experts.134.w1", "model.layers.26.block_sparse_moe.experts.135.w1", "model.layers.26.block_sparse_moe.experts.136.w1", "model.layers.26.block_sparse_moe.experts.137.w1", "model.layers.26.block_sparse_moe.experts.138.w1", "model.layers.26.block_sparse_moe.experts.139.w1", "model.layers.26.block_sparse_moe.experts.140.w1", "model.layers.26.block_sparse_moe.experts.141.w1", "model.layers.26.block_sparse_moe.experts.142.w1", "model.layers.26.block_sparse_moe.experts.143.w1", "model.layers.26.block_sparse_moe.experts.144.w1", "model.layers.26.block_sparse_moe.experts.145.w1", "model.layers.26.block_sparse_moe.experts.146.w1", "model.layers.26.block_sparse_moe.experts.147.w1", "model.layers.26.block_sparse_moe.experts.148.w1", "model.layers.26.block_sparse_moe.experts.149.w1", "model.layers.26.block_sparse_moe.experts.150.w1", "model.layers.26.block_sparse_moe.experts.151.w1", "model.layers.26.block_sparse_moe.experts.152.w1", "model.layers.26.block_sparse_moe.experts.153.w1", "model.layers.26.block_sparse_moe.experts.154.w1", "model.layers.26.block_sparse_moe.experts.155.w1", "model.layers.26.block_sparse_moe.experts.156.w1", "model.layers.26.block_sparse_moe.experts.157.w1", "model.layers.26.block_sparse_moe.experts.158.w1", "model.layers.26.block_sparse_moe.experts.159.w1", "model.layers.26.block_sparse_moe.experts.160.w1", "model.layers.26.block_sparse_moe.experts.161.w1", "model.layers.26.block_sparse_moe.experts.162.w1", "model.layers.26.block_sparse_moe.experts.163.w1", "model.layers.26.block_sparse_moe.experts.164.w1", "model.layers.26.block_sparse_moe.experts.165.w1", "model.layers.26.block_sparse_moe.experts.166.w1", "model.layers.26.block_sparse_moe.experts.167.w1", "model.layers.26.block_sparse_moe.experts.168.w1", "model.layers.26.block_sparse_moe.experts.169.w1", "model.layers.26.block_sparse_moe.experts.170.w1", "model.layers.26.block_sparse_moe.experts.171.w1", "model.layers.26.block_sparse_moe.experts.172.w1", "model.layers.26.block_sparse_moe.experts.173.w1", "model.layers.26.block_sparse_moe.experts.174.w1", "model.layers.26.block_sparse_moe.experts.175.w1", "model.layers.26.block_sparse_moe.experts.176.w1", "model.layers.26.block_sparse_moe.experts.177.w1", "model.layers.26.block_sparse_moe.experts.178.w1", "model.layers.26.block_sparse_moe.experts.179.w1", "model.layers.26.block_sparse_moe.experts.180.w1", "model.layers.26.block_sparse_moe.experts.181.w1", "model.layers.26.block_sparse_moe.experts.182.w1", "model.layers.26.block_sparse_moe.experts.183.w1", "model.layers.26.block_sparse_moe.experts.184.w1", "model.layers.26.block_sparse_moe.experts.185.w1", "model.layers.26.block_sparse_moe.experts.186.w1", "model.layers.26.block_sparse_moe.experts.187.w1", "model.layers.26.block_sparse_moe.experts.188.w1", "model.layers.26.block_sparse_moe.experts.189.w1", "model.layers.26.block_sparse_moe.experts.190.w1", "model.layers.26.block_sparse_moe.experts.191.w1", "model.layers.26.block_sparse_moe.experts.192.w1", "model.layers.26.block_sparse_moe.experts.193.w1", "model.layers.26.block_sparse_moe.experts.194.w1", "model.layers.26.block_sparse_moe.experts.195.w1", "model.layers.26.block_sparse_moe.experts.196.w1", "model.layers.26.block_sparse_moe.experts.197.w1", "model.layers.26.block_sparse_moe.experts.198.w1", "model.layers.26.block_sparse_moe.experts.199.w1", "model.layers.26.block_sparse_moe.experts.200.w1", "model.layers.26.block_sparse_moe.experts.201.w1", "model.layers.26.block_sparse_moe.experts.202.w1", "model.layers.26.block_sparse_moe.experts.203.w1", "model.layers.26.block_sparse_moe.experts.204.w1", "model.layers.26.block_sparse_moe.experts.205.w1", "model.layers.26.block_sparse_moe.experts.206.w1", "model.layers.26.block_sparse_moe.experts.207.w1", "model.layers.26.block_sparse_moe.experts.208.w1", "model.layers.26.block_sparse_moe.experts.209.w1", "model.layers.26.block_sparse_moe.experts.210.w1", "model.layers.26.block_sparse_moe.experts.211.w1", "model.layers.26.block_sparse_moe.experts.212.w1", "model.layers.26.block_sparse_moe.experts.213.w1", "model.layers.26.block_sparse_moe.experts.214.w1", "model.layers.26.block_sparse_moe.experts.215.w1", "model.layers.26.block_sparse_moe.experts.216.w1", "model.layers.26.block_sparse_moe.experts.217.w1", "model.layers.26.block_sparse_moe.experts.218.w1", "model.layers.26.block_sparse_moe.experts.219.w1", "model.layers.26.block_sparse_moe.experts.220.w1", "model.layers.26.block_sparse_moe.experts.221.w1", "model.layers.26.block_sparse_moe.experts.222.w1", "model.layers.26.block_sparse_moe.experts.223.w1", "model.layers.26.block_sparse_moe.experts.224.w1", "model.layers.26.block_sparse_moe.experts.225.w1", "model.layers.26.block_sparse_moe.experts.226.w1", "model.layers.26.block_sparse_moe.experts.227.w1", "model.layers.26.block_sparse_moe.experts.228.w1", "model.layers.26.block_sparse_moe.experts.229.w1", "model.layers.26.block_sparse_moe.experts.230.w1", "model.layers.26.block_sparse_moe.experts.231.w1", "model.layers.26.block_sparse_moe.experts.232.w1", "model.layers.26.block_sparse_moe.experts.233.w1", "model.layers.26.block_sparse_moe.experts.234.w1", "model.layers.26.block_sparse_moe.experts.235.w1", "model.layers.26.block_sparse_moe.experts.236.w1", "model.layers.26.block_sparse_moe.experts.237.w1", "model.layers.26.block_sparse_moe.experts.238.w1", "model.layers.26.block_sparse_moe.experts.239.w1", "model.layers.26.block_sparse_moe.experts.240.w1", "model.layers.26.block_sparse_moe.experts.241.w1", "model.layers.26.block_sparse_moe.experts.242.w1", "model.layers.26.block_sparse_moe.experts.243.w1", "model.layers.26.block_sparse_moe.experts.244.w1", "model.layers.26.block_sparse_moe.experts.245.w1", "model.layers.26.block_sparse_moe.experts.246.w1", "model.layers.26.block_sparse_moe.experts.247.w1", "model.layers.26.block_sparse_moe.experts.248.w1", "model.layers.26.block_sparse_moe.experts.249.w1", "model.layers.26.block_sparse_moe.experts.250.w1", "model.layers.26.block_sparse_moe.experts.251.w1", "model.layers.26.block_sparse_moe.experts.252.w1", "model.layers.26.block_sparse_moe.experts.253.w1", "model.layers.26.block_sparse_moe.experts.254.w1", "model.layers.26.block_sparse_moe.experts.255.w1", "model.layers.26.block_sparse_moe.experts.0.w3", "model.layers.26.block_sparse_moe.experts.1.w3", "model.layers.26.block_sparse_moe.experts.2.w3", "model.layers.26.block_sparse_moe.experts.3.w3", "model.layers.26.block_sparse_moe.experts.4.w3", "model.layers.26.block_sparse_moe.experts.5.w3", "model.layers.26.block_sparse_moe.experts.6.w3", "model.layers.26.block_sparse_moe.experts.7.w3", "model.layers.26.block_sparse_moe.experts.8.w3", "model.layers.26.block_sparse_moe.experts.9.w3", "model.layers.26.block_sparse_moe.experts.10.w3", "model.layers.26.block_sparse_moe.experts.11.w3", "model.layers.26.block_sparse_moe.experts.12.w3", "model.layers.26.block_sparse_moe.experts.13.w3", "model.layers.26.block_sparse_moe.experts.14.w3", "model.layers.26.block_sparse_moe.experts.15.w3", "model.layers.26.block_sparse_moe.experts.16.w3", "model.layers.26.block_sparse_moe.experts.17.w3", "model.layers.26.block_sparse_moe.experts.18.w3", "model.layers.26.block_sparse_moe.experts.19.w3", "model.layers.26.block_sparse_moe.experts.20.w3", "model.layers.26.block_sparse_moe.experts.21.w3", "model.layers.26.block_sparse_moe.experts.22.w3", "model.layers.26.block_sparse_moe.experts.23.w3", "model.layers.26.block_sparse_moe.experts.24.w3", "model.layers.26.block_sparse_moe.experts.25.w3", "model.layers.26.block_sparse_moe.experts.26.w3", "model.layers.26.block_sparse_moe.experts.27.w3", "model.layers.26.block_sparse_moe.experts.28.w3", "model.layers.26.block_sparse_moe.experts.29.w3", "model.layers.26.block_sparse_moe.experts.30.w3", "model.layers.26.block_sparse_moe.experts.31.w3", "model.layers.26.block_sparse_moe.experts.32.w3", "model.layers.26.block_sparse_moe.experts.33.w3", "model.layers.26.block_sparse_moe.experts.34.w3", "model.layers.26.block_sparse_moe.experts.35.w3", "model.layers.26.block_sparse_moe.experts.36.w3", "model.layers.26.block_sparse_moe.experts.37.w3", "model.layers.26.block_sparse_moe.experts.38.w3", "model.layers.26.block_sparse_moe.experts.39.w3", "model.layers.26.block_sparse_moe.experts.40.w3", "model.layers.26.block_sparse_moe.experts.41.w3", "model.layers.26.block_sparse_moe.experts.42.w3", "model.layers.26.block_sparse_moe.experts.43.w3", "model.layers.26.block_sparse_moe.experts.44.w3", "model.layers.26.block_sparse_moe.experts.45.w3", "model.layers.26.block_sparse_moe.experts.46.w3", "model.layers.26.block_sparse_moe.experts.47.w3", "model.layers.26.block_sparse_moe.experts.48.w3", "model.layers.26.block_sparse_moe.experts.49.w3", "model.layers.26.block_sparse_moe.experts.50.w3", "model.layers.26.block_sparse_moe.experts.51.w3", "model.layers.26.block_sparse_moe.experts.52.w3", "model.layers.26.block_sparse_moe.experts.53.w3", "model.layers.26.block_sparse_moe.experts.54.w3", "model.layers.26.block_sparse_moe.experts.55.w3", "model.layers.26.block_sparse_moe.experts.56.w3", "model.layers.26.block_sparse_moe.experts.57.w3", "model.layers.26.block_sparse_moe.experts.58.w3", "model.layers.26.block_sparse_moe.experts.59.w3", "model.layers.26.block_sparse_moe.experts.60.w3", "model.layers.26.block_sparse_moe.experts.61.w3", "model.layers.26.block_sparse_moe.experts.62.w3", "model.layers.26.block_sparse_moe.experts.63.w3", "model.layers.26.block_sparse_moe.experts.64.w3", "model.layers.26.block_sparse_moe.experts.65.w3", "model.layers.26.block_sparse_moe.experts.66.w3", "model.layers.26.block_sparse_moe.experts.67.w3", "model.layers.26.block_sparse_moe.experts.68.w3", "model.layers.26.block_sparse_moe.experts.69.w3", "model.layers.26.block_sparse_moe.experts.70.w3", "model.layers.26.block_sparse_moe.experts.71.w3", "model.layers.26.block_sparse_moe.experts.72.w3", "model.layers.26.block_sparse_moe.experts.73.w3", "model.layers.26.block_sparse_moe.experts.74.w3", "model.layers.26.block_sparse_moe.experts.75.w3", "model.layers.26.block_sparse_moe.experts.76.w3", "model.layers.26.block_sparse_moe.experts.77.w3", "model.layers.26.block_sparse_moe.experts.78.w3", "model.layers.26.block_sparse_moe.experts.79.w3", "model.layers.26.block_sparse_moe.experts.80.w3", "model.layers.26.block_sparse_moe.experts.81.w3", "model.layers.26.block_sparse_moe.experts.82.w3", "model.layers.26.block_sparse_moe.experts.83.w3", "model.layers.26.block_sparse_moe.experts.84.w3", "model.layers.26.block_sparse_moe.experts.85.w3", "model.layers.26.block_sparse_moe.experts.86.w3", "model.layers.26.block_sparse_moe.experts.87.w3", "model.layers.26.block_sparse_moe.experts.88.w3", "model.layers.26.block_sparse_moe.experts.89.w3", "model.layers.26.block_sparse_moe.experts.90.w3", "model.layers.26.block_sparse_moe.experts.91.w3", "model.layers.26.block_sparse_moe.experts.92.w3", "model.layers.26.block_sparse_moe.experts.93.w3", "model.layers.26.block_sparse_moe.experts.94.w3", "model.layers.26.block_sparse_moe.experts.95.w3", "model.layers.26.block_sparse_moe.experts.96.w3", "model.layers.26.block_sparse_moe.experts.97.w3", "model.layers.26.block_sparse_moe.experts.98.w3", "model.layers.26.block_sparse_moe.experts.99.w3", "model.layers.26.block_sparse_moe.experts.100.w3", "model.layers.26.block_sparse_moe.experts.101.w3", "model.layers.26.block_sparse_moe.experts.102.w3", "model.layers.26.block_sparse_moe.experts.103.w3", "model.layers.26.block_sparse_moe.experts.104.w3", "model.layers.26.block_sparse_moe.experts.105.w3", "model.layers.26.block_sparse_moe.experts.106.w3", "model.layers.26.block_sparse_moe.experts.107.w3", "model.layers.26.block_sparse_moe.experts.108.w3", "model.layers.26.block_sparse_moe.experts.109.w3", "model.layers.26.block_sparse_moe.experts.110.w3", "model.layers.26.block_sparse_moe.experts.111.w3", "model.layers.26.block_sparse_moe.experts.112.w3", "model.layers.26.block_sparse_moe.experts.113.w3", "model.layers.26.block_sparse_moe.experts.114.w3", "model.layers.26.block_sparse_moe.experts.115.w3", "model.layers.26.block_sparse_moe.experts.116.w3", "model.layers.26.block_sparse_moe.experts.117.w3", "model.layers.26.block_sparse_moe.experts.118.w3", "model.layers.26.block_sparse_moe.experts.119.w3", "model.layers.26.block_sparse_moe.experts.120.w3", "model.layers.26.block_sparse_moe.experts.121.w3", "model.layers.26.block_sparse_moe.experts.122.w3", "model.layers.26.block_sparse_moe.experts.123.w3", "model.layers.26.block_sparse_moe.experts.124.w3", "model.layers.26.block_sparse_moe.experts.125.w3", "model.layers.26.block_sparse_moe.experts.126.w3", "model.layers.26.block_sparse_moe.experts.127.w3", "model.layers.26.block_sparse_moe.experts.128.w3", "model.layers.26.block_sparse_moe.experts.129.w3", "model.layers.26.block_sparse_moe.experts.130.w3", "model.layers.26.block_sparse_moe.experts.131.w3", "model.layers.26.block_sparse_moe.experts.132.w3", "model.layers.26.block_sparse_moe.experts.133.w3", "model.layers.26.block_sparse_moe.experts.134.w3", "model.layers.26.block_sparse_moe.experts.135.w3", "model.layers.26.block_sparse_moe.experts.136.w3", "model.layers.26.block_sparse_moe.experts.137.w3", "model.layers.26.block_sparse_moe.experts.138.w3", "model.layers.26.block_sparse_moe.experts.139.w3", "model.layers.26.block_sparse_moe.experts.140.w3", "model.layers.26.block_sparse_moe.experts.141.w3", "model.layers.26.block_sparse_moe.experts.142.w3", "model.layers.26.block_sparse_moe.experts.143.w3", "model.layers.26.block_sparse_moe.experts.144.w3", "model.layers.26.block_sparse_moe.experts.145.w3", "model.layers.26.block_sparse_moe.experts.146.w3", "model.layers.26.block_sparse_moe.experts.147.w3", "model.layers.26.block_sparse_moe.experts.148.w3", "model.layers.26.block_sparse_moe.experts.149.w3", "model.layers.26.block_sparse_moe.experts.150.w3", "model.layers.26.block_sparse_moe.experts.151.w3", "model.layers.26.block_sparse_moe.experts.152.w3", "model.layers.26.block_sparse_moe.experts.153.w3", "model.layers.26.block_sparse_moe.experts.154.w3", "model.layers.26.block_sparse_moe.experts.155.w3", "model.layers.26.block_sparse_moe.experts.156.w3", "model.layers.26.block_sparse_moe.experts.157.w3", "model.layers.26.block_sparse_moe.experts.158.w3", "model.layers.26.block_sparse_moe.experts.159.w3", "model.layers.26.block_sparse_moe.experts.160.w3", "model.layers.26.block_sparse_moe.experts.161.w3", "model.layers.26.block_sparse_moe.experts.162.w3", "model.layers.26.block_sparse_moe.experts.163.w3", "model.layers.26.block_sparse_moe.experts.164.w3", "model.layers.26.block_sparse_moe.experts.165.w3", "model.layers.26.block_sparse_moe.experts.166.w3", "model.layers.26.block_sparse_moe.experts.167.w3", "model.layers.26.block_sparse_moe.experts.168.w3", "model.layers.26.block_sparse_moe.experts.169.w3", "model.layers.26.block_sparse_moe.experts.170.w3", "model.layers.26.block_sparse_moe.experts.171.w3", "model.layers.26.block_sparse_moe.experts.172.w3", "model.layers.26.block_sparse_moe.experts.173.w3", "model.layers.26.block_sparse_moe.experts.174.w3", "model.layers.26.block_sparse_moe.experts.175.w3", "model.layers.26.block_sparse_moe.experts.176.w3", "model.layers.26.block_sparse_moe.experts.177.w3", "model.layers.26.block_sparse_moe.experts.178.w3", "model.layers.26.block_sparse_moe.experts.179.w3", "model.layers.26.block_sparse_moe.experts.180.w3", "model.layers.26.block_sparse_moe.experts.181.w3", "model.layers.26.block_sparse_moe.experts.182.w3", "model.layers.26.block_sparse_moe.experts.183.w3", "model.layers.26.block_sparse_moe.experts.184.w3", "model.layers.26.block_sparse_moe.experts.185.w3", "model.layers.26.block_sparse_moe.experts.186.w3", "model.layers.26.block_sparse_moe.experts.187.w3", "model.layers.26.block_sparse_moe.experts.188.w3", "model.layers.26.block_sparse_moe.experts.189.w3", "model.layers.26.block_sparse_moe.experts.190.w3", "model.layers.26.block_sparse_moe.experts.191.w3", "model.layers.26.block_sparse_moe.experts.192.w3", "model.layers.26.block_sparse_moe.experts.193.w3", "model.layers.26.block_sparse_moe.experts.194.w3", "model.layers.26.block_sparse_moe.experts.195.w3", "model.layers.26.block_sparse_moe.experts.196.w3", "model.layers.26.block_sparse_moe.experts.197.w3", "model.layers.26.block_sparse_moe.experts.198.w3", "model.layers.26.block_sparse_moe.experts.199.w3", "model.layers.26.block_sparse_moe.experts.200.w3", "model.layers.26.block_sparse_moe.experts.201.w3", "model.layers.26.block_sparse_moe.experts.202.w3", "model.layers.26.block_sparse_moe.experts.203.w3", "model.layers.26.block_sparse_moe.experts.204.w3", "model.layers.26.block_sparse_moe.experts.205.w3", "model.layers.26.block_sparse_moe.experts.206.w3", "model.layers.26.block_sparse_moe.experts.207.w3", "model.layers.26.block_sparse_moe.experts.208.w3", "model.layers.26.block_sparse_moe.experts.209.w3", "model.layers.26.block_sparse_moe.experts.210.w3", "model.layers.26.block_sparse_moe.experts.211.w3", "model.layers.26.block_sparse_moe.experts.212.w3", "model.layers.26.block_sparse_moe.experts.213.w3", "model.layers.26.block_sparse_moe.experts.214.w3", "model.layers.26.block_sparse_moe.experts.215.w3", "model.layers.26.block_sparse_moe.experts.216.w3", "model.layers.26.block_sparse_moe.experts.217.w3", "model.layers.26.block_sparse_moe.experts.218.w3", "model.layers.26.block_sparse_moe.experts.219.w3", "model.layers.26.block_sparse_moe.experts.220.w3", "model.layers.26.block_sparse_moe.experts.221.w3", "model.layers.26.block_sparse_moe.experts.222.w3", "model.layers.26.block_sparse_moe.experts.223.w3", "model.layers.26.block_sparse_moe.experts.224.w3", "model.layers.26.block_sparse_moe.experts.225.w3", "model.layers.26.block_sparse_moe.experts.226.w3", "model.layers.26.block_sparse_moe.experts.227.w3", "model.layers.26.block_sparse_moe.experts.228.w3", "model.layers.26.block_sparse_moe.experts.229.w3", "model.layers.26.block_sparse_moe.experts.230.w3", "model.layers.26.block_sparse_moe.experts.231.w3", "model.layers.26.block_sparse_moe.experts.232.w3", "model.layers.26.block_sparse_moe.experts.233.w3", "model.layers.26.block_sparse_moe.experts.234.w3", "model.layers.26.block_sparse_moe.experts.235.w3", "model.layers.26.block_sparse_moe.experts.236.w3", "model.layers.26.block_sparse_moe.experts.237.w3", "model.layers.26.block_sparse_moe.experts.238.w3", "model.layers.26.block_sparse_moe.experts.239.w3", "model.layers.26.block_sparse_moe.experts.240.w3", "model.layers.26.block_sparse_moe.experts.241.w3", "model.layers.26.block_sparse_moe.experts.242.w3", "model.layers.26.block_sparse_moe.experts.243.w3", "model.layers.26.block_sparse_moe.experts.244.w3", "model.layers.26.block_sparse_moe.experts.245.w3", "model.layers.26.block_sparse_moe.experts.246.w3", "model.layers.26.block_sparse_moe.experts.247.w3", "model.layers.26.block_sparse_moe.experts.248.w3", "model.layers.26.block_sparse_moe.experts.249.w3", "model.layers.26.block_sparse_moe.experts.250.w3", "model.layers.26.block_sparse_moe.experts.251.w3", "model.layers.26.block_sparse_moe.experts.252.w3", "model.layers.26.block_sparse_moe.experts.253.w3", "model.layers.26.block_sparse_moe.experts.254.w3", "model.layers.26.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -4.429817199706754e-05, "dbits": 2415919104 } ] }, { "idx": 134, "layers": [ "model.layers.26.block_sparse_moe.experts.0.w2", "model.layers.26.block_sparse_moe.experts.1.w2", "model.layers.26.block_sparse_moe.experts.2.w2", "model.layers.26.block_sparse_moe.experts.3.w2", "model.layers.26.block_sparse_moe.experts.4.w2", "model.layers.26.block_sparse_moe.experts.5.w2", "model.layers.26.block_sparse_moe.experts.6.w2", "model.layers.26.block_sparse_moe.experts.7.w2", "model.layers.26.block_sparse_moe.experts.8.w2", "model.layers.26.block_sparse_moe.experts.9.w2", "model.layers.26.block_sparse_moe.experts.10.w2", "model.layers.26.block_sparse_moe.experts.11.w2", "model.layers.26.block_sparse_moe.experts.12.w2", "model.layers.26.block_sparse_moe.experts.13.w2", "model.layers.26.block_sparse_moe.experts.14.w2", "model.layers.26.block_sparse_moe.experts.15.w2", "model.layers.26.block_sparse_moe.experts.16.w2", "model.layers.26.block_sparse_moe.experts.17.w2", "model.layers.26.block_sparse_moe.experts.18.w2", "model.layers.26.block_sparse_moe.experts.19.w2", "model.layers.26.block_sparse_moe.experts.20.w2", "model.layers.26.block_sparse_moe.experts.21.w2", "model.layers.26.block_sparse_moe.experts.22.w2", "model.layers.26.block_sparse_moe.experts.23.w2", "model.layers.26.block_sparse_moe.experts.24.w2", "model.layers.26.block_sparse_moe.experts.25.w2", "model.layers.26.block_sparse_moe.experts.26.w2", "model.layers.26.block_sparse_moe.experts.27.w2", "model.layers.26.block_sparse_moe.experts.28.w2", "model.layers.26.block_sparse_moe.experts.29.w2", "model.layers.26.block_sparse_moe.experts.30.w2", "model.layers.26.block_sparse_moe.experts.31.w2", "model.layers.26.block_sparse_moe.experts.32.w2", "model.layers.26.block_sparse_moe.experts.33.w2", "model.layers.26.block_sparse_moe.experts.34.w2", "model.layers.26.block_sparse_moe.experts.35.w2", "model.layers.26.block_sparse_moe.experts.36.w2", "model.layers.26.block_sparse_moe.experts.37.w2", "model.layers.26.block_sparse_moe.experts.38.w2", "model.layers.26.block_sparse_moe.experts.39.w2", "model.layers.26.block_sparse_moe.experts.40.w2", "model.layers.26.block_sparse_moe.experts.41.w2", "model.layers.26.block_sparse_moe.experts.42.w2", "model.layers.26.block_sparse_moe.experts.43.w2", "model.layers.26.block_sparse_moe.experts.44.w2", "model.layers.26.block_sparse_moe.experts.45.w2", "model.layers.26.block_sparse_moe.experts.46.w2", "model.layers.26.block_sparse_moe.experts.47.w2", "model.layers.26.block_sparse_moe.experts.48.w2", "model.layers.26.block_sparse_moe.experts.49.w2", "model.layers.26.block_sparse_moe.experts.50.w2", "model.layers.26.block_sparse_moe.experts.51.w2", "model.layers.26.block_sparse_moe.experts.52.w2", "model.layers.26.block_sparse_moe.experts.53.w2", "model.layers.26.block_sparse_moe.experts.54.w2", "model.layers.26.block_sparse_moe.experts.55.w2", "model.layers.26.block_sparse_moe.experts.56.w2", "model.layers.26.block_sparse_moe.experts.57.w2", "model.layers.26.block_sparse_moe.experts.58.w2", "model.layers.26.block_sparse_moe.experts.59.w2", "model.layers.26.block_sparse_moe.experts.60.w2", "model.layers.26.block_sparse_moe.experts.61.w2", "model.layers.26.block_sparse_moe.experts.62.w2", "model.layers.26.block_sparse_moe.experts.63.w2", "model.layers.26.block_sparse_moe.experts.64.w2", "model.layers.26.block_sparse_moe.experts.65.w2", "model.layers.26.block_sparse_moe.experts.66.w2", "model.layers.26.block_sparse_moe.experts.67.w2", "model.layers.26.block_sparse_moe.experts.68.w2", "model.layers.26.block_sparse_moe.experts.69.w2", "model.layers.26.block_sparse_moe.experts.70.w2", "model.layers.26.block_sparse_moe.experts.71.w2", "model.layers.26.block_sparse_moe.experts.72.w2", "model.layers.26.block_sparse_moe.experts.73.w2", "model.layers.26.block_sparse_moe.experts.74.w2", "model.layers.26.block_sparse_moe.experts.75.w2", "model.layers.26.block_sparse_moe.experts.76.w2", "model.layers.26.block_sparse_moe.experts.77.w2", "model.layers.26.block_sparse_moe.experts.78.w2", "model.layers.26.block_sparse_moe.experts.79.w2", "model.layers.26.block_sparse_moe.experts.80.w2", "model.layers.26.block_sparse_moe.experts.81.w2", "model.layers.26.block_sparse_moe.experts.82.w2", "model.layers.26.block_sparse_moe.experts.83.w2", "model.layers.26.block_sparse_moe.experts.84.w2", "model.layers.26.block_sparse_moe.experts.85.w2", "model.layers.26.block_sparse_moe.experts.86.w2", "model.layers.26.block_sparse_moe.experts.87.w2", "model.layers.26.block_sparse_moe.experts.88.w2", "model.layers.26.block_sparse_moe.experts.89.w2", "model.layers.26.block_sparse_moe.experts.90.w2", "model.layers.26.block_sparse_moe.experts.91.w2", "model.layers.26.block_sparse_moe.experts.92.w2", "model.layers.26.block_sparse_moe.experts.93.w2", "model.layers.26.block_sparse_moe.experts.94.w2", "model.layers.26.block_sparse_moe.experts.95.w2", "model.layers.26.block_sparse_moe.experts.96.w2", "model.layers.26.block_sparse_moe.experts.97.w2", "model.layers.26.block_sparse_moe.experts.98.w2", "model.layers.26.block_sparse_moe.experts.99.w2", "model.layers.26.block_sparse_moe.experts.100.w2", "model.layers.26.block_sparse_moe.experts.101.w2", "model.layers.26.block_sparse_moe.experts.102.w2", "model.layers.26.block_sparse_moe.experts.103.w2", "model.layers.26.block_sparse_moe.experts.104.w2", "model.layers.26.block_sparse_moe.experts.105.w2", "model.layers.26.block_sparse_moe.experts.106.w2", "model.layers.26.block_sparse_moe.experts.107.w2", "model.layers.26.block_sparse_moe.experts.108.w2", "model.layers.26.block_sparse_moe.experts.109.w2", "model.layers.26.block_sparse_moe.experts.110.w2", "model.layers.26.block_sparse_moe.experts.111.w2", "model.layers.26.block_sparse_moe.experts.112.w2", "model.layers.26.block_sparse_moe.experts.113.w2", "model.layers.26.block_sparse_moe.experts.114.w2", "model.layers.26.block_sparse_moe.experts.115.w2", "model.layers.26.block_sparse_moe.experts.116.w2", "model.layers.26.block_sparse_moe.experts.117.w2", "model.layers.26.block_sparse_moe.experts.118.w2", "model.layers.26.block_sparse_moe.experts.119.w2", "model.layers.26.block_sparse_moe.experts.120.w2", "model.layers.26.block_sparse_moe.experts.121.w2", "model.layers.26.block_sparse_moe.experts.122.w2", "model.layers.26.block_sparse_moe.experts.123.w2", "model.layers.26.block_sparse_moe.experts.124.w2", "model.layers.26.block_sparse_moe.experts.125.w2", "model.layers.26.block_sparse_moe.experts.126.w2", "model.layers.26.block_sparse_moe.experts.127.w2", "model.layers.26.block_sparse_moe.experts.128.w2", "model.layers.26.block_sparse_moe.experts.129.w2", "model.layers.26.block_sparse_moe.experts.130.w2", "model.layers.26.block_sparse_moe.experts.131.w2", "model.layers.26.block_sparse_moe.experts.132.w2", "model.layers.26.block_sparse_moe.experts.133.w2", "model.layers.26.block_sparse_moe.experts.134.w2", "model.layers.26.block_sparse_moe.experts.135.w2", "model.layers.26.block_sparse_moe.experts.136.w2", "model.layers.26.block_sparse_moe.experts.137.w2", "model.layers.26.block_sparse_moe.experts.138.w2", "model.layers.26.block_sparse_moe.experts.139.w2", "model.layers.26.block_sparse_moe.experts.140.w2", "model.layers.26.block_sparse_moe.experts.141.w2", "model.layers.26.block_sparse_moe.experts.142.w2", "model.layers.26.block_sparse_moe.experts.143.w2", "model.layers.26.block_sparse_moe.experts.144.w2", "model.layers.26.block_sparse_moe.experts.145.w2", "model.layers.26.block_sparse_moe.experts.146.w2", "model.layers.26.block_sparse_moe.experts.147.w2", "model.layers.26.block_sparse_moe.experts.148.w2", "model.layers.26.block_sparse_moe.experts.149.w2", "model.layers.26.block_sparse_moe.experts.150.w2", "model.layers.26.block_sparse_moe.experts.151.w2", "model.layers.26.block_sparse_moe.experts.152.w2", "model.layers.26.block_sparse_moe.experts.153.w2", "model.layers.26.block_sparse_moe.experts.154.w2", "model.layers.26.block_sparse_moe.experts.155.w2", "model.layers.26.block_sparse_moe.experts.156.w2", "model.layers.26.block_sparse_moe.experts.157.w2", "model.layers.26.block_sparse_moe.experts.158.w2", "model.layers.26.block_sparse_moe.experts.159.w2", "model.layers.26.block_sparse_moe.experts.160.w2", "model.layers.26.block_sparse_moe.experts.161.w2", "model.layers.26.block_sparse_moe.experts.162.w2", "model.layers.26.block_sparse_moe.experts.163.w2", "model.layers.26.block_sparse_moe.experts.164.w2", "model.layers.26.block_sparse_moe.experts.165.w2", "model.layers.26.block_sparse_moe.experts.166.w2", "model.layers.26.block_sparse_moe.experts.167.w2", "model.layers.26.block_sparse_moe.experts.168.w2", "model.layers.26.block_sparse_moe.experts.169.w2", "model.layers.26.block_sparse_moe.experts.170.w2", "model.layers.26.block_sparse_moe.experts.171.w2", "model.layers.26.block_sparse_moe.experts.172.w2", "model.layers.26.block_sparse_moe.experts.173.w2", "model.layers.26.block_sparse_moe.experts.174.w2", "model.layers.26.block_sparse_moe.experts.175.w2", "model.layers.26.block_sparse_moe.experts.176.w2", "model.layers.26.block_sparse_moe.experts.177.w2", "model.layers.26.block_sparse_moe.experts.178.w2", "model.layers.26.block_sparse_moe.experts.179.w2", "model.layers.26.block_sparse_moe.experts.180.w2", "model.layers.26.block_sparse_moe.experts.181.w2", "model.layers.26.block_sparse_moe.experts.182.w2", "model.layers.26.block_sparse_moe.experts.183.w2", "model.layers.26.block_sparse_moe.experts.184.w2", "model.layers.26.block_sparse_moe.experts.185.w2", "model.layers.26.block_sparse_moe.experts.186.w2", "model.layers.26.block_sparse_moe.experts.187.w2", "model.layers.26.block_sparse_moe.experts.188.w2", "model.layers.26.block_sparse_moe.experts.189.w2", "model.layers.26.block_sparse_moe.experts.190.w2", "model.layers.26.block_sparse_moe.experts.191.w2", "model.layers.26.block_sparse_moe.experts.192.w2", "model.layers.26.block_sparse_moe.experts.193.w2", "model.layers.26.block_sparse_moe.experts.194.w2", "model.layers.26.block_sparse_moe.experts.195.w2", "model.layers.26.block_sparse_moe.experts.196.w2", "model.layers.26.block_sparse_moe.experts.197.w2", "model.layers.26.block_sparse_moe.experts.198.w2", "model.layers.26.block_sparse_moe.experts.199.w2", "model.layers.26.block_sparse_moe.experts.200.w2", "model.layers.26.block_sparse_moe.experts.201.w2", "model.layers.26.block_sparse_moe.experts.202.w2", "model.layers.26.block_sparse_moe.experts.203.w2", "model.layers.26.block_sparse_moe.experts.204.w2", "model.layers.26.block_sparse_moe.experts.205.w2", "model.layers.26.block_sparse_moe.experts.206.w2", "model.layers.26.block_sparse_moe.experts.207.w2", "model.layers.26.block_sparse_moe.experts.208.w2", "model.layers.26.block_sparse_moe.experts.209.w2", "model.layers.26.block_sparse_moe.experts.210.w2", "model.layers.26.block_sparse_moe.experts.211.w2", "model.layers.26.block_sparse_moe.experts.212.w2", "model.layers.26.block_sparse_moe.experts.213.w2", "model.layers.26.block_sparse_moe.experts.214.w2", "model.layers.26.block_sparse_moe.experts.215.w2", "model.layers.26.block_sparse_moe.experts.216.w2", "model.layers.26.block_sparse_moe.experts.217.w2", "model.layers.26.block_sparse_moe.experts.218.w2", "model.layers.26.block_sparse_moe.experts.219.w2", "model.layers.26.block_sparse_moe.experts.220.w2", "model.layers.26.block_sparse_moe.experts.221.w2", "model.layers.26.block_sparse_moe.experts.222.w2", "model.layers.26.block_sparse_moe.experts.223.w2", "model.layers.26.block_sparse_moe.experts.224.w2", "model.layers.26.block_sparse_moe.experts.225.w2", "model.layers.26.block_sparse_moe.experts.226.w2", "model.layers.26.block_sparse_moe.experts.227.w2", "model.layers.26.block_sparse_moe.experts.228.w2", "model.layers.26.block_sparse_moe.experts.229.w2", "model.layers.26.block_sparse_moe.experts.230.w2", "model.layers.26.block_sparse_moe.experts.231.w2", "model.layers.26.block_sparse_moe.experts.232.w2", "model.layers.26.block_sparse_moe.experts.233.w2", "model.layers.26.block_sparse_moe.experts.234.w2", "model.layers.26.block_sparse_moe.experts.235.w2", "model.layers.26.block_sparse_moe.experts.236.w2", "model.layers.26.block_sparse_moe.experts.237.w2", "model.layers.26.block_sparse_moe.experts.238.w2", "model.layers.26.block_sparse_moe.experts.239.w2", "model.layers.26.block_sparse_moe.experts.240.w2", "model.layers.26.block_sparse_moe.experts.241.w2", "model.layers.26.block_sparse_moe.experts.242.w2", "model.layers.26.block_sparse_moe.experts.243.w2", "model.layers.26.block_sparse_moe.experts.244.w2", "model.layers.26.block_sparse_moe.experts.245.w2", "model.layers.26.block_sparse_moe.experts.246.w2", "model.layers.26.block_sparse_moe.experts.247.w2", "model.layers.26.block_sparse_moe.experts.248.w2", "model.layers.26.block_sparse_moe.experts.249.w2", "model.layers.26.block_sparse_moe.experts.250.w2", "model.layers.26.block_sparse_moe.experts.251.w2", "model.layers.26.block_sparse_moe.experts.252.w2", "model.layers.26.block_sparse_moe.experts.253.w2", "model.layers.26.block_sparse_moe.experts.254.w2", "model.layers.26.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006061995401978465, "dbits": 1207959552 } ] }, { "idx": 135, "layers": [ "model.layers.27.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0002179259434342412, "dbits": 18874368 } ] }, { "idx": 136, "layers": [ "model.layers.27.self_attn.k_proj", "model.layers.27.self_attn.v_proj" ], "candidates": [ { "dkld": -0.00200659800320864, "dbits": 6291456 } ] }, { "idx": 137, "layers": [ "model.layers.27.self_attn.o_proj" ], "candidates": [ { "dkld": 0.00024208538234234134, "dbits": 18874368 } ] }, { "idx": 138, "layers": [ "model.layers.27.block_sparse_moe.experts.0.w1", "model.layers.27.block_sparse_moe.experts.1.w1", "model.layers.27.block_sparse_moe.experts.2.w1", "model.layers.27.block_sparse_moe.experts.3.w1", "model.layers.27.block_sparse_moe.experts.4.w1", "model.layers.27.block_sparse_moe.experts.5.w1", "model.layers.27.block_sparse_moe.experts.6.w1", "model.layers.27.block_sparse_moe.experts.7.w1", "model.layers.27.block_sparse_moe.experts.8.w1", "model.layers.27.block_sparse_moe.experts.9.w1", "model.layers.27.block_sparse_moe.experts.10.w1", "model.layers.27.block_sparse_moe.experts.11.w1", "model.layers.27.block_sparse_moe.experts.12.w1", "model.layers.27.block_sparse_moe.experts.13.w1", "model.layers.27.block_sparse_moe.experts.14.w1", "model.layers.27.block_sparse_moe.experts.15.w1", "model.layers.27.block_sparse_moe.experts.16.w1", "model.layers.27.block_sparse_moe.experts.17.w1", "model.layers.27.block_sparse_moe.experts.18.w1", "model.layers.27.block_sparse_moe.experts.19.w1", "model.layers.27.block_sparse_moe.experts.20.w1", "model.layers.27.block_sparse_moe.experts.21.w1", "model.layers.27.block_sparse_moe.experts.22.w1", "model.layers.27.block_sparse_moe.experts.23.w1", "model.layers.27.block_sparse_moe.experts.24.w1", "model.layers.27.block_sparse_moe.experts.25.w1", "model.layers.27.block_sparse_moe.experts.26.w1", "model.layers.27.block_sparse_moe.experts.27.w1", "model.layers.27.block_sparse_moe.experts.28.w1", "model.layers.27.block_sparse_moe.experts.29.w1", "model.layers.27.block_sparse_moe.experts.30.w1", "model.layers.27.block_sparse_moe.experts.31.w1", "model.layers.27.block_sparse_moe.experts.32.w1", "model.layers.27.block_sparse_moe.experts.33.w1", "model.layers.27.block_sparse_moe.experts.34.w1", "model.layers.27.block_sparse_moe.experts.35.w1", "model.layers.27.block_sparse_moe.experts.36.w1", "model.layers.27.block_sparse_moe.experts.37.w1", "model.layers.27.block_sparse_moe.experts.38.w1", "model.layers.27.block_sparse_moe.experts.39.w1", "model.layers.27.block_sparse_moe.experts.40.w1", "model.layers.27.block_sparse_moe.experts.41.w1", "model.layers.27.block_sparse_moe.experts.42.w1", "model.layers.27.block_sparse_moe.experts.43.w1", "model.layers.27.block_sparse_moe.experts.44.w1", "model.layers.27.block_sparse_moe.experts.45.w1", "model.layers.27.block_sparse_moe.experts.46.w1", "model.layers.27.block_sparse_moe.experts.47.w1", "model.layers.27.block_sparse_moe.experts.48.w1", "model.layers.27.block_sparse_moe.experts.49.w1", "model.layers.27.block_sparse_moe.experts.50.w1", "model.layers.27.block_sparse_moe.experts.51.w1", "model.layers.27.block_sparse_moe.experts.52.w1", "model.layers.27.block_sparse_moe.experts.53.w1", "model.layers.27.block_sparse_moe.experts.54.w1", "model.layers.27.block_sparse_moe.experts.55.w1", "model.layers.27.block_sparse_moe.experts.56.w1", "model.layers.27.block_sparse_moe.experts.57.w1", "model.layers.27.block_sparse_moe.experts.58.w1", "model.layers.27.block_sparse_moe.experts.59.w1", "model.layers.27.block_sparse_moe.experts.60.w1", "model.layers.27.block_sparse_moe.experts.61.w1", "model.layers.27.block_sparse_moe.experts.62.w1", "model.layers.27.block_sparse_moe.experts.63.w1", "model.layers.27.block_sparse_moe.experts.64.w1", "model.layers.27.block_sparse_moe.experts.65.w1", "model.layers.27.block_sparse_moe.experts.66.w1", "model.layers.27.block_sparse_moe.experts.67.w1", "model.layers.27.block_sparse_moe.experts.68.w1", "model.layers.27.block_sparse_moe.experts.69.w1", "model.layers.27.block_sparse_moe.experts.70.w1", "model.layers.27.block_sparse_moe.experts.71.w1", "model.layers.27.block_sparse_moe.experts.72.w1", "model.layers.27.block_sparse_moe.experts.73.w1", "model.layers.27.block_sparse_moe.experts.74.w1", "model.layers.27.block_sparse_moe.experts.75.w1", "model.layers.27.block_sparse_moe.experts.76.w1", "model.layers.27.block_sparse_moe.experts.77.w1", "model.layers.27.block_sparse_moe.experts.78.w1", "model.layers.27.block_sparse_moe.experts.79.w1", "model.layers.27.block_sparse_moe.experts.80.w1", "model.layers.27.block_sparse_moe.experts.81.w1", "model.layers.27.block_sparse_moe.experts.82.w1", "model.layers.27.block_sparse_moe.experts.83.w1", "model.layers.27.block_sparse_moe.experts.84.w1", "model.layers.27.block_sparse_moe.experts.85.w1", "model.layers.27.block_sparse_moe.experts.86.w1", "model.layers.27.block_sparse_moe.experts.87.w1", "model.layers.27.block_sparse_moe.experts.88.w1", "model.layers.27.block_sparse_moe.experts.89.w1", "model.layers.27.block_sparse_moe.experts.90.w1", "model.layers.27.block_sparse_moe.experts.91.w1", "model.layers.27.block_sparse_moe.experts.92.w1", "model.layers.27.block_sparse_moe.experts.93.w1", "model.layers.27.block_sparse_moe.experts.94.w1", "model.layers.27.block_sparse_moe.experts.95.w1", "model.layers.27.block_sparse_moe.experts.96.w1", "model.layers.27.block_sparse_moe.experts.97.w1", "model.layers.27.block_sparse_moe.experts.98.w1", "model.layers.27.block_sparse_moe.experts.99.w1", "model.layers.27.block_sparse_moe.experts.100.w1", "model.layers.27.block_sparse_moe.experts.101.w1", "model.layers.27.block_sparse_moe.experts.102.w1", "model.layers.27.block_sparse_moe.experts.103.w1", "model.layers.27.block_sparse_moe.experts.104.w1", "model.layers.27.block_sparse_moe.experts.105.w1", "model.layers.27.block_sparse_moe.experts.106.w1", "model.layers.27.block_sparse_moe.experts.107.w1", "model.layers.27.block_sparse_moe.experts.108.w1", "model.layers.27.block_sparse_moe.experts.109.w1", "model.layers.27.block_sparse_moe.experts.110.w1", "model.layers.27.block_sparse_moe.experts.111.w1", "model.layers.27.block_sparse_moe.experts.112.w1", "model.layers.27.block_sparse_moe.experts.113.w1", "model.layers.27.block_sparse_moe.experts.114.w1", "model.layers.27.block_sparse_moe.experts.115.w1", "model.layers.27.block_sparse_moe.experts.116.w1", "model.layers.27.block_sparse_moe.experts.117.w1", "model.layers.27.block_sparse_moe.experts.118.w1", "model.layers.27.block_sparse_moe.experts.119.w1", "model.layers.27.block_sparse_moe.experts.120.w1", "model.layers.27.block_sparse_moe.experts.121.w1", "model.layers.27.block_sparse_moe.experts.122.w1", "model.layers.27.block_sparse_moe.experts.123.w1", "model.layers.27.block_sparse_moe.experts.124.w1", "model.layers.27.block_sparse_moe.experts.125.w1", "model.layers.27.block_sparse_moe.experts.126.w1", "model.layers.27.block_sparse_moe.experts.127.w1", "model.layers.27.block_sparse_moe.experts.128.w1", "model.layers.27.block_sparse_moe.experts.129.w1", "model.layers.27.block_sparse_moe.experts.130.w1", "model.layers.27.block_sparse_moe.experts.131.w1", "model.layers.27.block_sparse_moe.experts.132.w1", "model.layers.27.block_sparse_moe.experts.133.w1", "model.layers.27.block_sparse_moe.experts.134.w1", "model.layers.27.block_sparse_moe.experts.135.w1", "model.layers.27.block_sparse_moe.experts.136.w1", "model.layers.27.block_sparse_moe.experts.137.w1", "model.layers.27.block_sparse_moe.experts.138.w1", "model.layers.27.block_sparse_moe.experts.139.w1", "model.layers.27.block_sparse_moe.experts.140.w1", "model.layers.27.block_sparse_moe.experts.141.w1", "model.layers.27.block_sparse_moe.experts.142.w1", "model.layers.27.block_sparse_moe.experts.143.w1", "model.layers.27.block_sparse_moe.experts.144.w1", "model.layers.27.block_sparse_moe.experts.145.w1", "model.layers.27.block_sparse_moe.experts.146.w1", "model.layers.27.block_sparse_moe.experts.147.w1", "model.layers.27.block_sparse_moe.experts.148.w1", "model.layers.27.block_sparse_moe.experts.149.w1", "model.layers.27.block_sparse_moe.experts.150.w1", "model.layers.27.block_sparse_moe.experts.151.w1", "model.layers.27.block_sparse_moe.experts.152.w1", "model.layers.27.block_sparse_moe.experts.153.w1", "model.layers.27.block_sparse_moe.experts.154.w1", "model.layers.27.block_sparse_moe.experts.155.w1", "model.layers.27.block_sparse_moe.experts.156.w1", "model.layers.27.block_sparse_moe.experts.157.w1", "model.layers.27.block_sparse_moe.experts.158.w1", "model.layers.27.block_sparse_moe.experts.159.w1", "model.layers.27.block_sparse_moe.experts.160.w1", "model.layers.27.block_sparse_moe.experts.161.w1", "model.layers.27.block_sparse_moe.experts.162.w1", "model.layers.27.block_sparse_moe.experts.163.w1", "model.layers.27.block_sparse_moe.experts.164.w1", "model.layers.27.block_sparse_moe.experts.165.w1", "model.layers.27.block_sparse_moe.experts.166.w1", "model.layers.27.block_sparse_moe.experts.167.w1", "model.layers.27.block_sparse_moe.experts.168.w1", "model.layers.27.block_sparse_moe.experts.169.w1", "model.layers.27.block_sparse_moe.experts.170.w1", "model.layers.27.block_sparse_moe.experts.171.w1", "model.layers.27.block_sparse_moe.experts.172.w1", "model.layers.27.block_sparse_moe.experts.173.w1", "model.layers.27.block_sparse_moe.experts.174.w1", "model.layers.27.block_sparse_moe.experts.175.w1", "model.layers.27.block_sparse_moe.experts.176.w1", "model.layers.27.block_sparse_moe.experts.177.w1", "model.layers.27.block_sparse_moe.experts.178.w1", "model.layers.27.block_sparse_moe.experts.179.w1", "model.layers.27.block_sparse_moe.experts.180.w1", "model.layers.27.block_sparse_moe.experts.181.w1", "model.layers.27.block_sparse_moe.experts.182.w1", "model.layers.27.block_sparse_moe.experts.183.w1", "model.layers.27.block_sparse_moe.experts.184.w1", "model.layers.27.block_sparse_moe.experts.185.w1", "model.layers.27.block_sparse_moe.experts.186.w1", "model.layers.27.block_sparse_moe.experts.187.w1", "model.layers.27.block_sparse_moe.experts.188.w1", "model.layers.27.block_sparse_moe.experts.189.w1", "model.layers.27.block_sparse_moe.experts.190.w1", "model.layers.27.block_sparse_moe.experts.191.w1", "model.layers.27.block_sparse_moe.experts.192.w1", "model.layers.27.block_sparse_moe.experts.193.w1", "model.layers.27.block_sparse_moe.experts.194.w1", "model.layers.27.block_sparse_moe.experts.195.w1", "model.layers.27.block_sparse_moe.experts.196.w1", "model.layers.27.block_sparse_moe.experts.197.w1", "model.layers.27.block_sparse_moe.experts.198.w1", "model.layers.27.block_sparse_moe.experts.199.w1", "model.layers.27.block_sparse_moe.experts.200.w1", "model.layers.27.block_sparse_moe.experts.201.w1", "model.layers.27.block_sparse_moe.experts.202.w1", "model.layers.27.block_sparse_moe.experts.203.w1", "model.layers.27.block_sparse_moe.experts.204.w1", "model.layers.27.block_sparse_moe.experts.205.w1", "model.layers.27.block_sparse_moe.experts.206.w1", "model.layers.27.block_sparse_moe.experts.207.w1", "model.layers.27.block_sparse_moe.experts.208.w1", "model.layers.27.block_sparse_moe.experts.209.w1", "model.layers.27.block_sparse_moe.experts.210.w1", "model.layers.27.block_sparse_moe.experts.211.w1", "model.layers.27.block_sparse_moe.experts.212.w1", "model.layers.27.block_sparse_moe.experts.213.w1", "model.layers.27.block_sparse_moe.experts.214.w1", "model.layers.27.block_sparse_moe.experts.215.w1", "model.layers.27.block_sparse_moe.experts.216.w1", "model.layers.27.block_sparse_moe.experts.217.w1", "model.layers.27.block_sparse_moe.experts.218.w1", "model.layers.27.block_sparse_moe.experts.219.w1", "model.layers.27.block_sparse_moe.experts.220.w1", "model.layers.27.block_sparse_moe.experts.221.w1", "model.layers.27.block_sparse_moe.experts.222.w1", "model.layers.27.block_sparse_moe.experts.223.w1", "model.layers.27.block_sparse_moe.experts.224.w1", "model.layers.27.block_sparse_moe.experts.225.w1", "model.layers.27.block_sparse_moe.experts.226.w1", "model.layers.27.block_sparse_moe.experts.227.w1", "model.layers.27.block_sparse_moe.experts.228.w1", "model.layers.27.block_sparse_moe.experts.229.w1", "model.layers.27.block_sparse_moe.experts.230.w1", "model.layers.27.block_sparse_moe.experts.231.w1", "model.layers.27.block_sparse_moe.experts.232.w1", "model.layers.27.block_sparse_moe.experts.233.w1", "model.layers.27.block_sparse_moe.experts.234.w1", "model.layers.27.block_sparse_moe.experts.235.w1", "model.layers.27.block_sparse_moe.experts.236.w1", "model.layers.27.block_sparse_moe.experts.237.w1", "model.layers.27.block_sparse_moe.experts.238.w1", "model.layers.27.block_sparse_moe.experts.239.w1", "model.layers.27.block_sparse_moe.experts.240.w1", "model.layers.27.block_sparse_moe.experts.241.w1", "model.layers.27.block_sparse_moe.experts.242.w1", "model.layers.27.block_sparse_moe.experts.243.w1", "model.layers.27.block_sparse_moe.experts.244.w1", "model.layers.27.block_sparse_moe.experts.245.w1", "model.layers.27.block_sparse_moe.experts.246.w1", "model.layers.27.block_sparse_moe.experts.247.w1", "model.layers.27.block_sparse_moe.experts.248.w1", "model.layers.27.block_sparse_moe.experts.249.w1", "model.layers.27.block_sparse_moe.experts.250.w1", "model.layers.27.block_sparse_moe.experts.251.w1", "model.layers.27.block_sparse_moe.experts.252.w1", "model.layers.27.block_sparse_moe.experts.253.w1", "model.layers.27.block_sparse_moe.experts.254.w1", "model.layers.27.block_sparse_moe.experts.255.w1", "model.layers.27.block_sparse_moe.experts.0.w3", "model.layers.27.block_sparse_moe.experts.1.w3", "model.layers.27.block_sparse_moe.experts.2.w3", "model.layers.27.block_sparse_moe.experts.3.w3", "model.layers.27.block_sparse_moe.experts.4.w3", "model.layers.27.block_sparse_moe.experts.5.w3", "model.layers.27.block_sparse_moe.experts.6.w3", "model.layers.27.block_sparse_moe.experts.7.w3", "model.layers.27.block_sparse_moe.experts.8.w3", "model.layers.27.block_sparse_moe.experts.9.w3", "model.layers.27.block_sparse_moe.experts.10.w3", "model.layers.27.block_sparse_moe.experts.11.w3", "model.layers.27.block_sparse_moe.experts.12.w3", "model.layers.27.block_sparse_moe.experts.13.w3", "model.layers.27.block_sparse_moe.experts.14.w3", "model.layers.27.block_sparse_moe.experts.15.w3", "model.layers.27.block_sparse_moe.experts.16.w3", "model.layers.27.block_sparse_moe.experts.17.w3", "model.layers.27.block_sparse_moe.experts.18.w3", "model.layers.27.block_sparse_moe.experts.19.w3", "model.layers.27.block_sparse_moe.experts.20.w3", "model.layers.27.block_sparse_moe.experts.21.w3", "model.layers.27.block_sparse_moe.experts.22.w3", "model.layers.27.block_sparse_moe.experts.23.w3", "model.layers.27.block_sparse_moe.experts.24.w3", "model.layers.27.block_sparse_moe.experts.25.w3", "model.layers.27.block_sparse_moe.experts.26.w3", "model.layers.27.block_sparse_moe.experts.27.w3", "model.layers.27.block_sparse_moe.experts.28.w3", "model.layers.27.block_sparse_moe.experts.29.w3", "model.layers.27.block_sparse_moe.experts.30.w3", "model.layers.27.block_sparse_moe.experts.31.w3", "model.layers.27.block_sparse_moe.experts.32.w3", "model.layers.27.block_sparse_moe.experts.33.w3", "model.layers.27.block_sparse_moe.experts.34.w3", "model.layers.27.block_sparse_moe.experts.35.w3", "model.layers.27.block_sparse_moe.experts.36.w3", "model.layers.27.block_sparse_moe.experts.37.w3", "model.layers.27.block_sparse_moe.experts.38.w3", "model.layers.27.block_sparse_moe.experts.39.w3", "model.layers.27.block_sparse_moe.experts.40.w3", "model.layers.27.block_sparse_moe.experts.41.w3", "model.layers.27.block_sparse_moe.experts.42.w3", "model.layers.27.block_sparse_moe.experts.43.w3", "model.layers.27.block_sparse_moe.experts.44.w3", "model.layers.27.block_sparse_moe.experts.45.w3", "model.layers.27.block_sparse_moe.experts.46.w3", "model.layers.27.block_sparse_moe.experts.47.w3", "model.layers.27.block_sparse_moe.experts.48.w3", "model.layers.27.block_sparse_moe.experts.49.w3", "model.layers.27.block_sparse_moe.experts.50.w3", "model.layers.27.block_sparse_moe.experts.51.w3", "model.layers.27.block_sparse_moe.experts.52.w3", "model.layers.27.block_sparse_moe.experts.53.w3", "model.layers.27.block_sparse_moe.experts.54.w3", "model.layers.27.block_sparse_moe.experts.55.w3", "model.layers.27.block_sparse_moe.experts.56.w3", "model.layers.27.block_sparse_moe.experts.57.w3", "model.layers.27.block_sparse_moe.experts.58.w3", "model.layers.27.block_sparse_moe.experts.59.w3", "model.layers.27.block_sparse_moe.experts.60.w3", "model.layers.27.block_sparse_moe.experts.61.w3", "model.layers.27.block_sparse_moe.experts.62.w3", "model.layers.27.block_sparse_moe.experts.63.w3", "model.layers.27.block_sparse_moe.experts.64.w3", "model.layers.27.block_sparse_moe.experts.65.w3", "model.layers.27.block_sparse_moe.experts.66.w3", "model.layers.27.block_sparse_moe.experts.67.w3", "model.layers.27.block_sparse_moe.experts.68.w3", "model.layers.27.block_sparse_moe.experts.69.w3", "model.layers.27.block_sparse_moe.experts.70.w3", "model.layers.27.block_sparse_moe.experts.71.w3", "model.layers.27.block_sparse_moe.experts.72.w3", "model.layers.27.block_sparse_moe.experts.73.w3", "model.layers.27.block_sparse_moe.experts.74.w3", "model.layers.27.block_sparse_moe.experts.75.w3", "model.layers.27.block_sparse_moe.experts.76.w3", "model.layers.27.block_sparse_moe.experts.77.w3", "model.layers.27.block_sparse_moe.experts.78.w3", "model.layers.27.block_sparse_moe.experts.79.w3", "model.layers.27.block_sparse_moe.experts.80.w3", "model.layers.27.block_sparse_moe.experts.81.w3", "model.layers.27.block_sparse_moe.experts.82.w3", "model.layers.27.block_sparse_moe.experts.83.w3", "model.layers.27.block_sparse_moe.experts.84.w3", "model.layers.27.block_sparse_moe.experts.85.w3", "model.layers.27.block_sparse_moe.experts.86.w3", "model.layers.27.block_sparse_moe.experts.87.w3", "model.layers.27.block_sparse_moe.experts.88.w3", "model.layers.27.block_sparse_moe.experts.89.w3", "model.layers.27.block_sparse_moe.experts.90.w3", "model.layers.27.block_sparse_moe.experts.91.w3", "model.layers.27.block_sparse_moe.experts.92.w3", "model.layers.27.block_sparse_moe.experts.93.w3", "model.layers.27.block_sparse_moe.experts.94.w3", "model.layers.27.block_sparse_moe.experts.95.w3", "model.layers.27.block_sparse_moe.experts.96.w3", "model.layers.27.block_sparse_moe.experts.97.w3", "model.layers.27.block_sparse_moe.experts.98.w3", "model.layers.27.block_sparse_moe.experts.99.w3", "model.layers.27.block_sparse_moe.experts.100.w3", "model.layers.27.block_sparse_moe.experts.101.w3", "model.layers.27.block_sparse_moe.experts.102.w3", "model.layers.27.block_sparse_moe.experts.103.w3", "model.layers.27.block_sparse_moe.experts.104.w3", "model.layers.27.block_sparse_moe.experts.105.w3", "model.layers.27.block_sparse_moe.experts.106.w3", "model.layers.27.block_sparse_moe.experts.107.w3", "model.layers.27.block_sparse_moe.experts.108.w3", "model.layers.27.block_sparse_moe.experts.109.w3", "model.layers.27.block_sparse_moe.experts.110.w3", "model.layers.27.block_sparse_moe.experts.111.w3", "model.layers.27.block_sparse_moe.experts.112.w3", "model.layers.27.block_sparse_moe.experts.113.w3", "model.layers.27.block_sparse_moe.experts.114.w3", "model.layers.27.block_sparse_moe.experts.115.w3", "model.layers.27.block_sparse_moe.experts.116.w3", "model.layers.27.block_sparse_moe.experts.117.w3", "model.layers.27.block_sparse_moe.experts.118.w3", "model.layers.27.block_sparse_moe.experts.119.w3", "model.layers.27.block_sparse_moe.experts.120.w3", "model.layers.27.block_sparse_moe.experts.121.w3", "model.layers.27.block_sparse_moe.experts.122.w3", "model.layers.27.block_sparse_moe.experts.123.w3", "model.layers.27.block_sparse_moe.experts.124.w3", "model.layers.27.block_sparse_moe.experts.125.w3", "model.layers.27.block_sparse_moe.experts.126.w3", "model.layers.27.block_sparse_moe.experts.127.w3", "model.layers.27.block_sparse_moe.experts.128.w3", "model.layers.27.block_sparse_moe.experts.129.w3", "model.layers.27.block_sparse_moe.experts.130.w3", "model.layers.27.block_sparse_moe.experts.131.w3", "model.layers.27.block_sparse_moe.experts.132.w3", "model.layers.27.block_sparse_moe.experts.133.w3", "model.layers.27.block_sparse_moe.experts.134.w3", "model.layers.27.block_sparse_moe.experts.135.w3", "model.layers.27.block_sparse_moe.experts.136.w3", "model.layers.27.block_sparse_moe.experts.137.w3", "model.layers.27.block_sparse_moe.experts.138.w3", "model.layers.27.block_sparse_moe.experts.139.w3", "model.layers.27.block_sparse_moe.experts.140.w3", "model.layers.27.block_sparse_moe.experts.141.w3", "model.layers.27.block_sparse_moe.experts.142.w3", "model.layers.27.block_sparse_moe.experts.143.w3", "model.layers.27.block_sparse_moe.experts.144.w3", "model.layers.27.block_sparse_moe.experts.145.w3", "model.layers.27.block_sparse_moe.experts.146.w3", "model.layers.27.block_sparse_moe.experts.147.w3", "model.layers.27.block_sparse_moe.experts.148.w3", "model.layers.27.block_sparse_moe.experts.149.w3", "model.layers.27.block_sparse_moe.experts.150.w3", "model.layers.27.block_sparse_moe.experts.151.w3", "model.layers.27.block_sparse_moe.experts.152.w3", "model.layers.27.block_sparse_moe.experts.153.w3", "model.layers.27.block_sparse_moe.experts.154.w3", "model.layers.27.block_sparse_moe.experts.155.w3", "model.layers.27.block_sparse_moe.experts.156.w3", "model.layers.27.block_sparse_moe.experts.157.w3", "model.layers.27.block_sparse_moe.experts.158.w3", "model.layers.27.block_sparse_moe.experts.159.w3", "model.layers.27.block_sparse_moe.experts.160.w3", "model.layers.27.block_sparse_moe.experts.161.w3", "model.layers.27.block_sparse_moe.experts.162.w3", "model.layers.27.block_sparse_moe.experts.163.w3", "model.layers.27.block_sparse_moe.experts.164.w3", "model.layers.27.block_sparse_moe.experts.165.w3", "model.layers.27.block_sparse_moe.experts.166.w3", "model.layers.27.block_sparse_moe.experts.167.w3", "model.layers.27.block_sparse_moe.experts.168.w3", "model.layers.27.block_sparse_moe.experts.169.w3", "model.layers.27.block_sparse_moe.experts.170.w3", "model.layers.27.block_sparse_moe.experts.171.w3", "model.layers.27.block_sparse_moe.experts.172.w3", "model.layers.27.block_sparse_moe.experts.173.w3", "model.layers.27.block_sparse_moe.experts.174.w3", "model.layers.27.block_sparse_moe.experts.175.w3", "model.layers.27.block_sparse_moe.experts.176.w3", "model.layers.27.block_sparse_moe.experts.177.w3", "model.layers.27.block_sparse_moe.experts.178.w3", "model.layers.27.block_sparse_moe.experts.179.w3", "model.layers.27.block_sparse_moe.experts.180.w3", "model.layers.27.block_sparse_moe.experts.181.w3", "model.layers.27.block_sparse_moe.experts.182.w3", "model.layers.27.block_sparse_moe.experts.183.w3", "model.layers.27.block_sparse_moe.experts.184.w3", "model.layers.27.block_sparse_moe.experts.185.w3", "model.layers.27.block_sparse_moe.experts.186.w3", "model.layers.27.block_sparse_moe.experts.187.w3", "model.layers.27.block_sparse_moe.experts.188.w3", "model.layers.27.block_sparse_moe.experts.189.w3", "model.layers.27.block_sparse_moe.experts.190.w3", "model.layers.27.block_sparse_moe.experts.191.w3", "model.layers.27.block_sparse_moe.experts.192.w3", "model.layers.27.block_sparse_moe.experts.193.w3", "model.layers.27.block_sparse_moe.experts.194.w3", "model.layers.27.block_sparse_moe.experts.195.w3", "model.layers.27.block_sparse_moe.experts.196.w3", "model.layers.27.block_sparse_moe.experts.197.w3", "model.layers.27.block_sparse_moe.experts.198.w3", "model.layers.27.block_sparse_moe.experts.199.w3", "model.layers.27.block_sparse_moe.experts.200.w3", "model.layers.27.block_sparse_moe.experts.201.w3", "model.layers.27.block_sparse_moe.experts.202.w3", "model.layers.27.block_sparse_moe.experts.203.w3", "model.layers.27.block_sparse_moe.experts.204.w3", "model.layers.27.block_sparse_moe.experts.205.w3", "model.layers.27.block_sparse_moe.experts.206.w3", "model.layers.27.block_sparse_moe.experts.207.w3", "model.layers.27.block_sparse_moe.experts.208.w3", "model.layers.27.block_sparse_moe.experts.209.w3", "model.layers.27.block_sparse_moe.experts.210.w3", "model.layers.27.block_sparse_moe.experts.211.w3", "model.layers.27.block_sparse_moe.experts.212.w3", "model.layers.27.block_sparse_moe.experts.213.w3", "model.layers.27.block_sparse_moe.experts.214.w3", "model.layers.27.block_sparse_moe.experts.215.w3", "model.layers.27.block_sparse_moe.experts.216.w3", "model.layers.27.block_sparse_moe.experts.217.w3", "model.layers.27.block_sparse_moe.experts.218.w3", "model.layers.27.block_sparse_moe.experts.219.w3", "model.layers.27.block_sparse_moe.experts.220.w3", "model.layers.27.block_sparse_moe.experts.221.w3", "model.layers.27.block_sparse_moe.experts.222.w3", "model.layers.27.block_sparse_moe.experts.223.w3", "model.layers.27.block_sparse_moe.experts.224.w3", "model.layers.27.block_sparse_moe.experts.225.w3", "model.layers.27.block_sparse_moe.experts.226.w3", "model.layers.27.block_sparse_moe.experts.227.w3", "model.layers.27.block_sparse_moe.experts.228.w3", "model.layers.27.block_sparse_moe.experts.229.w3", "model.layers.27.block_sparse_moe.experts.230.w3", "model.layers.27.block_sparse_moe.experts.231.w3", "model.layers.27.block_sparse_moe.experts.232.w3", "model.layers.27.block_sparse_moe.experts.233.w3", "model.layers.27.block_sparse_moe.experts.234.w3", "model.layers.27.block_sparse_moe.experts.235.w3", "model.layers.27.block_sparse_moe.experts.236.w3", "model.layers.27.block_sparse_moe.experts.237.w3", "model.layers.27.block_sparse_moe.experts.238.w3", "model.layers.27.block_sparse_moe.experts.239.w3", "model.layers.27.block_sparse_moe.experts.240.w3", "model.layers.27.block_sparse_moe.experts.241.w3", "model.layers.27.block_sparse_moe.experts.242.w3", "model.layers.27.block_sparse_moe.experts.243.w3", "model.layers.27.block_sparse_moe.experts.244.w3", "model.layers.27.block_sparse_moe.experts.245.w3", "model.layers.27.block_sparse_moe.experts.246.w3", "model.layers.27.block_sparse_moe.experts.247.w3", "model.layers.27.block_sparse_moe.experts.248.w3", "model.layers.27.block_sparse_moe.experts.249.w3", "model.layers.27.block_sparse_moe.experts.250.w3", "model.layers.27.block_sparse_moe.experts.251.w3", "model.layers.27.block_sparse_moe.experts.252.w3", "model.layers.27.block_sparse_moe.experts.253.w3", "model.layers.27.block_sparse_moe.experts.254.w3", "model.layers.27.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0004989353939890834, "dbits": 2415919104 } ] }, { "idx": 139, "layers": [ "model.layers.27.block_sparse_moe.experts.0.w2", "model.layers.27.block_sparse_moe.experts.1.w2", "model.layers.27.block_sparse_moe.experts.2.w2", "model.layers.27.block_sparse_moe.experts.3.w2", "model.layers.27.block_sparse_moe.experts.4.w2", "model.layers.27.block_sparse_moe.experts.5.w2", "model.layers.27.block_sparse_moe.experts.6.w2", "model.layers.27.block_sparse_moe.experts.7.w2", "model.layers.27.block_sparse_moe.experts.8.w2", "model.layers.27.block_sparse_moe.experts.9.w2", "model.layers.27.block_sparse_moe.experts.10.w2", "model.layers.27.block_sparse_moe.experts.11.w2", "model.layers.27.block_sparse_moe.experts.12.w2", "model.layers.27.block_sparse_moe.experts.13.w2", "model.layers.27.block_sparse_moe.experts.14.w2", "model.layers.27.block_sparse_moe.experts.15.w2", "model.layers.27.block_sparse_moe.experts.16.w2", "model.layers.27.block_sparse_moe.experts.17.w2", "model.layers.27.block_sparse_moe.experts.18.w2", "model.layers.27.block_sparse_moe.experts.19.w2", "model.layers.27.block_sparse_moe.experts.20.w2", "model.layers.27.block_sparse_moe.experts.21.w2", "model.layers.27.block_sparse_moe.experts.22.w2", "model.layers.27.block_sparse_moe.experts.23.w2", "model.layers.27.block_sparse_moe.experts.24.w2", "model.layers.27.block_sparse_moe.experts.25.w2", "model.layers.27.block_sparse_moe.experts.26.w2", "model.layers.27.block_sparse_moe.experts.27.w2", "model.layers.27.block_sparse_moe.experts.28.w2", "model.layers.27.block_sparse_moe.experts.29.w2", "model.layers.27.block_sparse_moe.experts.30.w2", "model.layers.27.block_sparse_moe.experts.31.w2", "model.layers.27.block_sparse_moe.experts.32.w2", "model.layers.27.block_sparse_moe.experts.33.w2", "model.layers.27.block_sparse_moe.experts.34.w2", "model.layers.27.block_sparse_moe.experts.35.w2", "model.layers.27.block_sparse_moe.experts.36.w2", "model.layers.27.block_sparse_moe.experts.37.w2", "model.layers.27.block_sparse_moe.experts.38.w2", "model.layers.27.block_sparse_moe.experts.39.w2", "model.layers.27.block_sparse_moe.experts.40.w2", "model.layers.27.block_sparse_moe.experts.41.w2", "model.layers.27.block_sparse_moe.experts.42.w2", "model.layers.27.block_sparse_moe.experts.43.w2", "model.layers.27.block_sparse_moe.experts.44.w2", "model.layers.27.block_sparse_moe.experts.45.w2", "model.layers.27.block_sparse_moe.experts.46.w2", "model.layers.27.block_sparse_moe.experts.47.w2", "model.layers.27.block_sparse_moe.experts.48.w2", "model.layers.27.block_sparse_moe.experts.49.w2", "model.layers.27.block_sparse_moe.experts.50.w2", "model.layers.27.block_sparse_moe.experts.51.w2", "model.layers.27.block_sparse_moe.experts.52.w2", "model.layers.27.block_sparse_moe.experts.53.w2", "model.layers.27.block_sparse_moe.experts.54.w2", "model.layers.27.block_sparse_moe.experts.55.w2", "model.layers.27.block_sparse_moe.experts.56.w2", "model.layers.27.block_sparse_moe.experts.57.w2", "model.layers.27.block_sparse_moe.experts.58.w2", "model.layers.27.block_sparse_moe.experts.59.w2", "model.layers.27.block_sparse_moe.experts.60.w2", "model.layers.27.block_sparse_moe.experts.61.w2", "model.layers.27.block_sparse_moe.experts.62.w2", "model.layers.27.block_sparse_moe.experts.63.w2", "model.layers.27.block_sparse_moe.experts.64.w2", "model.layers.27.block_sparse_moe.experts.65.w2", "model.layers.27.block_sparse_moe.experts.66.w2", "model.layers.27.block_sparse_moe.experts.67.w2", "model.layers.27.block_sparse_moe.experts.68.w2", "model.layers.27.block_sparse_moe.experts.69.w2", "model.layers.27.block_sparse_moe.experts.70.w2", "model.layers.27.block_sparse_moe.experts.71.w2", "model.layers.27.block_sparse_moe.experts.72.w2", "model.layers.27.block_sparse_moe.experts.73.w2", "model.layers.27.block_sparse_moe.experts.74.w2", "model.layers.27.block_sparse_moe.experts.75.w2", "model.layers.27.block_sparse_moe.experts.76.w2", "model.layers.27.block_sparse_moe.experts.77.w2", "model.layers.27.block_sparse_moe.experts.78.w2", "model.layers.27.block_sparse_moe.experts.79.w2", "model.layers.27.block_sparse_moe.experts.80.w2", "model.layers.27.block_sparse_moe.experts.81.w2", "model.layers.27.block_sparse_moe.experts.82.w2", "model.layers.27.block_sparse_moe.experts.83.w2", "model.layers.27.block_sparse_moe.experts.84.w2", "model.layers.27.block_sparse_moe.experts.85.w2", "model.layers.27.block_sparse_moe.experts.86.w2", "model.layers.27.block_sparse_moe.experts.87.w2", "model.layers.27.block_sparse_moe.experts.88.w2", "model.layers.27.block_sparse_moe.experts.89.w2", "model.layers.27.block_sparse_moe.experts.90.w2", "model.layers.27.block_sparse_moe.experts.91.w2", "model.layers.27.block_sparse_moe.experts.92.w2", "model.layers.27.block_sparse_moe.experts.93.w2", "model.layers.27.block_sparse_moe.experts.94.w2", "model.layers.27.block_sparse_moe.experts.95.w2", "model.layers.27.block_sparse_moe.experts.96.w2", "model.layers.27.block_sparse_moe.experts.97.w2", "model.layers.27.block_sparse_moe.experts.98.w2", "model.layers.27.block_sparse_moe.experts.99.w2", "model.layers.27.block_sparse_moe.experts.100.w2", "model.layers.27.block_sparse_moe.experts.101.w2", "model.layers.27.block_sparse_moe.experts.102.w2", "model.layers.27.block_sparse_moe.experts.103.w2", "model.layers.27.block_sparse_moe.experts.104.w2", "model.layers.27.block_sparse_moe.experts.105.w2", "model.layers.27.block_sparse_moe.experts.106.w2", "model.layers.27.block_sparse_moe.experts.107.w2", "model.layers.27.block_sparse_moe.experts.108.w2", "model.layers.27.block_sparse_moe.experts.109.w2", "model.layers.27.block_sparse_moe.experts.110.w2", "model.layers.27.block_sparse_moe.experts.111.w2", "model.layers.27.block_sparse_moe.experts.112.w2", "model.layers.27.block_sparse_moe.experts.113.w2", "model.layers.27.block_sparse_moe.experts.114.w2", "model.layers.27.block_sparse_moe.experts.115.w2", "model.layers.27.block_sparse_moe.experts.116.w2", "model.layers.27.block_sparse_moe.experts.117.w2", "model.layers.27.block_sparse_moe.experts.118.w2", "model.layers.27.block_sparse_moe.experts.119.w2", "model.layers.27.block_sparse_moe.experts.120.w2", "model.layers.27.block_sparse_moe.experts.121.w2", "model.layers.27.block_sparse_moe.experts.122.w2", "model.layers.27.block_sparse_moe.experts.123.w2", "model.layers.27.block_sparse_moe.experts.124.w2", "model.layers.27.block_sparse_moe.experts.125.w2", "model.layers.27.block_sparse_moe.experts.126.w2", "model.layers.27.block_sparse_moe.experts.127.w2", "model.layers.27.block_sparse_moe.experts.128.w2", "model.layers.27.block_sparse_moe.experts.129.w2", "model.layers.27.block_sparse_moe.experts.130.w2", "model.layers.27.block_sparse_moe.experts.131.w2", "model.layers.27.block_sparse_moe.experts.132.w2", "model.layers.27.block_sparse_moe.experts.133.w2", "model.layers.27.block_sparse_moe.experts.134.w2", "model.layers.27.block_sparse_moe.experts.135.w2", "model.layers.27.block_sparse_moe.experts.136.w2", "model.layers.27.block_sparse_moe.experts.137.w2", "model.layers.27.block_sparse_moe.experts.138.w2", "model.layers.27.block_sparse_moe.experts.139.w2", "model.layers.27.block_sparse_moe.experts.140.w2", "model.layers.27.block_sparse_moe.experts.141.w2", "model.layers.27.block_sparse_moe.experts.142.w2", "model.layers.27.block_sparse_moe.experts.143.w2", "model.layers.27.block_sparse_moe.experts.144.w2", "model.layers.27.block_sparse_moe.experts.145.w2", "model.layers.27.block_sparse_moe.experts.146.w2", "model.layers.27.block_sparse_moe.experts.147.w2", "model.layers.27.block_sparse_moe.experts.148.w2", "model.layers.27.block_sparse_moe.experts.149.w2", "model.layers.27.block_sparse_moe.experts.150.w2", "model.layers.27.block_sparse_moe.experts.151.w2", "model.layers.27.block_sparse_moe.experts.152.w2", "model.layers.27.block_sparse_moe.experts.153.w2", "model.layers.27.block_sparse_moe.experts.154.w2", "model.layers.27.block_sparse_moe.experts.155.w2", "model.layers.27.block_sparse_moe.experts.156.w2", "model.layers.27.block_sparse_moe.experts.157.w2", "model.layers.27.block_sparse_moe.experts.158.w2", "model.layers.27.block_sparse_moe.experts.159.w2", "model.layers.27.block_sparse_moe.experts.160.w2", "model.layers.27.block_sparse_moe.experts.161.w2", "model.layers.27.block_sparse_moe.experts.162.w2", "model.layers.27.block_sparse_moe.experts.163.w2", "model.layers.27.block_sparse_moe.experts.164.w2", "model.layers.27.block_sparse_moe.experts.165.w2", "model.layers.27.block_sparse_moe.experts.166.w2", "model.layers.27.block_sparse_moe.experts.167.w2", "model.layers.27.block_sparse_moe.experts.168.w2", "model.layers.27.block_sparse_moe.experts.169.w2", "model.layers.27.block_sparse_moe.experts.170.w2", "model.layers.27.block_sparse_moe.experts.171.w2", "model.layers.27.block_sparse_moe.experts.172.w2", "model.layers.27.block_sparse_moe.experts.173.w2", "model.layers.27.block_sparse_moe.experts.174.w2", "model.layers.27.block_sparse_moe.experts.175.w2", "model.layers.27.block_sparse_moe.experts.176.w2", "model.layers.27.block_sparse_moe.experts.177.w2", "model.layers.27.block_sparse_moe.experts.178.w2", "model.layers.27.block_sparse_moe.experts.179.w2", "model.layers.27.block_sparse_moe.experts.180.w2", "model.layers.27.block_sparse_moe.experts.181.w2", "model.layers.27.block_sparse_moe.experts.182.w2", "model.layers.27.block_sparse_moe.experts.183.w2", "model.layers.27.block_sparse_moe.experts.184.w2", "model.layers.27.block_sparse_moe.experts.185.w2", "model.layers.27.block_sparse_moe.experts.186.w2", "model.layers.27.block_sparse_moe.experts.187.w2", "model.layers.27.block_sparse_moe.experts.188.w2", "model.layers.27.block_sparse_moe.experts.189.w2", "model.layers.27.block_sparse_moe.experts.190.w2", "model.layers.27.block_sparse_moe.experts.191.w2", "model.layers.27.block_sparse_moe.experts.192.w2", "model.layers.27.block_sparse_moe.experts.193.w2", "model.layers.27.block_sparse_moe.experts.194.w2", "model.layers.27.block_sparse_moe.experts.195.w2", "model.layers.27.block_sparse_moe.experts.196.w2", "model.layers.27.block_sparse_moe.experts.197.w2", "model.layers.27.block_sparse_moe.experts.198.w2", "model.layers.27.block_sparse_moe.experts.199.w2", "model.layers.27.block_sparse_moe.experts.200.w2", "model.layers.27.block_sparse_moe.experts.201.w2", "model.layers.27.block_sparse_moe.experts.202.w2", "model.layers.27.block_sparse_moe.experts.203.w2", "model.layers.27.block_sparse_moe.experts.204.w2", "model.layers.27.block_sparse_moe.experts.205.w2", "model.layers.27.block_sparse_moe.experts.206.w2", "model.layers.27.block_sparse_moe.experts.207.w2", "model.layers.27.block_sparse_moe.experts.208.w2", "model.layers.27.block_sparse_moe.experts.209.w2", "model.layers.27.block_sparse_moe.experts.210.w2", "model.layers.27.block_sparse_moe.experts.211.w2", "model.layers.27.block_sparse_moe.experts.212.w2", "model.layers.27.block_sparse_moe.experts.213.w2", "model.layers.27.block_sparse_moe.experts.214.w2", "model.layers.27.block_sparse_moe.experts.215.w2", "model.layers.27.block_sparse_moe.experts.216.w2", "model.layers.27.block_sparse_moe.experts.217.w2", "model.layers.27.block_sparse_moe.experts.218.w2", "model.layers.27.block_sparse_moe.experts.219.w2", "model.layers.27.block_sparse_moe.experts.220.w2", "model.layers.27.block_sparse_moe.experts.221.w2", "model.layers.27.block_sparse_moe.experts.222.w2", "model.layers.27.block_sparse_moe.experts.223.w2", "model.layers.27.block_sparse_moe.experts.224.w2", "model.layers.27.block_sparse_moe.experts.225.w2", "model.layers.27.block_sparse_moe.experts.226.w2", "model.layers.27.block_sparse_moe.experts.227.w2", "model.layers.27.block_sparse_moe.experts.228.w2", "model.layers.27.block_sparse_moe.experts.229.w2", "model.layers.27.block_sparse_moe.experts.230.w2", "model.layers.27.block_sparse_moe.experts.231.w2", "model.layers.27.block_sparse_moe.experts.232.w2", "model.layers.27.block_sparse_moe.experts.233.w2", "model.layers.27.block_sparse_moe.experts.234.w2", "model.layers.27.block_sparse_moe.experts.235.w2", "model.layers.27.block_sparse_moe.experts.236.w2", "model.layers.27.block_sparse_moe.experts.237.w2", "model.layers.27.block_sparse_moe.experts.238.w2", "model.layers.27.block_sparse_moe.experts.239.w2", "model.layers.27.block_sparse_moe.experts.240.w2", "model.layers.27.block_sparse_moe.experts.241.w2", "model.layers.27.block_sparse_moe.experts.242.w2", "model.layers.27.block_sparse_moe.experts.243.w2", "model.layers.27.block_sparse_moe.experts.244.w2", "model.layers.27.block_sparse_moe.experts.245.w2", "model.layers.27.block_sparse_moe.experts.246.w2", "model.layers.27.block_sparse_moe.experts.247.w2", "model.layers.27.block_sparse_moe.experts.248.w2", "model.layers.27.block_sparse_moe.experts.249.w2", "model.layers.27.block_sparse_moe.experts.250.w2", "model.layers.27.block_sparse_moe.experts.251.w2", "model.layers.27.block_sparse_moe.experts.252.w2", "model.layers.27.block_sparse_moe.experts.253.w2", "model.layers.27.block_sparse_moe.experts.254.w2", "model.layers.27.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00022635310888290128, "dbits": 1207959552 } ] }, { "idx": 140, "layers": [ "model.layers.28.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0009518638253211975, "dbits": 18874368 } ] }, { "idx": 141, "layers": [ "model.layers.28.self_attn.k_proj", "model.layers.28.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0027869217097759164, "dbits": 6291456 } ] }, { "idx": 142, "layers": [ "model.layers.28.self_attn.o_proj" ], "candidates": [ { "dkld": 0.001312506757676593, "dbits": 18874368 } ] }, { "idx": 143, "layers": [ "model.layers.28.block_sparse_moe.experts.0.w1", "model.layers.28.block_sparse_moe.experts.1.w1", "model.layers.28.block_sparse_moe.experts.2.w1", "model.layers.28.block_sparse_moe.experts.3.w1", "model.layers.28.block_sparse_moe.experts.4.w1", "model.layers.28.block_sparse_moe.experts.5.w1", "model.layers.28.block_sparse_moe.experts.6.w1", "model.layers.28.block_sparse_moe.experts.7.w1", "model.layers.28.block_sparse_moe.experts.8.w1", "model.layers.28.block_sparse_moe.experts.9.w1", "model.layers.28.block_sparse_moe.experts.10.w1", "model.layers.28.block_sparse_moe.experts.11.w1", "model.layers.28.block_sparse_moe.experts.12.w1", "model.layers.28.block_sparse_moe.experts.13.w1", "model.layers.28.block_sparse_moe.experts.14.w1", "model.layers.28.block_sparse_moe.experts.15.w1", "model.layers.28.block_sparse_moe.experts.16.w1", "model.layers.28.block_sparse_moe.experts.17.w1", "model.layers.28.block_sparse_moe.experts.18.w1", "model.layers.28.block_sparse_moe.experts.19.w1", "model.layers.28.block_sparse_moe.experts.20.w1", "model.layers.28.block_sparse_moe.experts.21.w1", "model.layers.28.block_sparse_moe.experts.22.w1", "model.layers.28.block_sparse_moe.experts.23.w1", "model.layers.28.block_sparse_moe.experts.24.w1", "model.layers.28.block_sparse_moe.experts.25.w1", "model.layers.28.block_sparse_moe.experts.26.w1", "model.layers.28.block_sparse_moe.experts.27.w1", "model.layers.28.block_sparse_moe.experts.28.w1", "model.layers.28.block_sparse_moe.experts.29.w1", "model.layers.28.block_sparse_moe.experts.30.w1", "model.layers.28.block_sparse_moe.experts.31.w1", "model.layers.28.block_sparse_moe.experts.32.w1", "model.layers.28.block_sparse_moe.experts.33.w1", "model.layers.28.block_sparse_moe.experts.34.w1", "model.layers.28.block_sparse_moe.experts.35.w1", "model.layers.28.block_sparse_moe.experts.36.w1", "model.layers.28.block_sparse_moe.experts.37.w1", "model.layers.28.block_sparse_moe.experts.38.w1", "model.layers.28.block_sparse_moe.experts.39.w1", "model.layers.28.block_sparse_moe.experts.40.w1", "model.layers.28.block_sparse_moe.experts.41.w1", "model.layers.28.block_sparse_moe.experts.42.w1", "model.layers.28.block_sparse_moe.experts.43.w1", "model.layers.28.block_sparse_moe.experts.44.w1", "model.layers.28.block_sparse_moe.experts.45.w1", "model.layers.28.block_sparse_moe.experts.46.w1", "model.layers.28.block_sparse_moe.experts.47.w1", "model.layers.28.block_sparse_moe.experts.48.w1", "model.layers.28.block_sparse_moe.experts.49.w1", "model.layers.28.block_sparse_moe.experts.50.w1", "model.layers.28.block_sparse_moe.experts.51.w1", "model.layers.28.block_sparse_moe.experts.52.w1", "model.layers.28.block_sparse_moe.experts.53.w1", "model.layers.28.block_sparse_moe.experts.54.w1", "model.layers.28.block_sparse_moe.experts.55.w1", "model.layers.28.block_sparse_moe.experts.56.w1", "model.layers.28.block_sparse_moe.experts.57.w1", "model.layers.28.block_sparse_moe.experts.58.w1", "model.layers.28.block_sparse_moe.experts.59.w1", "model.layers.28.block_sparse_moe.experts.60.w1", "model.layers.28.block_sparse_moe.experts.61.w1", "model.layers.28.block_sparse_moe.experts.62.w1", "model.layers.28.block_sparse_moe.experts.63.w1", "model.layers.28.block_sparse_moe.experts.64.w1", "model.layers.28.block_sparse_moe.experts.65.w1", "model.layers.28.block_sparse_moe.experts.66.w1", "model.layers.28.block_sparse_moe.experts.67.w1", "model.layers.28.block_sparse_moe.experts.68.w1", "model.layers.28.block_sparse_moe.experts.69.w1", "model.layers.28.block_sparse_moe.experts.70.w1", "model.layers.28.block_sparse_moe.experts.71.w1", "model.layers.28.block_sparse_moe.experts.72.w1", "model.layers.28.block_sparse_moe.experts.73.w1", "model.layers.28.block_sparse_moe.experts.74.w1", "model.layers.28.block_sparse_moe.experts.75.w1", "model.layers.28.block_sparse_moe.experts.76.w1", "model.layers.28.block_sparse_moe.experts.77.w1", "model.layers.28.block_sparse_moe.experts.78.w1", "model.layers.28.block_sparse_moe.experts.79.w1", "model.layers.28.block_sparse_moe.experts.80.w1", "model.layers.28.block_sparse_moe.experts.81.w1", "model.layers.28.block_sparse_moe.experts.82.w1", "model.layers.28.block_sparse_moe.experts.83.w1", "model.layers.28.block_sparse_moe.experts.84.w1", "model.layers.28.block_sparse_moe.experts.85.w1", "model.layers.28.block_sparse_moe.experts.86.w1", "model.layers.28.block_sparse_moe.experts.87.w1", "model.layers.28.block_sparse_moe.experts.88.w1", "model.layers.28.block_sparse_moe.experts.89.w1", "model.layers.28.block_sparse_moe.experts.90.w1", "model.layers.28.block_sparse_moe.experts.91.w1", "model.layers.28.block_sparse_moe.experts.92.w1", "model.layers.28.block_sparse_moe.experts.93.w1", "model.layers.28.block_sparse_moe.experts.94.w1", "model.layers.28.block_sparse_moe.experts.95.w1", "model.layers.28.block_sparse_moe.experts.96.w1", "model.layers.28.block_sparse_moe.experts.97.w1", "model.layers.28.block_sparse_moe.experts.98.w1", "model.layers.28.block_sparse_moe.experts.99.w1", "model.layers.28.block_sparse_moe.experts.100.w1", "model.layers.28.block_sparse_moe.experts.101.w1", "model.layers.28.block_sparse_moe.experts.102.w1", "model.layers.28.block_sparse_moe.experts.103.w1", "model.layers.28.block_sparse_moe.experts.104.w1", "model.layers.28.block_sparse_moe.experts.105.w1", "model.layers.28.block_sparse_moe.experts.106.w1", "model.layers.28.block_sparse_moe.experts.107.w1", "model.layers.28.block_sparse_moe.experts.108.w1", "model.layers.28.block_sparse_moe.experts.109.w1", "model.layers.28.block_sparse_moe.experts.110.w1", "model.layers.28.block_sparse_moe.experts.111.w1", "model.layers.28.block_sparse_moe.experts.112.w1", "model.layers.28.block_sparse_moe.experts.113.w1", "model.layers.28.block_sparse_moe.experts.114.w1", "model.layers.28.block_sparse_moe.experts.115.w1", "model.layers.28.block_sparse_moe.experts.116.w1", "model.layers.28.block_sparse_moe.experts.117.w1", "model.layers.28.block_sparse_moe.experts.118.w1", "model.layers.28.block_sparse_moe.experts.119.w1", "model.layers.28.block_sparse_moe.experts.120.w1", "model.layers.28.block_sparse_moe.experts.121.w1", "model.layers.28.block_sparse_moe.experts.122.w1", "model.layers.28.block_sparse_moe.experts.123.w1", "model.layers.28.block_sparse_moe.experts.124.w1", "model.layers.28.block_sparse_moe.experts.125.w1", "model.layers.28.block_sparse_moe.experts.126.w1", "model.layers.28.block_sparse_moe.experts.127.w1", "model.layers.28.block_sparse_moe.experts.128.w1", "model.layers.28.block_sparse_moe.experts.129.w1", "model.layers.28.block_sparse_moe.experts.130.w1", "model.layers.28.block_sparse_moe.experts.131.w1", "model.layers.28.block_sparse_moe.experts.132.w1", "model.layers.28.block_sparse_moe.experts.133.w1", "model.layers.28.block_sparse_moe.experts.134.w1", "model.layers.28.block_sparse_moe.experts.135.w1", "model.layers.28.block_sparse_moe.experts.136.w1", "model.layers.28.block_sparse_moe.experts.137.w1", "model.layers.28.block_sparse_moe.experts.138.w1", "model.layers.28.block_sparse_moe.experts.139.w1", "model.layers.28.block_sparse_moe.experts.140.w1", "model.layers.28.block_sparse_moe.experts.141.w1", "model.layers.28.block_sparse_moe.experts.142.w1", "model.layers.28.block_sparse_moe.experts.143.w1", "model.layers.28.block_sparse_moe.experts.144.w1", "model.layers.28.block_sparse_moe.experts.145.w1", "model.layers.28.block_sparse_moe.experts.146.w1", "model.layers.28.block_sparse_moe.experts.147.w1", "model.layers.28.block_sparse_moe.experts.148.w1", "model.layers.28.block_sparse_moe.experts.149.w1", "model.layers.28.block_sparse_moe.experts.150.w1", "model.layers.28.block_sparse_moe.experts.151.w1", "model.layers.28.block_sparse_moe.experts.152.w1", "model.layers.28.block_sparse_moe.experts.153.w1", "model.layers.28.block_sparse_moe.experts.154.w1", "model.layers.28.block_sparse_moe.experts.155.w1", "model.layers.28.block_sparse_moe.experts.156.w1", "model.layers.28.block_sparse_moe.experts.157.w1", "model.layers.28.block_sparse_moe.experts.158.w1", "model.layers.28.block_sparse_moe.experts.159.w1", "model.layers.28.block_sparse_moe.experts.160.w1", "model.layers.28.block_sparse_moe.experts.161.w1", "model.layers.28.block_sparse_moe.experts.162.w1", "model.layers.28.block_sparse_moe.experts.163.w1", "model.layers.28.block_sparse_moe.experts.164.w1", "model.layers.28.block_sparse_moe.experts.165.w1", "model.layers.28.block_sparse_moe.experts.166.w1", "model.layers.28.block_sparse_moe.experts.167.w1", "model.layers.28.block_sparse_moe.experts.168.w1", "model.layers.28.block_sparse_moe.experts.169.w1", "model.layers.28.block_sparse_moe.experts.170.w1", "model.layers.28.block_sparse_moe.experts.171.w1", "model.layers.28.block_sparse_moe.experts.172.w1", "model.layers.28.block_sparse_moe.experts.173.w1", "model.layers.28.block_sparse_moe.experts.174.w1", "model.layers.28.block_sparse_moe.experts.175.w1", "model.layers.28.block_sparse_moe.experts.176.w1", "model.layers.28.block_sparse_moe.experts.177.w1", "model.layers.28.block_sparse_moe.experts.178.w1", "model.layers.28.block_sparse_moe.experts.179.w1", "model.layers.28.block_sparse_moe.experts.180.w1", "model.layers.28.block_sparse_moe.experts.181.w1", "model.layers.28.block_sparse_moe.experts.182.w1", "model.layers.28.block_sparse_moe.experts.183.w1", "model.layers.28.block_sparse_moe.experts.184.w1", "model.layers.28.block_sparse_moe.experts.185.w1", "model.layers.28.block_sparse_moe.experts.186.w1", "model.layers.28.block_sparse_moe.experts.187.w1", "model.layers.28.block_sparse_moe.experts.188.w1", "model.layers.28.block_sparse_moe.experts.189.w1", "model.layers.28.block_sparse_moe.experts.190.w1", "model.layers.28.block_sparse_moe.experts.191.w1", "model.layers.28.block_sparse_moe.experts.192.w1", "model.layers.28.block_sparse_moe.experts.193.w1", "model.layers.28.block_sparse_moe.experts.194.w1", "model.layers.28.block_sparse_moe.experts.195.w1", "model.layers.28.block_sparse_moe.experts.196.w1", "model.layers.28.block_sparse_moe.experts.197.w1", "model.layers.28.block_sparse_moe.experts.198.w1", "model.layers.28.block_sparse_moe.experts.199.w1", "model.layers.28.block_sparse_moe.experts.200.w1", "model.layers.28.block_sparse_moe.experts.201.w1", "model.layers.28.block_sparse_moe.experts.202.w1", "model.layers.28.block_sparse_moe.experts.203.w1", "model.layers.28.block_sparse_moe.experts.204.w1", "model.layers.28.block_sparse_moe.experts.205.w1", "model.layers.28.block_sparse_moe.experts.206.w1", "model.layers.28.block_sparse_moe.experts.207.w1", "model.layers.28.block_sparse_moe.experts.208.w1", "model.layers.28.block_sparse_moe.experts.209.w1", "model.layers.28.block_sparse_moe.experts.210.w1", "model.layers.28.block_sparse_moe.experts.211.w1", "model.layers.28.block_sparse_moe.experts.212.w1", "model.layers.28.block_sparse_moe.experts.213.w1", "model.layers.28.block_sparse_moe.experts.214.w1", "model.layers.28.block_sparse_moe.experts.215.w1", "model.layers.28.block_sparse_moe.experts.216.w1", "model.layers.28.block_sparse_moe.experts.217.w1", "model.layers.28.block_sparse_moe.experts.218.w1", "model.layers.28.block_sparse_moe.experts.219.w1", "model.layers.28.block_sparse_moe.experts.220.w1", "model.layers.28.block_sparse_moe.experts.221.w1", "model.layers.28.block_sparse_moe.experts.222.w1", "model.layers.28.block_sparse_moe.experts.223.w1", "model.layers.28.block_sparse_moe.experts.224.w1", "model.layers.28.block_sparse_moe.experts.225.w1", "model.layers.28.block_sparse_moe.experts.226.w1", "model.layers.28.block_sparse_moe.experts.227.w1", "model.layers.28.block_sparse_moe.experts.228.w1", "model.layers.28.block_sparse_moe.experts.229.w1", "model.layers.28.block_sparse_moe.experts.230.w1", "model.layers.28.block_sparse_moe.experts.231.w1", "model.layers.28.block_sparse_moe.experts.232.w1", "model.layers.28.block_sparse_moe.experts.233.w1", "model.layers.28.block_sparse_moe.experts.234.w1", "model.layers.28.block_sparse_moe.experts.235.w1", "model.layers.28.block_sparse_moe.experts.236.w1", "model.layers.28.block_sparse_moe.experts.237.w1", "model.layers.28.block_sparse_moe.experts.238.w1", "model.layers.28.block_sparse_moe.experts.239.w1", "model.layers.28.block_sparse_moe.experts.240.w1", "model.layers.28.block_sparse_moe.experts.241.w1", "model.layers.28.block_sparse_moe.experts.242.w1", "model.layers.28.block_sparse_moe.experts.243.w1", "model.layers.28.block_sparse_moe.experts.244.w1", "model.layers.28.block_sparse_moe.experts.245.w1", "model.layers.28.block_sparse_moe.experts.246.w1", "model.layers.28.block_sparse_moe.experts.247.w1", "model.layers.28.block_sparse_moe.experts.248.w1", "model.layers.28.block_sparse_moe.experts.249.w1", "model.layers.28.block_sparse_moe.experts.250.w1", "model.layers.28.block_sparse_moe.experts.251.w1", "model.layers.28.block_sparse_moe.experts.252.w1", "model.layers.28.block_sparse_moe.experts.253.w1", "model.layers.28.block_sparse_moe.experts.254.w1", "model.layers.28.block_sparse_moe.experts.255.w1", "model.layers.28.block_sparse_moe.experts.0.w3", "model.layers.28.block_sparse_moe.experts.1.w3", "model.layers.28.block_sparse_moe.experts.2.w3", "model.layers.28.block_sparse_moe.experts.3.w3", "model.layers.28.block_sparse_moe.experts.4.w3", "model.layers.28.block_sparse_moe.experts.5.w3", "model.layers.28.block_sparse_moe.experts.6.w3", "model.layers.28.block_sparse_moe.experts.7.w3", "model.layers.28.block_sparse_moe.experts.8.w3", "model.layers.28.block_sparse_moe.experts.9.w3", "model.layers.28.block_sparse_moe.experts.10.w3", "model.layers.28.block_sparse_moe.experts.11.w3", "model.layers.28.block_sparse_moe.experts.12.w3", "model.layers.28.block_sparse_moe.experts.13.w3", "model.layers.28.block_sparse_moe.experts.14.w3", "model.layers.28.block_sparse_moe.experts.15.w3", "model.layers.28.block_sparse_moe.experts.16.w3", "model.layers.28.block_sparse_moe.experts.17.w3", "model.layers.28.block_sparse_moe.experts.18.w3", "model.layers.28.block_sparse_moe.experts.19.w3", "model.layers.28.block_sparse_moe.experts.20.w3", "model.layers.28.block_sparse_moe.experts.21.w3", "model.layers.28.block_sparse_moe.experts.22.w3", "model.layers.28.block_sparse_moe.experts.23.w3", "model.layers.28.block_sparse_moe.experts.24.w3", "model.layers.28.block_sparse_moe.experts.25.w3", "model.layers.28.block_sparse_moe.experts.26.w3", "model.layers.28.block_sparse_moe.experts.27.w3", "model.layers.28.block_sparse_moe.experts.28.w3", "model.layers.28.block_sparse_moe.experts.29.w3", "model.layers.28.block_sparse_moe.experts.30.w3", "model.layers.28.block_sparse_moe.experts.31.w3", "model.layers.28.block_sparse_moe.experts.32.w3", "model.layers.28.block_sparse_moe.experts.33.w3", "model.layers.28.block_sparse_moe.experts.34.w3", "model.layers.28.block_sparse_moe.experts.35.w3", "model.layers.28.block_sparse_moe.experts.36.w3", "model.layers.28.block_sparse_moe.experts.37.w3", "model.layers.28.block_sparse_moe.experts.38.w3", "model.layers.28.block_sparse_moe.experts.39.w3", "model.layers.28.block_sparse_moe.experts.40.w3", "model.layers.28.block_sparse_moe.experts.41.w3", "model.layers.28.block_sparse_moe.experts.42.w3", "model.layers.28.block_sparse_moe.experts.43.w3", "model.layers.28.block_sparse_moe.experts.44.w3", "model.layers.28.block_sparse_moe.experts.45.w3", "model.layers.28.block_sparse_moe.experts.46.w3", "model.layers.28.block_sparse_moe.experts.47.w3", "model.layers.28.block_sparse_moe.experts.48.w3", "model.layers.28.block_sparse_moe.experts.49.w3", "model.layers.28.block_sparse_moe.experts.50.w3", "model.layers.28.block_sparse_moe.experts.51.w3", "model.layers.28.block_sparse_moe.experts.52.w3", "model.layers.28.block_sparse_moe.experts.53.w3", "model.layers.28.block_sparse_moe.experts.54.w3", "model.layers.28.block_sparse_moe.experts.55.w3", "model.layers.28.block_sparse_moe.experts.56.w3", "model.layers.28.block_sparse_moe.experts.57.w3", "model.layers.28.block_sparse_moe.experts.58.w3", "model.layers.28.block_sparse_moe.experts.59.w3", "model.layers.28.block_sparse_moe.experts.60.w3", "model.layers.28.block_sparse_moe.experts.61.w3", "model.layers.28.block_sparse_moe.experts.62.w3", "model.layers.28.block_sparse_moe.experts.63.w3", "model.layers.28.block_sparse_moe.experts.64.w3", "model.layers.28.block_sparse_moe.experts.65.w3", "model.layers.28.block_sparse_moe.experts.66.w3", "model.layers.28.block_sparse_moe.experts.67.w3", "model.layers.28.block_sparse_moe.experts.68.w3", "model.layers.28.block_sparse_moe.experts.69.w3", "model.layers.28.block_sparse_moe.experts.70.w3", "model.layers.28.block_sparse_moe.experts.71.w3", "model.layers.28.block_sparse_moe.experts.72.w3", "model.layers.28.block_sparse_moe.experts.73.w3", "model.layers.28.block_sparse_moe.experts.74.w3", "model.layers.28.block_sparse_moe.experts.75.w3", "model.layers.28.block_sparse_moe.experts.76.w3", "model.layers.28.block_sparse_moe.experts.77.w3", "model.layers.28.block_sparse_moe.experts.78.w3", "model.layers.28.block_sparse_moe.experts.79.w3", "model.layers.28.block_sparse_moe.experts.80.w3", "model.layers.28.block_sparse_moe.experts.81.w3", "model.layers.28.block_sparse_moe.experts.82.w3", "model.layers.28.block_sparse_moe.experts.83.w3", "model.layers.28.block_sparse_moe.experts.84.w3", "model.layers.28.block_sparse_moe.experts.85.w3", "model.layers.28.block_sparse_moe.experts.86.w3", "model.layers.28.block_sparse_moe.experts.87.w3", "model.layers.28.block_sparse_moe.experts.88.w3", "model.layers.28.block_sparse_moe.experts.89.w3", "model.layers.28.block_sparse_moe.experts.90.w3", "model.layers.28.block_sparse_moe.experts.91.w3", "model.layers.28.block_sparse_moe.experts.92.w3", "model.layers.28.block_sparse_moe.experts.93.w3", "model.layers.28.block_sparse_moe.experts.94.w3", "model.layers.28.block_sparse_moe.experts.95.w3", "model.layers.28.block_sparse_moe.experts.96.w3", "model.layers.28.block_sparse_moe.experts.97.w3", "model.layers.28.block_sparse_moe.experts.98.w3", "model.layers.28.block_sparse_moe.experts.99.w3", "model.layers.28.block_sparse_moe.experts.100.w3", "model.layers.28.block_sparse_moe.experts.101.w3", "model.layers.28.block_sparse_moe.experts.102.w3", "model.layers.28.block_sparse_moe.experts.103.w3", "model.layers.28.block_sparse_moe.experts.104.w3", "model.layers.28.block_sparse_moe.experts.105.w3", "model.layers.28.block_sparse_moe.experts.106.w3", "model.layers.28.block_sparse_moe.experts.107.w3", "model.layers.28.block_sparse_moe.experts.108.w3", "model.layers.28.block_sparse_moe.experts.109.w3", "model.layers.28.block_sparse_moe.experts.110.w3", "model.layers.28.block_sparse_moe.experts.111.w3", "model.layers.28.block_sparse_moe.experts.112.w3", "model.layers.28.block_sparse_moe.experts.113.w3", "model.layers.28.block_sparse_moe.experts.114.w3", "model.layers.28.block_sparse_moe.experts.115.w3", "model.layers.28.block_sparse_moe.experts.116.w3", "model.layers.28.block_sparse_moe.experts.117.w3", "model.layers.28.block_sparse_moe.experts.118.w3", "model.layers.28.block_sparse_moe.experts.119.w3", "model.layers.28.block_sparse_moe.experts.120.w3", "model.layers.28.block_sparse_moe.experts.121.w3", "model.layers.28.block_sparse_moe.experts.122.w3", "model.layers.28.block_sparse_moe.experts.123.w3", "model.layers.28.block_sparse_moe.experts.124.w3", "model.layers.28.block_sparse_moe.experts.125.w3", "model.layers.28.block_sparse_moe.experts.126.w3", "model.layers.28.block_sparse_moe.experts.127.w3", "model.layers.28.block_sparse_moe.experts.128.w3", "model.layers.28.block_sparse_moe.experts.129.w3", "model.layers.28.block_sparse_moe.experts.130.w3", "model.layers.28.block_sparse_moe.experts.131.w3", "model.layers.28.block_sparse_moe.experts.132.w3", "model.layers.28.block_sparse_moe.experts.133.w3", "model.layers.28.block_sparse_moe.experts.134.w3", "model.layers.28.block_sparse_moe.experts.135.w3", "model.layers.28.block_sparse_moe.experts.136.w3", "model.layers.28.block_sparse_moe.experts.137.w3", "model.layers.28.block_sparse_moe.experts.138.w3", "model.layers.28.block_sparse_moe.experts.139.w3", "model.layers.28.block_sparse_moe.experts.140.w3", "model.layers.28.block_sparse_moe.experts.141.w3", "model.layers.28.block_sparse_moe.experts.142.w3", "model.layers.28.block_sparse_moe.experts.143.w3", "model.layers.28.block_sparse_moe.experts.144.w3", "model.layers.28.block_sparse_moe.experts.145.w3", "model.layers.28.block_sparse_moe.experts.146.w3", "model.layers.28.block_sparse_moe.experts.147.w3", "model.layers.28.block_sparse_moe.experts.148.w3", "model.layers.28.block_sparse_moe.experts.149.w3", "model.layers.28.block_sparse_moe.experts.150.w3", "model.layers.28.block_sparse_moe.experts.151.w3", "model.layers.28.block_sparse_moe.experts.152.w3", "model.layers.28.block_sparse_moe.experts.153.w3", "model.layers.28.block_sparse_moe.experts.154.w3", "model.layers.28.block_sparse_moe.experts.155.w3", "model.layers.28.block_sparse_moe.experts.156.w3", "model.layers.28.block_sparse_moe.experts.157.w3", "model.layers.28.block_sparse_moe.experts.158.w3", "model.layers.28.block_sparse_moe.experts.159.w3", "model.layers.28.block_sparse_moe.experts.160.w3", "model.layers.28.block_sparse_moe.experts.161.w3", "model.layers.28.block_sparse_moe.experts.162.w3", "model.layers.28.block_sparse_moe.experts.163.w3", "model.layers.28.block_sparse_moe.experts.164.w3", "model.layers.28.block_sparse_moe.experts.165.w3", "model.layers.28.block_sparse_moe.experts.166.w3", "model.layers.28.block_sparse_moe.experts.167.w3", "model.layers.28.block_sparse_moe.experts.168.w3", "model.layers.28.block_sparse_moe.experts.169.w3", "model.layers.28.block_sparse_moe.experts.170.w3", "model.layers.28.block_sparse_moe.experts.171.w3", "model.layers.28.block_sparse_moe.experts.172.w3", "model.layers.28.block_sparse_moe.experts.173.w3", "model.layers.28.block_sparse_moe.experts.174.w3", "model.layers.28.block_sparse_moe.experts.175.w3", "model.layers.28.block_sparse_moe.experts.176.w3", "model.layers.28.block_sparse_moe.experts.177.w3", "model.layers.28.block_sparse_moe.experts.178.w3", "model.layers.28.block_sparse_moe.experts.179.w3", "model.layers.28.block_sparse_moe.experts.180.w3", "model.layers.28.block_sparse_moe.experts.181.w3", "model.layers.28.block_sparse_moe.experts.182.w3", "model.layers.28.block_sparse_moe.experts.183.w3", "model.layers.28.block_sparse_moe.experts.184.w3", "model.layers.28.block_sparse_moe.experts.185.w3", "model.layers.28.block_sparse_moe.experts.186.w3", "model.layers.28.block_sparse_moe.experts.187.w3", "model.layers.28.block_sparse_moe.experts.188.w3", "model.layers.28.block_sparse_moe.experts.189.w3", "model.layers.28.block_sparse_moe.experts.190.w3", "model.layers.28.block_sparse_moe.experts.191.w3", "model.layers.28.block_sparse_moe.experts.192.w3", "model.layers.28.block_sparse_moe.experts.193.w3", "model.layers.28.block_sparse_moe.experts.194.w3", "model.layers.28.block_sparse_moe.experts.195.w3", "model.layers.28.block_sparse_moe.experts.196.w3", "model.layers.28.block_sparse_moe.experts.197.w3", "model.layers.28.block_sparse_moe.experts.198.w3", "model.layers.28.block_sparse_moe.experts.199.w3", "model.layers.28.block_sparse_moe.experts.200.w3", "model.layers.28.block_sparse_moe.experts.201.w3", "model.layers.28.block_sparse_moe.experts.202.w3", "model.layers.28.block_sparse_moe.experts.203.w3", "model.layers.28.block_sparse_moe.experts.204.w3", "model.layers.28.block_sparse_moe.experts.205.w3", "model.layers.28.block_sparse_moe.experts.206.w3", "model.layers.28.block_sparse_moe.experts.207.w3", "model.layers.28.block_sparse_moe.experts.208.w3", "model.layers.28.block_sparse_moe.experts.209.w3", "model.layers.28.block_sparse_moe.experts.210.w3", "model.layers.28.block_sparse_moe.experts.211.w3", "model.layers.28.block_sparse_moe.experts.212.w3", "model.layers.28.block_sparse_moe.experts.213.w3", "model.layers.28.block_sparse_moe.experts.214.w3", "model.layers.28.block_sparse_moe.experts.215.w3", "model.layers.28.block_sparse_moe.experts.216.w3", "model.layers.28.block_sparse_moe.experts.217.w3", "model.layers.28.block_sparse_moe.experts.218.w3", "model.layers.28.block_sparse_moe.experts.219.w3", "model.layers.28.block_sparse_moe.experts.220.w3", "model.layers.28.block_sparse_moe.experts.221.w3", "model.layers.28.block_sparse_moe.experts.222.w3", "model.layers.28.block_sparse_moe.experts.223.w3", "model.layers.28.block_sparse_moe.experts.224.w3", "model.layers.28.block_sparse_moe.experts.225.w3", "model.layers.28.block_sparse_moe.experts.226.w3", "model.layers.28.block_sparse_moe.experts.227.w3", "model.layers.28.block_sparse_moe.experts.228.w3", "model.layers.28.block_sparse_moe.experts.229.w3", "model.layers.28.block_sparse_moe.experts.230.w3", "model.layers.28.block_sparse_moe.experts.231.w3", "model.layers.28.block_sparse_moe.experts.232.w3", "model.layers.28.block_sparse_moe.experts.233.w3", "model.layers.28.block_sparse_moe.experts.234.w3", "model.layers.28.block_sparse_moe.experts.235.w3", "model.layers.28.block_sparse_moe.experts.236.w3", "model.layers.28.block_sparse_moe.experts.237.w3", "model.layers.28.block_sparse_moe.experts.238.w3", "model.layers.28.block_sparse_moe.experts.239.w3", "model.layers.28.block_sparse_moe.experts.240.w3", "model.layers.28.block_sparse_moe.experts.241.w3", "model.layers.28.block_sparse_moe.experts.242.w3", "model.layers.28.block_sparse_moe.experts.243.w3", "model.layers.28.block_sparse_moe.experts.244.w3", "model.layers.28.block_sparse_moe.experts.245.w3", "model.layers.28.block_sparse_moe.experts.246.w3", "model.layers.28.block_sparse_moe.experts.247.w3", "model.layers.28.block_sparse_moe.experts.248.w3", "model.layers.28.block_sparse_moe.experts.249.w3", "model.layers.28.block_sparse_moe.experts.250.w3", "model.layers.28.block_sparse_moe.experts.251.w3", "model.layers.28.block_sparse_moe.experts.252.w3", "model.layers.28.block_sparse_moe.experts.253.w3", "model.layers.28.block_sparse_moe.experts.254.w3", "model.layers.28.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00037889759987592975, "dbits": 2415919104 } ] }, { "idx": 144, "layers": [ "model.layers.28.block_sparse_moe.experts.0.w2", "model.layers.28.block_sparse_moe.experts.1.w2", "model.layers.28.block_sparse_moe.experts.2.w2", "model.layers.28.block_sparse_moe.experts.3.w2", "model.layers.28.block_sparse_moe.experts.4.w2", "model.layers.28.block_sparse_moe.experts.5.w2", "model.layers.28.block_sparse_moe.experts.6.w2", "model.layers.28.block_sparse_moe.experts.7.w2", "model.layers.28.block_sparse_moe.experts.8.w2", "model.layers.28.block_sparse_moe.experts.9.w2", "model.layers.28.block_sparse_moe.experts.10.w2", "model.layers.28.block_sparse_moe.experts.11.w2", "model.layers.28.block_sparse_moe.experts.12.w2", "model.layers.28.block_sparse_moe.experts.13.w2", "model.layers.28.block_sparse_moe.experts.14.w2", "model.layers.28.block_sparse_moe.experts.15.w2", "model.layers.28.block_sparse_moe.experts.16.w2", "model.layers.28.block_sparse_moe.experts.17.w2", "model.layers.28.block_sparse_moe.experts.18.w2", "model.layers.28.block_sparse_moe.experts.19.w2", "model.layers.28.block_sparse_moe.experts.20.w2", "model.layers.28.block_sparse_moe.experts.21.w2", "model.layers.28.block_sparse_moe.experts.22.w2", "model.layers.28.block_sparse_moe.experts.23.w2", "model.layers.28.block_sparse_moe.experts.24.w2", "model.layers.28.block_sparse_moe.experts.25.w2", "model.layers.28.block_sparse_moe.experts.26.w2", "model.layers.28.block_sparse_moe.experts.27.w2", "model.layers.28.block_sparse_moe.experts.28.w2", "model.layers.28.block_sparse_moe.experts.29.w2", "model.layers.28.block_sparse_moe.experts.30.w2", "model.layers.28.block_sparse_moe.experts.31.w2", "model.layers.28.block_sparse_moe.experts.32.w2", "model.layers.28.block_sparse_moe.experts.33.w2", "model.layers.28.block_sparse_moe.experts.34.w2", "model.layers.28.block_sparse_moe.experts.35.w2", "model.layers.28.block_sparse_moe.experts.36.w2", "model.layers.28.block_sparse_moe.experts.37.w2", "model.layers.28.block_sparse_moe.experts.38.w2", "model.layers.28.block_sparse_moe.experts.39.w2", "model.layers.28.block_sparse_moe.experts.40.w2", "model.layers.28.block_sparse_moe.experts.41.w2", "model.layers.28.block_sparse_moe.experts.42.w2", "model.layers.28.block_sparse_moe.experts.43.w2", "model.layers.28.block_sparse_moe.experts.44.w2", "model.layers.28.block_sparse_moe.experts.45.w2", "model.layers.28.block_sparse_moe.experts.46.w2", "model.layers.28.block_sparse_moe.experts.47.w2", "model.layers.28.block_sparse_moe.experts.48.w2", "model.layers.28.block_sparse_moe.experts.49.w2", "model.layers.28.block_sparse_moe.experts.50.w2", "model.layers.28.block_sparse_moe.experts.51.w2", "model.layers.28.block_sparse_moe.experts.52.w2", "model.layers.28.block_sparse_moe.experts.53.w2", "model.layers.28.block_sparse_moe.experts.54.w2", "model.layers.28.block_sparse_moe.experts.55.w2", "model.layers.28.block_sparse_moe.experts.56.w2", "model.layers.28.block_sparse_moe.experts.57.w2", "model.layers.28.block_sparse_moe.experts.58.w2", "model.layers.28.block_sparse_moe.experts.59.w2", "model.layers.28.block_sparse_moe.experts.60.w2", "model.layers.28.block_sparse_moe.experts.61.w2", "model.layers.28.block_sparse_moe.experts.62.w2", "model.layers.28.block_sparse_moe.experts.63.w2", "model.layers.28.block_sparse_moe.experts.64.w2", "model.layers.28.block_sparse_moe.experts.65.w2", "model.layers.28.block_sparse_moe.experts.66.w2", "model.layers.28.block_sparse_moe.experts.67.w2", "model.layers.28.block_sparse_moe.experts.68.w2", "model.layers.28.block_sparse_moe.experts.69.w2", "model.layers.28.block_sparse_moe.experts.70.w2", "model.layers.28.block_sparse_moe.experts.71.w2", "model.layers.28.block_sparse_moe.experts.72.w2", "model.layers.28.block_sparse_moe.experts.73.w2", "model.layers.28.block_sparse_moe.experts.74.w2", "model.layers.28.block_sparse_moe.experts.75.w2", "model.layers.28.block_sparse_moe.experts.76.w2", "model.layers.28.block_sparse_moe.experts.77.w2", "model.layers.28.block_sparse_moe.experts.78.w2", "model.layers.28.block_sparse_moe.experts.79.w2", "model.layers.28.block_sparse_moe.experts.80.w2", "model.layers.28.block_sparse_moe.experts.81.w2", "model.layers.28.block_sparse_moe.experts.82.w2", "model.layers.28.block_sparse_moe.experts.83.w2", "model.layers.28.block_sparse_moe.experts.84.w2", "model.layers.28.block_sparse_moe.experts.85.w2", "model.layers.28.block_sparse_moe.experts.86.w2", "model.layers.28.block_sparse_moe.experts.87.w2", "model.layers.28.block_sparse_moe.experts.88.w2", "model.layers.28.block_sparse_moe.experts.89.w2", "model.layers.28.block_sparse_moe.experts.90.w2", "model.layers.28.block_sparse_moe.experts.91.w2", "model.layers.28.block_sparse_moe.experts.92.w2", "model.layers.28.block_sparse_moe.experts.93.w2", "model.layers.28.block_sparse_moe.experts.94.w2", "model.layers.28.block_sparse_moe.experts.95.w2", "model.layers.28.block_sparse_moe.experts.96.w2", "model.layers.28.block_sparse_moe.experts.97.w2", "model.layers.28.block_sparse_moe.experts.98.w2", "model.layers.28.block_sparse_moe.experts.99.w2", "model.layers.28.block_sparse_moe.experts.100.w2", "model.layers.28.block_sparse_moe.experts.101.w2", "model.layers.28.block_sparse_moe.experts.102.w2", "model.layers.28.block_sparse_moe.experts.103.w2", "model.layers.28.block_sparse_moe.experts.104.w2", "model.layers.28.block_sparse_moe.experts.105.w2", "model.layers.28.block_sparse_moe.experts.106.w2", "model.layers.28.block_sparse_moe.experts.107.w2", "model.layers.28.block_sparse_moe.experts.108.w2", "model.layers.28.block_sparse_moe.experts.109.w2", "model.layers.28.block_sparse_moe.experts.110.w2", "model.layers.28.block_sparse_moe.experts.111.w2", "model.layers.28.block_sparse_moe.experts.112.w2", "model.layers.28.block_sparse_moe.experts.113.w2", "model.layers.28.block_sparse_moe.experts.114.w2", "model.layers.28.block_sparse_moe.experts.115.w2", "model.layers.28.block_sparse_moe.experts.116.w2", "model.layers.28.block_sparse_moe.experts.117.w2", "model.layers.28.block_sparse_moe.experts.118.w2", "model.layers.28.block_sparse_moe.experts.119.w2", "model.layers.28.block_sparse_moe.experts.120.w2", "model.layers.28.block_sparse_moe.experts.121.w2", "model.layers.28.block_sparse_moe.experts.122.w2", "model.layers.28.block_sparse_moe.experts.123.w2", "model.layers.28.block_sparse_moe.experts.124.w2", "model.layers.28.block_sparse_moe.experts.125.w2", "model.layers.28.block_sparse_moe.experts.126.w2", "model.layers.28.block_sparse_moe.experts.127.w2", "model.layers.28.block_sparse_moe.experts.128.w2", "model.layers.28.block_sparse_moe.experts.129.w2", "model.layers.28.block_sparse_moe.experts.130.w2", "model.layers.28.block_sparse_moe.experts.131.w2", "model.layers.28.block_sparse_moe.experts.132.w2", "model.layers.28.block_sparse_moe.experts.133.w2", "model.layers.28.block_sparse_moe.experts.134.w2", "model.layers.28.block_sparse_moe.experts.135.w2", "model.layers.28.block_sparse_moe.experts.136.w2", "model.layers.28.block_sparse_moe.experts.137.w2", "model.layers.28.block_sparse_moe.experts.138.w2", "model.layers.28.block_sparse_moe.experts.139.w2", "model.layers.28.block_sparse_moe.experts.140.w2", "model.layers.28.block_sparse_moe.experts.141.w2", "model.layers.28.block_sparse_moe.experts.142.w2", "model.layers.28.block_sparse_moe.experts.143.w2", "model.layers.28.block_sparse_moe.experts.144.w2", "model.layers.28.block_sparse_moe.experts.145.w2", "model.layers.28.block_sparse_moe.experts.146.w2", "model.layers.28.block_sparse_moe.experts.147.w2", "model.layers.28.block_sparse_moe.experts.148.w2", "model.layers.28.block_sparse_moe.experts.149.w2", "model.layers.28.block_sparse_moe.experts.150.w2", "model.layers.28.block_sparse_moe.experts.151.w2", "model.layers.28.block_sparse_moe.experts.152.w2", "model.layers.28.block_sparse_moe.experts.153.w2", "model.layers.28.block_sparse_moe.experts.154.w2", "model.layers.28.block_sparse_moe.experts.155.w2", "model.layers.28.block_sparse_moe.experts.156.w2", "model.layers.28.block_sparse_moe.experts.157.w2", "model.layers.28.block_sparse_moe.experts.158.w2", "model.layers.28.block_sparse_moe.experts.159.w2", "model.layers.28.block_sparse_moe.experts.160.w2", "model.layers.28.block_sparse_moe.experts.161.w2", "model.layers.28.block_sparse_moe.experts.162.w2", "model.layers.28.block_sparse_moe.experts.163.w2", "model.layers.28.block_sparse_moe.experts.164.w2", "model.layers.28.block_sparse_moe.experts.165.w2", "model.layers.28.block_sparse_moe.experts.166.w2", "model.layers.28.block_sparse_moe.experts.167.w2", "model.layers.28.block_sparse_moe.experts.168.w2", "model.layers.28.block_sparse_moe.experts.169.w2", "model.layers.28.block_sparse_moe.experts.170.w2", "model.layers.28.block_sparse_moe.experts.171.w2", "model.layers.28.block_sparse_moe.experts.172.w2", "model.layers.28.block_sparse_moe.experts.173.w2", "model.layers.28.block_sparse_moe.experts.174.w2", "model.layers.28.block_sparse_moe.experts.175.w2", "model.layers.28.block_sparse_moe.experts.176.w2", "model.layers.28.block_sparse_moe.experts.177.w2", "model.layers.28.block_sparse_moe.experts.178.w2", "model.layers.28.block_sparse_moe.experts.179.w2", "model.layers.28.block_sparse_moe.experts.180.w2", "model.layers.28.block_sparse_moe.experts.181.w2", "model.layers.28.block_sparse_moe.experts.182.w2", "model.layers.28.block_sparse_moe.experts.183.w2", "model.layers.28.block_sparse_moe.experts.184.w2", "model.layers.28.block_sparse_moe.experts.185.w2", "model.layers.28.block_sparse_moe.experts.186.w2", "model.layers.28.block_sparse_moe.experts.187.w2", "model.layers.28.block_sparse_moe.experts.188.w2", "model.layers.28.block_sparse_moe.experts.189.w2", "model.layers.28.block_sparse_moe.experts.190.w2", "model.layers.28.block_sparse_moe.experts.191.w2", "model.layers.28.block_sparse_moe.experts.192.w2", "model.layers.28.block_sparse_moe.experts.193.w2", "model.layers.28.block_sparse_moe.experts.194.w2", "model.layers.28.block_sparse_moe.experts.195.w2", "model.layers.28.block_sparse_moe.experts.196.w2", "model.layers.28.block_sparse_moe.experts.197.w2", "model.layers.28.block_sparse_moe.experts.198.w2", "model.layers.28.block_sparse_moe.experts.199.w2", "model.layers.28.block_sparse_moe.experts.200.w2", "model.layers.28.block_sparse_moe.experts.201.w2", "model.layers.28.block_sparse_moe.experts.202.w2", "model.layers.28.block_sparse_moe.experts.203.w2", "model.layers.28.block_sparse_moe.experts.204.w2", "model.layers.28.block_sparse_moe.experts.205.w2", "model.layers.28.block_sparse_moe.experts.206.w2", "model.layers.28.block_sparse_moe.experts.207.w2", "model.layers.28.block_sparse_moe.experts.208.w2", "model.layers.28.block_sparse_moe.experts.209.w2", "model.layers.28.block_sparse_moe.experts.210.w2", "model.layers.28.block_sparse_moe.experts.211.w2", "model.layers.28.block_sparse_moe.experts.212.w2", "model.layers.28.block_sparse_moe.experts.213.w2", "model.layers.28.block_sparse_moe.experts.214.w2", "model.layers.28.block_sparse_moe.experts.215.w2", "model.layers.28.block_sparse_moe.experts.216.w2", "model.layers.28.block_sparse_moe.experts.217.w2", "model.layers.28.block_sparse_moe.experts.218.w2", "model.layers.28.block_sparse_moe.experts.219.w2", "model.layers.28.block_sparse_moe.experts.220.w2", "model.layers.28.block_sparse_moe.experts.221.w2", "model.layers.28.block_sparse_moe.experts.222.w2", "model.layers.28.block_sparse_moe.experts.223.w2", "model.layers.28.block_sparse_moe.experts.224.w2", "model.layers.28.block_sparse_moe.experts.225.w2", "model.layers.28.block_sparse_moe.experts.226.w2", "model.layers.28.block_sparse_moe.experts.227.w2", "model.layers.28.block_sparse_moe.experts.228.w2", "model.layers.28.block_sparse_moe.experts.229.w2", "model.layers.28.block_sparse_moe.experts.230.w2", "model.layers.28.block_sparse_moe.experts.231.w2", "model.layers.28.block_sparse_moe.experts.232.w2", "model.layers.28.block_sparse_moe.experts.233.w2", "model.layers.28.block_sparse_moe.experts.234.w2", "model.layers.28.block_sparse_moe.experts.235.w2", "model.layers.28.block_sparse_moe.experts.236.w2", "model.layers.28.block_sparse_moe.experts.237.w2", "model.layers.28.block_sparse_moe.experts.238.w2", "model.layers.28.block_sparse_moe.experts.239.w2", "model.layers.28.block_sparse_moe.experts.240.w2", "model.layers.28.block_sparse_moe.experts.241.w2", "model.layers.28.block_sparse_moe.experts.242.w2", "model.layers.28.block_sparse_moe.experts.243.w2", "model.layers.28.block_sparse_moe.experts.244.w2", "model.layers.28.block_sparse_moe.experts.245.w2", "model.layers.28.block_sparse_moe.experts.246.w2", "model.layers.28.block_sparse_moe.experts.247.w2", "model.layers.28.block_sparse_moe.experts.248.w2", "model.layers.28.block_sparse_moe.experts.249.w2", "model.layers.28.block_sparse_moe.experts.250.w2", "model.layers.28.block_sparse_moe.experts.251.w2", "model.layers.28.block_sparse_moe.experts.252.w2", "model.layers.28.block_sparse_moe.experts.253.w2", "model.layers.28.block_sparse_moe.experts.254.w2", "model.layers.28.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00020221490412951348, "dbits": 1207959552 } ] }, { "idx": 145, "layers": [ "model.layers.29.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00234395954757928, "dbits": 18874368 } ] }, { "idx": 146, "layers": [ "model.layers.29.self_attn.k_proj", "model.layers.29.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0010496083647012766, "dbits": 6291456 } ] }, { "idx": 147, "layers": [ "model.layers.29.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0022556770592927905, "dbits": 18874368 } ] }, { "idx": 148, "layers": [ "model.layers.29.block_sparse_moe.experts.0.w1", "model.layers.29.block_sparse_moe.experts.1.w1", "model.layers.29.block_sparse_moe.experts.2.w1", "model.layers.29.block_sparse_moe.experts.3.w1", "model.layers.29.block_sparse_moe.experts.4.w1", "model.layers.29.block_sparse_moe.experts.5.w1", "model.layers.29.block_sparse_moe.experts.6.w1", "model.layers.29.block_sparse_moe.experts.7.w1", "model.layers.29.block_sparse_moe.experts.8.w1", "model.layers.29.block_sparse_moe.experts.9.w1", "model.layers.29.block_sparse_moe.experts.10.w1", "model.layers.29.block_sparse_moe.experts.11.w1", "model.layers.29.block_sparse_moe.experts.12.w1", "model.layers.29.block_sparse_moe.experts.13.w1", "model.layers.29.block_sparse_moe.experts.14.w1", "model.layers.29.block_sparse_moe.experts.15.w1", "model.layers.29.block_sparse_moe.experts.16.w1", "model.layers.29.block_sparse_moe.experts.17.w1", "model.layers.29.block_sparse_moe.experts.18.w1", "model.layers.29.block_sparse_moe.experts.19.w1", "model.layers.29.block_sparse_moe.experts.20.w1", "model.layers.29.block_sparse_moe.experts.21.w1", "model.layers.29.block_sparse_moe.experts.22.w1", "model.layers.29.block_sparse_moe.experts.23.w1", "model.layers.29.block_sparse_moe.experts.24.w1", "model.layers.29.block_sparse_moe.experts.25.w1", "model.layers.29.block_sparse_moe.experts.26.w1", "model.layers.29.block_sparse_moe.experts.27.w1", "model.layers.29.block_sparse_moe.experts.28.w1", "model.layers.29.block_sparse_moe.experts.29.w1", "model.layers.29.block_sparse_moe.experts.30.w1", "model.layers.29.block_sparse_moe.experts.31.w1", "model.layers.29.block_sparse_moe.experts.32.w1", "model.layers.29.block_sparse_moe.experts.33.w1", "model.layers.29.block_sparse_moe.experts.34.w1", "model.layers.29.block_sparse_moe.experts.35.w1", "model.layers.29.block_sparse_moe.experts.36.w1", "model.layers.29.block_sparse_moe.experts.37.w1", "model.layers.29.block_sparse_moe.experts.38.w1", "model.layers.29.block_sparse_moe.experts.39.w1", "model.layers.29.block_sparse_moe.experts.40.w1", "model.layers.29.block_sparse_moe.experts.41.w1", "model.layers.29.block_sparse_moe.experts.42.w1", "model.layers.29.block_sparse_moe.experts.43.w1", "model.layers.29.block_sparse_moe.experts.44.w1", "model.layers.29.block_sparse_moe.experts.45.w1", "model.layers.29.block_sparse_moe.experts.46.w1", "model.layers.29.block_sparse_moe.experts.47.w1", "model.layers.29.block_sparse_moe.experts.48.w1", "model.layers.29.block_sparse_moe.experts.49.w1", "model.layers.29.block_sparse_moe.experts.50.w1", "model.layers.29.block_sparse_moe.experts.51.w1", "model.layers.29.block_sparse_moe.experts.52.w1", "model.layers.29.block_sparse_moe.experts.53.w1", "model.layers.29.block_sparse_moe.experts.54.w1", "model.layers.29.block_sparse_moe.experts.55.w1", "model.layers.29.block_sparse_moe.experts.56.w1", "model.layers.29.block_sparse_moe.experts.57.w1", "model.layers.29.block_sparse_moe.experts.58.w1", "model.layers.29.block_sparse_moe.experts.59.w1", "model.layers.29.block_sparse_moe.experts.60.w1", "model.layers.29.block_sparse_moe.experts.61.w1", "model.layers.29.block_sparse_moe.experts.62.w1", "model.layers.29.block_sparse_moe.experts.63.w1", "model.layers.29.block_sparse_moe.experts.64.w1", "model.layers.29.block_sparse_moe.experts.65.w1", "model.layers.29.block_sparse_moe.experts.66.w1", "model.layers.29.block_sparse_moe.experts.67.w1", "model.layers.29.block_sparse_moe.experts.68.w1", "model.layers.29.block_sparse_moe.experts.69.w1", "model.layers.29.block_sparse_moe.experts.70.w1", "model.layers.29.block_sparse_moe.experts.71.w1", "model.layers.29.block_sparse_moe.experts.72.w1", "model.layers.29.block_sparse_moe.experts.73.w1", "model.layers.29.block_sparse_moe.experts.74.w1", "model.layers.29.block_sparse_moe.experts.75.w1", "model.layers.29.block_sparse_moe.experts.76.w1", "model.layers.29.block_sparse_moe.experts.77.w1", "model.layers.29.block_sparse_moe.experts.78.w1", "model.layers.29.block_sparse_moe.experts.79.w1", "model.layers.29.block_sparse_moe.experts.80.w1", "model.layers.29.block_sparse_moe.experts.81.w1", "model.layers.29.block_sparse_moe.experts.82.w1", "model.layers.29.block_sparse_moe.experts.83.w1", "model.layers.29.block_sparse_moe.experts.84.w1", "model.layers.29.block_sparse_moe.experts.85.w1", "model.layers.29.block_sparse_moe.experts.86.w1", "model.layers.29.block_sparse_moe.experts.87.w1", "model.layers.29.block_sparse_moe.experts.88.w1", "model.layers.29.block_sparse_moe.experts.89.w1", "model.layers.29.block_sparse_moe.experts.90.w1", "model.layers.29.block_sparse_moe.experts.91.w1", "model.layers.29.block_sparse_moe.experts.92.w1", "model.layers.29.block_sparse_moe.experts.93.w1", "model.layers.29.block_sparse_moe.experts.94.w1", "model.layers.29.block_sparse_moe.experts.95.w1", "model.layers.29.block_sparse_moe.experts.96.w1", "model.layers.29.block_sparse_moe.experts.97.w1", "model.layers.29.block_sparse_moe.experts.98.w1", "model.layers.29.block_sparse_moe.experts.99.w1", "model.layers.29.block_sparse_moe.experts.100.w1", "model.layers.29.block_sparse_moe.experts.101.w1", "model.layers.29.block_sparse_moe.experts.102.w1", "model.layers.29.block_sparse_moe.experts.103.w1", "model.layers.29.block_sparse_moe.experts.104.w1", "model.layers.29.block_sparse_moe.experts.105.w1", "model.layers.29.block_sparse_moe.experts.106.w1", "model.layers.29.block_sparse_moe.experts.107.w1", "model.layers.29.block_sparse_moe.experts.108.w1", "model.layers.29.block_sparse_moe.experts.109.w1", "model.layers.29.block_sparse_moe.experts.110.w1", "model.layers.29.block_sparse_moe.experts.111.w1", "model.layers.29.block_sparse_moe.experts.112.w1", "model.layers.29.block_sparse_moe.experts.113.w1", "model.layers.29.block_sparse_moe.experts.114.w1", "model.layers.29.block_sparse_moe.experts.115.w1", "model.layers.29.block_sparse_moe.experts.116.w1", "model.layers.29.block_sparse_moe.experts.117.w1", "model.layers.29.block_sparse_moe.experts.118.w1", "model.layers.29.block_sparse_moe.experts.119.w1", "model.layers.29.block_sparse_moe.experts.120.w1", "model.layers.29.block_sparse_moe.experts.121.w1", "model.layers.29.block_sparse_moe.experts.122.w1", "model.layers.29.block_sparse_moe.experts.123.w1", "model.layers.29.block_sparse_moe.experts.124.w1", "model.layers.29.block_sparse_moe.experts.125.w1", "model.layers.29.block_sparse_moe.experts.126.w1", "model.layers.29.block_sparse_moe.experts.127.w1", "model.layers.29.block_sparse_moe.experts.128.w1", "model.layers.29.block_sparse_moe.experts.129.w1", "model.layers.29.block_sparse_moe.experts.130.w1", "model.layers.29.block_sparse_moe.experts.131.w1", "model.layers.29.block_sparse_moe.experts.132.w1", "model.layers.29.block_sparse_moe.experts.133.w1", "model.layers.29.block_sparse_moe.experts.134.w1", "model.layers.29.block_sparse_moe.experts.135.w1", "model.layers.29.block_sparse_moe.experts.136.w1", "model.layers.29.block_sparse_moe.experts.137.w1", "model.layers.29.block_sparse_moe.experts.138.w1", "model.layers.29.block_sparse_moe.experts.139.w1", "model.layers.29.block_sparse_moe.experts.140.w1", "model.layers.29.block_sparse_moe.experts.141.w1", "model.layers.29.block_sparse_moe.experts.142.w1", "model.layers.29.block_sparse_moe.experts.143.w1", "model.layers.29.block_sparse_moe.experts.144.w1", "model.layers.29.block_sparse_moe.experts.145.w1", "model.layers.29.block_sparse_moe.experts.146.w1", "model.layers.29.block_sparse_moe.experts.147.w1", "model.layers.29.block_sparse_moe.experts.148.w1", "model.layers.29.block_sparse_moe.experts.149.w1", "model.layers.29.block_sparse_moe.experts.150.w1", "model.layers.29.block_sparse_moe.experts.151.w1", "model.layers.29.block_sparse_moe.experts.152.w1", "model.layers.29.block_sparse_moe.experts.153.w1", "model.layers.29.block_sparse_moe.experts.154.w1", "model.layers.29.block_sparse_moe.experts.155.w1", "model.layers.29.block_sparse_moe.experts.156.w1", "model.layers.29.block_sparse_moe.experts.157.w1", "model.layers.29.block_sparse_moe.experts.158.w1", "model.layers.29.block_sparse_moe.experts.159.w1", "model.layers.29.block_sparse_moe.experts.160.w1", "model.layers.29.block_sparse_moe.experts.161.w1", "model.layers.29.block_sparse_moe.experts.162.w1", "model.layers.29.block_sparse_moe.experts.163.w1", "model.layers.29.block_sparse_moe.experts.164.w1", "model.layers.29.block_sparse_moe.experts.165.w1", "model.layers.29.block_sparse_moe.experts.166.w1", "model.layers.29.block_sparse_moe.experts.167.w1", "model.layers.29.block_sparse_moe.experts.168.w1", "model.layers.29.block_sparse_moe.experts.169.w1", "model.layers.29.block_sparse_moe.experts.170.w1", "model.layers.29.block_sparse_moe.experts.171.w1", "model.layers.29.block_sparse_moe.experts.172.w1", "model.layers.29.block_sparse_moe.experts.173.w1", "model.layers.29.block_sparse_moe.experts.174.w1", "model.layers.29.block_sparse_moe.experts.175.w1", "model.layers.29.block_sparse_moe.experts.176.w1", "model.layers.29.block_sparse_moe.experts.177.w1", "model.layers.29.block_sparse_moe.experts.178.w1", "model.layers.29.block_sparse_moe.experts.179.w1", "model.layers.29.block_sparse_moe.experts.180.w1", "model.layers.29.block_sparse_moe.experts.181.w1", "model.layers.29.block_sparse_moe.experts.182.w1", "model.layers.29.block_sparse_moe.experts.183.w1", "model.layers.29.block_sparse_moe.experts.184.w1", "model.layers.29.block_sparse_moe.experts.185.w1", "model.layers.29.block_sparse_moe.experts.186.w1", "model.layers.29.block_sparse_moe.experts.187.w1", "model.layers.29.block_sparse_moe.experts.188.w1", "model.layers.29.block_sparse_moe.experts.189.w1", "model.layers.29.block_sparse_moe.experts.190.w1", "model.layers.29.block_sparse_moe.experts.191.w1", "model.layers.29.block_sparse_moe.experts.192.w1", "model.layers.29.block_sparse_moe.experts.193.w1", "model.layers.29.block_sparse_moe.experts.194.w1", "model.layers.29.block_sparse_moe.experts.195.w1", "model.layers.29.block_sparse_moe.experts.196.w1", "model.layers.29.block_sparse_moe.experts.197.w1", "model.layers.29.block_sparse_moe.experts.198.w1", "model.layers.29.block_sparse_moe.experts.199.w1", "model.layers.29.block_sparse_moe.experts.200.w1", "model.layers.29.block_sparse_moe.experts.201.w1", "model.layers.29.block_sparse_moe.experts.202.w1", "model.layers.29.block_sparse_moe.experts.203.w1", "model.layers.29.block_sparse_moe.experts.204.w1", "model.layers.29.block_sparse_moe.experts.205.w1", "model.layers.29.block_sparse_moe.experts.206.w1", "model.layers.29.block_sparse_moe.experts.207.w1", "model.layers.29.block_sparse_moe.experts.208.w1", "model.layers.29.block_sparse_moe.experts.209.w1", "model.layers.29.block_sparse_moe.experts.210.w1", "model.layers.29.block_sparse_moe.experts.211.w1", "model.layers.29.block_sparse_moe.experts.212.w1", "model.layers.29.block_sparse_moe.experts.213.w1", "model.layers.29.block_sparse_moe.experts.214.w1", "model.layers.29.block_sparse_moe.experts.215.w1", "model.layers.29.block_sparse_moe.experts.216.w1", "model.layers.29.block_sparse_moe.experts.217.w1", "model.layers.29.block_sparse_moe.experts.218.w1", "model.layers.29.block_sparse_moe.experts.219.w1", "model.layers.29.block_sparse_moe.experts.220.w1", "model.layers.29.block_sparse_moe.experts.221.w1", "model.layers.29.block_sparse_moe.experts.222.w1", "model.layers.29.block_sparse_moe.experts.223.w1", "model.layers.29.block_sparse_moe.experts.224.w1", "model.layers.29.block_sparse_moe.experts.225.w1", "model.layers.29.block_sparse_moe.experts.226.w1", "model.layers.29.block_sparse_moe.experts.227.w1", "model.layers.29.block_sparse_moe.experts.228.w1", "model.layers.29.block_sparse_moe.experts.229.w1", "model.layers.29.block_sparse_moe.experts.230.w1", "model.layers.29.block_sparse_moe.experts.231.w1", "model.layers.29.block_sparse_moe.experts.232.w1", "model.layers.29.block_sparse_moe.experts.233.w1", "model.layers.29.block_sparse_moe.experts.234.w1", "model.layers.29.block_sparse_moe.experts.235.w1", "model.layers.29.block_sparse_moe.experts.236.w1", "model.layers.29.block_sparse_moe.experts.237.w1", "model.layers.29.block_sparse_moe.experts.238.w1", "model.layers.29.block_sparse_moe.experts.239.w1", "model.layers.29.block_sparse_moe.experts.240.w1", "model.layers.29.block_sparse_moe.experts.241.w1", "model.layers.29.block_sparse_moe.experts.242.w1", "model.layers.29.block_sparse_moe.experts.243.w1", "model.layers.29.block_sparse_moe.experts.244.w1", "model.layers.29.block_sparse_moe.experts.245.w1", "model.layers.29.block_sparse_moe.experts.246.w1", "model.layers.29.block_sparse_moe.experts.247.w1", "model.layers.29.block_sparse_moe.experts.248.w1", "model.layers.29.block_sparse_moe.experts.249.w1", "model.layers.29.block_sparse_moe.experts.250.w1", "model.layers.29.block_sparse_moe.experts.251.w1", "model.layers.29.block_sparse_moe.experts.252.w1", "model.layers.29.block_sparse_moe.experts.253.w1", "model.layers.29.block_sparse_moe.experts.254.w1", "model.layers.29.block_sparse_moe.experts.255.w1", "model.layers.29.block_sparse_moe.experts.0.w3", "model.layers.29.block_sparse_moe.experts.1.w3", "model.layers.29.block_sparse_moe.experts.2.w3", "model.layers.29.block_sparse_moe.experts.3.w3", "model.layers.29.block_sparse_moe.experts.4.w3", "model.layers.29.block_sparse_moe.experts.5.w3", "model.layers.29.block_sparse_moe.experts.6.w3", "model.layers.29.block_sparse_moe.experts.7.w3", "model.layers.29.block_sparse_moe.experts.8.w3", "model.layers.29.block_sparse_moe.experts.9.w3", "model.layers.29.block_sparse_moe.experts.10.w3", "model.layers.29.block_sparse_moe.experts.11.w3", "model.layers.29.block_sparse_moe.experts.12.w3", "model.layers.29.block_sparse_moe.experts.13.w3", "model.layers.29.block_sparse_moe.experts.14.w3", "model.layers.29.block_sparse_moe.experts.15.w3", "model.layers.29.block_sparse_moe.experts.16.w3", "model.layers.29.block_sparse_moe.experts.17.w3", "model.layers.29.block_sparse_moe.experts.18.w3", "model.layers.29.block_sparse_moe.experts.19.w3", "model.layers.29.block_sparse_moe.experts.20.w3", "model.layers.29.block_sparse_moe.experts.21.w3", "model.layers.29.block_sparse_moe.experts.22.w3", "model.layers.29.block_sparse_moe.experts.23.w3", "model.layers.29.block_sparse_moe.experts.24.w3", "model.layers.29.block_sparse_moe.experts.25.w3", "model.layers.29.block_sparse_moe.experts.26.w3", "model.layers.29.block_sparse_moe.experts.27.w3", "model.layers.29.block_sparse_moe.experts.28.w3", "model.layers.29.block_sparse_moe.experts.29.w3", "model.layers.29.block_sparse_moe.experts.30.w3", "model.layers.29.block_sparse_moe.experts.31.w3", "model.layers.29.block_sparse_moe.experts.32.w3", "model.layers.29.block_sparse_moe.experts.33.w3", "model.layers.29.block_sparse_moe.experts.34.w3", "model.layers.29.block_sparse_moe.experts.35.w3", "model.layers.29.block_sparse_moe.experts.36.w3", "model.layers.29.block_sparse_moe.experts.37.w3", "model.layers.29.block_sparse_moe.experts.38.w3", "model.layers.29.block_sparse_moe.experts.39.w3", "model.layers.29.block_sparse_moe.experts.40.w3", "model.layers.29.block_sparse_moe.experts.41.w3", "model.layers.29.block_sparse_moe.experts.42.w3", "model.layers.29.block_sparse_moe.experts.43.w3", "model.layers.29.block_sparse_moe.experts.44.w3", "model.layers.29.block_sparse_moe.experts.45.w3", "model.layers.29.block_sparse_moe.experts.46.w3", "model.layers.29.block_sparse_moe.experts.47.w3", "model.layers.29.block_sparse_moe.experts.48.w3", "model.layers.29.block_sparse_moe.experts.49.w3", "model.layers.29.block_sparse_moe.experts.50.w3", "model.layers.29.block_sparse_moe.experts.51.w3", "model.layers.29.block_sparse_moe.experts.52.w3", "model.layers.29.block_sparse_moe.experts.53.w3", "model.layers.29.block_sparse_moe.experts.54.w3", "model.layers.29.block_sparse_moe.experts.55.w3", "model.layers.29.block_sparse_moe.experts.56.w3", "model.layers.29.block_sparse_moe.experts.57.w3", "model.layers.29.block_sparse_moe.experts.58.w3", "model.layers.29.block_sparse_moe.experts.59.w3", "model.layers.29.block_sparse_moe.experts.60.w3", "model.layers.29.block_sparse_moe.experts.61.w3", "model.layers.29.block_sparse_moe.experts.62.w3", "model.layers.29.block_sparse_moe.experts.63.w3", "model.layers.29.block_sparse_moe.experts.64.w3", "model.layers.29.block_sparse_moe.experts.65.w3", "model.layers.29.block_sparse_moe.experts.66.w3", "model.layers.29.block_sparse_moe.experts.67.w3", "model.layers.29.block_sparse_moe.experts.68.w3", "model.layers.29.block_sparse_moe.experts.69.w3", "model.layers.29.block_sparse_moe.experts.70.w3", "model.layers.29.block_sparse_moe.experts.71.w3", "model.layers.29.block_sparse_moe.experts.72.w3", "model.layers.29.block_sparse_moe.experts.73.w3", "model.layers.29.block_sparse_moe.experts.74.w3", "model.layers.29.block_sparse_moe.experts.75.w3", "model.layers.29.block_sparse_moe.experts.76.w3", "model.layers.29.block_sparse_moe.experts.77.w3", "model.layers.29.block_sparse_moe.experts.78.w3", "model.layers.29.block_sparse_moe.experts.79.w3", "model.layers.29.block_sparse_moe.experts.80.w3", "model.layers.29.block_sparse_moe.experts.81.w3", "model.layers.29.block_sparse_moe.experts.82.w3", "model.layers.29.block_sparse_moe.experts.83.w3", "model.layers.29.block_sparse_moe.experts.84.w3", "model.layers.29.block_sparse_moe.experts.85.w3", "model.layers.29.block_sparse_moe.experts.86.w3", "model.layers.29.block_sparse_moe.experts.87.w3", "model.layers.29.block_sparse_moe.experts.88.w3", "model.layers.29.block_sparse_moe.experts.89.w3", "model.layers.29.block_sparse_moe.experts.90.w3", "model.layers.29.block_sparse_moe.experts.91.w3", "model.layers.29.block_sparse_moe.experts.92.w3", "model.layers.29.block_sparse_moe.experts.93.w3", "model.layers.29.block_sparse_moe.experts.94.w3", "model.layers.29.block_sparse_moe.experts.95.w3", "model.layers.29.block_sparse_moe.experts.96.w3", "model.layers.29.block_sparse_moe.experts.97.w3", "model.layers.29.block_sparse_moe.experts.98.w3", "model.layers.29.block_sparse_moe.experts.99.w3", "model.layers.29.block_sparse_moe.experts.100.w3", "model.layers.29.block_sparse_moe.experts.101.w3", "model.layers.29.block_sparse_moe.experts.102.w3", "model.layers.29.block_sparse_moe.experts.103.w3", "model.layers.29.block_sparse_moe.experts.104.w3", "model.layers.29.block_sparse_moe.experts.105.w3", "model.layers.29.block_sparse_moe.experts.106.w3", "model.layers.29.block_sparse_moe.experts.107.w3", "model.layers.29.block_sparse_moe.experts.108.w3", "model.layers.29.block_sparse_moe.experts.109.w3", "model.layers.29.block_sparse_moe.experts.110.w3", "model.layers.29.block_sparse_moe.experts.111.w3", "model.layers.29.block_sparse_moe.experts.112.w3", "model.layers.29.block_sparse_moe.experts.113.w3", "model.layers.29.block_sparse_moe.experts.114.w3", "model.layers.29.block_sparse_moe.experts.115.w3", "model.layers.29.block_sparse_moe.experts.116.w3", "model.layers.29.block_sparse_moe.experts.117.w3", "model.layers.29.block_sparse_moe.experts.118.w3", "model.layers.29.block_sparse_moe.experts.119.w3", "model.layers.29.block_sparse_moe.experts.120.w3", "model.layers.29.block_sparse_moe.experts.121.w3", "model.layers.29.block_sparse_moe.experts.122.w3", "model.layers.29.block_sparse_moe.experts.123.w3", "model.layers.29.block_sparse_moe.experts.124.w3", "model.layers.29.block_sparse_moe.experts.125.w3", "model.layers.29.block_sparse_moe.experts.126.w3", "model.layers.29.block_sparse_moe.experts.127.w3", "model.layers.29.block_sparse_moe.experts.128.w3", "model.layers.29.block_sparse_moe.experts.129.w3", "model.layers.29.block_sparse_moe.experts.130.w3", "model.layers.29.block_sparse_moe.experts.131.w3", "model.layers.29.block_sparse_moe.experts.132.w3", "model.layers.29.block_sparse_moe.experts.133.w3", "model.layers.29.block_sparse_moe.experts.134.w3", "model.layers.29.block_sparse_moe.experts.135.w3", "model.layers.29.block_sparse_moe.experts.136.w3", "model.layers.29.block_sparse_moe.experts.137.w3", "model.layers.29.block_sparse_moe.experts.138.w3", "model.layers.29.block_sparse_moe.experts.139.w3", "model.layers.29.block_sparse_moe.experts.140.w3", "model.layers.29.block_sparse_moe.experts.141.w3", "model.layers.29.block_sparse_moe.experts.142.w3", "model.layers.29.block_sparse_moe.experts.143.w3", "model.layers.29.block_sparse_moe.experts.144.w3", "model.layers.29.block_sparse_moe.experts.145.w3", "model.layers.29.block_sparse_moe.experts.146.w3", "model.layers.29.block_sparse_moe.experts.147.w3", "model.layers.29.block_sparse_moe.experts.148.w3", "model.layers.29.block_sparse_moe.experts.149.w3", "model.layers.29.block_sparse_moe.experts.150.w3", "model.layers.29.block_sparse_moe.experts.151.w3", "model.layers.29.block_sparse_moe.experts.152.w3", "model.layers.29.block_sparse_moe.experts.153.w3", "model.layers.29.block_sparse_moe.experts.154.w3", "model.layers.29.block_sparse_moe.experts.155.w3", "model.layers.29.block_sparse_moe.experts.156.w3", "model.layers.29.block_sparse_moe.experts.157.w3", "model.layers.29.block_sparse_moe.experts.158.w3", "model.layers.29.block_sparse_moe.experts.159.w3", "model.layers.29.block_sparse_moe.experts.160.w3", "model.layers.29.block_sparse_moe.experts.161.w3", "model.layers.29.block_sparse_moe.experts.162.w3", "model.layers.29.block_sparse_moe.experts.163.w3", "model.layers.29.block_sparse_moe.experts.164.w3", "model.layers.29.block_sparse_moe.experts.165.w3", "model.layers.29.block_sparse_moe.experts.166.w3", "model.layers.29.block_sparse_moe.experts.167.w3", "model.layers.29.block_sparse_moe.experts.168.w3", "model.layers.29.block_sparse_moe.experts.169.w3", "model.layers.29.block_sparse_moe.experts.170.w3", "model.layers.29.block_sparse_moe.experts.171.w3", "model.layers.29.block_sparse_moe.experts.172.w3", "model.layers.29.block_sparse_moe.experts.173.w3", "model.layers.29.block_sparse_moe.experts.174.w3", "model.layers.29.block_sparse_moe.experts.175.w3", "model.layers.29.block_sparse_moe.experts.176.w3", "model.layers.29.block_sparse_moe.experts.177.w3", "model.layers.29.block_sparse_moe.experts.178.w3", "model.layers.29.block_sparse_moe.experts.179.w3", "model.layers.29.block_sparse_moe.experts.180.w3", "model.layers.29.block_sparse_moe.experts.181.w3", "model.layers.29.block_sparse_moe.experts.182.w3", "model.layers.29.block_sparse_moe.experts.183.w3", "model.layers.29.block_sparse_moe.experts.184.w3", "model.layers.29.block_sparse_moe.experts.185.w3", "model.layers.29.block_sparse_moe.experts.186.w3", "model.layers.29.block_sparse_moe.experts.187.w3", "model.layers.29.block_sparse_moe.experts.188.w3", "model.layers.29.block_sparse_moe.experts.189.w3", "model.layers.29.block_sparse_moe.experts.190.w3", "model.layers.29.block_sparse_moe.experts.191.w3", "model.layers.29.block_sparse_moe.experts.192.w3", "model.layers.29.block_sparse_moe.experts.193.w3", "model.layers.29.block_sparse_moe.experts.194.w3", "model.layers.29.block_sparse_moe.experts.195.w3", "model.layers.29.block_sparse_moe.experts.196.w3", "model.layers.29.block_sparse_moe.experts.197.w3", "model.layers.29.block_sparse_moe.experts.198.w3", "model.layers.29.block_sparse_moe.experts.199.w3", "model.layers.29.block_sparse_moe.experts.200.w3", "model.layers.29.block_sparse_moe.experts.201.w3", "model.layers.29.block_sparse_moe.experts.202.w3", "model.layers.29.block_sparse_moe.experts.203.w3", "model.layers.29.block_sparse_moe.experts.204.w3", "model.layers.29.block_sparse_moe.experts.205.w3", "model.layers.29.block_sparse_moe.experts.206.w3", "model.layers.29.block_sparse_moe.experts.207.w3", "model.layers.29.block_sparse_moe.experts.208.w3", "model.layers.29.block_sparse_moe.experts.209.w3", "model.layers.29.block_sparse_moe.experts.210.w3", "model.layers.29.block_sparse_moe.experts.211.w3", "model.layers.29.block_sparse_moe.experts.212.w3", "model.layers.29.block_sparse_moe.experts.213.w3", "model.layers.29.block_sparse_moe.experts.214.w3", "model.layers.29.block_sparse_moe.experts.215.w3", "model.layers.29.block_sparse_moe.experts.216.w3", "model.layers.29.block_sparse_moe.experts.217.w3", "model.layers.29.block_sparse_moe.experts.218.w3", "model.layers.29.block_sparse_moe.experts.219.w3", "model.layers.29.block_sparse_moe.experts.220.w3", "model.layers.29.block_sparse_moe.experts.221.w3", "model.layers.29.block_sparse_moe.experts.222.w3", "model.layers.29.block_sparse_moe.experts.223.w3", "model.layers.29.block_sparse_moe.experts.224.w3", "model.layers.29.block_sparse_moe.experts.225.w3", "model.layers.29.block_sparse_moe.experts.226.w3", "model.layers.29.block_sparse_moe.experts.227.w3", "model.layers.29.block_sparse_moe.experts.228.w3", "model.layers.29.block_sparse_moe.experts.229.w3", "model.layers.29.block_sparse_moe.experts.230.w3", "model.layers.29.block_sparse_moe.experts.231.w3", "model.layers.29.block_sparse_moe.experts.232.w3", "model.layers.29.block_sparse_moe.experts.233.w3", "model.layers.29.block_sparse_moe.experts.234.w3", "model.layers.29.block_sparse_moe.experts.235.w3", "model.layers.29.block_sparse_moe.experts.236.w3", "model.layers.29.block_sparse_moe.experts.237.w3", "model.layers.29.block_sparse_moe.experts.238.w3", "model.layers.29.block_sparse_moe.experts.239.w3", "model.layers.29.block_sparse_moe.experts.240.w3", "model.layers.29.block_sparse_moe.experts.241.w3", "model.layers.29.block_sparse_moe.experts.242.w3", "model.layers.29.block_sparse_moe.experts.243.w3", "model.layers.29.block_sparse_moe.experts.244.w3", "model.layers.29.block_sparse_moe.experts.245.w3", "model.layers.29.block_sparse_moe.experts.246.w3", "model.layers.29.block_sparse_moe.experts.247.w3", "model.layers.29.block_sparse_moe.experts.248.w3", "model.layers.29.block_sparse_moe.experts.249.w3", "model.layers.29.block_sparse_moe.experts.250.w3", "model.layers.29.block_sparse_moe.experts.251.w3", "model.layers.29.block_sparse_moe.experts.252.w3", "model.layers.29.block_sparse_moe.experts.253.w3", "model.layers.29.block_sparse_moe.experts.254.w3", "model.layers.29.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0003663472831249154, "dbits": 2415919104 } ] }, { "idx": 149, "layers": [ "model.layers.29.block_sparse_moe.experts.0.w2", "model.layers.29.block_sparse_moe.experts.1.w2", "model.layers.29.block_sparse_moe.experts.2.w2", "model.layers.29.block_sparse_moe.experts.3.w2", "model.layers.29.block_sparse_moe.experts.4.w2", "model.layers.29.block_sparse_moe.experts.5.w2", "model.layers.29.block_sparse_moe.experts.6.w2", "model.layers.29.block_sparse_moe.experts.7.w2", "model.layers.29.block_sparse_moe.experts.8.w2", "model.layers.29.block_sparse_moe.experts.9.w2", "model.layers.29.block_sparse_moe.experts.10.w2", "model.layers.29.block_sparse_moe.experts.11.w2", "model.layers.29.block_sparse_moe.experts.12.w2", "model.layers.29.block_sparse_moe.experts.13.w2", "model.layers.29.block_sparse_moe.experts.14.w2", "model.layers.29.block_sparse_moe.experts.15.w2", "model.layers.29.block_sparse_moe.experts.16.w2", "model.layers.29.block_sparse_moe.experts.17.w2", "model.layers.29.block_sparse_moe.experts.18.w2", "model.layers.29.block_sparse_moe.experts.19.w2", "model.layers.29.block_sparse_moe.experts.20.w2", "model.layers.29.block_sparse_moe.experts.21.w2", "model.layers.29.block_sparse_moe.experts.22.w2", "model.layers.29.block_sparse_moe.experts.23.w2", "model.layers.29.block_sparse_moe.experts.24.w2", "model.layers.29.block_sparse_moe.experts.25.w2", "model.layers.29.block_sparse_moe.experts.26.w2", "model.layers.29.block_sparse_moe.experts.27.w2", "model.layers.29.block_sparse_moe.experts.28.w2", "model.layers.29.block_sparse_moe.experts.29.w2", "model.layers.29.block_sparse_moe.experts.30.w2", "model.layers.29.block_sparse_moe.experts.31.w2", "model.layers.29.block_sparse_moe.experts.32.w2", "model.layers.29.block_sparse_moe.experts.33.w2", "model.layers.29.block_sparse_moe.experts.34.w2", "model.layers.29.block_sparse_moe.experts.35.w2", "model.layers.29.block_sparse_moe.experts.36.w2", "model.layers.29.block_sparse_moe.experts.37.w2", "model.layers.29.block_sparse_moe.experts.38.w2", "model.layers.29.block_sparse_moe.experts.39.w2", "model.layers.29.block_sparse_moe.experts.40.w2", "model.layers.29.block_sparse_moe.experts.41.w2", "model.layers.29.block_sparse_moe.experts.42.w2", "model.layers.29.block_sparse_moe.experts.43.w2", "model.layers.29.block_sparse_moe.experts.44.w2", "model.layers.29.block_sparse_moe.experts.45.w2", "model.layers.29.block_sparse_moe.experts.46.w2", "model.layers.29.block_sparse_moe.experts.47.w2", "model.layers.29.block_sparse_moe.experts.48.w2", "model.layers.29.block_sparse_moe.experts.49.w2", "model.layers.29.block_sparse_moe.experts.50.w2", "model.layers.29.block_sparse_moe.experts.51.w2", "model.layers.29.block_sparse_moe.experts.52.w2", "model.layers.29.block_sparse_moe.experts.53.w2", "model.layers.29.block_sparse_moe.experts.54.w2", "model.layers.29.block_sparse_moe.experts.55.w2", "model.layers.29.block_sparse_moe.experts.56.w2", "model.layers.29.block_sparse_moe.experts.57.w2", "model.layers.29.block_sparse_moe.experts.58.w2", "model.layers.29.block_sparse_moe.experts.59.w2", "model.layers.29.block_sparse_moe.experts.60.w2", "model.layers.29.block_sparse_moe.experts.61.w2", "model.layers.29.block_sparse_moe.experts.62.w2", "model.layers.29.block_sparse_moe.experts.63.w2", "model.layers.29.block_sparse_moe.experts.64.w2", "model.layers.29.block_sparse_moe.experts.65.w2", "model.layers.29.block_sparse_moe.experts.66.w2", "model.layers.29.block_sparse_moe.experts.67.w2", "model.layers.29.block_sparse_moe.experts.68.w2", "model.layers.29.block_sparse_moe.experts.69.w2", "model.layers.29.block_sparse_moe.experts.70.w2", "model.layers.29.block_sparse_moe.experts.71.w2", "model.layers.29.block_sparse_moe.experts.72.w2", "model.layers.29.block_sparse_moe.experts.73.w2", "model.layers.29.block_sparse_moe.experts.74.w2", "model.layers.29.block_sparse_moe.experts.75.w2", "model.layers.29.block_sparse_moe.experts.76.w2", "model.layers.29.block_sparse_moe.experts.77.w2", "model.layers.29.block_sparse_moe.experts.78.w2", "model.layers.29.block_sparse_moe.experts.79.w2", "model.layers.29.block_sparse_moe.experts.80.w2", "model.layers.29.block_sparse_moe.experts.81.w2", "model.layers.29.block_sparse_moe.experts.82.w2", "model.layers.29.block_sparse_moe.experts.83.w2", "model.layers.29.block_sparse_moe.experts.84.w2", "model.layers.29.block_sparse_moe.experts.85.w2", "model.layers.29.block_sparse_moe.experts.86.w2", "model.layers.29.block_sparse_moe.experts.87.w2", "model.layers.29.block_sparse_moe.experts.88.w2", "model.layers.29.block_sparse_moe.experts.89.w2", "model.layers.29.block_sparse_moe.experts.90.w2", "model.layers.29.block_sparse_moe.experts.91.w2", "model.layers.29.block_sparse_moe.experts.92.w2", "model.layers.29.block_sparse_moe.experts.93.w2", "model.layers.29.block_sparse_moe.experts.94.w2", "model.layers.29.block_sparse_moe.experts.95.w2", "model.layers.29.block_sparse_moe.experts.96.w2", "model.layers.29.block_sparse_moe.experts.97.w2", "model.layers.29.block_sparse_moe.experts.98.w2", "model.layers.29.block_sparse_moe.experts.99.w2", "model.layers.29.block_sparse_moe.experts.100.w2", "model.layers.29.block_sparse_moe.experts.101.w2", "model.layers.29.block_sparse_moe.experts.102.w2", "model.layers.29.block_sparse_moe.experts.103.w2", "model.layers.29.block_sparse_moe.experts.104.w2", "model.layers.29.block_sparse_moe.experts.105.w2", "model.layers.29.block_sparse_moe.experts.106.w2", "model.layers.29.block_sparse_moe.experts.107.w2", "model.layers.29.block_sparse_moe.experts.108.w2", "model.layers.29.block_sparse_moe.experts.109.w2", "model.layers.29.block_sparse_moe.experts.110.w2", "model.layers.29.block_sparse_moe.experts.111.w2", "model.layers.29.block_sparse_moe.experts.112.w2", "model.layers.29.block_sparse_moe.experts.113.w2", "model.layers.29.block_sparse_moe.experts.114.w2", "model.layers.29.block_sparse_moe.experts.115.w2", "model.layers.29.block_sparse_moe.experts.116.w2", "model.layers.29.block_sparse_moe.experts.117.w2", "model.layers.29.block_sparse_moe.experts.118.w2", "model.layers.29.block_sparse_moe.experts.119.w2", "model.layers.29.block_sparse_moe.experts.120.w2", "model.layers.29.block_sparse_moe.experts.121.w2", "model.layers.29.block_sparse_moe.experts.122.w2", "model.layers.29.block_sparse_moe.experts.123.w2", "model.layers.29.block_sparse_moe.experts.124.w2", "model.layers.29.block_sparse_moe.experts.125.w2", "model.layers.29.block_sparse_moe.experts.126.w2", "model.layers.29.block_sparse_moe.experts.127.w2", "model.layers.29.block_sparse_moe.experts.128.w2", "model.layers.29.block_sparse_moe.experts.129.w2", "model.layers.29.block_sparse_moe.experts.130.w2", "model.layers.29.block_sparse_moe.experts.131.w2", "model.layers.29.block_sparse_moe.experts.132.w2", "model.layers.29.block_sparse_moe.experts.133.w2", "model.layers.29.block_sparse_moe.experts.134.w2", "model.layers.29.block_sparse_moe.experts.135.w2", "model.layers.29.block_sparse_moe.experts.136.w2", "model.layers.29.block_sparse_moe.experts.137.w2", "model.layers.29.block_sparse_moe.experts.138.w2", "model.layers.29.block_sparse_moe.experts.139.w2", "model.layers.29.block_sparse_moe.experts.140.w2", "model.layers.29.block_sparse_moe.experts.141.w2", "model.layers.29.block_sparse_moe.experts.142.w2", "model.layers.29.block_sparse_moe.experts.143.w2", "model.layers.29.block_sparse_moe.experts.144.w2", "model.layers.29.block_sparse_moe.experts.145.w2", "model.layers.29.block_sparse_moe.experts.146.w2", "model.layers.29.block_sparse_moe.experts.147.w2", "model.layers.29.block_sparse_moe.experts.148.w2", "model.layers.29.block_sparse_moe.experts.149.w2", "model.layers.29.block_sparse_moe.experts.150.w2", "model.layers.29.block_sparse_moe.experts.151.w2", "model.layers.29.block_sparse_moe.experts.152.w2", "model.layers.29.block_sparse_moe.experts.153.w2", "model.layers.29.block_sparse_moe.experts.154.w2", "model.layers.29.block_sparse_moe.experts.155.w2", "model.layers.29.block_sparse_moe.experts.156.w2", "model.layers.29.block_sparse_moe.experts.157.w2", "model.layers.29.block_sparse_moe.experts.158.w2", "model.layers.29.block_sparse_moe.experts.159.w2", "model.layers.29.block_sparse_moe.experts.160.w2", "model.layers.29.block_sparse_moe.experts.161.w2", "model.layers.29.block_sparse_moe.experts.162.w2", "model.layers.29.block_sparse_moe.experts.163.w2", "model.layers.29.block_sparse_moe.experts.164.w2", "model.layers.29.block_sparse_moe.experts.165.w2", "model.layers.29.block_sparse_moe.experts.166.w2", "model.layers.29.block_sparse_moe.experts.167.w2", "model.layers.29.block_sparse_moe.experts.168.w2", "model.layers.29.block_sparse_moe.experts.169.w2", "model.layers.29.block_sparse_moe.experts.170.w2", "model.layers.29.block_sparse_moe.experts.171.w2", "model.layers.29.block_sparse_moe.experts.172.w2", "model.layers.29.block_sparse_moe.experts.173.w2", "model.layers.29.block_sparse_moe.experts.174.w2", "model.layers.29.block_sparse_moe.experts.175.w2", "model.layers.29.block_sparse_moe.experts.176.w2", "model.layers.29.block_sparse_moe.experts.177.w2", "model.layers.29.block_sparse_moe.experts.178.w2", "model.layers.29.block_sparse_moe.experts.179.w2", "model.layers.29.block_sparse_moe.experts.180.w2", "model.layers.29.block_sparse_moe.experts.181.w2", "model.layers.29.block_sparse_moe.experts.182.w2", "model.layers.29.block_sparse_moe.experts.183.w2", "model.layers.29.block_sparse_moe.experts.184.w2", "model.layers.29.block_sparse_moe.experts.185.w2", "model.layers.29.block_sparse_moe.experts.186.w2", "model.layers.29.block_sparse_moe.experts.187.w2", "model.layers.29.block_sparse_moe.experts.188.w2", "model.layers.29.block_sparse_moe.experts.189.w2", "model.layers.29.block_sparse_moe.experts.190.w2", "model.layers.29.block_sparse_moe.experts.191.w2", "model.layers.29.block_sparse_moe.experts.192.w2", "model.layers.29.block_sparse_moe.experts.193.w2", "model.layers.29.block_sparse_moe.experts.194.w2", "model.layers.29.block_sparse_moe.experts.195.w2", "model.layers.29.block_sparse_moe.experts.196.w2", "model.layers.29.block_sparse_moe.experts.197.w2", "model.layers.29.block_sparse_moe.experts.198.w2", "model.layers.29.block_sparse_moe.experts.199.w2", "model.layers.29.block_sparse_moe.experts.200.w2", "model.layers.29.block_sparse_moe.experts.201.w2", "model.layers.29.block_sparse_moe.experts.202.w2", "model.layers.29.block_sparse_moe.experts.203.w2", "model.layers.29.block_sparse_moe.experts.204.w2", "model.layers.29.block_sparse_moe.experts.205.w2", "model.layers.29.block_sparse_moe.experts.206.w2", "model.layers.29.block_sparse_moe.experts.207.w2", "model.layers.29.block_sparse_moe.experts.208.w2", "model.layers.29.block_sparse_moe.experts.209.w2", "model.layers.29.block_sparse_moe.experts.210.w2", "model.layers.29.block_sparse_moe.experts.211.w2", "model.layers.29.block_sparse_moe.experts.212.w2", "model.layers.29.block_sparse_moe.experts.213.w2", "model.layers.29.block_sparse_moe.experts.214.w2", "model.layers.29.block_sparse_moe.experts.215.w2", "model.layers.29.block_sparse_moe.experts.216.w2", "model.layers.29.block_sparse_moe.experts.217.w2", "model.layers.29.block_sparse_moe.experts.218.w2", "model.layers.29.block_sparse_moe.experts.219.w2", "model.layers.29.block_sparse_moe.experts.220.w2", "model.layers.29.block_sparse_moe.experts.221.w2", "model.layers.29.block_sparse_moe.experts.222.w2", "model.layers.29.block_sparse_moe.experts.223.w2", "model.layers.29.block_sparse_moe.experts.224.w2", "model.layers.29.block_sparse_moe.experts.225.w2", "model.layers.29.block_sparse_moe.experts.226.w2", "model.layers.29.block_sparse_moe.experts.227.w2", "model.layers.29.block_sparse_moe.experts.228.w2", "model.layers.29.block_sparse_moe.experts.229.w2", "model.layers.29.block_sparse_moe.experts.230.w2", "model.layers.29.block_sparse_moe.experts.231.w2", "model.layers.29.block_sparse_moe.experts.232.w2", "model.layers.29.block_sparse_moe.experts.233.w2", "model.layers.29.block_sparse_moe.experts.234.w2", "model.layers.29.block_sparse_moe.experts.235.w2", "model.layers.29.block_sparse_moe.experts.236.w2", "model.layers.29.block_sparse_moe.experts.237.w2", "model.layers.29.block_sparse_moe.experts.238.w2", "model.layers.29.block_sparse_moe.experts.239.w2", "model.layers.29.block_sparse_moe.experts.240.w2", "model.layers.29.block_sparse_moe.experts.241.w2", "model.layers.29.block_sparse_moe.experts.242.w2", "model.layers.29.block_sparse_moe.experts.243.w2", "model.layers.29.block_sparse_moe.experts.244.w2", "model.layers.29.block_sparse_moe.experts.245.w2", "model.layers.29.block_sparse_moe.experts.246.w2", "model.layers.29.block_sparse_moe.experts.247.w2", "model.layers.29.block_sparse_moe.experts.248.w2", "model.layers.29.block_sparse_moe.experts.249.w2", "model.layers.29.block_sparse_moe.experts.250.w2", "model.layers.29.block_sparse_moe.experts.251.w2", "model.layers.29.block_sparse_moe.experts.252.w2", "model.layers.29.block_sparse_moe.experts.253.w2", "model.layers.29.block_sparse_moe.experts.254.w2", "model.layers.29.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006167033687233953, "dbits": 1207959552 } ] }, { "idx": 150, "layers": [ "model.layers.30.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0011333843693137197, "dbits": 18874368 } ] }, { "idx": 151, "layers": [ "model.layers.30.self_attn.k_proj", "model.layers.30.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0027658522129058866, "dbits": 6291456 } ] }, { "idx": 152, "layers": [ "model.layers.30.self_attn.o_proj" ], "candidates": [ { "dkld": 0.008279522508382792, "dbits": 18874368 } ] }, { "idx": 153, "layers": [ "model.layers.30.block_sparse_moe.experts.0.w1", "model.layers.30.block_sparse_moe.experts.1.w1", "model.layers.30.block_sparse_moe.experts.2.w1", "model.layers.30.block_sparse_moe.experts.3.w1", "model.layers.30.block_sparse_moe.experts.4.w1", "model.layers.30.block_sparse_moe.experts.5.w1", "model.layers.30.block_sparse_moe.experts.6.w1", "model.layers.30.block_sparse_moe.experts.7.w1", "model.layers.30.block_sparse_moe.experts.8.w1", "model.layers.30.block_sparse_moe.experts.9.w1", "model.layers.30.block_sparse_moe.experts.10.w1", "model.layers.30.block_sparse_moe.experts.11.w1", "model.layers.30.block_sparse_moe.experts.12.w1", "model.layers.30.block_sparse_moe.experts.13.w1", "model.layers.30.block_sparse_moe.experts.14.w1", "model.layers.30.block_sparse_moe.experts.15.w1", "model.layers.30.block_sparse_moe.experts.16.w1", "model.layers.30.block_sparse_moe.experts.17.w1", "model.layers.30.block_sparse_moe.experts.18.w1", "model.layers.30.block_sparse_moe.experts.19.w1", "model.layers.30.block_sparse_moe.experts.20.w1", "model.layers.30.block_sparse_moe.experts.21.w1", "model.layers.30.block_sparse_moe.experts.22.w1", "model.layers.30.block_sparse_moe.experts.23.w1", "model.layers.30.block_sparse_moe.experts.24.w1", "model.layers.30.block_sparse_moe.experts.25.w1", "model.layers.30.block_sparse_moe.experts.26.w1", "model.layers.30.block_sparse_moe.experts.27.w1", "model.layers.30.block_sparse_moe.experts.28.w1", "model.layers.30.block_sparse_moe.experts.29.w1", "model.layers.30.block_sparse_moe.experts.30.w1", "model.layers.30.block_sparse_moe.experts.31.w1", "model.layers.30.block_sparse_moe.experts.32.w1", "model.layers.30.block_sparse_moe.experts.33.w1", "model.layers.30.block_sparse_moe.experts.34.w1", "model.layers.30.block_sparse_moe.experts.35.w1", "model.layers.30.block_sparse_moe.experts.36.w1", "model.layers.30.block_sparse_moe.experts.37.w1", "model.layers.30.block_sparse_moe.experts.38.w1", "model.layers.30.block_sparse_moe.experts.39.w1", "model.layers.30.block_sparse_moe.experts.40.w1", "model.layers.30.block_sparse_moe.experts.41.w1", "model.layers.30.block_sparse_moe.experts.42.w1", "model.layers.30.block_sparse_moe.experts.43.w1", "model.layers.30.block_sparse_moe.experts.44.w1", "model.layers.30.block_sparse_moe.experts.45.w1", "model.layers.30.block_sparse_moe.experts.46.w1", "model.layers.30.block_sparse_moe.experts.47.w1", "model.layers.30.block_sparse_moe.experts.48.w1", "model.layers.30.block_sparse_moe.experts.49.w1", "model.layers.30.block_sparse_moe.experts.50.w1", "model.layers.30.block_sparse_moe.experts.51.w1", "model.layers.30.block_sparse_moe.experts.52.w1", "model.layers.30.block_sparse_moe.experts.53.w1", "model.layers.30.block_sparse_moe.experts.54.w1", "model.layers.30.block_sparse_moe.experts.55.w1", "model.layers.30.block_sparse_moe.experts.56.w1", "model.layers.30.block_sparse_moe.experts.57.w1", "model.layers.30.block_sparse_moe.experts.58.w1", "model.layers.30.block_sparse_moe.experts.59.w1", "model.layers.30.block_sparse_moe.experts.60.w1", "model.layers.30.block_sparse_moe.experts.61.w1", "model.layers.30.block_sparse_moe.experts.62.w1", "model.layers.30.block_sparse_moe.experts.63.w1", "model.layers.30.block_sparse_moe.experts.64.w1", "model.layers.30.block_sparse_moe.experts.65.w1", "model.layers.30.block_sparse_moe.experts.66.w1", "model.layers.30.block_sparse_moe.experts.67.w1", "model.layers.30.block_sparse_moe.experts.68.w1", "model.layers.30.block_sparse_moe.experts.69.w1", "model.layers.30.block_sparse_moe.experts.70.w1", "model.layers.30.block_sparse_moe.experts.71.w1", "model.layers.30.block_sparse_moe.experts.72.w1", "model.layers.30.block_sparse_moe.experts.73.w1", "model.layers.30.block_sparse_moe.experts.74.w1", "model.layers.30.block_sparse_moe.experts.75.w1", "model.layers.30.block_sparse_moe.experts.76.w1", "model.layers.30.block_sparse_moe.experts.77.w1", "model.layers.30.block_sparse_moe.experts.78.w1", "model.layers.30.block_sparse_moe.experts.79.w1", "model.layers.30.block_sparse_moe.experts.80.w1", "model.layers.30.block_sparse_moe.experts.81.w1", "model.layers.30.block_sparse_moe.experts.82.w1", "model.layers.30.block_sparse_moe.experts.83.w1", "model.layers.30.block_sparse_moe.experts.84.w1", "model.layers.30.block_sparse_moe.experts.85.w1", "model.layers.30.block_sparse_moe.experts.86.w1", "model.layers.30.block_sparse_moe.experts.87.w1", "model.layers.30.block_sparse_moe.experts.88.w1", "model.layers.30.block_sparse_moe.experts.89.w1", "model.layers.30.block_sparse_moe.experts.90.w1", "model.layers.30.block_sparse_moe.experts.91.w1", "model.layers.30.block_sparse_moe.experts.92.w1", "model.layers.30.block_sparse_moe.experts.93.w1", "model.layers.30.block_sparse_moe.experts.94.w1", "model.layers.30.block_sparse_moe.experts.95.w1", "model.layers.30.block_sparse_moe.experts.96.w1", "model.layers.30.block_sparse_moe.experts.97.w1", "model.layers.30.block_sparse_moe.experts.98.w1", "model.layers.30.block_sparse_moe.experts.99.w1", "model.layers.30.block_sparse_moe.experts.100.w1", "model.layers.30.block_sparse_moe.experts.101.w1", "model.layers.30.block_sparse_moe.experts.102.w1", "model.layers.30.block_sparse_moe.experts.103.w1", "model.layers.30.block_sparse_moe.experts.104.w1", "model.layers.30.block_sparse_moe.experts.105.w1", "model.layers.30.block_sparse_moe.experts.106.w1", "model.layers.30.block_sparse_moe.experts.107.w1", "model.layers.30.block_sparse_moe.experts.108.w1", "model.layers.30.block_sparse_moe.experts.109.w1", "model.layers.30.block_sparse_moe.experts.110.w1", "model.layers.30.block_sparse_moe.experts.111.w1", "model.layers.30.block_sparse_moe.experts.112.w1", "model.layers.30.block_sparse_moe.experts.113.w1", "model.layers.30.block_sparse_moe.experts.114.w1", "model.layers.30.block_sparse_moe.experts.115.w1", "model.layers.30.block_sparse_moe.experts.116.w1", "model.layers.30.block_sparse_moe.experts.117.w1", "model.layers.30.block_sparse_moe.experts.118.w1", "model.layers.30.block_sparse_moe.experts.119.w1", "model.layers.30.block_sparse_moe.experts.120.w1", "model.layers.30.block_sparse_moe.experts.121.w1", "model.layers.30.block_sparse_moe.experts.122.w1", "model.layers.30.block_sparse_moe.experts.123.w1", "model.layers.30.block_sparse_moe.experts.124.w1", "model.layers.30.block_sparse_moe.experts.125.w1", "model.layers.30.block_sparse_moe.experts.126.w1", "model.layers.30.block_sparse_moe.experts.127.w1", "model.layers.30.block_sparse_moe.experts.128.w1", "model.layers.30.block_sparse_moe.experts.129.w1", "model.layers.30.block_sparse_moe.experts.130.w1", "model.layers.30.block_sparse_moe.experts.131.w1", "model.layers.30.block_sparse_moe.experts.132.w1", "model.layers.30.block_sparse_moe.experts.133.w1", "model.layers.30.block_sparse_moe.experts.134.w1", "model.layers.30.block_sparse_moe.experts.135.w1", "model.layers.30.block_sparse_moe.experts.136.w1", "model.layers.30.block_sparse_moe.experts.137.w1", "model.layers.30.block_sparse_moe.experts.138.w1", "model.layers.30.block_sparse_moe.experts.139.w1", "model.layers.30.block_sparse_moe.experts.140.w1", "model.layers.30.block_sparse_moe.experts.141.w1", "model.layers.30.block_sparse_moe.experts.142.w1", "model.layers.30.block_sparse_moe.experts.143.w1", "model.layers.30.block_sparse_moe.experts.144.w1", "model.layers.30.block_sparse_moe.experts.145.w1", "model.layers.30.block_sparse_moe.experts.146.w1", "model.layers.30.block_sparse_moe.experts.147.w1", "model.layers.30.block_sparse_moe.experts.148.w1", "model.layers.30.block_sparse_moe.experts.149.w1", "model.layers.30.block_sparse_moe.experts.150.w1", "model.layers.30.block_sparse_moe.experts.151.w1", "model.layers.30.block_sparse_moe.experts.152.w1", "model.layers.30.block_sparse_moe.experts.153.w1", "model.layers.30.block_sparse_moe.experts.154.w1", "model.layers.30.block_sparse_moe.experts.155.w1", "model.layers.30.block_sparse_moe.experts.156.w1", "model.layers.30.block_sparse_moe.experts.157.w1", "model.layers.30.block_sparse_moe.experts.158.w1", "model.layers.30.block_sparse_moe.experts.159.w1", "model.layers.30.block_sparse_moe.experts.160.w1", "model.layers.30.block_sparse_moe.experts.161.w1", "model.layers.30.block_sparse_moe.experts.162.w1", "model.layers.30.block_sparse_moe.experts.163.w1", "model.layers.30.block_sparse_moe.experts.164.w1", "model.layers.30.block_sparse_moe.experts.165.w1", "model.layers.30.block_sparse_moe.experts.166.w1", "model.layers.30.block_sparse_moe.experts.167.w1", "model.layers.30.block_sparse_moe.experts.168.w1", "model.layers.30.block_sparse_moe.experts.169.w1", "model.layers.30.block_sparse_moe.experts.170.w1", "model.layers.30.block_sparse_moe.experts.171.w1", "model.layers.30.block_sparse_moe.experts.172.w1", "model.layers.30.block_sparse_moe.experts.173.w1", "model.layers.30.block_sparse_moe.experts.174.w1", "model.layers.30.block_sparse_moe.experts.175.w1", "model.layers.30.block_sparse_moe.experts.176.w1", "model.layers.30.block_sparse_moe.experts.177.w1", "model.layers.30.block_sparse_moe.experts.178.w1", "model.layers.30.block_sparse_moe.experts.179.w1", "model.layers.30.block_sparse_moe.experts.180.w1", "model.layers.30.block_sparse_moe.experts.181.w1", "model.layers.30.block_sparse_moe.experts.182.w1", "model.layers.30.block_sparse_moe.experts.183.w1", "model.layers.30.block_sparse_moe.experts.184.w1", "model.layers.30.block_sparse_moe.experts.185.w1", "model.layers.30.block_sparse_moe.experts.186.w1", "model.layers.30.block_sparse_moe.experts.187.w1", "model.layers.30.block_sparse_moe.experts.188.w1", "model.layers.30.block_sparse_moe.experts.189.w1", "model.layers.30.block_sparse_moe.experts.190.w1", "model.layers.30.block_sparse_moe.experts.191.w1", "model.layers.30.block_sparse_moe.experts.192.w1", "model.layers.30.block_sparse_moe.experts.193.w1", "model.layers.30.block_sparse_moe.experts.194.w1", "model.layers.30.block_sparse_moe.experts.195.w1", "model.layers.30.block_sparse_moe.experts.196.w1", "model.layers.30.block_sparse_moe.experts.197.w1", "model.layers.30.block_sparse_moe.experts.198.w1", "model.layers.30.block_sparse_moe.experts.199.w1", "model.layers.30.block_sparse_moe.experts.200.w1", "model.layers.30.block_sparse_moe.experts.201.w1", "model.layers.30.block_sparse_moe.experts.202.w1", "model.layers.30.block_sparse_moe.experts.203.w1", "model.layers.30.block_sparse_moe.experts.204.w1", "model.layers.30.block_sparse_moe.experts.205.w1", "model.layers.30.block_sparse_moe.experts.206.w1", "model.layers.30.block_sparse_moe.experts.207.w1", "model.layers.30.block_sparse_moe.experts.208.w1", "model.layers.30.block_sparse_moe.experts.209.w1", "model.layers.30.block_sparse_moe.experts.210.w1", "model.layers.30.block_sparse_moe.experts.211.w1", "model.layers.30.block_sparse_moe.experts.212.w1", "model.layers.30.block_sparse_moe.experts.213.w1", "model.layers.30.block_sparse_moe.experts.214.w1", "model.layers.30.block_sparse_moe.experts.215.w1", "model.layers.30.block_sparse_moe.experts.216.w1", "model.layers.30.block_sparse_moe.experts.217.w1", "model.layers.30.block_sparse_moe.experts.218.w1", "model.layers.30.block_sparse_moe.experts.219.w1", "model.layers.30.block_sparse_moe.experts.220.w1", "model.layers.30.block_sparse_moe.experts.221.w1", "model.layers.30.block_sparse_moe.experts.222.w1", "model.layers.30.block_sparse_moe.experts.223.w1", "model.layers.30.block_sparse_moe.experts.224.w1", "model.layers.30.block_sparse_moe.experts.225.w1", "model.layers.30.block_sparse_moe.experts.226.w1", "model.layers.30.block_sparse_moe.experts.227.w1", "model.layers.30.block_sparse_moe.experts.228.w1", "model.layers.30.block_sparse_moe.experts.229.w1", "model.layers.30.block_sparse_moe.experts.230.w1", "model.layers.30.block_sparse_moe.experts.231.w1", "model.layers.30.block_sparse_moe.experts.232.w1", "model.layers.30.block_sparse_moe.experts.233.w1", "model.layers.30.block_sparse_moe.experts.234.w1", "model.layers.30.block_sparse_moe.experts.235.w1", "model.layers.30.block_sparse_moe.experts.236.w1", "model.layers.30.block_sparse_moe.experts.237.w1", "model.layers.30.block_sparse_moe.experts.238.w1", "model.layers.30.block_sparse_moe.experts.239.w1", "model.layers.30.block_sparse_moe.experts.240.w1", "model.layers.30.block_sparse_moe.experts.241.w1", "model.layers.30.block_sparse_moe.experts.242.w1", "model.layers.30.block_sparse_moe.experts.243.w1", "model.layers.30.block_sparse_moe.experts.244.w1", "model.layers.30.block_sparse_moe.experts.245.w1", "model.layers.30.block_sparse_moe.experts.246.w1", "model.layers.30.block_sparse_moe.experts.247.w1", "model.layers.30.block_sparse_moe.experts.248.w1", "model.layers.30.block_sparse_moe.experts.249.w1", "model.layers.30.block_sparse_moe.experts.250.w1", "model.layers.30.block_sparse_moe.experts.251.w1", "model.layers.30.block_sparse_moe.experts.252.w1", "model.layers.30.block_sparse_moe.experts.253.w1", "model.layers.30.block_sparse_moe.experts.254.w1", "model.layers.30.block_sparse_moe.experts.255.w1", "model.layers.30.block_sparse_moe.experts.0.w3", "model.layers.30.block_sparse_moe.experts.1.w3", "model.layers.30.block_sparse_moe.experts.2.w3", "model.layers.30.block_sparse_moe.experts.3.w3", "model.layers.30.block_sparse_moe.experts.4.w3", "model.layers.30.block_sparse_moe.experts.5.w3", "model.layers.30.block_sparse_moe.experts.6.w3", "model.layers.30.block_sparse_moe.experts.7.w3", "model.layers.30.block_sparse_moe.experts.8.w3", "model.layers.30.block_sparse_moe.experts.9.w3", "model.layers.30.block_sparse_moe.experts.10.w3", "model.layers.30.block_sparse_moe.experts.11.w3", "model.layers.30.block_sparse_moe.experts.12.w3", "model.layers.30.block_sparse_moe.experts.13.w3", "model.layers.30.block_sparse_moe.experts.14.w3", "model.layers.30.block_sparse_moe.experts.15.w3", "model.layers.30.block_sparse_moe.experts.16.w3", "model.layers.30.block_sparse_moe.experts.17.w3", "model.layers.30.block_sparse_moe.experts.18.w3", "model.layers.30.block_sparse_moe.experts.19.w3", "model.layers.30.block_sparse_moe.experts.20.w3", "model.layers.30.block_sparse_moe.experts.21.w3", "model.layers.30.block_sparse_moe.experts.22.w3", "model.layers.30.block_sparse_moe.experts.23.w3", "model.layers.30.block_sparse_moe.experts.24.w3", "model.layers.30.block_sparse_moe.experts.25.w3", "model.layers.30.block_sparse_moe.experts.26.w3", "model.layers.30.block_sparse_moe.experts.27.w3", "model.layers.30.block_sparse_moe.experts.28.w3", "model.layers.30.block_sparse_moe.experts.29.w3", "model.layers.30.block_sparse_moe.experts.30.w3", "model.layers.30.block_sparse_moe.experts.31.w3", "model.layers.30.block_sparse_moe.experts.32.w3", "model.layers.30.block_sparse_moe.experts.33.w3", "model.layers.30.block_sparse_moe.experts.34.w3", "model.layers.30.block_sparse_moe.experts.35.w3", "model.layers.30.block_sparse_moe.experts.36.w3", "model.layers.30.block_sparse_moe.experts.37.w3", "model.layers.30.block_sparse_moe.experts.38.w3", "model.layers.30.block_sparse_moe.experts.39.w3", "model.layers.30.block_sparse_moe.experts.40.w3", "model.layers.30.block_sparse_moe.experts.41.w3", "model.layers.30.block_sparse_moe.experts.42.w3", "model.layers.30.block_sparse_moe.experts.43.w3", "model.layers.30.block_sparse_moe.experts.44.w3", "model.layers.30.block_sparse_moe.experts.45.w3", "model.layers.30.block_sparse_moe.experts.46.w3", "model.layers.30.block_sparse_moe.experts.47.w3", "model.layers.30.block_sparse_moe.experts.48.w3", "model.layers.30.block_sparse_moe.experts.49.w3", "model.layers.30.block_sparse_moe.experts.50.w3", "model.layers.30.block_sparse_moe.experts.51.w3", "model.layers.30.block_sparse_moe.experts.52.w3", "model.layers.30.block_sparse_moe.experts.53.w3", "model.layers.30.block_sparse_moe.experts.54.w3", "model.layers.30.block_sparse_moe.experts.55.w3", "model.layers.30.block_sparse_moe.experts.56.w3", "model.layers.30.block_sparse_moe.experts.57.w3", "model.layers.30.block_sparse_moe.experts.58.w3", "model.layers.30.block_sparse_moe.experts.59.w3", "model.layers.30.block_sparse_moe.experts.60.w3", "model.layers.30.block_sparse_moe.experts.61.w3", "model.layers.30.block_sparse_moe.experts.62.w3", "model.layers.30.block_sparse_moe.experts.63.w3", "model.layers.30.block_sparse_moe.experts.64.w3", "model.layers.30.block_sparse_moe.experts.65.w3", "model.layers.30.block_sparse_moe.experts.66.w3", "model.layers.30.block_sparse_moe.experts.67.w3", "model.layers.30.block_sparse_moe.experts.68.w3", "model.layers.30.block_sparse_moe.experts.69.w3", "model.layers.30.block_sparse_moe.experts.70.w3", "model.layers.30.block_sparse_moe.experts.71.w3", "model.layers.30.block_sparse_moe.experts.72.w3", "model.layers.30.block_sparse_moe.experts.73.w3", "model.layers.30.block_sparse_moe.experts.74.w3", "model.layers.30.block_sparse_moe.experts.75.w3", "model.layers.30.block_sparse_moe.experts.76.w3", "model.layers.30.block_sparse_moe.experts.77.w3", "model.layers.30.block_sparse_moe.experts.78.w3", "model.layers.30.block_sparse_moe.experts.79.w3", "model.layers.30.block_sparse_moe.experts.80.w3", "model.layers.30.block_sparse_moe.experts.81.w3", "model.layers.30.block_sparse_moe.experts.82.w3", "model.layers.30.block_sparse_moe.experts.83.w3", "model.layers.30.block_sparse_moe.experts.84.w3", "model.layers.30.block_sparse_moe.experts.85.w3", "model.layers.30.block_sparse_moe.experts.86.w3", "model.layers.30.block_sparse_moe.experts.87.w3", "model.layers.30.block_sparse_moe.experts.88.w3", "model.layers.30.block_sparse_moe.experts.89.w3", "model.layers.30.block_sparse_moe.experts.90.w3", "model.layers.30.block_sparse_moe.experts.91.w3", "model.layers.30.block_sparse_moe.experts.92.w3", "model.layers.30.block_sparse_moe.experts.93.w3", "model.layers.30.block_sparse_moe.experts.94.w3", "model.layers.30.block_sparse_moe.experts.95.w3", "model.layers.30.block_sparse_moe.experts.96.w3", "model.layers.30.block_sparse_moe.experts.97.w3", "model.layers.30.block_sparse_moe.experts.98.w3", "model.layers.30.block_sparse_moe.experts.99.w3", "model.layers.30.block_sparse_moe.experts.100.w3", "model.layers.30.block_sparse_moe.experts.101.w3", "model.layers.30.block_sparse_moe.experts.102.w3", "model.layers.30.block_sparse_moe.experts.103.w3", "model.layers.30.block_sparse_moe.experts.104.w3", "model.layers.30.block_sparse_moe.experts.105.w3", "model.layers.30.block_sparse_moe.experts.106.w3", "model.layers.30.block_sparse_moe.experts.107.w3", "model.layers.30.block_sparse_moe.experts.108.w3", "model.layers.30.block_sparse_moe.experts.109.w3", "model.layers.30.block_sparse_moe.experts.110.w3", "model.layers.30.block_sparse_moe.experts.111.w3", "model.layers.30.block_sparse_moe.experts.112.w3", "model.layers.30.block_sparse_moe.experts.113.w3", "model.layers.30.block_sparse_moe.experts.114.w3", "model.layers.30.block_sparse_moe.experts.115.w3", "model.layers.30.block_sparse_moe.experts.116.w3", "model.layers.30.block_sparse_moe.experts.117.w3", "model.layers.30.block_sparse_moe.experts.118.w3", "model.layers.30.block_sparse_moe.experts.119.w3", "model.layers.30.block_sparse_moe.experts.120.w3", "model.layers.30.block_sparse_moe.experts.121.w3", "model.layers.30.block_sparse_moe.experts.122.w3", "model.layers.30.block_sparse_moe.experts.123.w3", "model.layers.30.block_sparse_moe.experts.124.w3", "model.layers.30.block_sparse_moe.experts.125.w3", "model.layers.30.block_sparse_moe.experts.126.w3", "model.layers.30.block_sparse_moe.experts.127.w3", "model.layers.30.block_sparse_moe.experts.128.w3", "model.layers.30.block_sparse_moe.experts.129.w3", "model.layers.30.block_sparse_moe.experts.130.w3", "model.layers.30.block_sparse_moe.experts.131.w3", "model.layers.30.block_sparse_moe.experts.132.w3", "model.layers.30.block_sparse_moe.experts.133.w3", "model.layers.30.block_sparse_moe.experts.134.w3", "model.layers.30.block_sparse_moe.experts.135.w3", "model.layers.30.block_sparse_moe.experts.136.w3", "model.layers.30.block_sparse_moe.experts.137.w3", "model.layers.30.block_sparse_moe.experts.138.w3", "model.layers.30.block_sparse_moe.experts.139.w3", "model.layers.30.block_sparse_moe.experts.140.w3", "model.layers.30.block_sparse_moe.experts.141.w3", "model.layers.30.block_sparse_moe.experts.142.w3", "model.layers.30.block_sparse_moe.experts.143.w3", "model.layers.30.block_sparse_moe.experts.144.w3", "model.layers.30.block_sparse_moe.experts.145.w3", "model.layers.30.block_sparse_moe.experts.146.w3", "model.layers.30.block_sparse_moe.experts.147.w3", "model.layers.30.block_sparse_moe.experts.148.w3", "model.layers.30.block_sparse_moe.experts.149.w3", "model.layers.30.block_sparse_moe.experts.150.w3", "model.layers.30.block_sparse_moe.experts.151.w3", "model.layers.30.block_sparse_moe.experts.152.w3", "model.layers.30.block_sparse_moe.experts.153.w3", "model.layers.30.block_sparse_moe.experts.154.w3", "model.layers.30.block_sparse_moe.experts.155.w3", "model.layers.30.block_sparse_moe.experts.156.w3", "model.layers.30.block_sparse_moe.experts.157.w3", "model.layers.30.block_sparse_moe.experts.158.w3", "model.layers.30.block_sparse_moe.experts.159.w3", "model.layers.30.block_sparse_moe.experts.160.w3", "model.layers.30.block_sparse_moe.experts.161.w3", "model.layers.30.block_sparse_moe.experts.162.w3", "model.layers.30.block_sparse_moe.experts.163.w3", "model.layers.30.block_sparse_moe.experts.164.w3", "model.layers.30.block_sparse_moe.experts.165.w3", "model.layers.30.block_sparse_moe.experts.166.w3", "model.layers.30.block_sparse_moe.experts.167.w3", "model.layers.30.block_sparse_moe.experts.168.w3", "model.layers.30.block_sparse_moe.experts.169.w3", "model.layers.30.block_sparse_moe.experts.170.w3", "model.layers.30.block_sparse_moe.experts.171.w3", "model.layers.30.block_sparse_moe.experts.172.w3", "model.layers.30.block_sparse_moe.experts.173.w3", "model.layers.30.block_sparse_moe.experts.174.w3", "model.layers.30.block_sparse_moe.experts.175.w3", "model.layers.30.block_sparse_moe.experts.176.w3", "model.layers.30.block_sparse_moe.experts.177.w3", "model.layers.30.block_sparse_moe.experts.178.w3", "model.layers.30.block_sparse_moe.experts.179.w3", "model.layers.30.block_sparse_moe.experts.180.w3", "model.layers.30.block_sparse_moe.experts.181.w3", "model.layers.30.block_sparse_moe.experts.182.w3", "model.layers.30.block_sparse_moe.experts.183.w3", "model.layers.30.block_sparse_moe.experts.184.w3", "model.layers.30.block_sparse_moe.experts.185.w3", "model.layers.30.block_sparse_moe.experts.186.w3", "model.layers.30.block_sparse_moe.experts.187.w3", "model.layers.30.block_sparse_moe.experts.188.w3", "model.layers.30.block_sparse_moe.experts.189.w3", "model.layers.30.block_sparse_moe.experts.190.w3", "model.layers.30.block_sparse_moe.experts.191.w3", "model.layers.30.block_sparse_moe.experts.192.w3", "model.layers.30.block_sparse_moe.experts.193.w3", "model.layers.30.block_sparse_moe.experts.194.w3", "model.layers.30.block_sparse_moe.experts.195.w3", "model.layers.30.block_sparse_moe.experts.196.w3", "model.layers.30.block_sparse_moe.experts.197.w3", "model.layers.30.block_sparse_moe.experts.198.w3", "model.layers.30.block_sparse_moe.experts.199.w3", "model.layers.30.block_sparse_moe.experts.200.w3", "model.layers.30.block_sparse_moe.experts.201.w3", "model.layers.30.block_sparse_moe.experts.202.w3", "model.layers.30.block_sparse_moe.experts.203.w3", "model.layers.30.block_sparse_moe.experts.204.w3", "model.layers.30.block_sparse_moe.experts.205.w3", "model.layers.30.block_sparse_moe.experts.206.w3", "model.layers.30.block_sparse_moe.experts.207.w3", "model.layers.30.block_sparse_moe.experts.208.w3", "model.layers.30.block_sparse_moe.experts.209.w3", "model.layers.30.block_sparse_moe.experts.210.w3", "model.layers.30.block_sparse_moe.experts.211.w3", "model.layers.30.block_sparse_moe.experts.212.w3", "model.layers.30.block_sparse_moe.experts.213.w3", "model.layers.30.block_sparse_moe.experts.214.w3", "model.layers.30.block_sparse_moe.experts.215.w3", "model.layers.30.block_sparse_moe.experts.216.w3", "model.layers.30.block_sparse_moe.experts.217.w3", "model.layers.30.block_sparse_moe.experts.218.w3", "model.layers.30.block_sparse_moe.experts.219.w3", "model.layers.30.block_sparse_moe.experts.220.w3", "model.layers.30.block_sparse_moe.experts.221.w3", "model.layers.30.block_sparse_moe.experts.222.w3", "model.layers.30.block_sparse_moe.experts.223.w3", "model.layers.30.block_sparse_moe.experts.224.w3", "model.layers.30.block_sparse_moe.experts.225.w3", "model.layers.30.block_sparse_moe.experts.226.w3", "model.layers.30.block_sparse_moe.experts.227.w3", "model.layers.30.block_sparse_moe.experts.228.w3", "model.layers.30.block_sparse_moe.experts.229.w3", "model.layers.30.block_sparse_moe.experts.230.w3", "model.layers.30.block_sparse_moe.experts.231.w3", "model.layers.30.block_sparse_moe.experts.232.w3", "model.layers.30.block_sparse_moe.experts.233.w3", "model.layers.30.block_sparse_moe.experts.234.w3", "model.layers.30.block_sparse_moe.experts.235.w3", "model.layers.30.block_sparse_moe.experts.236.w3", "model.layers.30.block_sparse_moe.experts.237.w3", "model.layers.30.block_sparse_moe.experts.238.w3", "model.layers.30.block_sparse_moe.experts.239.w3", "model.layers.30.block_sparse_moe.experts.240.w3", "model.layers.30.block_sparse_moe.experts.241.w3", "model.layers.30.block_sparse_moe.experts.242.w3", "model.layers.30.block_sparse_moe.experts.243.w3", "model.layers.30.block_sparse_moe.experts.244.w3", "model.layers.30.block_sparse_moe.experts.245.w3", "model.layers.30.block_sparse_moe.experts.246.w3", "model.layers.30.block_sparse_moe.experts.247.w3", "model.layers.30.block_sparse_moe.experts.248.w3", "model.layers.30.block_sparse_moe.experts.249.w3", "model.layers.30.block_sparse_moe.experts.250.w3", "model.layers.30.block_sparse_moe.experts.251.w3", "model.layers.30.block_sparse_moe.experts.252.w3", "model.layers.30.block_sparse_moe.experts.253.w3", "model.layers.30.block_sparse_moe.experts.254.w3", "model.layers.30.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -6.4079649746418e-05, "dbits": 2415919104 } ] }, { "idx": 154, "layers": [ "model.layers.30.block_sparse_moe.experts.0.w2", "model.layers.30.block_sparse_moe.experts.1.w2", "model.layers.30.block_sparse_moe.experts.2.w2", "model.layers.30.block_sparse_moe.experts.3.w2", "model.layers.30.block_sparse_moe.experts.4.w2", "model.layers.30.block_sparse_moe.experts.5.w2", "model.layers.30.block_sparse_moe.experts.6.w2", "model.layers.30.block_sparse_moe.experts.7.w2", "model.layers.30.block_sparse_moe.experts.8.w2", "model.layers.30.block_sparse_moe.experts.9.w2", "model.layers.30.block_sparse_moe.experts.10.w2", "model.layers.30.block_sparse_moe.experts.11.w2", "model.layers.30.block_sparse_moe.experts.12.w2", "model.layers.30.block_sparse_moe.experts.13.w2", "model.layers.30.block_sparse_moe.experts.14.w2", "model.layers.30.block_sparse_moe.experts.15.w2", "model.layers.30.block_sparse_moe.experts.16.w2", "model.layers.30.block_sparse_moe.experts.17.w2", "model.layers.30.block_sparse_moe.experts.18.w2", "model.layers.30.block_sparse_moe.experts.19.w2", "model.layers.30.block_sparse_moe.experts.20.w2", "model.layers.30.block_sparse_moe.experts.21.w2", "model.layers.30.block_sparse_moe.experts.22.w2", "model.layers.30.block_sparse_moe.experts.23.w2", "model.layers.30.block_sparse_moe.experts.24.w2", "model.layers.30.block_sparse_moe.experts.25.w2", "model.layers.30.block_sparse_moe.experts.26.w2", "model.layers.30.block_sparse_moe.experts.27.w2", "model.layers.30.block_sparse_moe.experts.28.w2", "model.layers.30.block_sparse_moe.experts.29.w2", "model.layers.30.block_sparse_moe.experts.30.w2", "model.layers.30.block_sparse_moe.experts.31.w2", "model.layers.30.block_sparse_moe.experts.32.w2", "model.layers.30.block_sparse_moe.experts.33.w2", "model.layers.30.block_sparse_moe.experts.34.w2", "model.layers.30.block_sparse_moe.experts.35.w2", "model.layers.30.block_sparse_moe.experts.36.w2", "model.layers.30.block_sparse_moe.experts.37.w2", "model.layers.30.block_sparse_moe.experts.38.w2", "model.layers.30.block_sparse_moe.experts.39.w2", "model.layers.30.block_sparse_moe.experts.40.w2", "model.layers.30.block_sparse_moe.experts.41.w2", "model.layers.30.block_sparse_moe.experts.42.w2", "model.layers.30.block_sparse_moe.experts.43.w2", "model.layers.30.block_sparse_moe.experts.44.w2", "model.layers.30.block_sparse_moe.experts.45.w2", "model.layers.30.block_sparse_moe.experts.46.w2", "model.layers.30.block_sparse_moe.experts.47.w2", "model.layers.30.block_sparse_moe.experts.48.w2", "model.layers.30.block_sparse_moe.experts.49.w2", "model.layers.30.block_sparse_moe.experts.50.w2", "model.layers.30.block_sparse_moe.experts.51.w2", "model.layers.30.block_sparse_moe.experts.52.w2", "model.layers.30.block_sparse_moe.experts.53.w2", "model.layers.30.block_sparse_moe.experts.54.w2", "model.layers.30.block_sparse_moe.experts.55.w2", "model.layers.30.block_sparse_moe.experts.56.w2", "model.layers.30.block_sparse_moe.experts.57.w2", "model.layers.30.block_sparse_moe.experts.58.w2", "model.layers.30.block_sparse_moe.experts.59.w2", "model.layers.30.block_sparse_moe.experts.60.w2", "model.layers.30.block_sparse_moe.experts.61.w2", "model.layers.30.block_sparse_moe.experts.62.w2", "model.layers.30.block_sparse_moe.experts.63.w2", "model.layers.30.block_sparse_moe.experts.64.w2", "model.layers.30.block_sparse_moe.experts.65.w2", "model.layers.30.block_sparse_moe.experts.66.w2", "model.layers.30.block_sparse_moe.experts.67.w2", "model.layers.30.block_sparse_moe.experts.68.w2", "model.layers.30.block_sparse_moe.experts.69.w2", "model.layers.30.block_sparse_moe.experts.70.w2", "model.layers.30.block_sparse_moe.experts.71.w2", "model.layers.30.block_sparse_moe.experts.72.w2", "model.layers.30.block_sparse_moe.experts.73.w2", "model.layers.30.block_sparse_moe.experts.74.w2", "model.layers.30.block_sparse_moe.experts.75.w2", "model.layers.30.block_sparse_moe.experts.76.w2", "model.layers.30.block_sparse_moe.experts.77.w2", "model.layers.30.block_sparse_moe.experts.78.w2", "model.layers.30.block_sparse_moe.experts.79.w2", "model.layers.30.block_sparse_moe.experts.80.w2", "model.layers.30.block_sparse_moe.experts.81.w2", "model.layers.30.block_sparse_moe.experts.82.w2", "model.layers.30.block_sparse_moe.experts.83.w2", "model.layers.30.block_sparse_moe.experts.84.w2", "model.layers.30.block_sparse_moe.experts.85.w2", "model.layers.30.block_sparse_moe.experts.86.w2", "model.layers.30.block_sparse_moe.experts.87.w2", "model.layers.30.block_sparse_moe.experts.88.w2", "model.layers.30.block_sparse_moe.experts.89.w2", "model.layers.30.block_sparse_moe.experts.90.w2", "model.layers.30.block_sparse_moe.experts.91.w2", "model.layers.30.block_sparse_moe.experts.92.w2", "model.layers.30.block_sparse_moe.experts.93.w2", "model.layers.30.block_sparse_moe.experts.94.w2", "model.layers.30.block_sparse_moe.experts.95.w2", "model.layers.30.block_sparse_moe.experts.96.w2", "model.layers.30.block_sparse_moe.experts.97.w2", "model.layers.30.block_sparse_moe.experts.98.w2", "model.layers.30.block_sparse_moe.experts.99.w2", "model.layers.30.block_sparse_moe.experts.100.w2", "model.layers.30.block_sparse_moe.experts.101.w2", "model.layers.30.block_sparse_moe.experts.102.w2", "model.layers.30.block_sparse_moe.experts.103.w2", "model.layers.30.block_sparse_moe.experts.104.w2", "model.layers.30.block_sparse_moe.experts.105.w2", "model.layers.30.block_sparse_moe.experts.106.w2", "model.layers.30.block_sparse_moe.experts.107.w2", "model.layers.30.block_sparse_moe.experts.108.w2", "model.layers.30.block_sparse_moe.experts.109.w2", "model.layers.30.block_sparse_moe.experts.110.w2", "model.layers.30.block_sparse_moe.experts.111.w2", "model.layers.30.block_sparse_moe.experts.112.w2", "model.layers.30.block_sparse_moe.experts.113.w2", "model.layers.30.block_sparse_moe.experts.114.w2", "model.layers.30.block_sparse_moe.experts.115.w2", "model.layers.30.block_sparse_moe.experts.116.w2", "model.layers.30.block_sparse_moe.experts.117.w2", "model.layers.30.block_sparse_moe.experts.118.w2", "model.layers.30.block_sparse_moe.experts.119.w2", "model.layers.30.block_sparse_moe.experts.120.w2", "model.layers.30.block_sparse_moe.experts.121.w2", "model.layers.30.block_sparse_moe.experts.122.w2", "model.layers.30.block_sparse_moe.experts.123.w2", "model.layers.30.block_sparse_moe.experts.124.w2", "model.layers.30.block_sparse_moe.experts.125.w2", "model.layers.30.block_sparse_moe.experts.126.w2", "model.layers.30.block_sparse_moe.experts.127.w2", "model.layers.30.block_sparse_moe.experts.128.w2", "model.layers.30.block_sparse_moe.experts.129.w2", "model.layers.30.block_sparse_moe.experts.130.w2", "model.layers.30.block_sparse_moe.experts.131.w2", "model.layers.30.block_sparse_moe.experts.132.w2", "model.layers.30.block_sparse_moe.experts.133.w2", "model.layers.30.block_sparse_moe.experts.134.w2", "model.layers.30.block_sparse_moe.experts.135.w2", "model.layers.30.block_sparse_moe.experts.136.w2", "model.layers.30.block_sparse_moe.experts.137.w2", "model.layers.30.block_sparse_moe.experts.138.w2", "model.layers.30.block_sparse_moe.experts.139.w2", "model.layers.30.block_sparse_moe.experts.140.w2", "model.layers.30.block_sparse_moe.experts.141.w2", "model.layers.30.block_sparse_moe.experts.142.w2", "model.layers.30.block_sparse_moe.experts.143.w2", "model.layers.30.block_sparse_moe.experts.144.w2", "model.layers.30.block_sparse_moe.experts.145.w2", "model.layers.30.block_sparse_moe.experts.146.w2", "model.layers.30.block_sparse_moe.experts.147.w2", "model.layers.30.block_sparse_moe.experts.148.w2", "model.layers.30.block_sparse_moe.experts.149.w2", "model.layers.30.block_sparse_moe.experts.150.w2", "model.layers.30.block_sparse_moe.experts.151.w2", "model.layers.30.block_sparse_moe.experts.152.w2", "model.layers.30.block_sparse_moe.experts.153.w2", "model.layers.30.block_sparse_moe.experts.154.w2", "model.layers.30.block_sparse_moe.experts.155.w2", "model.layers.30.block_sparse_moe.experts.156.w2", "model.layers.30.block_sparse_moe.experts.157.w2", "model.layers.30.block_sparse_moe.experts.158.w2", "model.layers.30.block_sparse_moe.experts.159.w2", "model.layers.30.block_sparse_moe.experts.160.w2", "model.layers.30.block_sparse_moe.experts.161.w2", "model.layers.30.block_sparse_moe.experts.162.w2", "model.layers.30.block_sparse_moe.experts.163.w2", "model.layers.30.block_sparse_moe.experts.164.w2", "model.layers.30.block_sparse_moe.experts.165.w2", "model.layers.30.block_sparse_moe.experts.166.w2", "model.layers.30.block_sparse_moe.experts.167.w2", "model.layers.30.block_sparse_moe.experts.168.w2", "model.layers.30.block_sparse_moe.experts.169.w2", "model.layers.30.block_sparse_moe.experts.170.w2", "model.layers.30.block_sparse_moe.experts.171.w2", "model.layers.30.block_sparse_moe.experts.172.w2", "model.layers.30.block_sparse_moe.experts.173.w2", "model.layers.30.block_sparse_moe.experts.174.w2", "model.layers.30.block_sparse_moe.experts.175.w2", "model.layers.30.block_sparse_moe.experts.176.w2", "model.layers.30.block_sparse_moe.experts.177.w2", "model.layers.30.block_sparse_moe.experts.178.w2", "model.layers.30.block_sparse_moe.experts.179.w2", "model.layers.30.block_sparse_moe.experts.180.w2", "model.layers.30.block_sparse_moe.experts.181.w2", "model.layers.30.block_sparse_moe.experts.182.w2", "model.layers.30.block_sparse_moe.experts.183.w2", "model.layers.30.block_sparse_moe.experts.184.w2", "model.layers.30.block_sparse_moe.experts.185.w2", "model.layers.30.block_sparse_moe.experts.186.w2", "model.layers.30.block_sparse_moe.experts.187.w2", "model.layers.30.block_sparse_moe.experts.188.w2", "model.layers.30.block_sparse_moe.experts.189.w2", "model.layers.30.block_sparse_moe.experts.190.w2", "model.layers.30.block_sparse_moe.experts.191.w2", "model.layers.30.block_sparse_moe.experts.192.w2", "model.layers.30.block_sparse_moe.experts.193.w2", "model.layers.30.block_sparse_moe.experts.194.w2", "model.layers.30.block_sparse_moe.experts.195.w2", "model.layers.30.block_sparse_moe.experts.196.w2", "model.layers.30.block_sparse_moe.experts.197.w2", "model.layers.30.block_sparse_moe.experts.198.w2", "model.layers.30.block_sparse_moe.experts.199.w2", "model.layers.30.block_sparse_moe.experts.200.w2", "model.layers.30.block_sparse_moe.experts.201.w2", "model.layers.30.block_sparse_moe.experts.202.w2", "model.layers.30.block_sparse_moe.experts.203.w2", "model.layers.30.block_sparse_moe.experts.204.w2", "model.layers.30.block_sparse_moe.experts.205.w2", "model.layers.30.block_sparse_moe.experts.206.w2", "model.layers.30.block_sparse_moe.experts.207.w2", "model.layers.30.block_sparse_moe.experts.208.w2", "model.layers.30.block_sparse_moe.experts.209.w2", "model.layers.30.block_sparse_moe.experts.210.w2", "model.layers.30.block_sparse_moe.experts.211.w2", "model.layers.30.block_sparse_moe.experts.212.w2", "model.layers.30.block_sparse_moe.experts.213.w2", "model.layers.30.block_sparse_moe.experts.214.w2", "model.layers.30.block_sparse_moe.experts.215.w2", "model.layers.30.block_sparse_moe.experts.216.w2", "model.layers.30.block_sparse_moe.experts.217.w2", "model.layers.30.block_sparse_moe.experts.218.w2", "model.layers.30.block_sparse_moe.experts.219.w2", "model.layers.30.block_sparse_moe.experts.220.w2", "model.layers.30.block_sparse_moe.experts.221.w2", "model.layers.30.block_sparse_moe.experts.222.w2", "model.layers.30.block_sparse_moe.experts.223.w2", "model.layers.30.block_sparse_moe.experts.224.w2", "model.layers.30.block_sparse_moe.experts.225.w2", "model.layers.30.block_sparse_moe.experts.226.w2", "model.layers.30.block_sparse_moe.experts.227.w2", "model.layers.30.block_sparse_moe.experts.228.w2", "model.layers.30.block_sparse_moe.experts.229.w2", "model.layers.30.block_sparse_moe.experts.230.w2", "model.layers.30.block_sparse_moe.experts.231.w2", "model.layers.30.block_sparse_moe.experts.232.w2", "model.layers.30.block_sparse_moe.experts.233.w2", "model.layers.30.block_sparse_moe.experts.234.w2", "model.layers.30.block_sparse_moe.experts.235.w2", "model.layers.30.block_sparse_moe.experts.236.w2", "model.layers.30.block_sparse_moe.experts.237.w2", "model.layers.30.block_sparse_moe.experts.238.w2", "model.layers.30.block_sparse_moe.experts.239.w2", "model.layers.30.block_sparse_moe.experts.240.w2", "model.layers.30.block_sparse_moe.experts.241.w2", "model.layers.30.block_sparse_moe.experts.242.w2", "model.layers.30.block_sparse_moe.experts.243.w2", "model.layers.30.block_sparse_moe.experts.244.w2", "model.layers.30.block_sparse_moe.experts.245.w2", "model.layers.30.block_sparse_moe.experts.246.w2", "model.layers.30.block_sparse_moe.experts.247.w2", "model.layers.30.block_sparse_moe.experts.248.w2", "model.layers.30.block_sparse_moe.experts.249.w2", "model.layers.30.block_sparse_moe.experts.250.w2", "model.layers.30.block_sparse_moe.experts.251.w2", "model.layers.30.block_sparse_moe.experts.252.w2", "model.layers.30.block_sparse_moe.experts.253.w2", "model.layers.30.block_sparse_moe.experts.254.w2", "model.layers.30.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00015796218067407053, "dbits": 1207959552 } ] }, { "idx": 155, "layers": [ "model.layers.31.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00147305019199849, "dbits": 18874368 } ] }, { "idx": 156, "layers": [ "model.layers.31.self_attn.k_proj", "model.layers.31.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0007015522569417953, "dbits": 6291456 } ] }, { "idx": 157, "layers": [ "model.layers.31.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0009726988151669558, "dbits": 18874368 } ] }, { "idx": 158, "layers": [ "model.layers.31.block_sparse_moe.experts.0.w1", "model.layers.31.block_sparse_moe.experts.1.w1", "model.layers.31.block_sparse_moe.experts.2.w1", "model.layers.31.block_sparse_moe.experts.3.w1", "model.layers.31.block_sparse_moe.experts.4.w1", "model.layers.31.block_sparse_moe.experts.5.w1", "model.layers.31.block_sparse_moe.experts.6.w1", "model.layers.31.block_sparse_moe.experts.7.w1", "model.layers.31.block_sparse_moe.experts.8.w1", "model.layers.31.block_sparse_moe.experts.9.w1", "model.layers.31.block_sparse_moe.experts.10.w1", "model.layers.31.block_sparse_moe.experts.11.w1", "model.layers.31.block_sparse_moe.experts.12.w1", "model.layers.31.block_sparse_moe.experts.13.w1", "model.layers.31.block_sparse_moe.experts.14.w1", "model.layers.31.block_sparse_moe.experts.15.w1", "model.layers.31.block_sparse_moe.experts.16.w1", "model.layers.31.block_sparse_moe.experts.17.w1", "model.layers.31.block_sparse_moe.experts.18.w1", "model.layers.31.block_sparse_moe.experts.19.w1", "model.layers.31.block_sparse_moe.experts.20.w1", "model.layers.31.block_sparse_moe.experts.21.w1", "model.layers.31.block_sparse_moe.experts.22.w1", "model.layers.31.block_sparse_moe.experts.23.w1", "model.layers.31.block_sparse_moe.experts.24.w1", "model.layers.31.block_sparse_moe.experts.25.w1", "model.layers.31.block_sparse_moe.experts.26.w1", "model.layers.31.block_sparse_moe.experts.27.w1", "model.layers.31.block_sparse_moe.experts.28.w1", "model.layers.31.block_sparse_moe.experts.29.w1", "model.layers.31.block_sparse_moe.experts.30.w1", "model.layers.31.block_sparse_moe.experts.31.w1", "model.layers.31.block_sparse_moe.experts.32.w1", "model.layers.31.block_sparse_moe.experts.33.w1", "model.layers.31.block_sparse_moe.experts.34.w1", "model.layers.31.block_sparse_moe.experts.35.w1", "model.layers.31.block_sparse_moe.experts.36.w1", "model.layers.31.block_sparse_moe.experts.37.w1", "model.layers.31.block_sparse_moe.experts.38.w1", "model.layers.31.block_sparse_moe.experts.39.w1", "model.layers.31.block_sparse_moe.experts.40.w1", "model.layers.31.block_sparse_moe.experts.41.w1", "model.layers.31.block_sparse_moe.experts.42.w1", "model.layers.31.block_sparse_moe.experts.43.w1", "model.layers.31.block_sparse_moe.experts.44.w1", "model.layers.31.block_sparse_moe.experts.45.w1", "model.layers.31.block_sparse_moe.experts.46.w1", "model.layers.31.block_sparse_moe.experts.47.w1", "model.layers.31.block_sparse_moe.experts.48.w1", "model.layers.31.block_sparse_moe.experts.49.w1", "model.layers.31.block_sparse_moe.experts.50.w1", "model.layers.31.block_sparse_moe.experts.51.w1", "model.layers.31.block_sparse_moe.experts.52.w1", "model.layers.31.block_sparse_moe.experts.53.w1", "model.layers.31.block_sparse_moe.experts.54.w1", "model.layers.31.block_sparse_moe.experts.55.w1", "model.layers.31.block_sparse_moe.experts.56.w1", "model.layers.31.block_sparse_moe.experts.57.w1", "model.layers.31.block_sparse_moe.experts.58.w1", "model.layers.31.block_sparse_moe.experts.59.w1", "model.layers.31.block_sparse_moe.experts.60.w1", "model.layers.31.block_sparse_moe.experts.61.w1", "model.layers.31.block_sparse_moe.experts.62.w1", "model.layers.31.block_sparse_moe.experts.63.w1", "model.layers.31.block_sparse_moe.experts.64.w1", "model.layers.31.block_sparse_moe.experts.65.w1", "model.layers.31.block_sparse_moe.experts.66.w1", "model.layers.31.block_sparse_moe.experts.67.w1", "model.layers.31.block_sparse_moe.experts.68.w1", "model.layers.31.block_sparse_moe.experts.69.w1", "model.layers.31.block_sparse_moe.experts.70.w1", "model.layers.31.block_sparse_moe.experts.71.w1", "model.layers.31.block_sparse_moe.experts.72.w1", "model.layers.31.block_sparse_moe.experts.73.w1", "model.layers.31.block_sparse_moe.experts.74.w1", "model.layers.31.block_sparse_moe.experts.75.w1", "model.layers.31.block_sparse_moe.experts.76.w1", "model.layers.31.block_sparse_moe.experts.77.w1", "model.layers.31.block_sparse_moe.experts.78.w1", "model.layers.31.block_sparse_moe.experts.79.w1", "model.layers.31.block_sparse_moe.experts.80.w1", "model.layers.31.block_sparse_moe.experts.81.w1", "model.layers.31.block_sparse_moe.experts.82.w1", "model.layers.31.block_sparse_moe.experts.83.w1", "model.layers.31.block_sparse_moe.experts.84.w1", "model.layers.31.block_sparse_moe.experts.85.w1", "model.layers.31.block_sparse_moe.experts.86.w1", "model.layers.31.block_sparse_moe.experts.87.w1", "model.layers.31.block_sparse_moe.experts.88.w1", "model.layers.31.block_sparse_moe.experts.89.w1", "model.layers.31.block_sparse_moe.experts.90.w1", "model.layers.31.block_sparse_moe.experts.91.w1", "model.layers.31.block_sparse_moe.experts.92.w1", "model.layers.31.block_sparse_moe.experts.93.w1", "model.layers.31.block_sparse_moe.experts.94.w1", "model.layers.31.block_sparse_moe.experts.95.w1", "model.layers.31.block_sparse_moe.experts.96.w1", "model.layers.31.block_sparse_moe.experts.97.w1", "model.layers.31.block_sparse_moe.experts.98.w1", "model.layers.31.block_sparse_moe.experts.99.w1", "model.layers.31.block_sparse_moe.experts.100.w1", "model.layers.31.block_sparse_moe.experts.101.w1", "model.layers.31.block_sparse_moe.experts.102.w1", "model.layers.31.block_sparse_moe.experts.103.w1", "model.layers.31.block_sparse_moe.experts.104.w1", "model.layers.31.block_sparse_moe.experts.105.w1", "model.layers.31.block_sparse_moe.experts.106.w1", "model.layers.31.block_sparse_moe.experts.107.w1", "model.layers.31.block_sparse_moe.experts.108.w1", "model.layers.31.block_sparse_moe.experts.109.w1", "model.layers.31.block_sparse_moe.experts.110.w1", "model.layers.31.block_sparse_moe.experts.111.w1", "model.layers.31.block_sparse_moe.experts.112.w1", "model.layers.31.block_sparse_moe.experts.113.w1", "model.layers.31.block_sparse_moe.experts.114.w1", "model.layers.31.block_sparse_moe.experts.115.w1", "model.layers.31.block_sparse_moe.experts.116.w1", "model.layers.31.block_sparse_moe.experts.117.w1", "model.layers.31.block_sparse_moe.experts.118.w1", "model.layers.31.block_sparse_moe.experts.119.w1", "model.layers.31.block_sparse_moe.experts.120.w1", "model.layers.31.block_sparse_moe.experts.121.w1", "model.layers.31.block_sparse_moe.experts.122.w1", "model.layers.31.block_sparse_moe.experts.123.w1", "model.layers.31.block_sparse_moe.experts.124.w1", "model.layers.31.block_sparse_moe.experts.125.w1", "model.layers.31.block_sparse_moe.experts.126.w1", "model.layers.31.block_sparse_moe.experts.127.w1", "model.layers.31.block_sparse_moe.experts.128.w1", "model.layers.31.block_sparse_moe.experts.129.w1", "model.layers.31.block_sparse_moe.experts.130.w1", "model.layers.31.block_sparse_moe.experts.131.w1", "model.layers.31.block_sparse_moe.experts.132.w1", "model.layers.31.block_sparse_moe.experts.133.w1", "model.layers.31.block_sparse_moe.experts.134.w1", "model.layers.31.block_sparse_moe.experts.135.w1", "model.layers.31.block_sparse_moe.experts.136.w1", "model.layers.31.block_sparse_moe.experts.137.w1", "model.layers.31.block_sparse_moe.experts.138.w1", "model.layers.31.block_sparse_moe.experts.139.w1", "model.layers.31.block_sparse_moe.experts.140.w1", "model.layers.31.block_sparse_moe.experts.141.w1", "model.layers.31.block_sparse_moe.experts.142.w1", "model.layers.31.block_sparse_moe.experts.143.w1", "model.layers.31.block_sparse_moe.experts.144.w1", "model.layers.31.block_sparse_moe.experts.145.w1", "model.layers.31.block_sparse_moe.experts.146.w1", "model.layers.31.block_sparse_moe.experts.147.w1", "model.layers.31.block_sparse_moe.experts.148.w1", "model.layers.31.block_sparse_moe.experts.149.w1", "model.layers.31.block_sparse_moe.experts.150.w1", "model.layers.31.block_sparse_moe.experts.151.w1", "model.layers.31.block_sparse_moe.experts.152.w1", "model.layers.31.block_sparse_moe.experts.153.w1", "model.layers.31.block_sparse_moe.experts.154.w1", "model.layers.31.block_sparse_moe.experts.155.w1", "model.layers.31.block_sparse_moe.experts.156.w1", "model.layers.31.block_sparse_moe.experts.157.w1", "model.layers.31.block_sparse_moe.experts.158.w1", "model.layers.31.block_sparse_moe.experts.159.w1", "model.layers.31.block_sparse_moe.experts.160.w1", "model.layers.31.block_sparse_moe.experts.161.w1", "model.layers.31.block_sparse_moe.experts.162.w1", "model.layers.31.block_sparse_moe.experts.163.w1", "model.layers.31.block_sparse_moe.experts.164.w1", "model.layers.31.block_sparse_moe.experts.165.w1", "model.layers.31.block_sparse_moe.experts.166.w1", "model.layers.31.block_sparse_moe.experts.167.w1", "model.layers.31.block_sparse_moe.experts.168.w1", "model.layers.31.block_sparse_moe.experts.169.w1", "model.layers.31.block_sparse_moe.experts.170.w1", "model.layers.31.block_sparse_moe.experts.171.w1", "model.layers.31.block_sparse_moe.experts.172.w1", "model.layers.31.block_sparse_moe.experts.173.w1", "model.layers.31.block_sparse_moe.experts.174.w1", "model.layers.31.block_sparse_moe.experts.175.w1", "model.layers.31.block_sparse_moe.experts.176.w1", "model.layers.31.block_sparse_moe.experts.177.w1", "model.layers.31.block_sparse_moe.experts.178.w1", "model.layers.31.block_sparse_moe.experts.179.w1", "model.layers.31.block_sparse_moe.experts.180.w1", "model.layers.31.block_sparse_moe.experts.181.w1", "model.layers.31.block_sparse_moe.experts.182.w1", "model.layers.31.block_sparse_moe.experts.183.w1", "model.layers.31.block_sparse_moe.experts.184.w1", "model.layers.31.block_sparse_moe.experts.185.w1", "model.layers.31.block_sparse_moe.experts.186.w1", "model.layers.31.block_sparse_moe.experts.187.w1", "model.layers.31.block_sparse_moe.experts.188.w1", "model.layers.31.block_sparse_moe.experts.189.w1", "model.layers.31.block_sparse_moe.experts.190.w1", "model.layers.31.block_sparse_moe.experts.191.w1", "model.layers.31.block_sparse_moe.experts.192.w1", "model.layers.31.block_sparse_moe.experts.193.w1", "model.layers.31.block_sparse_moe.experts.194.w1", "model.layers.31.block_sparse_moe.experts.195.w1", "model.layers.31.block_sparse_moe.experts.196.w1", "model.layers.31.block_sparse_moe.experts.197.w1", "model.layers.31.block_sparse_moe.experts.198.w1", "model.layers.31.block_sparse_moe.experts.199.w1", "model.layers.31.block_sparse_moe.experts.200.w1", "model.layers.31.block_sparse_moe.experts.201.w1", "model.layers.31.block_sparse_moe.experts.202.w1", "model.layers.31.block_sparse_moe.experts.203.w1", "model.layers.31.block_sparse_moe.experts.204.w1", "model.layers.31.block_sparse_moe.experts.205.w1", "model.layers.31.block_sparse_moe.experts.206.w1", "model.layers.31.block_sparse_moe.experts.207.w1", "model.layers.31.block_sparse_moe.experts.208.w1", "model.layers.31.block_sparse_moe.experts.209.w1", "model.layers.31.block_sparse_moe.experts.210.w1", "model.layers.31.block_sparse_moe.experts.211.w1", "model.layers.31.block_sparse_moe.experts.212.w1", "model.layers.31.block_sparse_moe.experts.213.w1", "model.layers.31.block_sparse_moe.experts.214.w1", "model.layers.31.block_sparse_moe.experts.215.w1", "model.layers.31.block_sparse_moe.experts.216.w1", "model.layers.31.block_sparse_moe.experts.217.w1", "model.layers.31.block_sparse_moe.experts.218.w1", "model.layers.31.block_sparse_moe.experts.219.w1", "model.layers.31.block_sparse_moe.experts.220.w1", "model.layers.31.block_sparse_moe.experts.221.w1", "model.layers.31.block_sparse_moe.experts.222.w1", "model.layers.31.block_sparse_moe.experts.223.w1", "model.layers.31.block_sparse_moe.experts.224.w1", "model.layers.31.block_sparse_moe.experts.225.w1", "model.layers.31.block_sparse_moe.experts.226.w1", "model.layers.31.block_sparse_moe.experts.227.w1", "model.layers.31.block_sparse_moe.experts.228.w1", "model.layers.31.block_sparse_moe.experts.229.w1", "model.layers.31.block_sparse_moe.experts.230.w1", "model.layers.31.block_sparse_moe.experts.231.w1", "model.layers.31.block_sparse_moe.experts.232.w1", "model.layers.31.block_sparse_moe.experts.233.w1", "model.layers.31.block_sparse_moe.experts.234.w1", "model.layers.31.block_sparse_moe.experts.235.w1", "model.layers.31.block_sparse_moe.experts.236.w1", "model.layers.31.block_sparse_moe.experts.237.w1", "model.layers.31.block_sparse_moe.experts.238.w1", "model.layers.31.block_sparse_moe.experts.239.w1", "model.layers.31.block_sparse_moe.experts.240.w1", "model.layers.31.block_sparse_moe.experts.241.w1", "model.layers.31.block_sparse_moe.experts.242.w1", "model.layers.31.block_sparse_moe.experts.243.w1", "model.layers.31.block_sparse_moe.experts.244.w1", "model.layers.31.block_sparse_moe.experts.245.w1", "model.layers.31.block_sparse_moe.experts.246.w1", "model.layers.31.block_sparse_moe.experts.247.w1", "model.layers.31.block_sparse_moe.experts.248.w1", "model.layers.31.block_sparse_moe.experts.249.w1", "model.layers.31.block_sparse_moe.experts.250.w1", "model.layers.31.block_sparse_moe.experts.251.w1", "model.layers.31.block_sparse_moe.experts.252.w1", "model.layers.31.block_sparse_moe.experts.253.w1", "model.layers.31.block_sparse_moe.experts.254.w1", "model.layers.31.block_sparse_moe.experts.255.w1", "model.layers.31.block_sparse_moe.experts.0.w3", "model.layers.31.block_sparse_moe.experts.1.w3", "model.layers.31.block_sparse_moe.experts.2.w3", "model.layers.31.block_sparse_moe.experts.3.w3", "model.layers.31.block_sparse_moe.experts.4.w3", "model.layers.31.block_sparse_moe.experts.5.w3", "model.layers.31.block_sparse_moe.experts.6.w3", "model.layers.31.block_sparse_moe.experts.7.w3", "model.layers.31.block_sparse_moe.experts.8.w3", "model.layers.31.block_sparse_moe.experts.9.w3", "model.layers.31.block_sparse_moe.experts.10.w3", "model.layers.31.block_sparse_moe.experts.11.w3", "model.layers.31.block_sparse_moe.experts.12.w3", "model.layers.31.block_sparse_moe.experts.13.w3", "model.layers.31.block_sparse_moe.experts.14.w3", "model.layers.31.block_sparse_moe.experts.15.w3", "model.layers.31.block_sparse_moe.experts.16.w3", "model.layers.31.block_sparse_moe.experts.17.w3", "model.layers.31.block_sparse_moe.experts.18.w3", "model.layers.31.block_sparse_moe.experts.19.w3", "model.layers.31.block_sparse_moe.experts.20.w3", "model.layers.31.block_sparse_moe.experts.21.w3", "model.layers.31.block_sparse_moe.experts.22.w3", "model.layers.31.block_sparse_moe.experts.23.w3", "model.layers.31.block_sparse_moe.experts.24.w3", "model.layers.31.block_sparse_moe.experts.25.w3", "model.layers.31.block_sparse_moe.experts.26.w3", "model.layers.31.block_sparse_moe.experts.27.w3", "model.layers.31.block_sparse_moe.experts.28.w3", "model.layers.31.block_sparse_moe.experts.29.w3", "model.layers.31.block_sparse_moe.experts.30.w3", "model.layers.31.block_sparse_moe.experts.31.w3", "model.layers.31.block_sparse_moe.experts.32.w3", "model.layers.31.block_sparse_moe.experts.33.w3", "model.layers.31.block_sparse_moe.experts.34.w3", "model.layers.31.block_sparse_moe.experts.35.w3", "model.layers.31.block_sparse_moe.experts.36.w3", "model.layers.31.block_sparse_moe.experts.37.w3", "model.layers.31.block_sparse_moe.experts.38.w3", "model.layers.31.block_sparse_moe.experts.39.w3", "model.layers.31.block_sparse_moe.experts.40.w3", "model.layers.31.block_sparse_moe.experts.41.w3", "model.layers.31.block_sparse_moe.experts.42.w3", "model.layers.31.block_sparse_moe.experts.43.w3", "model.layers.31.block_sparse_moe.experts.44.w3", "model.layers.31.block_sparse_moe.experts.45.w3", "model.layers.31.block_sparse_moe.experts.46.w3", "model.layers.31.block_sparse_moe.experts.47.w3", "model.layers.31.block_sparse_moe.experts.48.w3", "model.layers.31.block_sparse_moe.experts.49.w3", "model.layers.31.block_sparse_moe.experts.50.w3", "model.layers.31.block_sparse_moe.experts.51.w3", "model.layers.31.block_sparse_moe.experts.52.w3", "model.layers.31.block_sparse_moe.experts.53.w3", "model.layers.31.block_sparse_moe.experts.54.w3", "model.layers.31.block_sparse_moe.experts.55.w3", "model.layers.31.block_sparse_moe.experts.56.w3", "model.layers.31.block_sparse_moe.experts.57.w3", "model.layers.31.block_sparse_moe.experts.58.w3", "model.layers.31.block_sparse_moe.experts.59.w3", "model.layers.31.block_sparse_moe.experts.60.w3", "model.layers.31.block_sparse_moe.experts.61.w3", "model.layers.31.block_sparse_moe.experts.62.w3", "model.layers.31.block_sparse_moe.experts.63.w3", "model.layers.31.block_sparse_moe.experts.64.w3", "model.layers.31.block_sparse_moe.experts.65.w3", "model.layers.31.block_sparse_moe.experts.66.w3", "model.layers.31.block_sparse_moe.experts.67.w3", "model.layers.31.block_sparse_moe.experts.68.w3", "model.layers.31.block_sparse_moe.experts.69.w3", "model.layers.31.block_sparse_moe.experts.70.w3", "model.layers.31.block_sparse_moe.experts.71.w3", "model.layers.31.block_sparse_moe.experts.72.w3", "model.layers.31.block_sparse_moe.experts.73.w3", "model.layers.31.block_sparse_moe.experts.74.w3", "model.layers.31.block_sparse_moe.experts.75.w3", "model.layers.31.block_sparse_moe.experts.76.w3", "model.layers.31.block_sparse_moe.experts.77.w3", "model.layers.31.block_sparse_moe.experts.78.w3", "model.layers.31.block_sparse_moe.experts.79.w3", "model.layers.31.block_sparse_moe.experts.80.w3", "model.layers.31.block_sparse_moe.experts.81.w3", "model.layers.31.block_sparse_moe.experts.82.w3", "model.layers.31.block_sparse_moe.experts.83.w3", "model.layers.31.block_sparse_moe.experts.84.w3", "model.layers.31.block_sparse_moe.experts.85.w3", "model.layers.31.block_sparse_moe.experts.86.w3", "model.layers.31.block_sparse_moe.experts.87.w3", "model.layers.31.block_sparse_moe.experts.88.w3", "model.layers.31.block_sparse_moe.experts.89.w3", "model.layers.31.block_sparse_moe.experts.90.w3", "model.layers.31.block_sparse_moe.experts.91.w3", "model.layers.31.block_sparse_moe.experts.92.w3", "model.layers.31.block_sparse_moe.experts.93.w3", "model.layers.31.block_sparse_moe.experts.94.w3", "model.layers.31.block_sparse_moe.experts.95.w3", "model.layers.31.block_sparse_moe.experts.96.w3", "model.layers.31.block_sparse_moe.experts.97.w3", "model.layers.31.block_sparse_moe.experts.98.w3", "model.layers.31.block_sparse_moe.experts.99.w3", "model.layers.31.block_sparse_moe.experts.100.w3", "model.layers.31.block_sparse_moe.experts.101.w3", "model.layers.31.block_sparse_moe.experts.102.w3", "model.layers.31.block_sparse_moe.experts.103.w3", "model.layers.31.block_sparse_moe.experts.104.w3", "model.layers.31.block_sparse_moe.experts.105.w3", "model.layers.31.block_sparse_moe.experts.106.w3", "model.layers.31.block_sparse_moe.experts.107.w3", "model.layers.31.block_sparse_moe.experts.108.w3", "model.layers.31.block_sparse_moe.experts.109.w3", "model.layers.31.block_sparse_moe.experts.110.w3", "model.layers.31.block_sparse_moe.experts.111.w3", "model.layers.31.block_sparse_moe.experts.112.w3", "model.layers.31.block_sparse_moe.experts.113.w3", "model.layers.31.block_sparse_moe.experts.114.w3", "model.layers.31.block_sparse_moe.experts.115.w3", "model.layers.31.block_sparse_moe.experts.116.w3", "model.layers.31.block_sparse_moe.experts.117.w3", "model.layers.31.block_sparse_moe.experts.118.w3", "model.layers.31.block_sparse_moe.experts.119.w3", "model.layers.31.block_sparse_moe.experts.120.w3", "model.layers.31.block_sparse_moe.experts.121.w3", "model.layers.31.block_sparse_moe.experts.122.w3", "model.layers.31.block_sparse_moe.experts.123.w3", "model.layers.31.block_sparse_moe.experts.124.w3", "model.layers.31.block_sparse_moe.experts.125.w3", "model.layers.31.block_sparse_moe.experts.126.w3", "model.layers.31.block_sparse_moe.experts.127.w3", "model.layers.31.block_sparse_moe.experts.128.w3", "model.layers.31.block_sparse_moe.experts.129.w3", "model.layers.31.block_sparse_moe.experts.130.w3", "model.layers.31.block_sparse_moe.experts.131.w3", "model.layers.31.block_sparse_moe.experts.132.w3", "model.layers.31.block_sparse_moe.experts.133.w3", "model.layers.31.block_sparse_moe.experts.134.w3", "model.layers.31.block_sparse_moe.experts.135.w3", "model.layers.31.block_sparse_moe.experts.136.w3", "model.layers.31.block_sparse_moe.experts.137.w3", "model.layers.31.block_sparse_moe.experts.138.w3", "model.layers.31.block_sparse_moe.experts.139.w3", "model.layers.31.block_sparse_moe.experts.140.w3", "model.layers.31.block_sparse_moe.experts.141.w3", "model.layers.31.block_sparse_moe.experts.142.w3", "model.layers.31.block_sparse_moe.experts.143.w3", "model.layers.31.block_sparse_moe.experts.144.w3", "model.layers.31.block_sparse_moe.experts.145.w3", "model.layers.31.block_sparse_moe.experts.146.w3", "model.layers.31.block_sparse_moe.experts.147.w3", "model.layers.31.block_sparse_moe.experts.148.w3", "model.layers.31.block_sparse_moe.experts.149.w3", "model.layers.31.block_sparse_moe.experts.150.w3", "model.layers.31.block_sparse_moe.experts.151.w3", "model.layers.31.block_sparse_moe.experts.152.w3", "model.layers.31.block_sparse_moe.experts.153.w3", "model.layers.31.block_sparse_moe.experts.154.w3", "model.layers.31.block_sparse_moe.experts.155.w3", "model.layers.31.block_sparse_moe.experts.156.w3", "model.layers.31.block_sparse_moe.experts.157.w3", "model.layers.31.block_sparse_moe.experts.158.w3", "model.layers.31.block_sparse_moe.experts.159.w3", "model.layers.31.block_sparse_moe.experts.160.w3", "model.layers.31.block_sparse_moe.experts.161.w3", "model.layers.31.block_sparse_moe.experts.162.w3", "model.layers.31.block_sparse_moe.experts.163.w3", "model.layers.31.block_sparse_moe.experts.164.w3", "model.layers.31.block_sparse_moe.experts.165.w3", "model.layers.31.block_sparse_moe.experts.166.w3", "model.layers.31.block_sparse_moe.experts.167.w3", "model.layers.31.block_sparse_moe.experts.168.w3", "model.layers.31.block_sparse_moe.experts.169.w3", "model.layers.31.block_sparse_moe.experts.170.w3", "model.layers.31.block_sparse_moe.experts.171.w3", "model.layers.31.block_sparse_moe.experts.172.w3", "model.layers.31.block_sparse_moe.experts.173.w3", "model.layers.31.block_sparse_moe.experts.174.w3", "model.layers.31.block_sparse_moe.experts.175.w3", "model.layers.31.block_sparse_moe.experts.176.w3", "model.layers.31.block_sparse_moe.experts.177.w3", "model.layers.31.block_sparse_moe.experts.178.w3", "model.layers.31.block_sparse_moe.experts.179.w3", "model.layers.31.block_sparse_moe.experts.180.w3", "model.layers.31.block_sparse_moe.experts.181.w3", "model.layers.31.block_sparse_moe.experts.182.w3", "model.layers.31.block_sparse_moe.experts.183.w3", "model.layers.31.block_sparse_moe.experts.184.w3", "model.layers.31.block_sparse_moe.experts.185.w3", "model.layers.31.block_sparse_moe.experts.186.w3", "model.layers.31.block_sparse_moe.experts.187.w3", "model.layers.31.block_sparse_moe.experts.188.w3", "model.layers.31.block_sparse_moe.experts.189.w3", "model.layers.31.block_sparse_moe.experts.190.w3", "model.layers.31.block_sparse_moe.experts.191.w3", "model.layers.31.block_sparse_moe.experts.192.w3", "model.layers.31.block_sparse_moe.experts.193.w3", "model.layers.31.block_sparse_moe.experts.194.w3", "model.layers.31.block_sparse_moe.experts.195.w3", "model.layers.31.block_sparse_moe.experts.196.w3", "model.layers.31.block_sparse_moe.experts.197.w3", "model.layers.31.block_sparse_moe.experts.198.w3", "model.layers.31.block_sparse_moe.experts.199.w3", "model.layers.31.block_sparse_moe.experts.200.w3", "model.layers.31.block_sparse_moe.experts.201.w3", "model.layers.31.block_sparse_moe.experts.202.w3", "model.layers.31.block_sparse_moe.experts.203.w3", "model.layers.31.block_sparse_moe.experts.204.w3", "model.layers.31.block_sparse_moe.experts.205.w3", "model.layers.31.block_sparse_moe.experts.206.w3", "model.layers.31.block_sparse_moe.experts.207.w3", "model.layers.31.block_sparse_moe.experts.208.w3", "model.layers.31.block_sparse_moe.experts.209.w3", "model.layers.31.block_sparse_moe.experts.210.w3", "model.layers.31.block_sparse_moe.experts.211.w3", "model.layers.31.block_sparse_moe.experts.212.w3", "model.layers.31.block_sparse_moe.experts.213.w3", "model.layers.31.block_sparse_moe.experts.214.w3", "model.layers.31.block_sparse_moe.experts.215.w3", "model.layers.31.block_sparse_moe.experts.216.w3", "model.layers.31.block_sparse_moe.experts.217.w3", "model.layers.31.block_sparse_moe.experts.218.w3", "model.layers.31.block_sparse_moe.experts.219.w3", "model.layers.31.block_sparse_moe.experts.220.w3", "model.layers.31.block_sparse_moe.experts.221.w3", "model.layers.31.block_sparse_moe.experts.222.w3", "model.layers.31.block_sparse_moe.experts.223.w3", "model.layers.31.block_sparse_moe.experts.224.w3", "model.layers.31.block_sparse_moe.experts.225.w3", "model.layers.31.block_sparse_moe.experts.226.w3", "model.layers.31.block_sparse_moe.experts.227.w3", "model.layers.31.block_sparse_moe.experts.228.w3", "model.layers.31.block_sparse_moe.experts.229.w3", "model.layers.31.block_sparse_moe.experts.230.w3", "model.layers.31.block_sparse_moe.experts.231.w3", "model.layers.31.block_sparse_moe.experts.232.w3", "model.layers.31.block_sparse_moe.experts.233.w3", "model.layers.31.block_sparse_moe.experts.234.w3", "model.layers.31.block_sparse_moe.experts.235.w3", "model.layers.31.block_sparse_moe.experts.236.w3", "model.layers.31.block_sparse_moe.experts.237.w3", "model.layers.31.block_sparse_moe.experts.238.w3", "model.layers.31.block_sparse_moe.experts.239.w3", "model.layers.31.block_sparse_moe.experts.240.w3", "model.layers.31.block_sparse_moe.experts.241.w3", "model.layers.31.block_sparse_moe.experts.242.w3", "model.layers.31.block_sparse_moe.experts.243.w3", "model.layers.31.block_sparse_moe.experts.244.w3", "model.layers.31.block_sparse_moe.experts.245.w3", "model.layers.31.block_sparse_moe.experts.246.w3", "model.layers.31.block_sparse_moe.experts.247.w3", "model.layers.31.block_sparse_moe.experts.248.w3", "model.layers.31.block_sparse_moe.experts.249.w3", "model.layers.31.block_sparse_moe.experts.250.w3", "model.layers.31.block_sparse_moe.experts.251.w3", "model.layers.31.block_sparse_moe.experts.252.w3", "model.layers.31.block_sparse_moe.experts.253.w3", "model.layers.31.block_sparse_moe.experts.254.w3", "model.layers.31.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00020487084984779913, "dbits": 2415919104 } ] }, { "idx": 159, "layers": [ "model.layers.31.block_sparse_moe.experts.0.w2", "model.layers.31.block_sparse_moe.experts.1.w2", "model.layers.31.block_sparse_moe.experts.2.w2", "model.layers.31.block_sparse_moe.experts.3.w2", "model.layers.31.block_sparse_moe.experts.4.w2", "model.layers.31.block_sparse_moe.experts.5.w2", "model.layers.31.block_sparse_moe.experts.6.w2", "model.layers.31.block_sparse_moe.experts.7.w2", "model.layers.31.block_sparse_moe.experts.8.w2", "model.layers.31.block_sparse_moe.experts.9.w2", "model.layers.31.block_sparse_moe.experts.10.w2", "model.layers.31.block_sparse_moe.experts.11.w2", "model.layers.31.block_sparse_moe.experts.12.w2", "model.layers.31.block_sparse_moe.experts.13.w2", "model.layers.31.block_sparse_moe.experts.14.w2", "model.layers.31.block_sparse_moe.experts.15.w2", "model.layers.31.block_sparse_moe.experts.16.w2", "model.layers.31.block_sparse_moe.experts.17.w2", "model.layers.31.block_sparse_moe.experts.18.w2", "model.layers.31.block_sparse_moe.experts.19.w2", "model.layers.31.block_sparse_moe.experts.20.w2", "model.layers.31.block_sparse_moe.experts.21.w2", "model.layers.31.block_sparse_moe.experts.22.w2", "model.layers.31.block_sparse_moe.experts.23.w2", "model.layers.31.block_sparse_moe.experts.24.w2", "model.layers.31.block_sparse_moe.experts.25.w2", "model.layers.31.block_sparse_moe.experts.26.w2", "model.layers.31.block_sparse_moe.experts.27.w2", "model.layers.31.block_sparse_moe.experts.28.w2", "model.layers.31.block_sparse_moe.experts.29.w2", "model.layers.31.block_sparse_moe.experts.30.w2", "model.layers.31.block_sparse_moe.experts.31.w2", "model.layers.31.block_sparse_moe.experts.32.w2", "model.layers.31.block_sparse_moe.experts.33.w2", "model.layers.31.block_sparse_moe.experts.34.w2", "model.layers.31.block_sparse_moe.experts.35.w2", "model.layers.31.block_sparse_moe.experts.36.w2", "model.layers.31.block_sparse_moe.experts.37.w2", "model.layers.31.block_sparse_moe.experts.38.w2", "model.layers.31.block_sparse_moe.experts.39.w2", "model.layers.31.block_sparse_moe.experts.40.w2", "model.layers.31.block_sparse_moe.experts.41.w2", "model.layers.31.block_sparse_moe.experts.42.w2", "model.layers.31.block_sparse_moe.experts.43.w2", "model.layers.31.block_sparse_moe.experts.44.w2", "model.layers.31.block_sparse_moe.experts.45.w2", "model.layers.31.block_sparse_moe.experts.46.w2", "model.layers.31.block_sparse_moe.experts.47.w2", "model.layers.31.block_sparse_moe.experts.48.w2", "model.layers.31.block_sparse_moe.experts.49.w2", "model.layers.31.block_sparse_moe.experts.50.w2", "model.layers.31.block_sparse_moe.experts.51.w2", "model.layers.31.block_sparse_moe.experts.52.w2", "model.layers.31.block_sparse_moe.experts.53.w2", "model.layers.31.block_sparse_moe.experts.54.w2", "model.layers.31.block_sparse_moe.experts.55.w2", "model.layers.31.block_sparse_moe.experts.56.w2", "model.layers.31.block_sparse_moe.experts.57.w2", "model.layers.31.block_sparse_moe.experts.58.w2", "model.layers.31.block_sparse_moe.experts.59.w2", "model.layers.31.block_sparse_moe.experts.60.w2", "model.layers.31.block_sparse_moe.experts.61.w2", "model.layers.31.block_sparse_moe.experts.62.w2", "model.layers.31.block_sparse_moe.experts.63.w2", "model.layers.31.block_sparse_moe.experts.64.w2", "model.layers.31.block_sparse_moe.experts.65.w2", "model.layers.31.block_sparse_moe.experts.66.w2", "model.layers.31.block_sparse_moe.experts.67.w2", "model.layers.31.block_sparse_moe.experts.68.w2", "model.layers.31.block_sparse_moe.experts.69.w2", "model.layers.31.block_sparse_moe.experts.70.w2", "model.layers.31.block_sparse_moe.experts.71.w2", "model.layers.31.block_sparse_moe.experts.72.w2", "model.layers.31.block_sparse_moe.experts.73.w2", "model.layers.31.block_sparse_moe.experts.74.w2", "model.layers.31.block_sparse_moe.experts.75.w2", "model.layers.31.block_sparse_moe.experts.76.w2", "model.layers.31.block_sparse_moe.experts.77.w2", "model.layers.31.block_sparse_moe.experts.78.w2", "model.layers.31.block_sparse_moe.experts.79.w2", "model.layers.31.block_sparse_moe.experts.80.w2", "model.layers.31.block_sparse_moe.experts.81.w2", "model.layers.31.block_sparse_moe.experts.82.w2", "model.layers.31.block_sparse_moe.experts.83.w2", "model.layers.31.block_sparse_moe.experts.84.w2", "model.layers.31.block_sparse_moe.experts.85.w2", "model.layers.31.block_sparse_moe.experts.86.w2", "model.layers.31.block_sparse_moe.experts.87.w2", "model.layers.31.block_sparse_moe.experts.88.w2", "model.layers.31.block_sparse_moe.experts.89.w2", "model.layers.31.block_sparse_moe.experts.90.w2", "model.layers.31.block_sparse_moe.experts.91.w2", "model.layers.31.block_sparse_moe.experts.92.w2", "model.layers.31.block_sparse_moe.experts.93.w2", "model.layers.31.block_sparse_moe.experts.94.w2", "model.layers.31.block_sparse_moe.experts.95.w2", "model.layers.31.block_sparse_moe.experts.96.w2", "model.layers.31.block_sparse_moe.experts.97.w2", "model.layers.31.block_sparse_moe.experts.98.w2", "model.layers.31.block_sparse_moe.experts.99.w2", "model.layers.31.block_sparse_moe.experts.100.w2", "model.layers.31.block_sparse_moe.experts.101.w2", "model.layers.31.block_sparse_moe.experts.102.w2", "model.layers.31.block_sparse_moe.experts.103.w2", "model.layers.31.block_sparse_moe.experts.104.w2", "model.layers.31.block_sparse_moe.experts.105.w2", "model.layers.31.block_sparse_moe.experts.106.w2", "model.layers.31.block_sparse_moe.experts.107.w2", "model.layers.31.block_sparse_moe.experts.108.w2", "model.layers.31.block_sparse_moe.experts.109.w2", "model.layers.31.block_sparse_moe.experts.110.w2", "model.layers.31.block_sparse_moe.experts.111.w2", "model.layers.31.block_sparse_moe.experts.112.w2", "model.layers.31.block_sparse_moe.experts.113.w2", "model.layers.31.block_sparse_moe.experts.114.w2", "model.layers.31.block_sparse_moe.experts.115.w2", "model.layers.31.block_sparse_moe.experts.116.w2", "model.layers.31.block_sparse_moe.experts.117.w2", "model.layers.31.block_sparse_moe.experts.118.w2", "model.layers.31.block_sparse_moe.experts.119.w2", "model.layers.31.block_sparse_moe.experts.120.w2", "model.layers.31.block_sparse_moe.experts.121.w2", "model.layers.31.block_sparse_moe.experts.122.w2", "model.layers.31.block_sparse_moe.experts.123.w2", "model.layers.31.block_sparse_moe.experts.124.w2", "model.layers.31.block_sparse_moe.experts.125.w2", "model.layers.31.block_sparse_moe.experts.126.w2", "model.layers.31.block_sparse_moe.experts.127.w2", "model.layers.31.block_sparse_moe.experts.128.w2", "model.layers.31.block_sparse_moe.experts.129.w2", "model.layers.31.block_sparse_moe.experts.130.w2", "model.layers.31.block_sparse_moe.experts.131.w2", "model.layers.31.block_sparse_moe.experts.132.w2", "model.layers.31.block_sparse_moe.experts.133.w2", "model.layers.31.block_sparse_moe.experts.134.w2", "model.layers.31.block_sparse_moe.experts.135.w2", "model.layers.31.block_sparse_moe.experts.136.w2", "model.layers.31.block_sparse_moe.experts.137.w2", "model.layers.31.block_sparse_moe.experts.138.w2", "model.layers.31.block_sparse_moe.experts.139.w2", "model.layers.31.block_sparse_moe.experts.140.w2", "model.layers.31.block_sparse_moe.experts.141.w2", "model.layers.31.block_sparse_moe.experts.142.w2", "model.layers.31.block_sparse_moe.experts.143.w2", "model.layers.31.block_sparse_moe.experts.144.w2", "model.layers.31.block_sparse_moe.experts.145.w2", "model.layers.31.block_sparse_moe.experts.146.w2", "model.layers.31.block_sparse_moe.experts.147.w2", "model.layers.31.block_sparse_moe.experts.148.w2", "model.layers.31.block_sparse_moe.experts.149.w2", "model.layers.31.block_sparse_moe.experts.150.w2", "model.layers.31.block_sparse_moe.experts.151.w2", "model.layers.31.block_sparse_moe.experts.152.w2", "model.layers.31.block_sparse_moe.experts.153.w2", "model.layers.31.block_sparse_moe.experts.154.w2", "model.layers.31.block_sparse_moe.experts.155.w2", "model.layers.31.block_sparse_moe.experts.156.w2", "model.layers.31.block_sparse_moe.experts.157.w2", "model.layers.31.block_sparse_moe.experts.158.w2", "model.layers.31.block_sparse_moe.experts.159.w2", "model.layers.31.block_sparse_moe.experts.160.w2", "model.layers.31.block_sparse_moe.experts.161.w2", "model.layers.31.block_sparse_moe.experts.162.w2", "model.layers.31.block_sparse_moe.experts.163.w2", "model.layers.31.block_sparse_moe.experts.164.w2", "model.layers.31.block_sparse_moe.experts.165.w2", "model.layers.31.block_sparse_moe.experts.166.w2", "model.layers.31.block_sparse_moe.experts.167.w2", "model.layers.31.block_sparse_moe.experts.168.w2", "model.layers.31.block_sparse_moe.experts.169.w2", "model.layers.31.block_sparse_moe.experts.170.w2", "model.layers.31.block_sparse_moe.experts.171.w2", "model.layers.31.block_sparse_moe.experts.172.w2", "model.layers.31.block_sparse_moe.experts.173.w2", "model.layers.31.block_sparse_moe.experts.174.w2", "model.layers.31.block_sparse_moe.experts.175.w2", "model.layers.31.block_sparse_moe.experts.176.w2", "model.layers.31.block_sparse_moe.experts.177.w2", "model.layers.31.block_sparse_moe.experts.178.w2", "model.layers.31.block_sparse_moe.experts.179.w2", "model.layers.31.block_sparse_moe.experts.180.w2", "model.layers.31.block_sparse_moe.experts.181.w2", "model.layers.31.block_sparse_moe.experts.182.w2", "model.layers.31.block_sparse_moe.experts.183.w2", "model.layers.31.block_sparse_moe.experts.184.w2", "model.layers.31.block_sparse_moe.experts.185.w2", "model.layers.31.block_sparse_moe.experts.186.w2", "model.layers.31.block_sparse_moe.experts.187.w2", "model.layers.31.block_sparse_moe.experts.188.w2", "model.layers.31.block_sparse_moe.experts.189.w2", "model.layers.31.block_sparse_moe.experts.190.w2", "model.layers.31.block_sparse_moe.experts.191.w2", "model.layers.31.block_sparse_moe.experts.192.w2", "model.layers.31.block_sparse_moe.experts.193.w2", "model.layers.31.block_sparse_moe.experts.194.w2", "model.layers.31.block_sparse_moe.experts.195.w2", "model.layers.31.block_sparse_moe.experts.196.w2", "model.layers.31.block_sparse_moe.experts.197.w2", "model.layers.31.block_sparse_moe.experts.198.w2", "model.layers.31.block_sparse_moe.experts.199.w2", "model.layers.31.block_sparse_moe.experts.200.w2", "model.layers.31.block_sparse_moe.experts.201.w2", "model.layers.31.block_sparse_moe.experts.202.w2", "model.layers.31.block_sparse_moe.experts.203.w2", "model.layers.31.block_sparse_moe.experts.204.w2", "model.layers.31.block_sparse_moe.experts.205.w2", "model.layers.31.block_sparse_moe.experts.206.w2", "model.layers.31.block_sparse_moe.experts.207.w2", "model.layers.31.block_sparse_moe.experts.208.w2", "model.layers.31.block_sparse_moe.experts.209.w2", "model.layers.31.block_sparse_moe.experts.210.w2", "model.layers.31.block_sparse_moe.experts.211.w2", "model.layers.31.block_sparse_moe.experts.212.w2", "model.layers.31.block_sparse_moe.experts.213.w2", "model.layers.31.block_sparse_moe.experts.214.w2", "model.layers.31.block_sparse_moe.experts.215.w2", "model.layers.31.block_sparse_moe.experts.216.w2", "model.layers.31.block_sparse_moe.experts.217.w2", "model.layers.31.block_sparse_moe.experts.218.w2", "model.layers.31.block_sparse_moe.experts.219.w2", "model.layers.31.block_sparse_moe.experts.220.w2", "model.layers.31.block_sparse_moe.experts.221.w2", "model.layers.31.block_sparse_moe.experts.222.w2", "model.layers.31.block_sparse_moe.experts.223.w2", "model.layers.31.block_sparse_moe.experts.224.w2", "model.layers.31.block_sparse_moe.experts.225.w2", "model.layers.31.block_sparse_moe.experts.226.w2", "model.layers.31.block_sparse_moe.experts.227.w2", "model.layers.31.block_sparse_moe.experts.228.w2", "model.layers.31.block_sparse_moe.experts.229.w2", "model.layers.31.block_sparse_moe.experts.230.w2", "model.layers.31.block_sparse_moe.experts.231.w2", "model.layers.31.block_sparse_moe.experts.232.w2", "model.layers.31.block_sparse_moe.experts.233.w2", "model.layers.31.block_sparse_moe.experts.234.w2", "model.layers.31.block_sparse_moe.experts.235.w2", "model.layers.31.block_sparse_moe.experts.236.w2", "model.layers.31.block_sparse_moe.experts.237.w2", "model.layers.31.block_sparse_moe.experts.238.w2", "model.layers.31.block_sparse_moe.experts.239.w2", "model.layers.31.block_sparse_moe.experts.240.w2", "model.layers.31.block_sparse_moe.experts.241.w2", "model.layers.31.block_sparse_moe.experts.242.w2", "model.layers.31.block_sparse_moe.experts.243.w2", "model.layers.31.block_sparse_moe.experts.244.w2", "model.layers.31.block_sparse_moe.experts.245.w2", "model.layers.31.block_sparse_moe.experts.246.w2", "model.layers.31.block_sparse_moe.experts.247.w2", "model.layers.31.block_sparse_moe.experts.248.w2", "model.layers.31.block_sparse_moe.experts.249.w2", "model.layers.31.block_sparse_moe.experts.250.w2", "model.layers.31.block_sparse_moe.experts.251.w2", "model.layers.31.block_sparse_moe.experts.252.w2", "model.layers.31.block_sparse_moe.experts.253.w2", "model.layers.31.block_sparse_moe.experts.254.w2", "model.layers.31.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00018091313540935516, "dbits": 1207959552 } ] }, { "idx": 160, "layers": [ "model.layers.32.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0013777488842606628, "dbits": 18874368 } ] }, { "idx": 161, "layers": [ "model.layers.32.self_attn.k_proj", "model.layers.32.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0015111062675714493, "dbits": 6291456 } ] }, { "idx": 162, "layers": [ "model.layers.32.self_attn.o_proj" ], "candidates": [ { "dkld": -0.003153693117201331, "dbits": 18874368 } ] }, { "idx": 163, "layers": [ "model.layers.32.block_sparse_moe.experts.0.w1", "model.layers.32.block_sparse_moe.experts.1.w1", "model.layers.32.block_sparse_moe.experts.2.w1", "model.layers.32.block_sparse_moe.experts.3.w1", "model.layers.32.block_sparse_moe.experts.4.w1", "model.layers.32.block_sparse_moe.experts.5.w1", "model.layers.32.block_sparse_moe.experts.6.w1", "model.layers.32.block_sparse_moe.experts.7.w1", "model.layers.32.block_sparse_moe.experts.8.w1", "model.layers.32.block_sparse_moe.experts.9.w1", "model.layers.32.block_sparse_moe.experts.10.w1", "model.layers.32.block_sparse_moe.experts.11.w1", "model.layers.32.block_sparse_moe.experts.12.w1", "model.layers.32.block_sparse_moe.experts.13.w1", "model.layers.32.block_sparse_moe.experts.14.w1", "model.layers.32.block_sparse_moe.experts.15.w1", "model.layers.32.block_sparse_moe.experts.16.w1", "model.layers.32.block_sparse_moe.experts.17.w1", "model.layers.32.block_sparse_moe.experts.18.w1", "model.layers.32.block_sparse_moe.experts.19.w1", "model.layers.32.block_sparse_moe.experts.20.w1", "model.layers.32.block_sparse_moe.experts.21.w1", "model.layers.32.block_sparse_moe.experts.22.w1", "model.layers.32.block_sparse_moe.experts.23.w1", "model.layers.32.block_sparse_moe.experts.24.w1", "model.layers.32.block_sparse_moe.experts.25.w1", "model.layers.32.block_sparse_moe.experts.26.w1", "model.layers.32.block_sparse_moe.experts.27.w1", "model.layers.32.block_sparse_moe.experts.28.w1", "model.layers.32.block_sparse_moe.experts.29.w1", "model.layers.32.block_sparse_moe.experts.30.w1", "model.layers.32.block_sparse_moe.experts.31.w1", "model.layers.32.block_sparse_moe.experts.32.w1", "model.layers.32.block_sparse_moe.experts.33.w1", "model.layers.32.block_sparse_moe.experts.34.w1", "model.layers.32.block_sparse_moe.experts.35.w1", "model.layers.32.block_sparse_moe.experts.36.w1", "model.layers.32.block_sparse_moe.experts.37.w1", "model.layers.32.block_sparse_moe.experts.38.w1", "model.layers.32.block_sparse_moe.experts.39.w1", "model.layers.32.block_sparse_moe.experts.40.w1", "model.layers.32.block_sparse_moe.experts.41.w1", "model.layers.32.block_sparse_moe.experts.42.w1", "model.layers.32.block_sparse_moe.experts.43.w1", "model.layers.32.block_sparse_moe.experts.44.w1", "model.layers.32.block_sparse_moe.experts.45.w1", "model.layers.32.block_sparse_moe.experts.46.w1", "model.layers.32.block_sparse_moe.experts.47.w1", "model.layers.32.block_sparse_moe.experts.48.w1", "model.layers.32.block_sparse_moe.experts.49.w1", "model.layers.32.block_sparse_moe.experts.50.w1", "model.layers.32.block_sparse_moe.experts.51.w1", "model.layers.32.block_sparse_moe.experts.52.w1", "model.layers.32.block_sparse_moe.experts.53.w1", "model.layers.32.block_sparse_moe.experts.54.w1", "model.layers.32.block_sparse_moe.experts.55.w1", "model.layers.32.block_sparse_moe.experts.56.w1", "model.layers.32.block_sparse_moe.experts.57.w1", "model.layers.32.block_sparse_moe.experts.58.w1", "model.layers.32.block_sparse_moe.experts.59.w1", "model.layers.32.block_sparse_moe.experts.60.w1", "model.layers.32.block_sparse_moe.experts.61.w1", "model.layers.32.block_sparse_moe.experts.62.w1", "model.layers.32.block_sparse_moe.experts.63.w1", "model.layers.32.block_sparse_moe.experts.64.w1", "model.layers.32.block_sparse_moe.experts.65.w1", "model.layers.32.block_sparse_moe.experts.66.w1", "model.layers.32.block_sparse_moe.experts.67.w1", "model.layers.32.block_sparse_moe.experts.68.w1", "model.layers.32.block_sparse_moe.experts.69.w1", "model.layers.32.block_sparse_moe.experts.70.w1", "model.layers.32.block_sparse_moe.experts.71.w1", "model.layers.32.block_sparse_moe.experts.72.w1", "model.layers.32.block_sparse_moe.experts.73.w1", "model.layers.32.block_sparse_moe.experts.74.w1", "model.layers.32.block_sparse_moe.experts.75.w1", "model.layers.32.block_sparse_moe.experts.76.w1", "model.layers.32.block_sparse_moe.experts.77.w1", "model.layers.32.block_sparse_moe.experts.78.w1", "model.layers.32.block_sparse_moe.experts.79.w1", "model.layers.32.block_sparse_moe.experts.80.w1", "model.layers.32.block_sparse_moe.experts.81.w1", "model.layers.32.block_sparse_moe.experts.82.w1", "model.layers.32.block_sparse_moe.experts.83.w1", "model.layers.32.block_sparse_moe.experts.84.w1", "model.layers.32.block_sparse_moe.experts.85.w1", "model.layers.32.block_sparse_moe.experts.86.w1", "model.layers.32.block_sparse_moe.experts.87.w1", "model.layers.32.block_sparse_moe.experts.88.w1", "model.layers.32.block_sparse_moe.experts.89.w1", "model.layers.32.block_sparse_moe.experts.90.w1", "model.layers.32.block_sparse_moe.experts.91.w1", "model.layers.32.block_sparse_moe.experts.92.w1", "model.layers.32.block_sparse_moe.experts.93.w1", "model.layers.32.block_sparse_moe.experts.94.w1", "model.layers.32.block_sparse_moe.experts.95.w1", "model.layers.32.block_sparse_moe.experts.96.w1", "model.layers.32.block_sparse_moe.experts.97.w1", "model.layers.32.block_sparse_moe.experts.98.w1", "model.layers.32.block_sparse_moe.experts.99.w1", "model.layers.32.block_sparse_moe.experts.100.w1", "model.layers.32.block_sparse_moe.experts.101.w1", "model.layers.32.block_sparse_moe.experts.102.w1", "model.layers.32.block_sparse_moe.experts.103.w1", "model.layers.32.block_sparse_moe.experts.104.w1", "model.layers.32.block_sparse_moe.experts.105.w1", "model.layers.32.block_sparse_moe.experts.106.w1", "model.layers.32.block_sparse_moe.experts.107.w1", "model.layers.32.block_sparse_moe.experts.108.w1", "model.layers.32.block_sparse_moe.experts.109.w1", "model.layers.32.block_sparse_moe.experts.110.w1", "model.layers.32.block_sparse_moe.experts.111.w1", "model.layers.32.block_sparse_moe.experts.112.w1", "model.layers.32.block_sparse_moe.experts.113.w1", "model.layers.32.block_sparse_moe.experts.114.w1", "model.layers.32.block_sparse_moe.experts.115.w1", "model.layers.32.block_sparse_moe.experts.116.w1", "model.layers.32.block_sparse_moe.experts.117.w1", "model.layers.32.block_sparse_moe.experts.118.w1", "model.layers.32.block_sparse_moe.experts.119.w1", "model.layers.32.block_sparse_moe.experts.120.w1", "model.layers.32.block_sparse_moe.experts.121.w1", "model.layers.32.block_sparse_moe.experts.122.w1", "model.layers.32.block_sparse_moe.experts.123.w1", "model.layers.32.block_sparse_moe.experts.124.w1", "model.layers.32.block_sparse_moe.experts.125.w1", "model.layers.32.block_sparse_moe.experts.126.w1", "model.layers.32.block_sparse_moe.experts.127.w1", "model.layers.32.block_sparse_moe.experts.128.w1", "model.layers.32.block_sparse_moe.experts.129.w1", "model.layers.32.block_sparse_moe.experts.130.w1", "model.layers.32.block_sparse_moe.experts.131.w1", "model.layers.32.block_sparse_moe.experts.132.w1", "model.layers.32.block_sparse_moe.experts.133.w1", "model.layers.32.block_sparse_moe.experts.134.w1", "model.layers.32.block_sparse_moe.experts.135.w1", "model.layers.32.block_sparse_moe.experts.136.w1", "model.layers.32.block_sparse_moe.experts.137.w1", "model.layers.32.block_sparse_moe.experts.138.w1", "model.layers.32.block_sparse_moe.experts.139.w1", "model.layers.32.block_sparse_moe.experts.140.w1", "model.layers.32.block_sparse_moe.experts.141.w1", "model.layers.32.block_sparse_moe.experts.142.w1", "model.layers.32.block_sparse_moe.experts.143.w1", "model.layers.32.block_sparse_moe.experts.144.w1", "model.layers.32.block_sparse_moe.experts.145.w1", "model.layers.32.block_sparse_moe.experts.146.w1", "model.layers.32.block_sparse_moe.experts.147.w1", "model.layers.32.block_sparse_moe.experts.148.w1", "model.layers.32.block_sparse_moe.experts.149.w1", "model.layers.32.block_sparse_moe.experts.150.w1", "model.layers.32.block_sparse_moe.experts.151.w1", "model.layers.32.block_sparse_moe.experts.152.w1", "model.layers.32.block_sparse_moe.experts.153.w1", "model.layers.32.block_sparse_moe.experts.154.w1", "model.layers.32.block_sparse_moe.experts.155.w1", "model.layers.32.block_sparse_moe.experts.156.w1", "model.layers.32.block_sparse_moe.experts.157.w1", "model.layers.32.block_sparse_moe.experts.158.w1", "model.layers.32.block_sparse_moe.experts.159.w1", "model.layers.32.block_sparse_moe.experts.160.w1", "model.layers.32.block_sparse_moe.experts.161.w1", "model.layers.32.block_sparse_moe.experts.162.w1", "model.layers.32.block_sparse_moe.experts.163.w1", "model.layers.32.block_sparse_moe.experts.164.w1", "model.layers.32.block_sparse_moe.experts.165.w1", "model.layers.32.block_sparse_moe.experts.166.w1", "model.layers.32.block_sparse_moe.experts.167.w1", "model.layers.32.block_sparse_moe.experts.168.w1", "model.layers.32.block_sparse_moe.experts.169.w1", "model.layers.32.block_sparse_moe.experts.170.w1", "model.layers.32.block_sparse_moe.experts.171.w1", "model.layers.32.block_sparse_moe.experts.172.w1", "model.layers.32.block_sparse_moe.experts.173.w1", "model.layers.32.block_sparse_moe.experts.174.w1", "model.layers.32.block_sparse_moe.experts.175.w1", "model.layers.32.block_sparse_moe.experts.176.w1", "model.layers.32.block_sparse_moe.experts.177.w1", "model.layers.32.block_sparse_moe.experts.178.w1", "model.layers.32.block_sparse_moe.experts.179.w1", "model.layers.32.block_sparse_moe.experts.180.w1", "model.layers.32.block_sparse_moe.experts.181.w1", "model.layers.32.block_sparse_moe.experts.182.w1", "model.layers.32.block_sparse_moe.experts.183.w1", "model.layers.32.block_sparse_moe.experts.184.w1", "model.layers.32.block_sparse_moe.experts.185.w1", "model.layers.32.block_sparse_moe.experts.186.w1", "model.layers.32.block_sparse_moe.experts.187.w1", "model.layers.32.block_sparse_moe.experts.188.w1", "model.layers.32.block_sparse_moe.experts.189.w1", "model.layers.32.block_sparse_moe.experts.190.w1", "model.layers.32.block_sparse_moe.experts.191.w1", "model.layers.32.block_sparse_moe.experts.192.w1", "model.layers.32.block_sparse_moe.experts.193.w1", "model.layers.32.block_sparse_moe.experts.194.w1", "model.layers.32.block_sparse_moe.experts.195.w1", "model.layers.32.block_sparse_moe.experts.196.w1", "model.layers.32.block_sparse_moe.experts.197.w1", "model.layers.32.block_sparse_moe.experts.198.w1", "model.layers.32.block_sparse_moe.experts.199.w1", "model.layers.32.block_sparse_moe.experts.200.w1", "model.layers.32.block_sparse_moe.experts.201.w1", "model.layers.32.block_sparse_moe.experts.202.w1", "model.layers.32.block_sparse_moe.experts.203.w1", "model.layers.32.block_sparse_moe.experts.204.w1", "model.layers.32.block_sparse_moe.experts.205.w1", "model.layers.32.block_sparse_moe.experts.206.w1", "model.layers.32.block_sparse_moe.experts.207.w1", "model.layers.32.block_sparse_moe.experts.208.w1", "model.layers.32.block_sparse_moe.experts.209.w1", "model.layers.32.block_sparse_moe.experts.210.w1", "model.layers.32.block_sparse_moe.experts.211.w1", "model.layers.32.block_sparse_moe.experts.212.w1", "model.layers.32.block_sparse_moe.experts.213.w1", "model.layers.32.block_sparse_moe.experts.214.w1", "model.layers.32.block_sparse_moe.experts.215.w1", "model.layers.32.block_sparse_moe.experts.216.w1", "model.layers.32.block_sparse_moe.experts.217.w1", "model.layers.32.block_sparse_moe.experts.218.w1", "model.layers.32.block_sparse_moe.experts.219.w1", "model.layers.32.block_sparse_moe.experts.220.w1", "model.layers.32.block_sparse_moe.experts.221.w1", "model.layers.32.block_sparse_moe.experts.222.w1", "model.layers.32.block_sparse_moe.experts.223.w1", "model.layers.32.block_sparse_moe.experts.224.w1", "model.layers.32.block_sparse_moe.experts.225.w1", "model.layers.32.block_sparse_moe.experts.226.w1", "model.layers.32.block_sparse_moe.experts.227.w1", "model.layers.32.block_sparse_moe.experts.228.w1", "model.layers.32.block_sparse_moe.experts.229.w1", "model.layers.32.block_sparse_moe.experts.230.w1", "model.layers.32.block_sparse_moe.experts.231.w1", "model.layers.32.block_sparse_moe.experts.232.w1", "model.layers.32.block_sparse_moe.experts.233.w1", "model.layers.32.block_sparse_moe.experts.234.w1", "model.layers.32.block_sparse_moe.experts.235.w1", "model.layers.32.block_sparse_moe.experts.236.w1", "model.layers.32.block_sparse_moe.experts.237.w1", "model.layers.32.block_sparse_moe.experts.238.w1", "model.layers.32.block_sparse_moe.experts.239.w1", "model.layers.32.block_sparse_moe.experts.240.w1", "model.layers.32.block_sparse_moe.experts.241.w1", "model.layers.32.block_sparse_moe.experts.242.w1", "model.layers.32.block_sparse_moe.experts.243.w1", "model.layers.32.block_sparse_moe.experts.244.w1", "model.layers.32.block_sparse_moe.experts.245.w1", "model.layers.32.block_sparse_moe.experts.246.w1", "model.layers.32.block_sparse_moe.experts.247.w1", "model.layers.32.block_sparse_moe.experts.248.w1", "model.layers.32.block_sparse_moe.experts.249.w1", "model.layers.32.block_sparse_moe.experts.250.w1", "model.layers.32.block_sparse_moe.experts.251.w1", "model.layers.32.block_sparse_moe.experts.252.w1", "model.layers.32.block_sparse_moe.experts.253.w1", "model.layers.32.block_sparse_moe.experts.254.w1", "model.layers.32.block_sparse_moe.experts.255.w1", "model.layers.32.block_sparse_moe.experts.0.w3", "model.layers.32.block_sparse_moe.experts.1.w3", "model.layers.32.block_sparse_moe.experts.2.w3", "model.layers.32.block_sparse_moe.experts.3.w3", "model.layers.32.block_sparse_moe.experts.4.w3", "model.layers.32.block_sparse_moe.experts.5.w3", "model.layers.32.block_sparse_moe.experts.6.w3", "model.layers.32.block_sparse_moe.experts.7.w3", "model.layers.32.block_sparse_moe.experts.8.w3", "model.layers.32.block_sparse_moe.experts.9.w3", "model.layers.32.block_sparse_moe.experts.10.w3", "model.layers.32.block_sparse_moe.experts.11.w3", "model.layers.32.block_sparse_moe.experts.12.w3", "model.layers.32.block_sparse_moe.experts.13.w3", "model.layers.32.block_sparse_moe.experts.14.w3", "model.layers.32.block_sparse_moe.experts.15.w3", "model.layers.32.block_sparse_moe.experts.16.w3", "model.layers.32.block_sparse_moe.experts.17.w3", "model.layers.32.block_sparse_moe.experts.18.w3", "model.layers.32.block_sparse_moe.experts.19.w3", "model.layers.32.block_sparse_moe.experts.20.w3", "model.layers.32.block_sparse_moe.experts.21.w3", "model.layers.32.block_sparse_moe.experts.22.w3", "model.layers.32.block_sparse_moe.experts.23.w3", "model.layers.32.block_sparse_moe.experts.24.w3", "model.layers.32.block_sparse_moe.experts.25.w3", "model.layers.32.block_sparse_moe.experts.26.w3", "model.layers.32.block_sparse_moe.experts.27.w3", "model.layers.32.block_sparse_moe.experts.28.w3", "model.layers.32.block_sparse_moe.experts.29.w3", "model.layers.32.block_sparse_moe.experts.30.w3", "model.layers.32.block_sparse_moe.experts.31.w3", "model.layers.32.block_sparse_moe.experts.32.w3", "model.layers.32.block_sparse_moe.experts.33.w3", "model.layers.32.block_sparse_moe.experts.34.w3", "model.layers.32.block_sparse_moe.experts.35.w3", "model.layers.32.block_sparse_moe.experts.36.w3", "model.layers.32.block_sparse_moe.experts.37.w3", "model.layers.32.block_sparse_moe.experts.38.w3", "model.layers.32.block_sparse_moe.experts.39.w3", "model.layers.32.block_sparse_moe.experts.40.w3", "model.layers.32.block_sparse_moe.experts.41.w3", "model.layers.32.block_sparse_moe.experts.42.w3", "model.layers.32.block_sparse_moe.experts.43.w3", "model.layers.32.block_sparse_moe.experts.44.w3", "model.layers.32.block_sparse_moe.experts.45.w3", "model.layers.32.block_sparse_moe.experts.46.w3", "model.layers.32.block_sparse_moe.experts.47.w3", "model.layers.32.block_sparse_moe.experts.48.w3", "model.layers.32.block_sparse_moe.experts.49.w3", "model.layers.32.block_sparse_moe.experts.50.w3", "model.layers.32.block_sparse_moe.experts.51.w3", "model.layers.32.block_sparse_moe.experts.52.w3", "model.layers.32.block_sparse_moe.experts.53.w3", "model.layers.32.block_sparse_moe.experts.54.w3", "model.layers.32.block_sparse_moe.experts.55.w3", "model.layers.32.block_sparse_moe.experts.56.w3", "model.layers.32.block_sparse_moe.experts.57.w3", "model.layers.32.block_sparse_moe.experts.58.w3", "model.layers.32.block_sparse_moe.experts.59.w3", "model.layers.32.block_sparse_moe.experts.60.w3", "model.layers.32.block_sparse_moe.experts.61.w3", "model.layers.32.block_sparse_moe.experts.62.w3", "model.layers.32.block_sparse_moe.experts.63.w3", "model.layers.32.block_sparse_moe.experts.64.w3", "model.layers.32.block_sparse_moe.experts.65.w3", "model.layers.32.block_sparse_moe.experts.66.w3", "model.layers.32.block_sparse_moe.experts.67.w3", "model.layers.32.block_sparse_moe.experts.68.w3", "model.layers.32.block_sparse_moe.experts.69.w3", "model.layers.32.block_sparse_moe.experts.70.w3", "model.layers.32.block_sparse_moe.experts.71.w3", "model.layers.32.block_sparse_moe.experts.72.w3", "model.layers.32.block_sparse_moe.experts.73.w3", "model.layers.32.block_sparse_moe.experts.74.w3", "model.layers.32.block_sparse_moe.experts.75.w3", "model.layers.32.block_sparse_moe.experts.76.w3", "model.layers.32.block_sparse_moe.experts.77.w3", "model.layers.32.block_sparse_moe.experts.78.w3", "model.layers.32.block_sparse_moe.experts.79.w3", "model.layers.32.block_sparse_moe.experts.80.w3", "model.layers.32.block_sparse_moe.experts.81.w3", "model.layers.32.block_sparse_moe.experts.82.w3", "model.layers.32.block_sparse_moe.experts.83.w3", "model.layers.32.block_sparse_moe.experts.84.w3", "model.layers.32.block_sparse_moe.experts.85.w3", "model.layers.32.block_sparse_moe.experts.86.w3", "model.layers.32.block_sparse_moe.experts.87.w3", "model.layers.32.block_sparse_moe.experts.88.w3", "model.layers.32.block_sparse_moe.experts.89.w3", "model.layers.32.block_sparse_moe.experts.90.w3", "model.layers.32.block_sparse_moe.experts.91.w3", "model.layers.32.block_sparse_moe.experts.92.w3", "model.layers.32.block_sparse_moe.experts.93.w3", "model.layers.32.block_sparse_moe.experts.94.w3", "model.layers.32.block_sparse_moe.experts.95.w3", "model.layers.32.block_sparse_moe.experts.96.w3", "model.layers.32.block_sparse_moe.experts.97.w3", "model.layers.32.block_sparse_moe.experts.98.w3", "model.layers.32.block_sparse_moe.experts.99.w3", "model.layers.32.block_sparse_moe.experts.100.w3", "model.layers.32.block_sparse_moe.experts.101.w3", "model.layers.32.block_sparse_moe.experts.102.w3", "model.layers.32.block_sparse_moe.experts.103.w3", "model.layers.32.block_sparse_moe.experts.104.w3", "model.layers.32.block_sparse_moe.experts.105.w3", "model.layers.32.block_sparse_moe.experts.106.w3", "model.layers.32.block_sparse_moe.experts.107.w3", "model.layers.32.block_sparse_moe.experts.108.w3", "model.layers.32.block_sparse_moe.experts.109.w3", "model.layers.32.block_sparse_moe.experts.110.w3", "model.layers.32.block_sparse_moe.experts.111.w3", "model.layers.32.block_sparse_moe.experts.112.w3", "model.layers.32.block_sparse_moe.experts.113.w3", "model.layers.32.block_sparse_moe.experts.114.w3", "model.layers.32.block_sparse_moe.experts.115.w3", "model.layers.32.block_sparse_moe.experts.116.w3", "model.layers.32.block_sparse_moe.experts.117.w3", "model.layers.32.block_sparse_moe.experts.118.w3", "model.layers.32.block_sparse_moe.experts.119.w3", "model.layers.32.block_sparse_moe.experts.120.w3", "model.layers.32.block_sparse_moe.experts.121.w3", "model.layers.32.block_sparse_moe.experts.122.w3", "model.layers.32.block_sparse_moe.experts.123.w3", "model.layers.32.block_sparse_moe.experts.124.w3", "model.layers.32.block_sparse_moe.experts.125.w3", "model.layers.32.block_sparse_moe.experts.126.w3", "model.layers.32.block_sparse_moe.experts.127.w3", "model.layers.32.block_sparse_moe.experts.128.w3", "model.layers.32.block_sparse_moe.experts.129.w3", "model.layers.32.block_sparse_moe.experts.130.w3", "model.layers.32.block_sparse_moe.experts.131.w3", "model.layers.32.block_sparse_moe.experts.132.w3", "model.layers.32.block_sparse_moe.experts.133.w3", "model.layers.32.block_sparse_moe.experts.134.w3", "model.layers.32.block_sparse_moe.experts.135.w3", "model.layers.32.block_sparse_moe.experts.136.w3", "model.layers.32.block_sparse_moe.experts.137.w3", "model.layers.32.block_sparse_moe.experts.138.w3", "model.layers.32.block_sparse_moe.experts.139.w3", "model.layers.32.block_sparse_moe.experts.140.w3", "model.layers.32.block_sparse_moe.experts.141.w3", "model.layers.32.block_sparse_moe.experts.142.w3", "model.layers.32.block_sparse_moe.experts.143.w3", "model.layers.32.block_sparse_moe.experts.144.w3", "model.layers.32.block_sparse_moe.experts.145.w3", "model.layers.32.block_sparse_moe.experts.146.w3", "model.layers.32.block_sparse_moe.experts.147.w3", "model.layers.32.block_sparse_moe.experts.148.w3", "model.layers.32.block_sparse_moe.experts.149.w3", "model.layers.32.block_sparse_moe.experts.150.w3", "model.layers.32.block_sparse_moe.experts.151.w3", "model.layers.32.block_sparse_moe.experts.152.w3", "model.layers.32.block_sparse_moe.experts.153.w3", "model.layers.32.block_sparse_moe.experts.154.w3", "model.layers.32.block_sparse_moe.experts.155.w3", "model.layers.32.block_sparse_moe.experts.156.w3", "model.layers.32.block_sparse_moe.experts.157.w3", "model.layers.32.block_sparse_moe.experts.158.w3", "model.layers.32.block_sparse_moe.experts.159.w3", "model.layers.32.block_sparse_moe.experts.160.w3", "model.layers.32.block_sparse_moe.experts.161.w3", "model.layers.32.block_sparse_moe.experts.162.w3", "model.layers.32.block_sparse_moe.experts.163.w3", "model.layers.32.block_sparse_moe.experts.164.w3", "model.layers.32.block_sparse_moe.experts.165.w3", "model.layers.32.block_sparse_moe.experts.166.w3", "model.layers.32.block_sparse_moe.experts.167.w3", "model.layers.32.block_sparse_moe.experts.168.w3", "model.layers.32.block_sparse_moe.experts.169.w3", "model.layers.32.block_sparse_moe.experts.170.w3", "model.layers.32.block_sparse_moe.experts.171.w3", "model.layers.32.block_sparse_moe.experts.172.w3", "model.layers.32.block_sparse_moe.experts.173.w3", "model.layers.32.block_sparse_moe.experts.174.w3", "model.layers.32.block_sparse_moe.experts.175.w3", "model.layers.32.block_sparse_moe.experts.176.w3", "model.layers.32.block_sparse_moe.experts.177.w3", "model.layers.32.block_sparse_moe.experts.178.w3", "model.layers.32.block_sparse_moe.experts.179.w3", "model.layers.32.block_sparse_moe.experts.180.w3", "model.layers.32.block_sparse_moe.experts.181.w3", "model.layers.32.block_sparse_moe.experts.182.w3", "model.layers.32.block_sparse_moe.experts.183.w3", "model.layers.32.block_sparse_moe.experts.184.w3", "model.layers.32.block_sparse_moe.experts.185.w3", "model.layers.32.block_sparse_moe.experts.186.w3", "model.layers.32.block_sparse_moe.experts.187.w3", "model.layers.32.block_sparse_moe.experts.188.w3", "model.layers.32.block_sparse_moe.experts.189.w3", "model.layers.32.block_sparse_moe.experts.190.w3", "model.layers.32.block_sparse_moe.experts.191.w3", "model.layers.32.block_sparse_moe.experts.192.w3", "model.layers.32.block_sparse_moe.experts.193.w3", "model.layers.32.block_sparse_moe.experts.194.w3", "model.layers.32.block_sparse_moe.experts.195.w3", "model.layers.32.block_sparse_moe.experts.196.w3", "model.layers.32.block_sparse_moe.experts.197.w3", "model.layers.32.block_sparse_moe.experts.198.w3", "model.layers.32.block_sparse_moe.experts.199.w3", "model.layers.32.block_sparse_moe.experts.200.w3", "model.layers.32.block_sparse_moe.experts.201.w3", "model.layers.32.block_sparse_moe.experts.202.w3", "model.layers.32.block_sparse_moe.experts.203.w3", "model.layers.32.block_sparse_moe.experts.204.w3", "model.layers.32.block_sparse_moe.experts.205.w3", "model.layers.32.block_sparse_moe.experts.206.w3", "model.layers.32.block_sparse_moe.experts.207.w3", "model.layers.32.block_sparse_moe.experts.208.w3", "model.layers.32.block_sparse_moe.experts.209.w3", "model.layers.32.block_sparse_moe.experts.210.w3", "model.layers.32.block_sparse_moe.experts.211.w3", "model.layers.32.block_sparse_moe.experts.212.w3", "model.layers.32.block_sparse_moe.experts.213.w3", "model.layers.32.block_sparse_moe.experts.214.w3", "model.layers.32.block_sparse_moe.experts.215.w3", "model.layers.32.block_sparse_moe.experts.216.w3", "model.layers.32.block_sparse_moe.experts.217.w3", "model.layers.32.block_sparse_moe.experts.218.w3", "model.layers.32.block_sparse_moe.experts.219.w3", "model.layers.32.block_sparse_moe.experts.220.w3", "model.layers.32.block_sparse_moe.experts.221.w3", "model.layers.32.block_sparse_moe.experts.222.w3", "model.layers.32.block_sparse_moe.experts.223.w3", "model.layers.32.block_sparse_moe.experts.224.w3", "model.layers.32.block_sparse_moe.experts.225.w3", "model.layers.32.block_sparse_moe.experts.226.w3", "model.layers.32.block_sparse_moe.experts.227.w3", "model.layers.32.block_sparse_moe.experts.228.w3", "model.layers.32.block_sparse_moe.experts.229.w3", "model.layers.32.block_sparse_moe.experts.230.w3", "model.layers.32.block_sparse_moe.experts.231.w3", "model.layers.32.block_sparse_moe.experts.232.w3", "model.layers.32.block_sparse_moe.experts.233.w3", "model.layers.32.block_sparse_moe.experts.234.w3", "model.layers.32.block_sparse_moe.experts.235.w3", "model.layers.32.block_sparse_moe.experts.236.w3", "model.layers.32.block_sparse_moe.experts.237.w3", "model.layers.32.block_sparse_moe.experts.238.w3", "model.layers.32.block_sparse_moe.experts.239.w3", "model.layers.32.block_sparse_moe.experts.240.w3", "model.layers.32.block_sparse_moe.experts.241.w3", "model.layers.32.block_sparse_moe.experts.242.w3", "model.layers.32.block_sparse_moe.experts.243.w3", "model.layers.32.block_sparse_moe.experts.244.w3", "model.layers.32.block_sparse_moe.experts.245.w3", "model.layers.32.block_sparse_moe.experts.246.w3", "model.layers.32.block_sparse_moe.experts.247.w3", "model.layers.32.block_sparse_moe.experts.248.w3", "model.layers.32.block_sparse_moe.experts.249.w3", "model.layers.32.block_sparse_moe.experts.250.w3", "model.layers.32.block_sparse_moe.experts.251.w3", "model.layers.32.block_sparse_moe.experts.252.w3", "model.layers.32.block_sparse_moe.experts.253.w3", "model.layers.32.block_sparse_moe.experts.254.w3", "model.layers.32.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0007541544735431699, "dbits": 2415919104 } ] }, { "idx": 164, "layers": [ "model.layers.32.block_sparse_moe.experts.0.w2", "model.layers.32.block_sparse_moe.experts.1.w2", "model.layers.32.block_sparse_moe.experts.2.w2", "model.layers.32.block_sparse_moe.experts.3.w2", "model.layers.32.block_sparse_moe.experts.4.w2", "model.layers.32.block_sparse_moe.experts.5.w2", "model.layers.32.block_sparse_moe.experts.6.w2", "model.layers.32.block_sparse_moe.experts.7.w2", "model.layers.32.block_sparse_moe.experts.8.w2", "model.layers.32.block_sparse_moe.experts.9.w2", "model.layers.32.block_sparse_moe.experts.10.w2", "model.layers.32.block_sparse_moe.experts.11.w2", "model.layers.32.block_sparse_moe.experts.12.w2", "model.layers.32.block_sparse_moe.experts.13.w2", "model.layers.32.block_sparse_moe.experts.14.w2", "model.layers.32.block_sparse_moe.experts.15.w2", "model.layers.32.block_sparse_moe.experts.16.w2", "model.layers.32.block_sparse_moe.experts.17.w2", "model.layers.32.block_sparse_moe.experts.18.w2", "model.layers.32.block_sparse_moe.experts.19.w2", "model.layers.32.block_sparse_moe.experts.20.w2", "model.layers.32.block_sparse_moe.experts.21.w2", "model.layers.32.block_sparse_moe.experts.22.w2", "model.layers.32.block_sparse_moe.experts.23.w2", "model.layers.32.block_sparse_moe.experts.24.w2", "model.layers.32.block_sparse_moe.experts.25.w2", "model.layers.32.block_sparse_moe.experts.26.w2", "model.layers.32.block_sparse_moe.experts.27.w2", "model.layers.32.block_sparse_moe.experts.28.w2", "model.layers.32.block_sparse_moe.experts.29.w2", "model.layers.32.block_sparse_moe.experts.30.w2", "model.layers.32.block_sparse_moe.experts.31.w2", "model.layers.32.block_sparse_moe.experts.32.w2", "model.layers.32.block_sparse_moe.experts.33.w2", "model.layers.32.block_sparse_moe.experts.34.w2", "model.layers.32.block_sparse_moe.experts.35.w2", "model.layers.32.block_sparse_moe.experts.36.w2", "model.layers.32.block_sparse_moe.experts.37.w2", "model.layers.32.block_sparse_moe.experts.38.w2", "model.layers.32.block_sparse_moe.experts.39.w2", "model.layers.32.block_sparse_moe.experts.40.w2", "model.layers.32.block_sparse_moe.experts.41.w2", "model.layers.32.block_sparse_moe.experts.42.w2", "model.layers.32.block_sparse_moe.experts.43.w2", "model.layers.32.block_sparse_moe.experts.44.w2", "model.layers.32.block_sparse_moe.experts.45.w2", "model.layers.32.block_sparse_moe.experts.46.w2", "model.layers.32.block_sparse_moe.experts.47.w2", "model.layers.32.block_sparse_moe.experts.48.w2", "model.layers.32.block_sparse_moe.experts.49.w2", "model.layers.32.block_sparse_moe.experts.50.w2", "model.layers.32.block_sparse_moe.experts.51.w2", "model.layers.32.block_sparse_moe.experts.52.w2", "model.layers.32.block_sparse_moe.experts.53.w2", "model.layers.32.block_sparse_moe.experts.54.w2", "model.layers.32.block_sparse_moe.experts.55.w2", "model.layers.32.block_sparse_moe.experts.56.w2", "model.layers.32.block_sparse_moe.experts.57.w2", "model.layers.32.block_sparse_moe.experts.58.w2", "model.layers.32.block_sparse_moe.experts.59.w2", "model.layers.32.block_sparse_moe.experts.60.w2", "model.layers.32.block_sparse_moe.experts.61.w2", "model.layers.32.block_sparse_moe.experts.62.w2", "model.layers.32.block_sparse_moe.experts.63.w2", "model.layers.32.block_sparse_moe.experts.64.w2", "model.layers.32.block_sparse_moe.experts.65.w2", "model.layers.32.block_sparse_moe.experts.66.w2", "model.layers.32.block_sparse_moe.experts.67.w2", "model.layers.32.block_sparse_moe.experts.68.w2", "model.layers.32.block_sparse_moe.experts.69.w2", "model.layers.32.block_sparse_moe.experts.70.w2", "model.layers.32.block_sparse_moe.experts.71.w2", "model.layers.32.block_sparse_moe.experts.72.w2", "model.layers.32.block_sparse_moe.experts.73.w2", "model.layers.32.block_sparse_moe.experts.74.w2", "model.layers.32.block_sparse_moe.experts.75.w2", "model.layers.32.block_sparse_moe.experts.76.w2", "model.layers.32.block_sparse_moe.experts.77.w2", "model.layers.32.block_sparse_moe.experts.78.w2", "model.layers.32.block_sparse_moe.experts.79.w2", "model.layers.32.block_sparse_moe.experts.80.w2", "model.layers.32.block_sparse_moe.experts.81.w2", "model.layers.32.block_sparse_moe.experts.82.w2", "model.layers.32.block_sparse_moe.experts.83.w2", "model.layers.32.block_sparse_moe.experts.84.w2", "model.layers.32.block_sparse_moe.experts.85.w2", "model.layers.32.block_sparse_moe.experts.86.w2", "model.layers.32.block_sparse_moe.experts.87.w2", "model.layers.32.block_sparse_moe.experts.88.w2", "model.layers.32.block_sparse_moe.experts.89.w2", "model.layers.32.block_sparse_moe.experts.90.w2", "model.layers.32.block_sparse_moe.experts.91.w2", "model.layers.32.block_sparse_moe.experts.92.w2", "model.layers.32.block_sparse_moe.experts.93.w2", "model.layers.32.block_sparse_moe.experts.94.w2", "model.layers.32.block_sparse_moe.experts.95.w2", "model.layers.32.block_sparse_moe.experts.96.w2", "model.layers.32.block_sparse_moe.experts.97.w2", "model.layers.32.block_sparse_moe.experts.98.w2", "model.layers.32.block_sparse_moe.experts.99.w2", "model.layers.32.block_sparse_moe.experts.100.w2", "model.layers.32.block_sparse_moe.experts.101.w2", "model.layers.32.block_sparse_moe.experts.102.w2", "model.layers.32.block_sparse_moe.experts.103.w2", "model.layers.32.block_sparse_moe.experts.104.w2", "model.layers.32.block_sparse_moe.experts.105.w2", "model.layers.32.block_sparse_moe.experts.106.w2", "model.layers.32.block_sparse_moe.experts.107.w2", "model.layers.32.block_sparse_moe.experts.108.w2", "model.layers.32.block_sparse_moe.experts.109.w2", "model.layers.32.block_sparse_moe.experts.110.w2", "model.layers.32.block_sparse_moe.experts.111.w2", "model.layers.32.block_sparse_moe.experts.112.w2", "model.layers.32.block_sparse_moe.experts.113.w2", "model.layers.32.block_sparse_moe.experts.114.w2", "model.layers.32.block_sparse_moe.experts.115.w2", "model.layers.32.block_sparse_moe.experts.116.w2", "model.layers.32.block_sparse_moe.experts.117.w2", "model.layers.32.block_sparse_moe.experts.118.w2", "model.layers.32.block_sparse_moe.experts.119.w2", "model.layers.32.block_sparse_moe.experts.120.w2", "model.layers.32.block_sparse_moe.experts.121.w2", "model.layers.32.block_sparse_moe.experts.122.w2", "model.layers.32.block_sparse_moe.experts.123.w2", "model.layers.32.block_sparse_moe.experts.124.w2", "model.layers.32.block_sparse_moe.experts.125.w2", "model.layers.32.block_sparse_moe.experts.126.w2", "model.layers.32.block_sparse_moe.experts.127.w2", "model.layers.32.block_sparse_moe.experts.128.w2", "model.layers.32.block_sparse_moe.experts.129.w2", "model.layers.32.block_sparse_moe.experts.130.w2", "model.layers.32.block_sparse_moe.experts.131.w2", "model.layers.32.block_sparse_moe.experts.132.w2", "model.layers.32.block_sparse_moe.experts.133.w2", "model.layers.32.block_sparse_moe.experts.134.w2", "model.layers.32.block_sparse_moe.experts.135.w2", "model.layers.32.block_sparse_moe.experts.136.w2", "model.layers.32.block_sparse_moe.experts.137.w2", "model.layers.32.block_sparse_moe.experts.138.w2", "model.layers.32.block_sparse_moe.experts.139.w2", "model.layers.32.block_sparse_moe.experts.140.w2", "model.layers.32.block_sparse_moe.experts.141.w2", "model.layers.32.block_sparse_moe.experts.142.w2", "model.layers.32.block_sparse_moe.experts.143.w2", "model.layers.32.block_sparse_moe.experts.144.w2", "model.layers.32.block_sparse_moe.experts.145.w2", "model.layers.32.block_sparse_moe.experts.146.w2", "model.layers.32.block_sparse_moe.experts.147.w2", "model.layers.32.block_sparse_moe.experts.148.w2", "model.layers.32.block_sparse_moe.experts.149.w2", "model.layers.32.block_sparse_moe.experts.150.w2", "model.layers.32.block_sparse_moe.experts.151.w2", "model.layers.32.block_sparse_moe.experts.152.w2", "model.layers.32.block_sparse_moe.experts.153.w2", "model.layers.32.block_sparse_moe.experts.154.w2", "model.layers.32.block_sparse_moe.experts.155.w2", "model.layers.32.block_sparse_moe.experts.156.w2", "model.layers.32.block_sparse_moe.experts.157.w2", "model.layers.32.block_sparse_moe.experts.158.w2", "model.layers.32.block_sparse_moe.experts.159.w2", "model.layers.32.block_sparse_moe.experts.160.w2", "model.layers.32.block_sparse_moe.experts.161.w2", "model.layers.32.block_sparse_moe.experts.162.w2", "model.layers.32.block_sparse_moe.experts.163.w2", "model.layers.32.block_sparse_moe.experts.164.w2", "model.layers.32.block_sparse_moe.experts.165.w2", "model.layers.32.block_sparse_moe.experts.166.w2", "model.layers.32.block_sparse_moe.experts.167.w2", "model.layers.32.block_sparse_moe.experts.168.w2", "model.layers.32.block_sparse_moe.experts.169.w2", "model.layers.32.block_sparse_moe.experts.170.w2", "model.layers.32.block_sparse_moe.experts.171.w2", "model.layers.32.block_sparse_moe.experts.172.w2", "model.layers.32.block_sparse_moe.experts.173.w2", "model.layers.32.block_sparse_moe.experts.174.w2", "model.layers.32.block_sparse_moe.experts.175.w2", "model.layers.32.block_sparse_moe.experts.176.w2", "model.layers.32.block_sparse_moe.experts.177.w2", "model.layers.32.block_sparse_moe.experts.178.w2", "model.layers.32.block_sparse_moe.experts.179.w2", "model.layers.32.block_sparse_moe.experts.180.w2", "model.layers.32.block_sparse_moe.experts.181.w2", "model.layers.32.block_sparse_moe.experts.182.w2", "model.layers.32.block_sparse_moe.experts.183.w2", "model.layers.32.block_sparse_moe.experts.184.w2", "model.layers.32.block_sparse_moe.experts.185.w2", "model.layers.32.block_sparse_moe.experts.186.w2", "model.layers.32.block_sparse_moe.experts.187.w2", "model.layers.32.block_sparse_moe.experts.188.w2", "model.layers.32.block_sparse_moe.experts.189.w2", "model.layers.32.block_sparse_moe.experts.190.w2", "model.layers.32.block_sparse_moe.experts.191.w2", "model.layers.32.block_sparse_moe.experts.192.w2", "model.layers.32.block_sparse_moe.experts.193.w2", "model.layers.32.block_sparse_moe.experts.194.w2", "model.layers.32.block_sparse_moe.experts.195.w2", "model.layers.32.block_sparse_moe.experts.196.w2", "model.layers.32.block_sparse_moe.experts.197.w2", "model.layers.32.block_sparse_moe.experts.198.w2", "model.layers.32.block_sparse_moe.experts.199.w2", "model.layers.32.block_sparse_moe.experts.200.w2", "model.layers.32.block_sparse_moe.experts.201.w2", "model.layers.32.block_sparse_moe.experts.202.w2", "model.layers.32.block_sparse_moe.experts.203.w2", "model.layers.32.block_sparse_moe.experts.204.w2", "model.layers.32.block_sparse_moe.experts.205.w2", "model.layers.32.block_sparse_moe.experts.206.w2", "model.layers.32.block_sparse_moe.experts.207.w2", "model.layers.32.block_sparse_moe.experts.208.w2", "model.layers.32.block_sparse_moe.experts.209.w2", "model.layers.32.block_sparse_moe.experts.210.w2", "model.layers.32.block_sparse_moe.experts.211.w2", "model.layers.32.block_sparse_moe.experts.212.w2", "model.layers.32.block_sparse_moe.experts.213.w2", "model.layers.32.block_sparse_moe.experts.214.w2", "model.layers.32.block_sparse_moe.experts.215.w2", "model.layers.32.block_sparse_moe.experts.216.w2", "model.layers.32.block_sparse_moe.experts.217.w2", "model.layers.32.block_sparse_moe.experts.218.w2", "model.layers.32.block_sparse_moe.experts.219.w2", "model.layers.32.block_sparse_moe.experts.220.w2", "model.layers.32.block_sparse_moe.experts.221.w2", "model.layers.32.block_sparse_moe.experts.222.w2", "model.layers.32.block_sparse_moe.experts.223.w2", "model.layers.32.block_sparse_moe.experts.224.w2", "model.layers.32.block_sparse_moe.experts.225.w2", "model.layers.32.block_sparse_moe.experts.226.w2", "model.layers.32.block_sparse_moe.experts.227.w2", "model.layers.32.block_sparse_moe.experts.228.w2", "model.layers.32.block_sparse_moe.experts.229.w2", "model.layers.32.block_sparse_moe.experts.230.w2", "model.layers.32.block_sparse_moe.experts.231.w2", "model.layers.32.block_sparse_moe.experts.232.w2", "model.layers.32.block_sparse_moe.experts.233.w2", "model.layers.32.block_sparse_moe.experts.234.w2", "model.layers.32.block_sparse_moe.experts.235.w2", "model.layers.32.block_sparse_moe.experts.236.w2", "model.layers.32.block_sparse_moe.experts.237.w2", "model.layers.32.block_sparse_moe.experts.238.w2", "model.layers.32.block_sparse_moe.experts.239.w2", "model.layers.32.block_sparse_moe.experts.240.w2", "model.layers.32.block_sparse_moe.experts.241.w2", "model.layers.32.block_sparse_moe.experts.242.w2", "model.layers.32.block_sparse_moe.experts.243.w2", "model.layers.32.block_sparse_moe.experts.244.w2", "model.layers.32.block_sparse_moe.experts.245.w2", "model.layers.32.block_sparse_moe.experts.246.w2", "model.layers.32.block_sparse_moe.experts.247.w2", "model.layers.32.block_sparse_moe.experts.248.w2", "model.layers.32.block_sparse_moe.experts.249.w2", "model.layers.32.block_sparse_moe.experts.250.w2", "model.layers.32.block_sparse_moe.experts.251.w2", "model.layers.32.block_sparse_moe.experts.252.w2", "model.layers.32.block_sparse_moe.experts.253.w2", "model.layers.32.block_sparse_moe.experts.254.w2", "model.layers.32.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0002817403525114115, "dbits": 1207959552 } ] }, { "idx": 165, "layers": [ "model.layers.33.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0003202710300684031, "dbits": 18874368 } ] }, { "idx": 166, "layers": [ "model.layers.33.self_attn.k_proj", "model.layers.33.self_attn.v_proj" ], "candidates": [ { "dkld": -0.00034785196185112277, "dbits": 6291456 } ] }, { "idx": 167, "layers": [ "model.layers.33.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0014206873252987917, "dbits": 18874368 } ] }, { "idx": 168, "layers": [ "model.layers.33.block_sparse_moe.experts.0.w1", "model.layers.33.block_sparse_moe.experts.1.w1", "model.layers.33.block_sparse_moe.experts.2.w1", "model.layers.33.block_sparse_moe.experts.3.w1", "model.layers.33.block_sparse_moe.experts.4.w1", "model.layers.33.block_sparse_moe.experts.5.w1", "model.layers.33.block_sparse_moe.experts.6.w1", "model.layers.33.block_sparse_moe.experts.7.w1", "model.layers.33.block_sparse_moe.experts.8.w1", "model.layers.33.block_sparse_moe.experts.9.w1", "model.layers.33.block_sparse_moe.experts.10.w1", "model.layers.33.block_sparse_moe.experts.11.w1", "model.layers.33.block_sparse_moe.experts.12.w1", "model.layers.33.block_sparse_moe.experts.13.w1", "model.layers.33.block_sparse_moe.experts.14.w1", "model.layers.33.block_sparse_moe.experts.15.w1", "model.layers.33.block_sparse_moe.experts.16.w1", "model.layers.33.block_sparse_moe.experts.17.w1", "model.layers.33.block_sparse_moe.experts.18.w1", "model.layers.33.block_sparse_moe.experts.19.w1", "model.layers.33.block_sparse_moe.experts.20.w1", "model.layers.33.block_sparse_moe.experts.21.w1", "model.layers.33.block_sparse_moe.experts.22.w1", "model.layers.33.block_sparse_moe.experts.23.w1", "model.layers.33.block_sparse_moe.experts.24.w1", "model.layers.33.block_sparse_moe.experts.25.w1", "model.layers.33.block_sparse_moe.experts.26.w1", "model.layers.33.block_sparse_moe.experts.27.w1", "model.layers.33.block_sparse_moe.experts.28.w1", "model.layers.33.block_sparse_moe.experts.29.w1", "model.layers.33.block_sparse_moe.experts.30.w1", "model.layers.33.block_sparse_moe.experts.31.w1", "model.layers.33.block_sparse_moe.experts.32.w1", "model.layers.33.block_sparse_moe.experts.33.w1", "model.layers.33.block_sparse_moe.experts.34.w1", "model.layers.33.block_sparse_moe.experts.35.w1", "model.layers.33.block_sparse_moe.experts.36.w1", "model.layers.33.block_sparse_moe.experts.37.w1", "model.layers.33.block_sparse_moe.experts.38.w1", "model.layers.33.block_sparse_moe.experts.39.w1", "model.layers.33.block_sparse_moe.experts.40.w1", "model.layers.33.block_sparse_moe.experts.41.w1", "model.layers.33.block_sparse_moe.experts.42.w1", "model.layers.33.block_sparse_moe.experts.43.w1", "model.layers.33.block_sparse_moe.experts.44.w1", "model.layers.33.block_sparse_moe.experts.45.w1", "model.layers.33.block_sparse_moe.experts.46.w1", "model.layers.33.block_sparse_moe.experts.47.w1", "model.layers.33.block_sparse_moe.experts.48.w1", "model.layers.33.block_sparse_moe.experts.49.w1", "model.layers.33.block_sparse_moe.experts.50.w1", "model.layers.33.block_sparse_moe.experts.51.w1", "model.layers.33.block_sparse_moe.experts.52.w1", "model.layers.33.block_sparse_moe.experts.53.w1", "model.layers.33.block_sparse_moe.experts.54.w1", "model.layers.33.block_sparse_moe.experts.55.w1", "model.layers.33.block_sparse_moe.experts.56.w1", "model.layers.33.block_sparse_moe.experts.57.w1", "model.layers.33.block_sparse_moe.experts.58.w1", "model.layers.33.block_sparse_moe.experts.59.w1", "model.layers.33.block_sparse_moe.experts.60.w1", "model.layers.33.block_sparse_moe.experts.61.w1", "model.layers.33.block_sparse_moe.experts.62.w1", "model.layers.33.block_sparse_moe.experts.63.w1", "model.layers.33.block_sparse_moe.experts.64.w1", "model.layers.33.block_sparse_moe.experts.65.w1", "model.layers.33.block_sparse_moe.experts.66.w1", "model.layers.33.block_sparse_moe.experts.67.w1", "model.layers.33.block_sparse_moe.experts.68.w1", "model.layers.33.block_sparse_moe.experts.69.w1", "model.layers.33.block_sparse_moe.experts.70.w1", "model.layers.33.block_sparse_moe.experts.71.w1", "model.layers.33.block_sparse_moe.experts.72.w1", "model.layers.33.block_sparse_moe.experts.73.w1", "model.layers.33.block_sparse_moe.experts.74.w1", "model.layers.33.block_sparse_moe.experts.75.w1", "model.layers.33.block_sparse_moe.experts.76.w1", "model.layers.33.block_sparse_moe.experts.77.w1", "model.layers.33.block_sparse_moe.experts.78.w1", "model.layers.33.block_sparse_moe.experts.79.w1", "model.layers.33.block_sparse_moe.experts.80.w1", "model.layers.33.block_sparse_moe.experts.81.w1", "model.layers.33.block_sparse_moe.experts.82.w1", "model.layers.33.block_sparse_moe.experts.83.w1", "model.layers.33.block_sparse_moe.experts.84.w1", "model.layers.33.block_sparse_moe.experts.85.w1", "model.layers.33.block_sparse_moe.experts.86.w1", "model.layers.33.block_sparse_moe.experts.87.w1", "model.layers.33.block_sparse_moe.experts.88.w1", "model.layers.33.block_sparse_moe.experts.89.w1", "model.layers.33.block_sparse_moe.experts.90.w1", "model.layers.33.block_sparse_moe.experts.91.w1", "model.layers.33.block_sparse_moe.experts.92.w1", "model.layers.33.block_sparse_moe.experts.93.w1", "model.layers.33.block_sparse_moe.experts.94.w1", "model.layers.33.block_sparse_moe.experts.95.w1", "model.layers.33.block_sparse_moe.experts.96.w1", "model.layers.33.block_sparse_moe.experts.97.w1", "model.layers.33.block_sparse_moe.experts.98.w1", "model.layers.33.block_sparse_moe.experts.99.w1", "model.layers.33.block_sparse_moe.experts.100.w1", "model.layers.33.block_sparse_moe.experts.101.w1", "model.layers.33.block_sparse_moe.experts.102.w1", "model.layers.33.block_sparse_moe.experts.103.w1", "model.layers.33.block_sparse_moe.experts.104.w1", "model.layers.33.block_sparse_moe.experts.105.w1", "model.layers.33.block_sparse_moe.experts.106.w1", "model.layers.33.block_sparse_moe.experts.107.w1", "model.layers.33.block_sparse_moe.experts.108.w1", "model.layers.33.block_sparse_moe.experts.109.w1", "model.layers.33.block_sparse_moe.experts.110.w1", "model.layers.33.block_sparse_moe.experts.111.w1", "model.layers.33.block_sparse_moe.experts.112.w1", "model.layers.33.block_sparse_moe.experts.113.w1", "model.layers.33.block_sparse_moe.experts.114.w1", "model.layers.33.block_sparse_moe.experts.115.w1", "model.layers.33.block_sparse_moe.experts.116.w1", "model.layers.33.block_sparse_moe.experts.117.w1", "model.layers.33.block_sparse_moe.experts.118.w1", "model.layers.33.block_sparse_moe.experts.119.w1", "model.layers.33.block_sparse_moe.experts.120.w1", "model.layers.33.block_sparse_moe.experts.121.w1", "model.layers.33.block_sparse_moe.experts.122.w1", "model.layers.33.block_sparse_moe.experts.123.w1", "model.layers.33.block_sparse_moe.experts.124.w1", "model.layers.33.block_sparse_moe.experts.125.w1", "model.layers.33.block_sparse_moe.experts.126.w1", "model.layers.33.block_sparse_moe.experts.127.w1", "model.layers.33.block_sparse_moe.experts.128.w1", "model.layers.33.block_sparse_moe.experts.129.w1", "model.layers.33.block_sparse_moe.experts.130.w1", "model.layers.33.block_sparse_moe.experts.131.w1", "model.layers.33.block_sparse_moe.experts.132.w1", "model.layers.33.block_sparse_moe.experts.133.w1", "model.layers.33.block_sparse_moe.experts.134.w1", "model.layers.33.block_sparse_moe.experts.135.w1", "model.layers.33.block_sparse_moe.experts.136.w1", "model.layers.33.block_sparse_moe.experts.137.w1", "model.layers.33.block_sparse_moe.experts.138.w1", "model.layers.33.block_sparse_moe.experts.139.w1", "model.layers.33.block_sparse_moe.experts.140.w1", "model.layers.33.block_sparse_moe.experts.141.w1", "model.layers.33.block_sparse_moe.experts.142.w1", "model.layers.33.block_sparse_moe.experts.143.w1", "model.layers.33.block_sparse_moe.experts.144.w1", "model.layers.33.block_sparse_moe.experts.145.w1", "model.layers.33.block_sparse_moe.experts.146.w1", "model.layers.33.block_sparse_moe.experts.147.w1", "model.layers.33.block_sparse_moe.experts.148.w1", "model.layers.33.block_sparse_moe.experts.149.w1", "model.layers.33.block_sparse_moe.experts.150.w1", "model.layers.33.block_sparse_moe.experts.151.w1", "model.layers.33.block_sparse_moe.experts.152.w1", "model.layers.33.block_sparse_moe.experts.153.w1", "model.layers.33.block_sparse_moe.experts.154.w1", "model.layers.33.block_sparse_moe.experts.155.w1", "model.layers.33.block_sparse_moe.experts.156.w1", "model.layers.33.block_sparse_moe.experts.157.w1", "model.layers.33.block_sparse_moe.experts.158.w1", "model.layers.33.block_sparse_moe.experts.159.w1", "model.layers.33.block_sparse_moe.experts.160.w1", "model.layers.33.block_sparse_moe.experts.161.w1", "model.layers.33.block_sparse_moe.experts.162.w1", "model.layers.33.block_sparse_moe.experts.163.w1", "model.layers.33.block_sparse_moe.experts.164.w1", "model.layers.33.block_sparse_moe.experts.165.w1", "model.layers.33.block_sparse_moe.experts.166.w1", "model.layers.33.block_sparse_moe.experts.167.w1", "model.layers.33.block_sparse_moe.experts.168.w1", "model.layers.33.block_sparse_moe.experts.169.w1", "model.layers.33.block_sparse_moe.experts.170.w1", "model.layers.33.block_sparse_moe.experts.171.w1", "model.layers.33.block_sparse_moe.experts.172.w1", "model.layers.33.block_sparse_moe.experts.173.w1", "model.layers.33.block_sparse_moe.experts.174.w1", "model.layers.33.block_sparse_moe.experts.175.w1", "model.layers.33.block_sparse_moe.experts.176.w1", "model.layers.33.block_sparse_moe.experts.177.w1", "model.layers.33.block_sparse_moe.experts.178.w1", "model.layers.33.block_sparse_moe.experts.179.w1", "model.layers.33.block_sparse_moe.experts.180.w1", "model.layers.33.block_sparse_moe.experts.181.w1", "model.layers.33.block_sparse_moe.experts.182.w1", "model.layers.33.block_sparse_moe.experts.183.w1", "model.layers.33.block_sparse_moe.experts.184.w1", "model.layers.33.block_sparse_moe.experts.185.w1", "model.layers.33.block_sparse_moe.experts.186.w1", "model.layers.33.block_sparse_moe.experts.187.w1", "model.layers.33.block_sparse_moe.experts.188.w1", "model.layers.33.block_sparse_moe.experts.189.w1", "model.layers.33.block_sparse_moe.experts.190.w1", "model.layers.33.block_sparse_moe.experts.191.w1", "model.layers.33.block_sparse_moe.experts.192.w1", "model.layers.33.block_sparse_moe.experts.193.w1", "model.layers.33.block_sparse_moe.experts.194.w1", "model.layers.33.block_sparse_moe.experts.195.w1", "model.layers.33.block_sparse_moe.experts.196.w1", "model.layers.33.block_sparse_moe.experts.197.w1", "model.layers.33.block_sparse_moe.experts.198.w1", "model.layers.33.block_sparse_moe.experts.199.w1", "model.layers.33.block_sparse_moe.experts.200.w1", "model.layers.33.block_sparse_moe.experts.201.w1", "model.layers.33.block_sparse_moe.experts.202.w1", "model.layers.33.block_sparse_moe.experts.203.w1", "model.layers.33.block_sparse_moe.experts.204.w1", "model.layers.33.block_sparse_moe.experts.205.w1", "model.layers.33.block_sparse_moe.experts.206.w1", "model.layers.33.block_sparse_moe.experts.207.w1", "model.layers.33.block_sparse_moe.experts.208.w1", "model.layers.33.block_sparse_moe.experts.209.w1", "model.layers.33.block_sparse_moe.experts.210.w1", "model.layers.33.block_sparse_moe.experts.211.w1", "model.layers.33.block_sparse_moe.experts.212.w1", "model.layers.33.block_sparse_moe.experts.213.w1", "model.layers.33.block_sparse_moe.experts.214.w1", "model.layers.33.block_sparse_moe.experts.215.w1", "model.layers.33.block_sparse_moe.experts.216.w1", "model.layers.33.block_sparse_moe.experts.217.w1", "model.layers.33.block_sparse_moe.experts.218.w1", "model.layers.33.block_sparse_moe.experts.219.w1", "model.layers.33.block_sparse_moe.experts.220.w1", "model.layers.33.block_sparse_moe.experts.221.w1", "model.layers.33.block_sparse_moe.experts.222.w1", "model.layers.33.block_sparse_moe.experts.223.w1", "model.layers.33.block_sparse_moe.experts.224.w1", "model.layers.33.block_sparse_moe.experts.225.w1", "model.layers.33.block_sparse_moe.experts.226.w1", "model.layers.33.block_sparse_moe.experts.227.w1", "model.layers.33.block_sparse_moe.experts.228.w1", "model.layers.33.block_sparse_moe.experts.229.w1", "model.layers.33.block_sparse_moe.experts.230.w1", "model.layers.33.block_sparse_moe.experts.231.w1", "model.layers.33.block_sparse_moe.experts.232.w1", "model.layers.33.block_sparse_moe.experts.233.w1", "model.layers.33.block_sparse_moe.experts.234.w1", "model.layers.33.block_sparse_moe.experts.235.w1", "model.layers.33.block_sparse_moe.experts.236.w1", "model.layers.33.block_sparse_moe.experts.237.w1", "model.layers.33.block_sparse_moe.experts.238.w1", "model.layers.33.block_sparse_moe.experts.239.w1", "model.layers.33.block_sparse_moe.experts.240.w1", "model.layers.33.block_sparse_moe.experts.241.w1", "model.layers.33.block_sparse_moe.experts.242.w1", "model.layers.33.block_sparse_moe.experts.243.w1", "model.layers.33.block_sparse_moe.experts.244.w1", "model.layers.33.block_sparse_moe.experts.245.w1", "model.layers.33.block_sparse_moe.experts.246.w1", "model.layers.33.block_sparse_moe.experts.247.w1", "model.layers.33.block_sparse_moe.experts.248.w1", "model.layers.33.block_sparse_moe.experts.249.w1", "model.layers.33.block_sparse_moe.experts.250.w1", "model.layers.33.block_sparse_moe.experts.251.w1", "model.layers.33.block_sparse_moe.experts.252.w1", "model.layers.33.block_sparse_moe.experts.253.w1", "model.layers.33.block_sparse_moe.experts.254.w1", "model.layers.33.block_sparse_moe.experts.255.w1", "model.layers.33.block_sparse_moe.experts.0.w3", "model.layers.33.block_sparse_moe.experts.1.w3", "model.layers.33.block_sparse_moe.experts.2.w3", "model.layers.33.block_sparse_moe.experts.3.w3", "model.layers.33.block_sparse_moe.experts.4.w3", "model.layers.33.block_sparse_moe.experts.5.w3", "model.layers.33.block_sparse_moe.experts.6.w3", "model.layers.33.block_sparse_moe.experts.7.w3", "model.layers.33.block_sparse_moe.experts.8.w3", "model.layers.33.block_sparse_moe.experts.9.w3", "model.layers.33.block_sparse_moe.experts.10.w3", "model.layers.33.block_sparse_moe.experts.11.w3", "model.layers.33.block_sparse_moe.experts.12.w3", "model.layers.33.block_sparse_moe.experts.13.w3", "model.layers.33.block_sparse_moe.experts.14.w3", "model.layers.33.block_sparse_moe.experts.15.w3", "model.layers.33.block_sparse_moe.experts.16.w3", "model.layers.33.block_sparse_moe.experts.17.w3", "model.layers.33.block_sparse_moe.experts.18.w3", "model.layers.33.block_sparse_moe.experts.19.w3", "model.layers.33.block_sparse_moe.experts.20.w3", "model.layers.33.block_sparse_moe.experts.21.w3", "model.layers.33.block_sparse_moe.experts.22.w3", "model.layers.33.block_sparse_moe.experts.23.w3", "model.layers.33.block_sparse_moe.experts.24.w3", "model.layers.33.block_sparse_moe.experts.25.w3", "model.layers.33.block_sparse_moe.experts.26.w3", "model.layers.33.block_sparse_moe.experts.27.w3", "model.layers.33.block_sparse_moe.experts.28.w3", "model.layers.33.block_sparse_moe.experts.29.w3", "model.layers.33.block_sparse_moe.experts.30.w3", "model.layers.33.block_sparse_moe.experts.31.w3", "model.layers.33.block_sparse_moe.experts.32.w3", "model.layers.33.block_sparse_moe.experts.33.w3", "model.layers.33.block_sparse_moe.experts.34.w3", "model.layers.33.block_sparse_moe.experts.35.w3", "model.layers.33.block_sparse_moe.experts.36.w3", "model.layers.33.block_sparse_moe.experts.37.w3", "model.layers.33.block_sparse_moe.experts.38.w3", "model.layers.33.block_sparse_moe.experts.39.w3", "model.layers.33.block_sparse_moe.experts.40.w3", "model.layers.33.block_sparse_moe.experts.41.w3", "model.layers.33.block_sparse_moe.experts.42.w3", "model.layers.33.block_sparse_moe.experts.43.w3", "model.layers.33.block_sparse_moe.experts.44.w3", "model.layers.33.block_sparse_moe.experts.45.w3", "model.layers.33.block_sparse_moe.experts.46.w3", "model.layers.33.block_sparse_moe.experts.47.w3", "model.layers.33.block_sparse_moe.experts.48.w3", "model.layers.33.block_sparse_moe.experts.49.w3", "model.layers.33.block_sparse_moe.experts.50.w3", "model.layers.33.block_sparse_moe.experts.51.w3", "model.layers.33.block_sparse_moe.experts.52.w3", "model.layers.33.block_sparse_moe.experts.53.w3", "model.layers.33.block_sparse_moe.experts.54.w3", "model.layers.33.block_sparse_moe.experts.55.w3", "model.layers.33.block_sparse_moe.experts.56.w3", "model.layers.33.block_sparse_moe.experts.57.w3", "model.layers.33.block_sparse_moe.experts.58.w3", "model.layers.33.block_sparse_moe.experts.59.w3", "model.layers.33.block_sparse_moe.experts.60.w3", "model.layers.33.block_sparse_moe.experts.61.w3", "model.layers.33.block_sparse_moe.experts.62.w3", "model.layers.33.block_sparse_moe.experts.63.w3", "model.layers.33.block_sparse_moe.experts.64.w3", "model.layers.33.block_sparse_moe.experts.65.w3", "model.layers.33.block_sparse_moe.experts.66.w3", "model.layers.33.block_sparse_moe.experts.67.w3", "model.layers.33.block_sparse_moe.experts.68.w3", "model.layers.33.block_sparse_moe.experts.69.w3", "model.layers.33.block_sparse_moe.experts.70.w3", "model.layers.33.block_sparse_moe.experts.71.w3", "model.layers.33.block_sparse_moe.experts.72.w3", "model.layers.33.block_sparse_moe.experts.73.w3", "model.layers.33.block_sparse_moe.experts.74.w3", "model.layers.33.block_sparse_moe.experts.75.w3", "model.layers.33.block_sparse_moe.experts.76.w3", "model.layers.33.block_sparse_moe.experts.77.w3", "model.layers.33.block_sparse_moe.experts.78.w3", "model.layers.33.block_sparse_moe.experts.79.w3", "model.layers.33.block_sparse_moe.experts.80.w3", "model.layers.33.block_sparse_moe.experts.81.w3", "model.layers.33.block_sparse_moe.experts.82.w3", "model.layers.33.block_sparse_moe.experts.83.w3", "model.layers.33.block_sparse_moe.experts.84.w3", "model.layers.33.block_sparse_moe.experts.85.w3", "model.layers.33.block_sparse_moe.experts.86.w3", "model.layers.33.block_sparse_moe.experts.87.w3", "model.layers.33.block_sparse_moe.experts.88.w3", "model.layers.33.block_sparse_moe.experts.89.w3", "model.layers.33.block_sparse_moe.experts.90.w3", "model.layers.33.block_sparse_moe.experts.91.w3", "model.layers.33.block_sparse_moe.experts.92.w3", "model.layers.33.block_sparse_moe.experts.93.w3", "model.layers.33.block_sparse_moe.experts.94.w3", "model.layers.33.block_sparse_moe.experts.95.w3", "model.layers.33.block_sparse_moe.experts.96.w3", "model.layers.33.block_sparse_moe.experts.97.w3", "model.layers.33.block_sparse_moe.experts.98.w3", "model.layers.33.block_sparse_moe.experts.99.w3", "model.layers.33.block_sparse_moe.experts.100.w3", "model.layers.33.block_sparse_moe.experts.101.w3", "model.layers.33.block_sparse_moe.experts.102.w3", "model.layers.33.block_sparse_moe.experts.103.w3", "model.layers.33.block_sparse_moe.experts.104.w3", "model.layers.33.block_sparse_moe.experts.105.w3", "model.layers.33.block_sparse_moe.experts.106.w3", "model.layers.33.block_sparse_moe.experts.107.w3", "model.layers.33.block_sparse_moe.experts.108.w3", "model.layers.33.block_sparse_moe.experts.109.w3", "model.layers.33.block_sparse_moe.experts.110.w3", "model.layers.33.block_sparse_moe.experts.111.w3", "model.layers.33.block_sparse_moe.experts.112.w3", "model.layers.33.block_sparse_moe.experts.113.w3", "model.layers.33.block_sparse_moe.experts.114.w3", "model.layers.33.block_sparse_moe.experts.115.w3", "model.layers.33.block_sparse_moe.experts.116.w3", "model.layers.33.block_sparse_moe.experts.117.w3", "model.layers.33.block_sparse_moe.experts.118.w3", "model.layers.33.block_sparse_moe.experts.119.w3", "model.layers.33.block_sparse_moe.experts.120.w3", "model.layers.33.block_sparse_moe.experts.121.w3", "model.layers.33.block_sparse_moe.experts.122.w3", "model.layers.33.block_sparse_moe.experts.123.w3", "model.layers.33.block_sparse_moe.experts.124.w3", "model.layers.33.block_sparse_moe.experts.125.w3", "model.layers.33.block_sparse_moe.experts.126.w3", "model.layers.33.block_sparse_moe.experts.127.w3", "model.layers.33.block_sparse_moe.experts.128.w3", "model.layers.33.block_sparse_moe.experts.129.w3", "model.layers.33.block_sparse_moe.experts.130.w3", "model.layers.33.block_sparse_moe.experts.131.w3", "model.layers.33.block_sparse_moe.experts.132.w3", "model.layers.33.block_sparse_moe.experts.133.w3", "model.layers.33.block_sparse_moe.experts.134.w3", "model.layers.33.block_sparse_moe.experts.135.w3", "model.layers.33.block_sparse_moe.experts.136.w3", "model.layers.33.block_sparse_moe.experts.137.w3", "model.layers.33.block_sparse_moe.experts.138.w3", "model.layers.33.block_sparse_moe.experts.139.w3", "model.layers.33.block_sparse_moe.experts.140.w3", "model.layers.33.block_sparse_moe.experts.141.w3", "model.layers.33.block_sparse_moe.experts.142.w3", "model.layers.33.block_sparse_moe.experts.143.w3", "model.layers.33.block_sparse_moe.experts.144.w3", "model.layers.33.block_sparse_moe.experts.145.w3", "model.layers.33.block_sparse_moe.experts.146.w3", "model.layers.33.block_sparse_moe.experts.147.w3", "model.layers.33.block_sparse_moe.experts.148.w3", "model.layers.33.block_sparse_moe.experts.149.w3", "model.layers.33.block_sparse_moe.experts.150.w3", "model.layers.33.block_sparse_moe.experts.151.w3", "model.layers.33.block_sparse_moe.experts.152.w3", "model.layers.33.block_sparse_moe.experts.153.w3", "model.layers.33.block_sparse_moe.experts.154.w3", "model.layers.33.block_sparse_moe.experts.155.w3", "model.layers.33.block_sparse_moe.experts.156.w3", "model.layers.33.block_sparse_moe.experts.157.w3", "model.layers.33.block_sparse_moe.experts.158.w3", "model.layers.33.block_sparse_moe.experts.159.w3", "model.layers.33.block_sparse_moe.experts.160.w3", "model.layers.33.block_sparse_moe.experts.161.w3", "model.layers.33.block_sparse_moe.experts.162.w3", "model.layers.33.block_sparse_moe.experts.163.w3", "model.layers.33.block_sparse_moe.experts.164.w3", "model.layers.33.block_sparse_moe.experts.165.w3", "model.layers.33.block_sparse_moe.experts.166.w3", "model.layers.33.block_sparse_moe.experts.167.w3", "model.layers.33.block_sparse_moe.experts.168.w3", "model.layers.33.block_sparse_moe.experts.169.w3", "model.layers.33.block_sparse_moe.experts.170.w3", "model.layers.33.block_sparse_moe.experts.171.w3", "model.layers.33.block_sparse_moe.experts.172.w3", "model.layers.33.block_sparse_moe.experts.173.w3", "model.layers.33.block_sparse_moe.experts.174.w3", "model.layers.33.block_sparse_moe.experts.175.w3", "model.layers.33.block_sparse_moe.experts.176.w3", "model.layers.33.block_sparse_moe.experts.177.w3", "model.layers.33.block_sparse_moe.experts.178.w3", "model.layers.33.block_sparse_moe.experts.179.w3", "model.layers.33.block_sparse_moe.experts.180.w3", "model.layers.33.block_sparse_moe.experts.181.w3", "model.layers.33.block_sparse_moe.experts.182.w3", "model.layers.33.block_sparse_moe.experts.183.w3", "model.layers.33.block_sparse_moe.experts.184.w3", "model.layers.33.block_sparse_moe.experts.185.w3", "model.layers.33.block_sparse_moe.experts.186.w3", "model.layers.33.block_sparse_moe.experts.187.w3", "model.layers.33.block_sparse_moe.experts.188.w3", "model.layers.33.block_sparse_moe.experts.189.w3", "model.layers.33.block_sparse_moe.experts.190.w3", "model.layers.33.block_sparse_moe.experts.191.w3", "model.layers.33.block_sparse_moe.experts.192.w3", "model.layers.33.block_sparse_moe.experts.193.w3", "model.layers.33.block_sparse_moe.experts.194.w3", "model.layers.33.block_sparse_moe.experts.195.w3", "model.layers.33.block_sparse_moe.experts.196.w3", "model.layers.33.block_sparse_moe.experts.197.w3", "model.layers.33.block_sparse_moe.experts.198.w3", "model.layers.33.block_sparse_moe.experts.199.w3", "model.layers.33.block_sparse_moe.experts.200.w3", "model.layers.33.block_sparse_moe.experts.201.w3", "model.layers.33.block_sparse_moe.experts.202.w3", "model.layers.33.block_sparse_moe.experts.203.w3", "model.layers.33.block_sparse_moe.experts.204.w3", "model.layers.33.block_sparse_moe.experts.205.w3", "model.layers.33.block_sparse_moe.experts.206.w3", "model.layers.33.block_sparse_moe.experts.207.w3", "model.layers.33.block_sparse_moe.experts.208.w3", "model.layers.33.block_sparse_moe.experts.209.w3", "model.layers.33.block_sparse_moe.experts.210.w3", "model.layers.33.block_sparse_moe.experts.211.w3", "model.layers.33.block_sparse_moe.experts.212.w3", "model.layers.33.block_sparse_moe.experts.213.w3", "model.layers.33.block_sparse_moe.experts.214.w3", "model.layers.33.block_sparse_moe.experts.215.w3", "model.layers.33.block_sparse_moe.experts.216.w3", "model.layers.33.block_sparse_moe.experts.217.w3", "model.layers.33.block_sparse_moe.experts.218.w3", "model.layers.33.block_sparse_moe.experts.219.w3", "model.layers.33.block_sparse_moe.experts.220.w3", "model.layers.33.block_sparse_moe.experts.221.w3", "model.layers.33.block_sparse_moe.experts.222.w3", "model.layers.33.block_sparse_moe.experts.223.w3", "model.layers.33.block_sparse_moe.experts.224.w3", "model.layers.33.block_sparse_moe.experts.225.w3", "model.layers.33.block_sparse_moe.experts.226.w3", "model.layers.33.block_sparse_moe.experts.227.w3", "model.layers.33.block_sparse_moe.experts.228.w3", "model.layers.33.block_sparse_moe.experts.229.w3", "model.layers.33.block_sparse_moe.experts.230.w3", "model.layers.33.block_sparse_moe.experts.231.w3", "model.layers.33.block_sparse_moe.experts.232.w3", "model.layers.33.block_sparse_moe.experts.233.w3", "model.layers.33.block_sparse_moe.experts.234.w3", "model.layers.33.block_sparse_moe.experts.235.w3", "model.layers.33.block_sparse_moe.experts.236.w3", "model.layers.33.block_sparse_moe.experts.237.w3", "model.layers.33.block_sparse_moe.experts.238.w3", "model.layers.33.block_sparse_moe.experts.239.w3", "model.layers.33.block_sparse_moe.experts.240.w3", "model.layers.33.block_sparse_moe.experts.241.w3", "model.layers.33.block_sparse_moe.experts.242.w3", "model.layers.33.block_sparse_moe.experts.243.w3", "model.layers.33.block_sparse_moe.experts.244.w3", "model.layers.33.block_sparse_moe.experts.245.w3", "model.layers.33.block_sparse_moe.experts.246.w3", "model.layers.33.block_sparse_moe.experts.247.w3", "model.layers.33.block_sparse_moe.experts.248.w3", "model.layers.33.block_sparse_moe.experts.249.w3", "model.layers.33.block_sparse_moe.experts.250.w3", "model.layers.33.block_sparse_moe.experts.251.w3", "model.layers.33.block_sparse_moe.experts.252.w3", "model.layers.33.block_sparse_moe.experts.253.w3", "model.layers.33.block_sparse_moe.experts.254.w3", "model.layers.33.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.001161750592291358, "dbits": 2415919104 } ] }, { "idx": 169, "layers": [ "model.layers.33.block_sparse_moe.experts.0.w2", "model.layers.33.block_sparse_moe.experts.1.w2", "model.layers.33.block_sparse_moe.experts.2.w2", "model.layers.33.block_sparse_moe.experts.3.w2", "model.layers.33.block_sparse_moe.experts.4.w2", "model.layers.33.block_sparse_moe.experts.5.w2", "model.layers.33.block_sparse_moe.experts.6.w2", "model.layers.33.block_sparse_moe.experts.7.w2", "model.layers.33.block_sparse_moe.experts.8.w2", "model.layers.33.block_sparse_moe.experts.9.w2", "model.layers.33.block_sparse_moe.experts.10.w2", "model.layers.33.block_sparse_moe.experts.11.w2", "model.layers.33.block_sparse_moe.experts.12.w2", "model.layers.33.block_sparse_moe.experts.13.w2", "model.layers.33.block_sparse_moe.experts.14.w2", "model.layers.33.block_sparse_moe.experts.15.w2", "model.layers.33.block_sparse_moe.experts.16.w2", "model.layers.33.block_sparse_moe.experts.17.w2", "model.layers.33.block_sparse_moe.experts.18.w2", "model.layers.33.block_sparse_moe.experts.19.w2", "model.layers.33.block_sparse_moe.experts.20.w2", "model.layers.33.block_sparse_moe.experts.21.w2", "model.layers.33.block_sparse_moe.experts.22.w2", "model.layers.33.block_sparse_moe.experts.23.w2", "model.layers.33.block_sparse_moe.experts.24.w2", "model.layers.33.block_sparse_moe.experts.25.w2", "model.layers.33.block_sparse_moe.experts.26.w2", "model.layers.33.block_sparse_moe.experts.27.w2", "model.layers.33.block_sparse_moe.experts.28.w2", "model.layers.33.block_sparse_moe.experts.29.w2", "model.layers.33.block_sparse_moe.experts.30.w2", "model.layers.33.block_sparse_moe.experts.31.w2", "model.layers.33.block_sparse_moe.experts.32.w2", "model.layers.33.block_sparse_moe.experts.33.w2", "model.layers.33.block_sparse_moe.experts.34.w2", "model.layers.33.block_sparse_moe.experts.35.w2", "model.layers.33.block_sparse_moe.experts.36.w2", "model.layers.33.block_sparse_moe.experts.37.w2", "model.layers.33.block_sparse_moe.experts.38.w2", "model.layers.33.block_sparse_moe.experts.39.w2", "model.layers.33.block_sparse_moe.experts.40.w2", "model.layers.33.block_sparse_moe.experts.41.w2", "model.layers.33.block_sparse_moe.experts.42.w2", "model.layers.33.block_sparse_moe.experts.43.w2", "model.layers.33.block_sparse_moe.experts.44.w2", "model.layers.33.block_sparse_moe.experts.45.w2", "model.layers.33.block_sparse_moe.experts.46.w2", "model.layers.33.block_sparse_moe.experts.47.w2", "model.layers.33.block_sparse_moe.experts.48.w2", "model.layers.33.block_sparse_moe.experts.49.w2", "model.layers.33.block_sparse_moe.experts.50.w2", "model.layers.33.block_sparse_moe.experts.51.w2", "model.layers.33.block_sparse_moe.experts.52.w2", "model.layers.33.block_sparse_moe.experts.53.w2", "model.layers.33.block_sparse_moe.experts.54.w2", "model.layers.33.block_sparse_moe.experts.55.w2", "model.layers.33.block_sparse_moe.experts.56.w2", "model.layers.33.block_sparse_moe.experts.57.w2", "model.layers.33.block_sparse_moe.experts.58.w2", "model.layers.33.block_sparse_moe.experts.59.w2", "model.layers.33.block_sparse_moe.experts.60.w2", "model.layers.33.block_sparse_moe.experts.61.w2", "model.layers.33.block_sparse_moe.experts.62.w2", "model.layers.33.block_sparse_moe.experts.63.w2", "model.layers.33.block_sparse_moe.experts.64.w2", "model.layers.33.block_sparse_moe.experts.65.w2", "model.layers.33.block_sparse_moe.experts.66.w2", "model.layers.33.block_sparse_moe.experts.67.w2", "model.layers.33.block_sparse_moe.experts.68.w2", "model.layers.33.block_sparse_moe.experts.69.w2", "model.layers.33.block_sparse_moe.experts.70.w2", "model.layers.33.block_sparse_moe.experts.71.w2", "model.layers.33.block_sparse_moe.experts.72.w2", "model.layers.33.block_sparse_moe.experts.73.w2", "model.layers.33.block_sparse_moe.experts.74.w2", "model.layers.33.block_sparse_moe.experts.75.w2", "model.layers.33.block_sparse_moe.experts.76.w2", "model.layers.33.block_sparse_moe.experts.77.w2", "model.layers.33.block_sparse_moe.experts.78.w2", "model.layers.33.block_sparse_moe.experts.79.w2", "model.layers.33.block_sparse_moe.experts.80.w2", "model.layers.33.block_sparse_moe.experts.81.w2", "model.layers.33.block_sparse_moe.experts.82.w2", "model.layers.33.block_sparse_moe.experts.83.w2", "model.layers.33.block_sparse_moe.experts.84.w2", "model.layers.33.block_sparse_moe.experts.85.w2", "model.layers.33.block_sparse_moe.experts.86.w2", "model.layers.33.block_sparse_moe.experts.87.w2", "model.layers.33.block_sparse_moe.experts.88.w2", "model.layers.33.block_sparse_moe.experts.89.w2", "model.layers.33.block_sparse_moe.experts.90.w2", "model.layers.33.block_sparse_moe.experts.91.w2", "model.layers.33.block_sparse_moe.experts.92.w2", "model.layers.33.block_sparse_moe.experts.93.w2", "model.layers.33.block_sparse_moe.experts.94.w2", "model.layers.33.block_sparse_moe.experts.95.w2", "model.layers.33.block_sparse_moe.experts.96.w2", "model.layers.33.block_sparse_moe.experts.97.w2", "model.layers.33.block_sparse_moe.experts.98.w2", "model.layers.33.block_sparse_moe.experts.99.w2", "model.layers.33.block_sparse_moe.experts.100.w2", "model.layers.33.block_sparse_moe.experts.101.w2", "model.layers.33.block_sparse_moe.experts.102.w2", "model.layers.33.block_sparse_moe.experts.103.w2", "model.layers.33.block_sparse_moe.experts.104.w2", "model.layers.33.block_sparse_moe.experts.105.w2", "model.layers.33.block_sparse_moe.experts.106.w2", "model.layers.33.block_sparse_moe.experts.107.w2", "model.layers.33.block_sparse_moe.experts.108.w2", "model.layers.33.block_sparse_moe.experts.109.w2", "model.layers.33.block_sparse_moe.experts.110.w2", "model.layers.33.block_sparse_moe.experts.111.w2", "model.layers.33.block_sparse_moe.experts.112.w2", "model.layers.33.block_sparse_moe.experts.113.w2", "model.layers.33.block_sparse_moe.experts.114.w2", "model.layers.33.block_sparse_moe.experts.115.w2", "model.layers.33.block_sparse_moe.experts.116.w2", "model.layers.33.block_sparse_moe.experts.117.w2", "model.layers.33.block_sparse_moe.experts.118.w2", "model.layers.33.block_sparse_moe.experts.119.w2", "model.layers.33.block_sparse_moe.experts.120.w2", "model.layers.33.block_sparse_moe.experts.121.w2", "model.layers.33.block_sparse_moe.experts.122.w2", "model.layers.33.block_sparse_moe.experts.123.w2", "model.layers.33.block_sparse_moe.experts.124.w2", "model.layers.33.block_sparse_moe.experts.125.w2", "model.layers.33.block_sparse_moe.experts.126.w2", "model.layers.33.block_sparse_moe.experts.127.w2", "model.layers.33.block_sparse_moe.experts.128.w2", "model.layers.33.block_sparse_moe.experts.129.w2", "model.layers.33.block_sparse_moe.experts.130.w2", "model.layers.33.block_sparse_moe.experts.131.w2", "model.layers.33.block_sparse_moe.experts.132.w2", "model.layers.33.block_sparse_moe.experts.133.w2", "model.layers.33.block_sparse_moe.experts.134.w2", "model.layers.33.block_sparse_moe.experts.135.w2", "model.layers.33.block_sparse_moe.experts.136.w2", "model.layers.33.block_sparse_moe.experts.137.w2", "model.layers.33.block_sparse_moe.experts.138.w2", "model.layers.33.block_sparse_moe.experts.139.w2", "model.layers.33.block_sparse_moe.experts.140.w2", "model.layers.33.block_sparse_moe.experts.141.w2", "model.layers.33.block_sparse_moe.experts.142.w2", "model.layers.33.block_sparse_moe.experts.143.w2", "model.layers.33.block_sparse_moe.experts.144.w2", "model.layers.33.block_sparse_moe.experts.145.w2", "model.layers.33.block_sparse_moe.experts.146.w2", "model.layers.33.block_sparse_moe.experts.147.w2", "model.layers.33.block_sparse_moe.experts.148.w2", "model.layers.33.block_sparse_moe.experts.149.w2", "model.layers.33.block_sparse_moe.experts.150.w2", "model.layers.33.block_sparse_moe.experts.151.w2", "model.layers.33.block_sparse_moe.experts.152.w2", "model.layers.33.block_sparse_moe.experts.153.w2", "model.layers.33.block_sparse_moe.experts.154.w2", "model.layers.33.block_sparse_moe.experts.155.w2", "model.layers.33.block_sparse_moe.experts.156.w2", "model.layers.33.block_sparse_moe.experts.157.w2", "model.layers.33.block_sparse_moe.experts.158.w2", "model.layers.33.block_sparse_moe.experts.159.w2", "model.layers.33.block_sparse_moe.experts.160.w2", "model.layers.33.block_sparse_moe.experts.161.w2", "model.layers.33.block_sparse_moe.experts.162.w2", "model.layers.33.block_sparse_moe.experts.163.w2", "model.layers.33.block_sparse_moe.experts.164.w2", "model.layers.33.block_sparse_moe.experts.165.w2", "model.layers.33.block_sparse_moe.experts.166.w2", "model.layers.33.block_sparse_moe.experts.167.w2", "model.layers.33.block_sparse_moe.experts.168.w2", "model.layers.33.block_sparse_moe.experts.169.w2", "model.layers.33.block_sparse_moe.experts.170.w2", "model.layers.33.block_sparse_moe.experts.171.w2", "model.layers.33.block_sparse_moe.experts.172.w2", "model.layers.33.block_sparse_moe.experts.173.w2", "model.layers.33.block_sparse_moe.experts.174.w2", "model.layers.33.block_sparse_moe.experts.175.w2", "model.layers.33.block_sparse_moe.experts.176.w2", "model.layers.33.block_sparse_moe.experts.177.w2", "model.layers.33.block_sparse_moe.experts.178.w2", "model.layers.33.block_sparse_moe.experts.179.w2", "model.layers.33.block_sparse_moe.experts.180.w2", "model.layers.33.block_sparse_moe.experts.181.w2", "model.layers.33.block_sparse_moe.experts.182.w2", "model.layers.33.block_sparse_moe.experts.183.w2", "model.layers.33.block_sparse_moe.experts.184.w2", "model.layers.33.block_sparse_moe.experts.185.w2", "model.layers.33.block_sparse_moe.experts.186.w2", "model.layers.33.block_sparse_moe.experts.187.w2", "model.layers.33.block_sparse_moe.experts.188.w2", "model.layers.33.block_sparse_moe.experts.189.w2", "model.layers.33.block_sparse_moe.experts.190.w2", "model.layers.33.block_sparse_moe.experts.191.w2", "model.layers.33.block_sparse_moe.experts.192.w2", "model.layers.33.block_sparse_moe.experts.193.w2", "model.layers.33.block_sparse_moe.experts.194.w2", "model.layers.33.block_sparse_moe.experts.195.w2", "model.layers.33.block_sparse_moe.experts.196.w2", "model.layers.33.block_sparse_moe.experts.197.w2", "model.layers.33.block_sparse_moe.experts.198.w2", "model.layers.33.block_sparse_moe.experts.199.w2", "model.layers.33.block_sparse_moe.experts.200.w2", "model.layers.33.block_sparse_moe.experts.201.w2", "model.layers.33.block_sparse_moe.experts.202.w2", "model.layers.33.block_sparse_moe.experts.203.w2", "model.layers.33.block_sparse_moe.experts.204.w2", "model.layers.33.block_sparse_moe.experts.205.w2", "model.layers.33.block_sparse_moe.experts.206.w2", "model.layers.33.block_sparse_moe.experts.207.w2", "model.layers.33.block_sparse_moe.experts.208.w2", "model.layers.33.block_sparse_moe.experts.209.w2", "model.layers.33.block_sparse_moe.experts.210.w2", "model.layers.33.block_sparse_moe.experts.211.w2", "model.layers.33.block_sparse_moe.experts.212.w2", "model.layers.33.block_sparse_moe.experts.213.w2", "model.layers.33.block_sparse_moe.experts.214.w2", "model.layers.33.block_sparse_moe.experts.215.w2", "model.layers.33.block_sparse_moe.experts.216.w2", "model.layers.33.block_sparse_moe.experts.217.w2", "model.layers.33.block_sparse_moe.experts.218.w2", "model.layers.33.block_sparse_moe.experts.219.w2", "model.layers.33.block_sparse_moe.experts.220.w2", "model.layers.33.block_sparse_moe.experts.221.w2", "model.layers.33.block_sparse_moe.experts.222.w2", "model.layers.33.block_sparse_moe.experts.223.w2", "model.layers.33.block_sparse_moe.experts.224.w2", "model.layers.33.block_sparse_moe.experts.225.w2", "model.layers.33.block_sparse_moe.experts.226.w2", "model.layers.33.block_sparse_moe.experts.227.w2", "model.layers.33.block_sparse_moe.experts.228.w2", "model.layers.33.block_sparse_moe.experts.229.w2", "model.layers.33.block_sparse_moe.experts.230.w2", "model.layers.33.block_sparse_moe.experts.231.w2", "model.layers.33.block_sparse_moe.experts.232.w2", "model.layers.33.block_sparse_moe.experts.233.w2", "model.layers.33.block_sparse_moe.experts.234.w2", "model.layers.33.block_sparse_moe.experts.235.w2", "model.layers.33.block_sparse_moe.experts.236.w2", "model.layers.33.block_sparse_moe.experts.237.w2", "model.layers.33.block_sparse_moe.experts.238.w2", "model.layers.33.block_sparse_moe.experts.239.w2", "model.layers.33.block_sparse_moe.experts.240.w2", "model.layers.33.block_sparse_moe.experts.241.w2", "model.layers.33.block_sparse_moe.experts.242.w2", "model.layers.33.block_sparse_moe.experts.243.w2", "model.layers.33.block_sparse_moe.experts.244.w2", "model.layers.33.block_sparse_moe.experts.245.w2", "model.layers.33.block_sparse_moe.experts.246.w2", "model.layers.33.block_sparse_moe.experts.247.w2", "model.layers.33.block_sparse_moe.experts.248.w2", "model.layers.33.block_sparse_moe.experts.249.w2", "model.layers.33.block_sparse_moe.experts.250.w2", "model.layers.33.block_sparse_moe.experts.251.w2", "model.layers.33.block_sparse_moe.experts.252.w2", "model.layers.33.block_sparse_moe.experts.253.w2", "model.layers.33.block_sparse_moe.experts.254.w2", "model.layers.33.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -4.166513681412576e-05, "dbits": 1207959552 } ] }, { "idx": 170, "layers": [ "model.layers.34.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0002844901755452156, "dbits": 18874368 } ] }, { "idx": 171, "layers": [ "model.layers.34.self_attn.k_proj", "model.layers.34.self_attn.v_proj" ], "candidates": [ { "dkld": 0.002198797278106207, "dbits": 6291456 } ] }, { "idx": 172, "layers": [ "model.layers.34.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0004972623661160386, "dbits": 18874368 } ] }, { "idx": 173, "layers": [ "model.layers.34.block_sparse_moe.experts.0.w1", "model.layers.34.block_sparse_moe.experts.1.w1", "model.layers.34.block_sparse_moe.experts.2.w1", "model.layers.34.block_sparse_moe.experts.3.w1", "model.layers.34.block_sparse_moe.experts.4.w1", "model.layers.34.block_sparse_moe.experts.5.w1", "model.layers.34.block_sparse_moe.experts.6.w1", "model.layers.34.block_sparse_moe.experts.7.w1", "model.layers.34.block_sparse_moe.experts.8.w1", "model.layers.34.block_sparse_moe.experts.9.w1", "model.layers.34.block_sparse_moe.experts.10.w1", "model.layers.34.block_sparse_moe.experts.11.w1", "model.layers.34.block_sparse_moe.experts.12.w1", "model.layers.34.block_sparse_moe.experts.13.w1", "model.layers.34.block_sparse_moe.experts.14.w1", "model.layers.34.block_sparse_moe.experts.15.w1", "model.layers.34.block_sparse_moe.experts.16.w1", "model.layers.34.block_sparse_moe.experts.17.w1", "model.layers.34.block_sparse_moe.experts.18.w1", "model.layers.34.block_sparse_moe.experts.19.w1", "model.layers.34.block_sparse_moe.experts.20.w1", "model.layers.34.block_sparse_moe.experts.21.w1", "model.layers.34.block_sparse_moe.experts.22.w1", "model.layers.34.block_sparse_moe.experts.23.w1", "model.layers.34.block_sparse_moe.experts.24.w1", "model.layers.34.block_sparse_moe.experts.25.w1", "model.layers.34.block_sparse_moe.experts.26.w1", "model.layers.34.block_sparse_moe.experts.27.w1", "model.layers.34.block_sparse_moe.experts.28.w1", "model.layers.34.block_sparse_moe.experts.29.w1", "model.layers.34.block_sparse_moe.experts.30.w1", "model.layers.34.block_sparse_moe.experts.31.w1", "model.layers.34.block_sparse_moe.experts.32.w1", "model.layers.34.block_sparse_moe.experts.33.w1", "model.layers.34.block_sparse_moe.experts.34.w1", "model.layers.34.block_sparse_moe.experts.35.w1", "model.layers.34.block_sparse_moe.experts.36.w1", "model.layers.34.block_sparse_moe.experts.37.w1", "model.layers.34.block_sparse_moe.experts.38.w1", "model.layers.34.block_sparse_moe.experts.39.w1", "model.layers.34.block_sparse_moe.experts.40.w1", "model.layers.34.block_sparse_moe.experts.41.w1", "model.layers.34.block_sparse_moe.experts.42.w1", "model.layers.34.block_sparse_moe.experts.43.w1", "model.layers.34.block_sparse_moe.experts.44.w1", "model.layers.34.block_sparse_moe.experts.45.w1", "model.layers.34.block_sparse_moe.experts.46.w1", "model.layers.34.block_sparse_moe.experts.47.w1", "model.layers.34.block_sparse_moe.experts.48.w1", "model.layers.34.block_sparse_moe.experts.49.w1", "model.layers.34.block_sparse_moe.experts.50.w1", "model.layers.34.block_sparse_moe.experts.51.w1", "model.layers.34.block_sparse_moe.experts.52.w1", "model.layers.34.block_sparse_moe.experts.53.w1", "model.layers.34.block_sparse_moe.experts.54.w1", "model.layers.34.block_sparse_moe.experts.55.w1", "model.layers.34.block_sparse_moe.experts.56.w1", "model.layers.34.block_sparse_moe.experts.57.w1", "model.layers.34.block_sparse_moe.experts.58.w1", "model.layers.34.block_sparse_moe.experts.59.w1", "model.layers.34.block_sparse_moe.experts.60.w1", "model.layers.34.block_sparse_moe.experts.61.w1", "model.layers.34.block_sparse_moe.experts.62.w1", "model.layers.34.block_sparse_moe.experts.63.w1", "model.layers.34.block_sparse_moe.experts.64.w1", "model.layers.34.block_sparse_moe.experts.65.w1", "model.layers.34.block_sparse_moe.experts.66.w1", "model.layers.34.block_sparse_moe.experts.67.w1", "model.layers.34.block_sparse_moe.experts.68.w1", "model.layers.34.block_sparse_moe.experts.69.w1", "model.layers.34.block_sparse_moe.experts.70.w1", "model.layers.34.block_sparse_moe.experts.71.w1", "model.layers.34.block_sparse_moe.experts.72.w1", "model.layers.34.block_sparse_moe.experts.73.w1", "model.layers.34.block_sparse_moe.experts.74.w1", "model.layers.34.block_sparse_moe.experts.75.w1", "model.layers.34.block_sparse_moe.experts.76.w1", "model.layers.34.block_sparse_moe.experts.77.w1", "model.layers.34.block_sparse_moe.experts.78.w1", "model.layers.34.block_sparse_moe.experts.79.w1", "model.layers.34.block_sparse_moe.experts.80.w1", "model.layers.34.block_sparse_moe.experts.81.w1", "model.layers.34.block_sparse_moe.experts.82.w1", "model.layers.34.block_sparse_moe.experts.83.w1", "model.layers.34.block_sparse_moe.experts.84.w1", "model.layers.34.block_sparse_moe.experts.85.w1", "model.layers.34.block_sparse_moe.experts.86.w1", "model.layers.34.block_sparse_moe.experts.87.w1", "model.layers.34.block_sparse_moe.experts.88.w1", "model.layers.34.block_sparse_moe.experts.89.w1", "model.layers.34.block_sparse_moe.experts.90.w1", "model.layers.34.block_sparse_moe.experts.91.w1", "model.layers.34.block_sparse_moe.experts.92.w1", "model.layers.34.block_sparse_moe.experts.93.w1", "model.layers.34.block_sparse_moe.experts.94.w1", "model.layers.34.block_sparse_moe.experts.95.w1", "model.layers.34.block_sparse_moe.experts.96.w1", "model.layers.34.block_sparse_moe.experts.97.w1", "model.layers.34.block_sparse_moe.experts.98.w1", "model.layers.34.block_sparse_moe.experts.99.w1", "model.layers.34.block_sparse_moe.experts.100.w1", "model.layers.34.block_sparse_moe.experts.101.w1", "model.layers.34.block_sparse_moe.experts.102.w1", "model.layers.34.block_sparse_moe.experts.103.w1", "model.layers.34.block_sparse_moe.experts.104.w1", "model.layers.34.block_sparse_moe.experts.105.w1", "model.layers.34.block_sparse_moe.experts.106.w1", "model.layers.34.block_sparse_moe.experts.107.w1", "model.layers.34.block_sparse_moe.experts.108.w1", "model.layers.34.block_sparse_moe.experts.109.w1", "model.layers.34.block_sparse_moe.experts.110.w1", "model.layers.34.block_sparse_moe.experts.111.w1", "model.layers.34.block_sparse_moe.experts.112.w1", "model.layers.34.block_sparse_moe.experts.113.w1", "model.layers.34.block_sparse_moe.experts.114.w1", "model.layers.34.block_sparse_moe.experts.115.w1", "model.layers.34.block_sparse_moe.experts.116.w1", "model.layers.34.block_sparse_moe.experts.117.w1", "model.layers.34.block_sparse_moe.experts.118.w1", "model.layers.34.block_sparse_moe.experts.119.w1", "model.layers.34.block_sparse_moe.experts.120.w1", "model.layers.34.block_sparse_moe.experts.121.w1", "model.layers.34.block_sparse_moe.experts.122.w1", "model.layers.34.block_sparse_moe.experts.123.w1", "model.layers.34.block_sparse_moe.experts.124.w1", "model.layers.34.block_sparse_moe.experts.125.w1", "model.layers.34.block_sparse_moe.experts.126.w1", "model.layers.34.block_sparse_moe.experts.127.w1", "model.layers.34.block_sparse_moe.experts.128.w1", "model.layers.34.block_sparse_moe.experts.129.w1", "model.layers.34.block_sparse_moe.experts.130.w1", "model.layers.34.block_sparse_moe.experts.131.w1", "model.layers.34.block_sparse_moe.experts.132.w1", "model.layers.34.block_sparse_moe.experts.133.w1", "model.layers.34.block_sparse_moe.experts.134.w1", "model.layers.34.block_sparse_moe.experts.135.w1", "model.layers.34.block_sparse_moe.experts.136.w1", "model.layers.34.block_sparse_moe.experts.137.w1", "model.layers.34.block_sparse_moe.experts.138.w1", "model.layers.34.block_sparse_moe.experts.139.w1", "model.layers.34.block_sparse_moe.experts.140.w1", "model.layers.34.block_sparse_moe.experts.141.w1", "model.layers.34.block_sparse_moe.experts.142.w1", "model.layers.34.block_sparse_moe.experts.143.w1", "model.layers.34.block_sparse_moe.experts.144.w1", "model.layers.34.block_sparse_moe.experts.145.w1", "model.layers.34.block_sparse_moe.experts.146.w1", "model.layers.34.block_sparse_moe.experts.147.w1", "model.layers.34.block_sparse_moe.experts.148.w1", "model.layers.34.block_sparse_moe.experts.149.w1", "model.layers.34.block_sparse_moe.experts.150.w1", "model.layers.34.block_sparse_moe.experts.151.w1", "model.layers.34.block_sparse_moe.experts.152.w1", "model.layers.34.block_sparse_moe.experts.153.w1", "model.layers.34.block_sparse_moe.experts.154.w1", "model.layers.34.block_sparse_moe.experts.155.w1", "model.layers.34.block_sparse_moe.experts.156.w1", "model.layers.34.block_sparse_moe.experts.157.w1", "model.layers.34.block_sparse_moe.experts.158.w1", "model.layers.34.block_sparse_moe.experts.159.w1", "model.layers.34.block_sparse_moe.experts.160.w1", "model.layers.34.block_sparse_moe.experts.161.w1", "model.layers.34.block_sparse_moe.experts.162.w1", "model.layers.34.block_sparse_moe.experts.163.w1", "model.layers.34.block_sparse_moe.experts.164.w1", "model.layers.34.block_sparse_moe.experts.165.w1", "model.layers.34.block_sparse_moe.experts.166.w1", "model.layers.34.block_sparse_moe.experts.167.w1", "model.layers.34.block_sparse_moe.experts.168.w1", "model.layers.34.block_sparse_moe.experts.169.w1", "model.layers.34.block_sparse_moe.experts.170.w1", "model.layers.34.block_sparse_moe.experts.171.w1", "model.layers.34.block_sparse_moe.experts.172.w1", "model.layers.34.block_sparse_moe.experts.173.w1", "model.layers.34.block_sparse_moe.experts.174.w1", "model.layers.34.block_sparse_moe.experts.175.w1", "model.layers.34.block_sparse_moe.experts.176.w1", "model.layers.34.block_sparse_moe.experts.177.w1", "model.layers.34.block_sparse_moe.experts.178.w1", "model.layers.34.block_sparse_moe.experts.179.w1", "model.layers.34.block_sparse_moe.experts.180.w1", "model.layers.34.block_sparse_moe.experts.181.w1", "model.layers.34.block_sparse_moe.experts.182.w1", "model.layers.34.block_sparse_moe.experts.183.w1", "model.layers.34.block_sparse_moe.experts.184.w1", "model.layers.34.block_sparse_moe.experts.185.w1", "model.layers.34.block_sparse_moe.experts.186.w1", "model.layers.34.block_sparse_moe.experts.187.w1", "model.layers.34.block_sparse_moe.experts.188.w1", "model.layers.34.block_sparse_moe.experts.189.w1", "model.layers.34.block_sparse_moe.experts.190.w1", "model.layers.34.block_sparse_moe.experts.191.w1", "model.layers.34.block_sparse_moe.experts.192.w1", "model.layers.34.block_sparse_moe.experts.193.w1", "model.layers.34.block_sparse_moe.experts.194.w1", "model.layers.34.block_sparse_moe.experts.195.w1", "model.layers.34.block_sparse_moe.experts.196.w1", "model.layers.34.block_sparse_moe.experts.197.w1", "model.layers.34.block_sparse_moe.experts.198.w1", "model.layers.34.block_sparse_moe.experts.199.w1", "model.layers.34.block_sparse_moe.experts.200.w1", "model.layers.34.block_sparse_moe.experts.201.w1", "model.layers.34.block_sparse_moe.experts.202.w1", "model.layers.34.block_sparse_moe.experts.203.w1", "model.layers.34.block_sparse_moe.experts.204.w1", "model.layers.34.block_sparse_moe.experts.205.w1", "model.layers.34.block_sparse_moe.experts.206.w1", "model.layers.34.block_sparse_moe.experts.207.w1", "model.layers.34.block_sparse_moe.experts.208.w1", "model.layers.34.block_sparse_moe.experts.209.w1", "model.layers.34.block_sparse_moe.experts.210.w1", "model.layers.34.block_sparse_moe.experts.211.w1", "model.layers.34.block_sparse_moe.experts.212.w1", "model.layers.34.block_sparse_moe.experts.213.w1", "model.layers.34.block_sparse_moe.experts.214.w1", "model.layers.34.block_sparse_moe.experts.215.w1", "model.layers.34.block_sparse_moe.experts.216.w1", "model.layers.34.block_sparse_moe.experts.217.w1", "model.layers.34.block_sparse_moe.experts.218.w1", "model.layers.34.block_sparse_moe.experts.219.w1", "model.layers.34.block_sparse_moe.experts.220.w1", "model.layers.34.block_sparse_moe.experts.221.w1", "model.layers.34.block_sparse_moe.experts.222.w1", "model.layers.34.block_sparse_moe.experts.223.w1", "model.layers.34.block_sparse_moe.experts.224.w1", "model.layers.34.block_sparse_moe.experts.225.w1", "model.layers.34.block_sparse_moe.experts.226.w1", "model.layers.34.block_sparse_moe.experts.227.w1", "model.layers.34.block_sparse_moe.experts.228.w1", "model.layers.34.block_sparse_moe.experts.229.w1", "model.layers.34.block_sparse_moe.experts.230.w1", "model.layers.34.block_sparse_moe.experts.231.w1", "model.layers.34.block_sparse_moe.experts.232.w1", "model.layers.34.block_sparse_moe.experts.233.w1", "model.layers.34.block_sparse_moe.experts.234.w1", "model.layers.34.block_sparse_moe.experts.235.w1", "model.layers.34.block_sparse_moe.experts.236.w1", "model.layers.34.block_sparse_moe.experts.237.w1", "model.layers.34.block_sparse_moe.experts.238.w1", "model.layers.34.block_sparse_moe.experts.239.w1", "model.layers.34.block_sparse_moe.experts.240.w1", "model.layers.34.block_sparse_moe.experts.241.w1", "model.layers.34.block_sparse_moe.experts.242.w1", "model.layers.34.block_sparse_moe.experts.243.w1", "model.layers.34.block_sparse_moe.experts.244.w1", "model.layers.34.block_sparse_moe.experts.245.w1", "model.layers.34.block_sparse_moe.experts.246.w1", "model.layers.34.block_sparse_moe.experts.247.w1", "model.layers.34.block_sparse_moe.experts.248.w1", "model.layers.34.block_sparse_moe.experts.249.w1", "model.layers.34.block_sparse_moe.experts.250.w1", "model.layers.34.block_sparse_moe.experts.251.w1", "model.layers.34.block_sparse_moe.experts.252.w1", "model.layers.34.block_sparse_moe.experts.253.w1", "model.layers.34.block_sparse_moe.experts.254.w1", "model.layers.34.block_sparse_moe.experts.255.w1", "model.layers.34.block_sparse_moe.experts.0.w3", "model.layers.34.block_sparse_moe.experts.1.w3", "model.layers.34.block_sparse_moe.experts.2.w3", "model.layers.34.block_sparse_moe.experts.3.w3", "model.layers.34.block_sparse_moe.experts.4.w3", "model.layers.34.block_sparse_moe.experts.5.w3", "model.layers.34.block_sparse_moe.experts.6.w3", "model.layers.34.block_sparse_moe.experts.7.w3", "model.layers.34.block_sparse_moe.experts.8.w3", "model.layers.34.block_sparse_moe.experts.9.w3", "model.layers.34.block_sparse_moe.experts.10.w3", "model.layers.34.block_sparse_moe.experts.11.w3", "model.layers.34.block_sparse_moe.experts.12.w3", "model.layers.34.block_sparse_moe.experts.13.w3", "model.layers.34.block_sparse_moe.experts.14.w3", "model.layers.34.block_sparse_moe.experts.15.w3", "model.layers.34.block_sparse_moe.experts.16.w3", "model.layers.34.block_sparse_moe.experts.17.w3", "model.layers.34.block_sparse_moe.experts.18.w3", "model.layers.34.block_sparse_moe.experts.19.w3", "model.layers.34.block_sparse_moe.experts.20.w3", "model.layers.34.block_sparse_moe.experts.21.w3", "model.layers.34.block_sparse_moe.experts.22.w3", "model.layers.34.block_sparse_moe.experts.23.w3", "model.layers.34.block_sparse_moe.experts.24.w3", "model.layers.34.block_sparse_moe.experts.25.w3", "model.layers.34.block_sparse_moe.experts.26.w3", "model.layers.34.block_sparse_moe.experts.27.w3", "model.layers.34.block_sparse_moe.experts.28.w3", "model.layers.34.block_sparse_moe.experts.29.w3", "model.layers.34.block_sparse_moe.experts.30.w3", "model.layers.34.block_sparse_moe.experts.31.w3", "model.layers.34.block_sparse_moe.experts.32.w3", "model.layers.34.block_sparse_moe.experts.33.w3", "model.layers.34.block_sparse_moe.experts.34.w3", "model.layers.34.block_sparse_moe.experts.35.w3", "model.layers.34.block_sparse_moe.experts.36.w3", "model.layers.34.block_sparse_moe.experts.37.w3", "model.layers.34.block_sparse_moe.experts.38.w3", "model.layers.34.block_sparse_moe.experts.39.w3", "model.layers.34.block_sparse_moe.experts.40.w3", "model.layers.34.block_sparse_moe.experts.41.w3", "model.layers.34.block_sparse_moe.experts.42.w3", "model.layers.34.block_sparse_moe.experts.43.w3", "model.layers.34.block_sparse_moe.experts.44.w3", "model.layers.34.block_sparse_moe.experts.45.w3", "model.layers.34.block_sparse_moe.experts.46.w3", "model.layers.34.block_sparse_moe.experts.47.w3", "model.layers.34.block_sparse_moe.experts.48.w3", "model.layers.34.block_sparse_moe.experts.49.w3", "model.layers.34.block_sparse_moe.experts.50.w3", "model.layers.34.block_sparse_moe.experts.51.w3", "model.layers.34.block_sparse_moe.experts.52.w3", "model.layers.34.block_sparse_moe.experts.53.w3", "model.layers.34.block_sparse_moe.experts.54.w3", "model.layers.34.block_sparse_moe.experts.55.w3", "model.layers.34.block_sparse_moe.experts.56.w3", "model.layers.34.block_sparse_moe.experts.57.w3", "model.layers.34.block_sparse_moe.experts.58.w3", "model.layers.34.block_sparse_moe.experts.59.w3", "model.layers.34.block_sparse_moe.experts.60.w3", "model.layers.34.block_sparse_moe.experts.61.w3", "model.layers.34.block_sparse_moe.experts.62.w3", "model.layers.34.block_sparse_moe.experts.63.w3", "model.layers.34.block_sparse_moe.experts.64.w3", "model.layers.34.block_sparse_moe.experts.65.w3", "model.layers.34.block_sparse_moe.experts.66.w3", "model.layers.34.block_sparse_moe.experts.67.w3", "model.layers.34.block_sparse_moe.experts.68.w3", "model.layers.34.block_sparse_moe.experts.69.w3", "model.layers.34.block_sparse_moe.experts.70.w3", "model.layers.34.block_sparse_moe.experts.71.w3", "model.layers.34.block_sparse_moe.experts.72.w3", "model.layers.34.block_sparse_moe.experts.73.w3", "model.layers.34.block_sparse_moe.experts.74.w3", "model.layers.34.block_sparse_moe.experts.75.w3", "model.layers.34.block_sparse_moe.experts.76.w3", "model.layers.34.block_sparse_moe.experts.77.w3", "model.layers.34.block_sparse_moe.experts.78.w3", "model.layers.34.block_sparse_moe.experts.79.w3", "model.layers.34.block_sparse_moe.experts.80.w3", "model.layers.34.block_sparse_moe.experts.81.w3", "model.layers.34.block_sparse_moe.experts.82.w3", "model.layers.34.block_sparse_moe.experts.83.w3", "model.layers.34.block_sparse_moe.experts.84.w3", "model.layers.34.block_sparse_moe.experts.85.w3", "model.layers.34.block_sparse_moe.experts.86.w3", "model.layers.34.block_sparse_moe.experts.87.w3", "model.layers.34.block_sparse_moe.experts.88.w3", "model.layers.34.block_sparse_moe.experts.89.w3", "model.layers.34.block_sparse_moe.experts.90.w3", "model.layers.34.block_sparse_moe.experts.91.w3", "model.layers.34.block_sparse_moe.experts.92.w3", "model.layers.34.block_sparse_moe.experts.93.w3", "model.layers.34.block_sparse_moe.experts.94.w3", "model.layers.34.block_sparse_moe.experts.95.w3", "model.layers.34.block_sparse_moe.experts.96.w3", "model.layers.34.block_sparse_moe.experts.97.w3", "model.layers.34.block_sparse_moe.experts.98.w3", "model.layers.34.block_sparse_moe.experts.99.w3", "model.layers.34.block_sparse_moe.experts.100.w3", "model.layers.34.block_sparse_moe.experts.101.w3", "model.layers.34.block_sparse_moe.experts.102.w3", "model.layers.34.block_sparse_moe.experts.103.w3", "model.layers.34.block_sparse_moe.experts.104.w3", "model.layers.34.block_sparse_moe.experts.105.w3", "model.layers.34.block_sparse_moe.experts.106.w3", "model.layers.34.block_sparse_moe.experts.107.w3", "model.layers.34.block_sparse_moe.experts.108.w3", "model.layers.34.block_sparse_moe.experts.109.w3", "model.layers.34.block_sparse_moe.experts.110.w3", "model.layers.34.block_sparse_moe.experts.111.w3", "model.layers.34.block_sparse_moe.experts.112.w3", "model.layers.34.block_sparse_moe.experts.113.w3", "model.layers.34.block_sparse_moe.experts.114.w3", "model.layers.34.block_sparse_moe.experts.115.w3", "model.layers.34.block_sparse_moe.experts.116.w3", "model.layers.34.block_sparse_moe.experts.117.w3", "model.layers.34.block_sparse_moe.experts.118.w3", "model.layers.34.block_sparse_moe.experts.119.w3", "model.layers.34.block_sparse_moe.experts.120.w3", "model.layers.34.block_sparse_moe.experts.121.w3", "model.layers.34.block_sparse_moe.experts.122.w3", "model.layers.34.block_sparse_moe.experts.123.w3", "model.layers.34.block_sparse_moe.experts.124.w3", "model.layers.34.block_sparse_moe.experts.125.w3", "model.layers.34.block_sparse_moe.experts.126.w3", "model.layers.34.block_sparse_moe.experts.127.w3", "model.layers.34.block_sparse_moe.experts.128.w3", "model.layers.34.block_sparse_moe.experts.129.w3", "model.layers.34.block_sparse_moe.experts.130.w3", "model.layers.34.block_sparse_moe.experts.131.w3", "model.layers.34.block_sparse_moe.experts.132.w3", "model.layers.34.block_sparse_moe.experts.133.w3", "model.layers.34.block_sparse_moe.experts.134.w3", "model.layers.34.block_sparse_moe.experts.135.w3", "model.layers.34.block_sparse_moe.experts.136.w3", "model.layers.34.block_sparse_moe.experts.137.w3", "model.layers.34.block_sparse_moe.experts.138.w3", "model.layers.34.block_sparse_moe.experts.139.w3", "model.layers.34.block_sparse_moe.experts.140.w3", "model.layers.34.block_sparse_moe.experts.141.w3", "model.layers.34.block_sparse_moe.experts.142.w3", "model.layers.34.block_sparse_moe.experts.143.w3", "model.layers.34.block_sparse_moe.experts.144.w3", "model.layers.34.block_sparse_moe.experts.145.w3", "model.layers.34.block_sparse_moe.experts.146.w3", "model.layers.34.block_sparse_moe.experts.147.w3", "model.layers.34.block_sparse_moe.experts.148.w3", "model.layers.34.block_sparse_moe.experts.149.w3", "model.layers.34.block_sparse_moe.experts.150.w3", "model.layers.34.block_sparse_moe.experts.151.w3", "model.layers.34.block_sparse_moe.experts.152.w3", "model.layers.34.block_sparse_moe.experts.153.w3", "model.layers.34.block_sparse_moe.experts.154.w3", "model.layers.34.block_sparse_moe.experts.155.w3", "model.layers.34.block_sparse_moe.experts.156.w3", "model.layers.34.block_sparse_moe.experts.157.w3", "model.layers.34.block_sparse_moe.experts.158.w3", "model.layers.34.block_sparse_moe.experts.159.w3", "model.layers.34.block_sparse_moe.experts.160.w3", "model.layers.34.block_sparse_moe.experts.161.w3", "model.layers.34.block_sparse_moe.experts.162.w3", "model.layers.34.block_sparse_moe.experts.163.w3", "model.layers.34.block_sparse_moe.experts.164.w3", "model.layers.34.block_sparse_moe.experts.165.w3", "model.layers.34.block_sparse_moe.experts.166.w3", "model.layers.34.block_sparse_moe.experts.167.w3", "model.layers.34.block_sparse_moe.experts.168.w3", "model.layers.34.block_sparse_moe.experts.169.w3", "model.layers.34.block_sparse_moe.experts.170.w3", "model.layers.34.block_sparse_moe.experts.171.w3", "model.layers.34.block_sparse_moe.experts.172.w3", "model.layers.34.block_sparse_moe.experts.173.w3", "model.layers.34.block_sparse_moe.experts.174.w3", "model.layers.34.block_sparse_moe.experts.175.w3", "model.layers.34.block_sparse_moe.experts.176.w3", "model.layers.34.block_sparse_moe.experts.177.w3", "model.layers.34.block_sparse_moe.experts.178.w3", "model.layers.34.block_sparse_moe.experts.179.w3", "model.layers.34.block_sparse_moe.experts.180.w3", "model.layers.34.block_sparse_moe.experts.181.w3", "model.layers.34.block_sparse_moe.experts.182.w3", "model.layers.34.block_sparse_moe.experts.183.w3", "model.layers.34.block_sparse_moe.experts.184.w3", "model.layers.34.block_sparse_moe.experts.185.w3", "model.layers.34.block_sparse_moe.experts.186.w3", "model.layers.34.block_sparse_moe.experts.187.w3", "model.layers.34.block_sparse_moe.experts.188.w3", "model.layers.34.block_sparse_moe.experts.189.w3", "model.layers.34.block_sparse_moe.experts.190.w3", "model.layers.34.block_sparse_moe.experts.191.w3", "model.layers.34.block_sparse_moe.experts.192.w3", "model.layers.34.block_sparse_moe.experts.193.w3", "model.layers.34.block_sparse_moe.experts.194.w3", "model.layers.34.block_sparse_moe.experts.195.w3", "model.layers.34.block_sparse_moe.experts.196.w3", "model.layers.34.block_sparse_moe.experts.197.w3", "model.layers.34.block_sparse_moe.experts.198.w3", "model.layers.34.block_sparse_moe.experts.199.w3", "model.layers.34.block_sparse_moe.experts.200.w3", "model.layers.34.block_sparse_moe.experts.201.w3", "model.layers.34.block_sparse_moe.experts.202.w3", "model.layers.34.block_sparse_moe.experts.203.w3", "model.layers.34.block_sparse_moe.experts.204.w3", "model.layers.34.block_sparse_moe.experts.205.w3", "model.layers.34.block_sparse_moe.experts.206.w3", "model.layers.34.block_sparse_moe.experts.207.w3", "model.layers.34.block_sparse_moe.experts.208.w3", "model.layers.34.block_sparse_moe.experts.209.w3", "model.layers.34.block_sparse_moe.experts.210.w3", "model.layers.34.block_sparse_moe.experts.211.w3", "model.layers.34.block_sparse_moe.experts.212.w3", "model.layers.34.block_sparse_moe.experts.213.w3", "model.layers.34.block_sparse_moe.experts.214.w3", "model.layers.34.block_sparse_moe.experts.215.w3", "model.layers.34.block_sparse_moe.experts.216.w3", "model.layers.34.block_sparse_moe.experts.217.w3", "model.layers.34.block_sparse_moe.experts.218.w3", "model.layers.34.block_sparse_moe.experts.219.w3", "model.layers.34.block_sparse_moe.experts.220.w3", "model.layers.34.block_sparse_moe.experts.221.w3", "model.layers.34.block_sparse_moe.experts.222.w3", "model.layers.34.block_sparse_moe.experts.223.w3", "model.layers.34.block_sparse_moe.experts.224.w3", "model.layers.34.block_sparse_moe.experts.225.w3", "model.layers.34.block_sparse_moe.experts.226.w3", "model.layers.34.block_sparse_moe.experts.227.w3", "model.layers.34.block_sparse_moe.experts.228.w3", "model.layers.34.block_sparse_moe.experts.229.w3", "model.layers.34.block_sparse_moe.experts.230.w3", "model.layers.34.block_sparse_moe.experts.231.w3", "model.layers.34.block_sparse_moe.experts.232.w3", "model.layers.34.block_sparse_moe.experts.233.w3", "model.layers.34.block_sparse_moe.experts.234.w3", "model.layers.34.block_sparse_moe.experts.235.w3", "model.layers.34.block_sparse_moe.experts.236.w3", "model.layers.34.block_sparse_moe.experts.237.w3", "model.layers.34.block_sparse_moe.experts.238.w3", "model.layers.34.block_sparse_moe.experts.239.w3", "model.layers.34.block_sparse_moe.experts.240.w3", "model.layers.34.block_sparse_moe.experts.241.w3", "model.layers.34.block_sparse_moe.experts.242.w3", "model.layers.34.block_sparse_moe.experts.243.w3", "model.layers.34.block_sparse_moe.experts.244.w3", "model.layers.34.block_sparse_moe.experts.245.w3", "model.layers.34.block_sparse_moe.experts.246.w3", "model.layers.34.block_sparse_moe.experts.247.w3", "model.layers.34.block_sparse_moe.experts.248.w3", "model.layers.34.block_sparse_moe.experts.249.w3", "model.layers.34.block_sparse_moe.experts.250.w3", "model.layers.34.block_sparse_moe.experts.251.w3", "model.layers.34.block_sparse_moe.experts.252.w3", "model.layers.34.block_sparse_moe.experts.253.w3", "model.layers.34.block_sparse_moe.experts.254.w3", "model.layers.34.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0002461610361933625, "dbits": 2415919104 } ] }, { "idx": 174, "layers": [ "model.layers.34.block_sparse_moe.experts.0.w2", "model.layers.34.block_sparse_moe.experts.1.w2", "model.layers.34.block_sparse_moe.experts.2.w2", "model.layers.34.block_sparse_moe.experts.3.w2", "model.layers.34.block_sparse_moe.experts.4.w2", "model.layers.34.block_sparse_moe.experts.5.w2", "model.layers.34.block_sparse_moe.experts.6.w2", "model.layers.34.block_sparse_moe.experts.7.w2", "model.layers.34.block_sparse_moe.experts.8.w2", "model.layers.34.block_sparse_moe.experts.9.w2", "model.layers.34.block_sparse_moe.experts.10.w2", "model.layers.34.block_sparse_moe.experts.11.w2", "model.layers.34.block_sparse_moe.experts.12.w2", "model.layers.34.block_sparse_moe.experts.13.w2", "model.layers.34.block_sparse_moe.experts.14.w2", "model.layers.34.block_sparse_moe.experts.15.w2", "model.layers.34.block_sparse_moe.experts.16.w2", "model.layers.34.block_sparse_moe.experts.17.w2", "model.layers.34.block_sparse_moe.experts.18.w2", "model.layers.34.block_sparse_moe.experts.19.w2", "model.layers.34.block_sparse_moe.experts.20.w2", "model.layers.34.block_sparse_moe.experts.21.w2", "model.layers.34.block_sparse_moe.experts.22.w2", "model.layers.34.block_sparse_moe.experts.23.w2", "model.layers.34.block_sparse_moe.experts.24.w2", "model.layers.34.block_sparse_moe.experts.25.w2", "model.layers.34.block_sparse_moe.experts.26.w2", "model.layers.34.block_sparse_moe.experts.27.w2", "model.layers.34.block_sparse_moe.experts.28.w2", "model.layers.34.block_sparse_moe.experts.29.w2", "model.layers.34.block_sparse_moe.experts.30.w2", "model.layers.34.block_sparse_moe.experts.31.w2", "model.layers.34.block_sparse_moe.experts.32.w2", "model.layers.34.block_sparse_moe.experts.33.w2", "model.layers.34.block_sparse_moe.experts.34.w2", "model.layers.34.block_sparse_moe.experts.35.w2", "model.layers.34.block_sparse_moe.experts.36.w2", "model.layers.34.block_sparse_moe.experts.37.w2", "model.layers.34.block_sparse_moe.experts.38.w2", "model.layers.34.block_sparse_moe.experts.39.w2", "model.layers.34.block_sparse_moe.experts.40.w2", "model.layers.34.block_sparse_moe.experts.41.w2", "model.layers.34.block_sparse_moe.experts.42.w2", "model.layers.34.block_sparse_moe.experts.43.w2", "model.layers.34.block_sparse_moe.experts.44.w2", "model.layers.34.block_sparse_moe.experts.45.w2", "model.layers.34.block_sparse_moe.experts.46.w2", "model.layers.34.block_sparse_moe.experts.47.w2", "model.layers.34.block_sparse_moe.experts.48.w2", "model.layers.34.block_sparse_moe.experts.49.w2", "model.layers.34.block_sparse_moe.experts.50.w2", "model.layers.34.block_sparse_moe.experts.51.w2", "model.layers.34.block_sparse_moe.experts.52.w2", "model.layers.34.block_sparse_moe.experts.53.w2", "model.layers.34.block_sparse_moe.experts.54.w2", "model.layers.34.block_sparse_moe.experts.55.w2", "model.layers.34.block_sparse_moe.experts.56.w2", "model.layers.34.block_sparse_moe.experts.57.w2", "model.layers.34.block_sparse_moe.experts.58.w2", "model.layers.34.block_sparse_moe.experts.59.w2", "model.layers.34.block_sparse_moe.experts.60.w2", "model.layers.34.block_sparse_moe.experts.61.w2", "model.layers.34.block_sparse_moe.experts.62.w2", "model.layers.34.block_sparse_moe.experts.63.w2", "model.layers.34.block_sparse_moe.experts.64.w2", "model.layers.34.block_sparse_moe.experts.65.w2", "model.layers.34.block_sparse_moe.experts.66.w2", "model.layers.34.block_sparse_moe.experts.67.w2", "model.layers.34.block_sparse_moe.experts.68.w2", "model.layers.34.block_sparse_moe.experts.69.w2", "model.layers.34.block_sparse_moe.experts.70.w2", "model.layers.34.block_sparse_moe.experts.71.w2", "model.layers.34.block_sparse_moe.experts.72.w2", "model.layers.34.block_sparse_moe.experts.73.w2", "model.layers.34.block_sparse_moe.experts.74.w2", "model.layers.34.block_sparse_moe.experts.75.w2", "model.layers.34.block_sparse_moe.experts.76.w2", "model.layers.34.block_sparse_moe.experts.77.w2", "model.layers.34.block_sparse_moe.experts.78.w2", "model.layers.34.block_sparse_moe.experts.79.w2", "model.layers.34.block_sparse_moe.experts.80.w2", "model.layers.34.block_sparse_moe.experts.81.w2", "model.layers.34.block_sparse_moe.experts.82.w2", "model.layers.34.block_sparse_moe.experts.83.w2", "model.layers.34.block_sparse_moe.experts.84.w2", "model.layers.34.block_sparse_moe.experts.85.w2", "model.layers.34.block_sparse_moe.experts.86.w2", "model.layers.34.block_sparse_moe.experts.87.w2", "model.layers.34.block_sparse_moe.experts.88.w2", "model.layers.34.block_sparse_moe.experts.89.w2", "model.layers.34.block_sparse_moe.experts.90.w2", "model.layers.34.block_sparse_moe.experts.91.w2", "model.layers.34.block_sparse_moe.experts.92.w2", "model.layers.34.block_sparse_moe.experts.93.w2", "model.layers.34.block_sparse_moe.experts.94.w2", "model.layers.34.block_sparse_moe.experts.95.w2", "model.layers.34.block_sparse_moe.experts.96.w2", "model.layers.34.block_sparse_moe.experts.97.w2", "model.layers.34.block_sparse_moe.experts.98.w2", "model.layers.34.block_sparse_moe.experts.99.w2", "model.layers.34.block_sparse_moe.experts.100.w2", "model.layers.34.block_sparse_moe.experts.101.w2", "model.layers.34.block_sparse_moe.experts.102.w2", "model.layers.34.block_sparse_moe.experts.103.w2", "model.layers.34.block_sparse_moe.experts.104.w2", "model.layers.34.block_sparse_moe.experts.105.w2", "model.layers.34.block_sparse_moe.experts.106.w2", "model.layers.34.block_sparse_moe.experts.107.w2", "model.layers.34.block_sparse_moe.experts.108.w2", "model.layers.34.block_sparse_moe.experts.109.w2", "model.layers.34.block_sparse_moe.experts.110.w2", "model.layers.34.block_sparse_moe.experts.111.w2", "model.layers.34.block_sparse_moe.experts.112.w2", "model.layers.34.block_sparse_moe.experts.113.w2", "model.layers.34.block_sparse_moe.experts.114.w2", "model.layers.34.block_sparse_moe.experts.115.w2", "model.layers.34.block_sparse_moe.experts.116.w2", "model.layers.34.block_sparse_moe.experts.117.w2", "model.layers.34.block_sparse_moe.experts.118.w2", "model.layers.34.block_sparse_moe.experts.119.w2", "model.layers.34.block_sparse_moe.experts.120.w2", "model.layers.34.block_sparse_moe.experts.121.w2", "model.layers.34.block_sparse_moe.experts.122.w2", "model.layers.34.block_sparse_moe.experts.123.w2", "model.layers.34.block_sparse_moe.experts.124.w2", "model.layers.34.block_sparse_moe.experts.125.w2", "model.layers.34.block_sparse_moe.experts.126.w2", "model.layers.34.block_sparse_moe.experts.127.w2", "model.layers.34.block_sparse_moe.experts.128.w2", "model.layers.34.block_sparse_moe.experts.129.w2", "model.layers.34.block_sparse_moe.experts.130.w2", "model.layers.34.block_sparse_moe.experts.131.w2", "model.layers.34.block_sparse_moe.experts.132.w2", "model.layers.34.block_sparse_moe.experts.133.w2", "model.layers.34.block_sparse_moe.experts.134.w2", "model.layers.34.block_sparse_moe.experts.135.w2", "model.layers.34.block_sparse_moe.experts.136.w2", "model.layers.34.block_sparse_moe.experts.137.w2", "model.layers.34.block_sparse_moe.experts.138.w2", "model.layers.34.block_sparse_moe.experts.139.w2", "model.layers.34.block_sparse_moe.experts.140.w2", "model.layers.34.block_sparse_moe.experts.141.w2", "model.layers.34.block_sparse_moe.experts.142.w2", "model.layers.34.block_sparse_moe.experts.143.w2", "model.layers.34.block_sparse_moe.experts.144.w2", "model.layers.34.block_sparse_moe.experts.145.w2", "model.layers.34.block_sparse_moe.experts.146.w2", "model.layers.34.block_sparse_moe.experts.147.w2", "model.layers.34.block_sparse_moe.experts.148.w2", "model.layers.34.block_sparse_moe.experts.149.w2", "model.layers.34.block_sparse_moe.experts.150.w2", "model.layers.34.block_sparse_moe.experts.151.w2", "model.layers.34.block_sparse_moe.experts.152.w2", "model.layers.34.block_sparse_moe.experts.153.w2", "model.layers.34.block_sparse_moe.experts.154.w2", "model.layers.34.block_sparse_moe.experts.155.w2", "model.layers.34.block_sparse_moe.experts.156.w2", "model.layers.34.block_sparse_moe.experts.157.w2", "model.layers.34.block_sparse_moe.experts.158.w2", "model.layers.34.block_sparse_moe.experts.159.w2", "model.layers.34.block_sparse_moe.experts.160.w2", "model.layers.34.block_sparse_moe.experts.161.w2", "model.layers.34.block_sparse_moe.experts.162.w2", "model.layers.34.block_sparse_moe.experts.163.w2", "model.layers.34.block_sparse_moe.experts.164.w2", "model.layers.34.block_sparse_moe.experts.165.w2", "model.layers.34.block_sparse_moe.experts.166.w2", "model.layers.34.block_sparse_moe.experts.167.w2", "model.layers.34.block_sparse_moe.experts.168.w2", "model.layers.34.block_sparse_moe.experts.169.w2", "model.layers.34.block_sparse_moe.experts.170.w2", "model.layers.34.block_sparse_moe.experts.171.w2", "model.layers.34.block_sparse_moe.experts.172.w2", "model.layers.34.block_sparse_moe.experts.173.w2", "model.layers.34.block_sparse_moe.experts.174.w2", "model.layers.34.block_sparse_moe.experts.175.w2", "model.layers.34.block_sparse_moe.experts.176.w2", "model.layers.34.block_sparse_moe.experts.177.w2", "model.layers.34.block_sparse_moe.experts.178.w2", "model.layers.34.block_sparse_moe.experts.179.w2", "model.layers.34.block_sparse_moe.experts.180.w2", "model.layers.34.block_sparse_moe.experts.181.w2", "model.layers.34.block_sparse_moe.experts.182.w2", "model.layers.34.block_sparse_moe.experts.183.w2", "model.layers.34.block_sparse_moe.experts.184.w2", "model.layers.34.block_sparse_moe.experts.185.w2", "model.layers.34.block_sparse_moe.experts.186.w2", "model.layers.34.block_sparse_moe.experts.187.w2", "model.layers.34.block_sparse_moe.experts.188.w2", "model.layers.34.block_sparse_moe.experts.189.w2", "model.layers.34.block_sparse_moe.experts.190.w2", "model.layers.34.block_sparse_moe.experts.191.w2", "model.layers.34.block_sparse_moe.experts.192.w2", "model.layers.34.block_sparse_moe.experts.193.w2", "model.layers.34.block_sparse_moe.experts.194.w2", "model.layers.34.block_sparse_moe.experts.195.w2", "model.layers.34.block_sparse_moe.experts.196.w2", "model.layers.34.block_sparse_moe.experts.197.w2", "model.layers.34.block_sparse_moe.experts.198.w2", "model.layers.34.block_sparse_moe.experts.199.w2", "model.layers.34.block_sparse_moe.experts.200.w2", "model.layers.34.block_sparse_moe.experts.201.w2", "model.layers.34.block_sparse_moe.experts.202.w2", "model.layers.34.block_sparse_moe.experts.203.w2", "model.layers.34.block_sparse_moe.experts.204.w2", "model.layers.34.block_sparse_moe.experts.205.w2", "model.layers.34.block_sparse_moe.experts.206.w2", "model.layers.34.block_sparse_moe.experts.207.w2", "model.layers.34.block_sparse_moe.experts.208.w2", "model.layers.34.block_sparse_moe.experts.209.w2", "model.layers.34.block_sparse_moe.experts.210.w2", "model.layers.34.block_sparse_moe.experts.211.w2", "model.layers.34.block_sparse_moe.experts.212.w2", "model.layers.34.block_sparse_moe.experts.213.w2", "model.layers.34.block_sparse_moe.experts.214.w2", "model.layers.34.block_sparse_moe.experts.215.w2", "model.layers.34.block_sparse_moe.experts.216.w2", "model.layers.34.block_sparse_moe.experts.217.w2", "model.layers.34.block_sparse_moe.experts.218.w2", "model.layers.34.block_sparse_moe.experts.219.w2", "model.layers.34.block_sparse_moe.experts.220.w2", "model.layers.34.block_sparse_moe.experts.221.w2", "model.layers.34.block_sparse_moe.experts.222.w2", "model.layers.34.block_sparse_moe.experts.223.w2", "model.layers.34.block_sparse_moe.experts.224.w2", "model.layers.34.block_sparse_moe.experts.225.w2", "model.layers.34.block_sparse_moe.experts.226.w2", "model.layers.34.block_sparse_moe.experts.227.w2", "model.layers.34.block_sparse_moe.experts.228.w2", "model.layers.34.block_sparse_moe.experts.229.w2", "model.layers.34.block_sparse_moe.experts.230.w2", "model.layers.34.block_sparse_moe.experts.231.w2", "model.layers.34.block_sparse_moe.experts.232.w2", "model.layers.34.block_sparse_moe.experts.233.w2", "model.layers.34.block_sparse_moe.experts.234.w2", "model.layers.34.block_sparse_moe.experts.235.w2", "model.layers.34.block_sparse_moe.experts.236.w2", "model.layers.34.block_sparse_moe.experts.237.w2", "model.layers.34.block_sparse_moe.experts.238.w2", "model.layers.34.block_sparse_moe.experts.239.w2", "model.layers.34.block_sparse_moe.experts.240.w2", "model.layers.34.block_sparse_moe.experts.241.w2", "model.layers.34.block_sparse_moe.experts.242.w2", "model.layers.34.block_sparse_moe.experts.243.w2", "model.layers.34.block_sparse_moe.experts.244.w2", "model.layers.34.block_sparse_moe.experts.245.w2", "model.layers.34.block_sparse_moe.experts.246.w2", "model.layers.34.block_sparse_moe.experts.247.w2", "model.layers.34.block_sparse_moe.experts.248.w2", "model.layers.34.block_sparse_moe.experts.249.w2", "model.layers.34.block_sparse_moe.experts.250.w2", "model.layers.34.block_sparse_moe.experts.251.w2", "model.layers.34.block_sparse_moe.experts.252.w2", "model.layers.34.block_sparse_moe.experts.253.w2", "model.layers.34.block_sparse_moe.experts.254.w2", "model.layers.34.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -1.3425201177599821e-05, "dbits": 1207959552 } ] }, { "idx": 175, "layers": [ "model.layers.35.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0014068834483623588, "dbits": 18874368 } ] }, { "idx": 176, "layers": [ "model.layers.35.self_attn.k_proj", "model.layers.35.self_attn.v_proj" ], "candidates": [ { "dkld": -7.008723914624093e-05, "dbits": 6291456 } ] }, { "idx": 177, "layers": [ "model.layers.35.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0002937089651823127, "dbits": 18874368 } ] }, { "idx": 178, "layers": [ "model.layers.35.block_sparse_moe.experts.0.w1", "model.layers.35.block_sparse_moe.experts.1.w1", "model.layers.35.block_sparse_moe.experts.2.w1", "model.layers.35.block_sparse_moe.experts.3.w1", "model.layers.35.block_sparse_moe.experts.4.w1", "model.layers.35.block_sparse_moe.experts.5.w1", "model.layers.35.block_sparse_moe.experts.6.w1", "model.layers.35.block_sparse_moe.experts.7.w1", "model.layers.35.block_sparse_moe.experts.8.w1", "model.layers.35.block_sparse_moe.experts.9.w1", "model.layers.35.block_sparse_moe.experts.10.w1", "model.layers.35.block_sparse_moe.experts.11.w1", "model.layers.35.block_sparse_moe.experts.12.w1", "model.layers.35.block_sparse_moe.experts.13.w1", "model.layers.35.block_sparse_moe.experts.14.w1", "model.layers.35.block_sparse_moe.experts.15.w1", "model.layers.35.block_sparse_moe.experts.16.w1", "model.layers.35.block_sparse_moe.experts.17.w1", "model.layers.35.block_sparse_moe.experts.18.w1", "model.layers.35.block_sparse_moe.experts.19.w1", "model.layers.35.block_sparse_moe.experts.20.w1", "model.layers.35.block_sparse_moe.experts.21.w1", "model.layers.35.block_sparse_moe.experts.22.w1", "model.layers.35.block_sparse_moe.experts.23.w1", "model.layers.35.block_sparse_moe.experts.24.w1", "model.layers.35.block_sparse_moe.experts.25.w1", "model.layers.35.block_sparse_moe.experts.26.w1", "model.layers.35.block_sparse_moe.experts.27.w1", "model.layers.35.block_sparse_moe.experts.28.w1", "model.layers.35.block_sparse_moe.experts.29.w1", "model.layers.35.block_sparse_moe.experts.30.w1", "model.layers.35.block_sparse_moe.experts.31.w1", "model.layers.35.block_sparse_moe.experts.32.w1", "model.layers.35.block_sparse_moe.experts.33.w1", "model.layers.35.block_sparse_moe.experts.34.w1", "model.layers.35.block_sparse_moe.experts.35.w1", "model.layers.35.block_sparse_moe.experts.36.w1", "model.layers.35.block_sparse_moe.experts.37.w1", "model.layers.35.block_sparse_moe.experts.38.w1", "model.layers.35.block_sparse_moe.experts.39.w1", "model.layers.35.block_sparse_moe.experts.40.w1", "model.layers.35.block_sparse_moe.experts.41.w1", "model.layers.35.block_sparse_moe.experts.42.w1", "model.layers.35.block_sparse_moe.experts.43.w1", "model.layers.35.block_sparse_moe.experts.44.w1", "model.layers.35.block_sparse_moe.experts.45.w1", "model.layers.35.block_sparse_moe.experts.46.w1", "model.layers.35.block_sparse_moe.experts.47.w1", "model.layers.35.block_sparse_moe.experts.48.w1", "model.layers.35.block_sparse_moe.experts.49.w1", "model.layers.35.block_sparse_moe.experts.50.w1", "model.layers.35.block_sparse_moe.experts.51.w1", "model.layers.35.block_sparse_moe.experts.52.w1", "model.layers.35.block_sparse_moe.experts.53.w1", "model.layers.35.block_sparse_moe.experts.54.w1", "model.layers.35.block_sparse_moe.experts.55.w1", "model.layers.35.block_sparse_moe.experts.56.w1", "model.layers.35.block_sparse_moe.experts.57.w1", "model.layers.35.block_sparse_moe.experts.58.w1", "model.layers.35.block_sparse_moe.experts.59.w1", "model.layers.35.block_sparse_moe.experts.60.w1", "model.layers.35.block_sparse_moe.experts.61.w1", "model.layers.35.block_sparse_moe.experts.62.w1", "model.layers.35.block_sparse_moe.experts.63.w1", "model.layers.35.block_sparse_moe.experts.64.w1", "model.layers.35.block_sparse_moe.experts.65.w1", "model.layers.35.block_sparse_moe.experts.66.w1", "model.layers.35.block_sparse_moe.experts.67.w1", "model.layers.35.block_sparse_moe.experts.68.w1", "model.layers.35.block_sparse_moe.experts.69.w1", "model.layers.35.block_sparse_moe.experts.70.w1", "model.layers.35.block_sparse_moe.experts.71.w1", "model.layers.35.block_sparse_moe.experts.72.w1", "model.layers.35.block_sparse_moe.experts.73.w1", "model.layers.35.block_sparse_moe.experts.74.w1", "model.layers.35.block_sparse_moe.experts.75.w1", "model.layers.35.block_sparse_moe.experts.76.w1", "model.layers.35.block_sparse_moe.experts.77.w1", "model.layers.35.block_sparse_moe.experts.78.w1", "model.layers.35.block_sparse_moe.experts.79.w1", "model.layers.35.block_sparse_moe.experts.80.w1", "model.layers.35.block_sparse_moe.experts.81.w1", "model.layers.35.block_sparse_moe.experts.82.w1", "model.layers.35.block_sparse_moe.experts.83.w1", "model.layers.35.block_sparse_moe.experts.84.w1", "model.layers.35.block_sparse_moe.experts.85.w1", "model.layers.35.block_sparse_moe.experts.86.w1", "model.layers.35.block_sparse_moe.experts.87.w1", "model.layers.35.block_sparse_moe.experts.88.w1", "model.layers.35.block_sparse_moe.experts.89.w1", "model.layers.35.block_sparse_moe.experts.90.w1", "model.layers.35.block_sparse_moe.experts.91.w1", "model.layers.35.block_sparse_moe.experts.92.w1", "model.layers.35.block_sparse_moe.experts.93.w1", "model.layers.35.block_sparse_moe.experts.94.w1", "model.layers.35.block_sparse_moe.experts.95.w1", "model.layers.35.block_sparse_moe.experts.96.w1", "model.layers.35.block_sparse_moe.experts.97.w1", "model.layers.35.block_sparse_moe.experts.98.w1", "model.layers.35.block_sparse_moe.experts.99.w1", "model.layers.35.block_sparse_moe.experts.100.w1", "model.layers.35.block_sparse_moe.experts.101.w1", "model.layers.35.block_sparse_moe.experts.102.w1", "model.layers.35.block_sparse_moe.experts.103.w1", "model.layers.35.block_sparse_moe.experts.104.w1", "model.layers.35.block_sparse_moe.experts.105.w1", "model.layers.35.block_sparse_moe.experts.106.w1", "model.layers.35.block_sparse_moe.experts.107.w1", "model.layers.35.block_sparse_moe.experts.108.w1", "model.layers.35.block_sparse_moe.experts.109.w1", "model.layers.35.block_sparse_moe.experts.110.w1", "model.layers.35.block_sparse_moe.experts.111.w1", "model.layers.35.block_sparse_moe.experts.112.w1", "model.layers.35.block_sparse_moe.experts.113.w1", "model.layers.35.block_sparse_moe.experts.114.w1", "model.layers.35.block_sparse_moe.experts.115.w1", "model.layers.35.block_sparse_moe.experts.116.w1", "model.layers.35.block_sparse_moe.experts.117.w1", "model.layers.35.block_sparse_moe.experts.118.w1", "model.layers.35.block_sparse_moe.experts.119.w1", "model.layers.35.block_sparse_moe.experts.120.w1", "model.layers.35.block_sparse_moe.experts.121.w1", "model.layers.35.block_sparse_moe.experts.122.w1", "model.layers.35.block_sparse_moe.experts.123.w1", "model.layers.35.block_sparse_moe.experts.124.w1", "model.layers.35.block_sparse_moe.experts.125.w1", "model.layers.35.block_sparse_moe.experts.126.w1", "model.layers.35.block_sparse_moe.experts.127.w1", "model.layers.35.block_sparse_moe.experts.128.w1", "model.layers.35.block_sparse_moe.experts.129.w1", "model.layers.35.block_sparse_moe.experts.130.w1", "model.layers.35.block_sparse_moe.experts.131.w1", "model.layers.35.block_sparse_moe.experts.132.w1", "model.layers.35.block_sparse_moe.experts.133.w1", "model.layers.35.block_sparse_moe.experts.134.w1", "model.layers.35.block_sparse_moe.experts.135.w1", "model.layers.35.block_sparse_moe.experts.136.w1", "model.layers.35.block_sparse_moe.experts.137.w1", "model.layers.35.block_sparse_moe.experts.138.w1", "model.layers.35.block_sparse_moe.experts.139.w1", "model.layers.35.block_sparse_moe.experts.140.w1", "model.layers.35.block_sparse_moe.experts.141.w1", "model.layers.35.block_sparse_moe.experts.142.w1", "model.layers.35.block_sparse_moe.experts.143.w1", "model.layers.35.block_sparse_moe.experts.144.w1", "model.layers.35.block_sparse_moe.experts.145.w1", "model.layers.35.block_sparse_moe.experts.146.w1", "model.layers.35.block_sparse_moe.experts.147.w1", "model.layers.35.block_sparse_moe.experts.148.w1", "model.layers.35.block_sparse_moe.experts.149.w1", "model.layers.35.block_sparse_moe.experts.150.w1", "model.layers.35.block_sparse_moe.experts.151.w1", "model.layers.35.block_sparse_moe.experts.152.w1", "model.layers.35.block_sparse_moe.experts.153.w1", "model.layers.35.block_sparse_moe.experts.154.w1", "model.layers.35.block_sparse_moe.experts.155.w1", "model.layers.35.block_sparse_moe.experts.156.w1", "model.layers.35.block_sparse_moe.experts.157.w1", "model.layers.35.block_sparse_moe.experts.158.w1", "model.layers.35.block_sparse_moe.experts.159.w1", "model.layers.35.block_sparse_moe.experts.160.w1", "model.layers.35.block_sparse_moe.experts.161.w1", "model.layers.35.block_sparse_moe.experts.162.w1", "model.layers.35.block_sparse_moe.experts.163.w1", "model.layers.35.block_sparse_moe.experts.164.w1", "model.layers.35.block_sparse_moe.experts.165.w1", "model.layers.35.block_sparse_moe.experts.166.w1", "model.layers.35.block_sparse_moe.experts.167.w1", "model.layers.35.block_sparse_moe.experts.168.w1", "model.layers.35.block_sparse_moe.experts.169.w1", "model.layers.35.block_sparse_moe.experts.170.w1", "model.layers.35.block_sparse_moe.experts.171.w1", "model.layers.35.block_sparse_moe.experts.172.w1", "model.layers.35.block_sparse_moe.experts.173.w1", "model.layers.35.block_sparse_moe.experts.174.w1", "model.layers.35.block_sparse_moe.experts.175.w1", "model.layers.35.block_sparse_moe.experts.176.w1", "model.layers.35.block_sparse_moe.experts.177.w1", "model.layers.35.block_sparse_moe.experts.178.w1", "model.layers.35.block_sparse_moe.experts.179.w1", "model.layers.35.block_sparse_moe.experts.180.w1", "model.layers.35.block_sparse_moe.experts.181.w1", "model.layers.35.block_sparse_moe.experts.182.w1", "model.layers.35.block_sparse_moe.experts.183.w1", "model.layers.35.block_sparse_moe.experts.184.w1", "model.layers.35.block_sparse_moe.experts.185.w1", "model.layers.35.block_sparse_moe.experts.186.w1", "model.layers.35.block_sparse_moe.experts.187.w1", "model.layers.35.block_sparse_moe.experts.188.w1", "model.layers.35.block_sparse_moe.experts.189.w1", "model.layers.35.block_sparse_moe.experts.190.w1", "model.layers.35.block_sparse_moe.experts.191.w1", "model.layers.35.block_sparse_moe.experts.192.w1", "model.layers.35.block_sparse_moe.experts.193.w1", "model.layers.35.block_sparse_moe.experts.194.w1", "model.layers.35.block_sparse_moe.experts.195.w1", "model.layers.35.block_sparse_moe.experts.196.w1", "model.layers.35.block_sparse_moe.experts.197.w1", "model.layers.35.block_sparse_moe.experts.198.w1", "model.layers.35.block_sparse_moe.experts.199.w1", "model.layers.35.block_sparse_moe.experts.200.w1", "model.layers.35.block_sparse_moe.experts.201.w1", "model.layers.35.block_sparse_moe.experts.202.w1", "model.layers.35.block_sparse_moe.experts.203.w1", "model.layers.35.block_sparse_moe.experts.204.w1", "model.layers.35.block_sparse_moe.experts.205.w1", "model.layers.35.block_sparse_moe.experts.206.w1", "model.layers.35.block_sparse_moe.experts.207.w1", "model.layers.35.block_sparse_moe.experts.208.w1", "model.layers.35.block_sparse_moe.experts.209.w1", "model.layers.35.block_sparse_moe.experts.210.w1", "model.layers.35.block_sparse_moe.experts.211.w1", "model.layers.35.block_sparse_moe.experts.212.w1", "model.layers.35.block_sparse_moe.experts.213.w1", "model.layers.35.block_sparse_moe.experts.214.w1", "model.layers.35.block_sparse_moe.experts.215.w1", "model.layers.35.block_sparse_moe.experts.216.w1", "model.layers.35.block_sparse_moe.experts.217.w1", "model.layers.35.block_sparse_moe.experts.218.w1", "model.layers.35.block_sparse_moe.experts.219.w1", "model.layers.35.block_sparse_moe.experts.220.w1", "model.layers.35.block_sparse_moe.experts.221.w1", "model.layers.35.block_sparse_moe.experts.222.w1", "model.layers.35.block_sparse_moe.experts.223.w1", "model.layers.35.block_sparse_moe.experts.224.w1", "model.layers.35.block_sparse_moe.experts.225.w1", "model.layers.35.block_sparse_moe.experts.226.w1", "model.layers.35.block_sparse_moe.experts.227.w1", "model.layers.35.block_sparse_moe.experts.228.w1", "model.layers.35.block_sparse_moe.experts.229.w1", "model.layers.35.block_sparse_moe.experts.230.w1", "model.layers.35.block_sparse_moe.experts.231.w1", "model.layers.35.block_sparse_moe.experts.232.w1", "model.layers.35.block_sparse_moe.experts.233.w1", "model.layers.35.block_sparse_moe.experts.234.w1", "model.layers.35.block_sparse_moe.experts.235.w1", "model.layers.35.block_sparse_moe.experts.236.w1", "model.layers.35.block_sparse_moe.experts.237.w1", "model.layers.35.block_sparse_moe.experts.238.w1", "model.layers.35.block_sparse_moe.experts.239.w1", "model.layers.35.block_sparse_moe.experts.240.w1", "model.layers.35.block_sparse_moe.experts.241.w1", "model.layers.35.block_sparse_moe.experts.242.w1", "model.layers.35.block_sparse_moe.experts.243.w1", "model.layers.35.block_sparse_moe.experts.244.w1", "model.layers.35.block_sparse_moe.experts.245.w1", "model.layers.35.block_sparse_moe.experts.246.w1", "model.layers.35.block_sparse_moe.experts.247.w1", "model.layers.35.block_sparse_moe.experts.248.w1", "model.layers.35.block_sparse_moe.experts.249.w1", "model.layers.35.block_sparse_moe.experts.250.w1", "model.layers.35.block_sparse_moe.experts.251.w1", "model.layers.35.block_sparse_moe.experts.252.w1", "model.layers.35.block_sparse_moe.experts.253.w1", "model.layers.35.block_sparse_moe.experts.254.w1", "model.layers.35.block_sparse_moe.experts.255.w1", "model.layers.35.block_sparse_moe.experts.0.w3", "model.layers.35.block_sparse_moe.experts.1.w3", "model.layers.35.block_sparse_moe.experts.2.w3", "model.layers.35.block_sparse_moe.experts.3.w3", "model.layers.35.block_sparse_moe.experts.4.w3", "model.layers.35.block_sparse_moe.experts.5.w3", "model.layers.35.block_sparse_moe.experts.6.w3", "model.layers.35.block_sparse_moe.experts.7.w3", "model.layers.35.block_sparse_moe.experts.8.w3", "model.layers.35.block_sparse_moe.experts.9.w3", "model.layers.35.block_sparse_moe.experts.10.w3", "model.layers.35.block_sparse_moe.experts.11.w3", "model.layers.35.block_sparse_moe.experts.12.w3", "model.layers.35.block_sparse_moe.experts.13.w3", "model.layers.35.block_sparse_moe.experts.14.w3", "model.layers.35.block_sparse_moe.experts.15.w3", "model.layers.35.block_sparse_moe.experts.16.w3", "model.layers.35.block_sparse_moe.experts.17.w3", "model.layers.35.block_sparse_moe.experts.18.w3", "model.layers.35.block_sparse_moe.experts.19.w3", "model.layers.35.block_sparse_moe.experts.20.w3", "model.layers.35.block_sparse_moe.experts.21.w3", "model.layers.35.block_sparse_moe.experts.22.w3", "model.layers.35.block_sparse_moe.experts.23.w3", "model.layers.35.block_sparse_moe.experts.24.w3", "model.layers.35.block_sparse_moe.experts.25.w3", "model.layers.35.block_sparse_moe.experts.26.w3", "model.layers.35.block_sparse_moe.experts.27.w3", "model.layers.35.block_sparse_moe.experts.28.w3", "model.layers.35.block_sparse_moe.experts.29.w3", "model.layers.35.block_sparse_moe.experts.30.w3", "model.layers.35.block_sparse_moe.experts.31.w3", "model.layers.35.block_sparse_moe.experts.32.w3", "model.layers.35.block_sparse_moe.experts.33.w3", "model.layers.35.block_sparse_moe.experts.34.w3", "model.layers.35.block_sparse_moe.experts.35.w3", "model.layers.35.block_sparse_moe.experts.36.w3", "model.layers.35.block_sparse_moe.experts.37.w3", "model.layers.35.block_sparse_moe.experts.38.w3", "model.layers.35.block_sparse_moe.experts.39.w3", "model.layers.35.block_sparse_moe.experts.40.w3", "model.layers.35.block_sparse_moe.experts.41.w3", "model.layers.35.block_sparse_moe.experts.42.w3", "model.layers.35.block_sparse_moe.experts.43.w3", "model.layers.35.block_sparse_moe.experts.44.w3", "model.layers.35.block_sparse_moe.experts.45.w3", "model.layers.35.block_sparse_moe.experts.46.w3", "model.layers.35.block_sparse_moe.experts.47.w3", "model.layers.35.block_sparse_moe.experts.48.w3", "model.layers.35.block_sparse_moe.experts.49.w3", "model.layers.35.block_sparse_moe.experts.50.w3", "model.layers.35.block_sparse_moe.experts.51.w3", "model.layers.35.block_sparse_moe.experts.52.w3", "model.layers.35.block_sparse_moe.experts.53.w3", "model.layers.35.block_sparse_moe.experts.54.w3", "model.layers.35.block_sparse_moe.experts.55.w3", "model.layers.35.block_sparse_moe.experts.56.w3", "model.layers.35.block_sparse_moe.experts.57.w3", "model.layers.35.block_sparse_moe.experts.58.w3", "model.layers.35.block_sparse_moe.experts.59.w3", "model.layers.35.block_sparse_moe.experts.60.w3", "model.layers.35.block_sparse_moe.experts.61.w3", "model.layers.35.block_sparse_moe.experts.62.w3", "model.layers.35.block_sparse_moe.experts.63.w3", "model.layers.35.block_sparse_moe.experts.64.w3", "model.layers.35.block_sparse_moe.experts.65.w3", "model.layers.35.block_sparse_moe.experts.66.w3", "model.layers.35.block_sparse_moe.experts.67.w3", "model.layers.35.block_sparse_moe.experts.68.w3", "model.layers.35.block_sparse_moe.experts.69.w3", "model.layers.35.block_sparse_moe.experts.70.w3", "model.layers.35.block_sparse_moe.experts.71.w3", "model.layers.35.block_sparse_moe.experts.72.w3", "model.layers.35.block_sparse_moe.experts.73.w3", "model.layers.35.block_sparse_moe.experts.74.w3", "model.layers.35.block_sparse_moe.experts.75.w3", "model.layers.35.block_sparse_moe.experts.76.w3", "model.layers.35.block_sparse_moe.experts.77.w3", "model.layers.35.block_sparse_moe.experts.78.w3", "model.layers.35.block_sparse_moe.experts.79.w3", "model.layers.35.block_sparse_moe.experts.80.w3", "model.layers.35.block_sparse_moe.experts.81.w3", "model.layers.35.block_sparse_moe.experts.82.w3", "model.layers.35.block_sparse_moe.experts.83.w3", "model.layers.35.block_sparse_moe.experts.84.w3", "model.layers.35.block_sparse_moe.experts.85.w3", "model.layers.35.block_sparse_moe.experts.86.w3", "model.layers.35.block_sparse_moe.experts.87.w3", "model.layers.35.block_sparse_moe.experts.88.w3", "model.layers.35.block_sparse_moe.experts.89.w3", "model.layers.35.block_sparse_moe.experts.90.w3", "model.layers.35.block_sparse_moe.experts.91.w3", "model.layers.35.block_sparse_moe.experts.92.w3", "model.layers.35.block_sparse_moe.experts.93.w3", "model.layers.35.block_sparse_moe.experts.94.w3", "model.layers.35.block_sparse_moe.experts.95.w3", "model.layers.35.block_sparse_moe.experts.96.w3", "model.layers.35.block_sparse_moe.experts.97.w3", "model.layers.35.block_sparse_moe.experts.98.w3", "model.layers.35.block_sparse_moe.experts.99.w3", "model.layers.35.block_sparse_moe.experts.100.w3", "model.layers.35.block_sparse_moe.experts.101.w3", "model.layers.35.block_sparse_moe.experts.102.w3", "model.layers.35.block_sparse_moe.experts.103.w3", "model.layers.35.block_sparse_moe.experts.104.w3", "model.layers.35.block_sparse_moe.experts.105.w3", "model.layers.35.block_sparse_moe.experts.106.w3", "model.layers.35.block_sparse_moe.experts.107.w3", "model.layers.35.block_sparse_moe.experts.108.w3", "model.layers.35.block_sparse_moe.experts.109.w3", "model.layers.35.block_sparse_moe.experts.110.w3", "model.layers.35.block_sparse_moe.experts.111.w3", "model.layers.35.block_sparse_moe.experts.112.w3", "model.layers.35.block_sparse_moe.experts.113.w3", "model.layers.35.block_sparse_moe.experts.114.w3", "model.layers.35.block_sparse_moe.experts.115.w3", "model.layers.35.block_sparse_moe.experts.116.w3", "model.layers.35.block_sparse_moe.experts.117.w3", "model.layers.35.block_sparse_moe.experts.118.w3", "model.layers.35.block_sparse_moe.experts.119.w3", "model.layers.35.block_sparse_moe.experts.120.w3", "model.layers.35.block_sparse_moe.experts.121.w3", "model.layers.35.block_sparse_moe.experts.122.w3", "model.layers.35.block_sparse_moe.experts.123.w3", "model.layers.35.block_sparse_moe.experts.124.w3", "model.layers.35.block_sparse_moe.experts.125.w3", "model.layers.35.block_sparse_moe.experts.126.w3", "model.layers.35.block_sparse_moe.experts.127.w3", "model.layers.35.block_sparse_moe.experts.128.w3", "model.layers.35.block_sparse_moe.experts.129.w3", "model.layers.35.block_sparse_moe.experts.130.w3", "model.layers.35.block_sparse_moe.experts.131.w3", "model.layers.35.block_sparse_moe.experts.132.w3", "model.layers.35.block_sparse_moe.experts.133.w3", "model.layers.35.block_sparse_moe.experts.134.w3", "model.layers.35.block_sparse_moe.experts.135.w3", "model.layers.35.block_sparse_moe.experts.136.w3", "model.layers.35.block_sparse_moe.experts.137.w3", "model.layers.35.block_sparse_moe.experts.138.w3", "model.layers.35.block_sparse_moe.experts.139.w3", "model.layers.35.block_sparse_moe.experts.140.w3", "model.layers.35.block_sparse_moe.experts.141.w3", "model.layers.35.block_sparse_moe.experts.142.w3", "model.layers.35.block_sparse_moe.experts.143.w3", "model.layers.35.block_sparse_moe.experts.144.w3", "model.layers.35.block_sparse_moe.experts.145.w3", "model.layers.35.block_sparse_moe.experts.146.w3", "model.layers.35.block_sparse_moe.experts.147.w3", "model.layers.35.block_sparse_moe.experts.148.w3", "model.layers.35.block_sparse_moe.experts.149.w3", "model.layers.35.block_sparse_moe.experts.150.w3", "model.layers.35.block_sparse_moe.experts.151.w3", "model.layers.35.block_sparse_moe.experts.152.w3", "model.layers.35.block_sparse_moe.experts.153.w3", "model.layers.35.block_sparse_moe.experts.154.w3", "model.layers.35.block_sparse_moe.experts.155.w3", "model.layers.35.block_sparse_moe.experts.156.w3", "model.layers.35.block_sparse_moe.experts.157.w3", "model.layers.35.block_sparse_moe.experts.158.w3", "model.layers.35.block_sparse_moe.experts.159.w3", "model.layers.35.block_sparse_moe.experts.160.w3", "model.layers.35.block_sparse_moe.experts.161.w3", "model.layers.35.block_sparse_moe.experts.162.w3", "model.layers.35.block_sparse_moe.experts.163.w3", "model.layers.35.block_sparse_moe.experts.164.w3", "model.layers.35.block_sparse_moe.experts.165.w3", "model.layers.35.block_sparse_moe.experts.166.w3", "model.layers.35.block_sparse_moe.experts.167.w3", "model.layers.35.block_sparse_moe.experts.168.w3", "model.layers.35.block_sparse_moe.experts.169.w3", "model.layers.35.block_sparse_moe.experts.170.w3", "model.layers.35.block_sparse_moe.experts.171.w3", "model.layers.35.block_sparse_moe.experts.172.w3", "model.layers.35.block_sparse_moe.experts.173.w3", "model.layers.35.block_sparse_moe.experts.174.w3", "model.layers.35.block_sparse_moe.experts.175.w3", "model.layers.35.block_sparse_moe.experts.176.w3", "model.layers.35.block_sparse_moe.experts.177.w3", "model.layers.35.block_sparse_moe.experts.178.w3", "model.layers.35.block_sparse_moe.experts.179.w3", "model.layers.35.block_sparse_moe.experts.180.w3", "model.layers.35.block_sparse_moe.experts.181.w3", "model.layers.35.block_sparse_moe.experts.182.w3", "model.layers.35.block_sparse_moe.experts.183.w3", "model.layers.35.block_sparse_moe.experts.184.w3", "model.layers.35.block_sparse_moe.experts.185.w3", "model.layers.35.block_sparse_moe.experts.186.w3", "model.layers.35.block_sparse_moe.experts.187.w3", "model.layers.35.block_sparse_moe.experts.188.w3", "model.layers.35.block_sparse_moe.experts.189.w3", "model.layers.35.block_sparse_moe.experts.190.w3", "model.layers.35.block_sparse_moe.experts.191.w3", "model.layers.35.block_sparse_moe.experts.192.w3", "model.layers.35.block_sparse_moe.experts.193.w3", "model.layers.35.block_sparse_moe.experts.194.w3", "model.layers.35.block_sparse_moe.experts.195.w3", "model.layers.35.block_sparse_moe.experts.196.w3", "model.layers.35.block_sparse_moe.experts.197.w3", "model.layers.35.block_sparse_moe.experts.198.w3", "model.layers.35.block_sparse_moe.experts.199.w3", "model.layers.35.block_sparse_moe.experts.200.w3", "model.layers.35.block_sparse_moe.experts.201.w3", "model.layers.35.block_sparse_moe.experts.202.w3", "model.layers.35.block_sparse_moe.experts.203.w3", "model.layers.35.block_sparse_moe.experts.204.w3", "model.layers.35.block_sparse_moe.experts.205.w3", "model.layers.35.block_sparse_moe.experts.206.w3", "model.layers.35.block_sparse_moe.experts.207.w3", "model.layers.35.block_sparse_moe.experts.208.w3", "model.layers.35.block_sparse_moe.experts.209.w3", "model.layers.35.block_sparse_moe.experts.210.w3", "model.layers.35.block_sparse_moe.experts.211.w3", "model.layers.35.block_sparse_moe.experts.212.w3", "model.layers.35.block_sparse_moe.experts.213.w3", "model.layers.35.block_sparse_moe.experts.214.w3", "model.layers.35.block_sparse_moe.experts.215.w3", "model.layers.35.block_sparse_moe.experts.216.w3", "model.layers.35.block_sparse_moe.experts.217.w3", "model.layers.35.block_sparse_moe.experts.218.w3", "model.layers.35.block_sparse_moe.experts.219.w3", "model.layers.35.block_sparse_moe.experts.220.w3", "model.layers.35.block_sparse_moe.experts.221.w3", "model.layers.35.block_sparse_moe.experts.222.w3", "model.layers.35.block_sparse_moe.experts.223.w3", "model.layers.35.block_sparse_moe.experts.224.w3", "model.layers.35.block_sparse_moe.experts.225.w3", "model.layers.35.block_sparse_moe.experts.226.w3", "model.layers.35.block_sparse_moe.experts.227.w3", "model.layers.35.block_sparse_moe.experts.228.w3", "model.layers.35.block_sparse_moe.experts.229.w3", "model.layers.35.block_sparse_moe.experts.230.w3", "model.layers.35.block_sparse_moe.experts.231.w3", "model.layers.35.block_sparse_moe.experts.232.w3", "model.layers.35.block_sparse_moe.experts.233.w3", "model.layers.35.block_sparse_moe.experts.234.w3", "model.layers.35.block_sparse_moe.experts.235.w3", "model.layers.35.block_sparse_moe.experts.236.w3", "model.layers.35.block_sparse_moe.experts.237.w3", "model.layers.35.block_sparse_moe.experts.238.w3", "model.layers.35.block_sparse_moe.experts.239.w3", "model.layers.35.block_sparse_moe.experts.240.w3", "model.layers.35.block_sparse_moe.experts.241.w3", "model.layers.35.block_sparse_moe.experts.242.w3", "model.layers.35.block_sparse_moe.experts.243.w3", "model.layers.35.block_sparse_moe.experts.244.w3", "model.layers.35.block_sparse_moe.experts.245.w3", "model.layers.35.block_sparse_moe.experts.246.w3", "model.layers.35.block_sparse_moe.experts.247.w3", "model.layers.35.block_sparse_moe.experts.248.w3", "model.layers.35.block_sparse_moe.experts.249.w3", "model.layers.35.block_sparse_moe.experts.250.w3", "model.layers.35.block_sparse_moe.experts.251.w3", "model.layers.35.block_sparse_moe.experts.252.w3", "model.layers.35.block_sparse_moe.experts.253.w3", "model.layers.35.block_sparse_moe.experts.254.w3", "model.layers.35.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -9.765345603228448e-05, "dbits": 2415919104 } ] }, { "idx": 179, "layers": [ "model.layers.35.block_sparse_moe.experts.0.w2", "model.layers.35.block_sparse_moe.experts.1.w2", "model.layers.35.block_sparse_moe.experts.2.w2", "model.layers.35.block_sparse_moe.experts.3.w2", "model.layers.35.block_sparse_moe.experts.4.w2", "model.layers.35.block_sparse_moe.experts.5.w2", "model.layers.35.block_sparse_moe.experts.6.w2", "model.layers.35.block_sparse_moe.experts.7.w2", "model.layers.35.block_sparse_moe.experts.8.w2", "model.layers.35.block_sparse_moe.experts.9.w2", "model.layers.35.block_sparse_moe.experts.10.w2", "model.layers.35.block_sparse_moe.experts.11.w2", "model.layers.35.block_sparse_moe.experts.12.w2", "model.layers.35.block_sparse_moe.experts.13.w2", "model.layers.35.block_sparse_moe.experts.14.w2", "model.layers.35.block_sparse_moe.experts.15.w2", "model.layers.35.block_sparse_moe.experts.16.w2", "model.layers.35.block_sparse_moe.experts.17.w2", "model.layers.35.block_sparse_moe.experts.18.w2", "model.layers.35.block_sparse_moe.experts.19.w2", "model.layers.35.block_sparse_moe.experts.20.w2", "model.layers.35.block_sparse_moe.experts.21.w2", "model.layers.35.block_sparse_moe.experts.22.w2", "model.layers.35.block_sparse_moe.experts.23.w2", "model.layers.35.block_sparse_moe.experts.24.w2", "model.layers.35.block_sparse_moe.experts.25.w2", "model.layers.35.block_sparse_moe.experts.26.w2", "model.layers.35.block_sparse_moe.experts.27.w2", "model.layers.35.block_sparse_moe.experts.28.w2", "model.layers.35.block_sparse_moe.experts.29.w2", "model.layers.35.block_sparse_moe.experts.30.w2", "model.layers.35.block_sparse_moe.experts.31.w2", "model.layers.35.block_sparse_moe.experts.32.w2", "model.layers.35.block_sparse_moe.experts.33.w2", "model.layers.35.block_sparse_moe.experts.34.w2", "model.layers.35.block_sparse_moe.experts.35.w2", "model.layers.35.block_sparse_moe.experts.36.w2", "model.layers.35.block_sparse_moe.experts.37.w2", "model.layers.35.block_sparse_moe.experts.38.w2", "model.layers.35.block_sparse_moe.experts.39.w2", "model.layers.35.block_sparse_moe.experts.40.w2", "model.layers.35.block_sparse_moe.experts.41.w2", "model.layers.35.block_sparse_moe.experts.42.w2", "model.layers.35.block_sparse_moe.experts.43.w2", "model.layers.35.block_sparse_moe.experts.44.w2", "model.layers.35.block_sparse_moe.experts.45.w2", "model.layers.35.block_sparse_moe.experts.46.w2", "model.layers.35.block_sparse_moe.experts.47.w2", "model.layers.35.block_sparse_moe.experts.48.w2", "model.layers.35.block_sparse_moe.experts.49.w2", "model.layers.35.block_sparse_moe.experts.50.w2", "model.layers.35.block_sparse_moe.experts.51.w2", "model.layers.35.block_sparse_moe.experts.52.w2", "model.layers.35.block_sparse_moe.experts.53.w2", "model.layers.35.block_sparse_moe.experts.54.w2", "model.layers.35.block_sparse_moe.experts.55.w2", "model.layers.35.block_sparse_moe.experts.56.w2", "model.layers.35.block_sparse_moe.experts.57.w2", "model.layers.35.block_sparse_moe.experts.58.w2", "model.layers.35.block_sparse_moe.experts.59.w2", "model.layers.35.block_sparse_moe.experts.60.w2", "model.layers.35.block_sparse_moe.experts.61.w2", "model.layers.35.block_sparse_moe.experts.62.w2", "model.layers.35.block_sparse_moe.experts.63.w2", "model.layers.35.block_sparse_moe.experts.64.w2", "model.layers.35.block_sparse_moe.experts.65.w2", "model.layers.35.block_sparse_moe.experts.66.w2", "model.layers.35.block_sparse_moe.experts.67.w2", "model.layers.35.block_sparse_moe.experts.68.w2", "model.layers.35.block_sparse_moe.experts.69.w2", "model.layers.35.block_sparse_moe.experts.70.w2", "model.layers.35.block_sparse_moe.experts.71.w2", "model.layers.35.block_sparse_moe.experts.72.w2", "model.layers.35.block_sparse_moe.experts.73.w2", "model.layers.35.block_sparse_moe.experts.74.w2", "model.layers.35.block_sparse_moe.experts.75.w2", "model.layers.35.block_sparse_moe.experts.76.w2", "model.layers.35.block_sparse_moe.experts.77.w2", "model.layers.35.block_sparse_moe.experts.78.w2", "model.layers.35.block_sparse_moe.experts.79.w2", "model.layers.35.block_sparse_moe.experts.80.w2", "model.layers.35.block_sparse_moe.experts.81.w2", "model.layers.35.block_sparse_moe.experts.82.w2", "model.layers.35.block_sparse_moe.experts.83.w2", "model.layers.35.block_sparse_moe.experts.84.w2", "model.layers.35.block_sparse_moe.experts.85.w2", "model.layers.35.block_sparse_moe.experts.86.w2", "model.layers.35.block_sparse_moe.experts.87.w2", "model.layers.35.block_sparse_moe.experts.88.w2", "model.layers.35.block_sparse_moe.experts.89.w2", "model.layers.35.block_sparse_moe.experts.90.w2", "model.layers.35.block_sparse_moe.experts.91.w2", "model.layers.35.block_sparse_moe.experts.92.w2", "model.layers.35.block_sparse_moe.experts.93.w2", "model.layers.35.block_sparse_moe.experts.94.w2", "model.layers.35.block_sparse_moe.experts.95.w2", "model.layers.35.block_sparse_moe.experts.96.w2", "model.layers.35.block_sparse_moe.experts.97.w2", "model.layers.35.block_sparse_moe.experts.98.w2", "model.layers.35.block_sparse_moe.experts.99.w2", "model.layers.35.block_sparse_moe.experts.100.w2", "model.layers.35.block_sparse_moe.experts.101.w2", "model.layers.35.block_sparse_moe.experts.102.w2", "model.layers.35.block_sparse_moe.experts.103.w2", "model.layers.35.block_sparse_moe.experts.104.w2", "model.layers.35.block_sparse_moe.experts.105.w2", "model.layers.35.block_sparse_moe.experts.106.w2", "model.layers.35.block_sparse_moe.experts.107.w2", "model.layers.35.block_sparse_moe.experts.108.w2", "model.layers.35.block_sparse_moe.experts.109.w2", "model.layers.35.block_sparse_moe.experts.110.w2", "model.layers.35.block_sparse_moe.experts.111.w2", "model.layers.35.block_sparse_moe.experts.112.w2", "model.layers.35.block_sparse_moe.experts.113.w2", "model.layers.35.block_sparse_moe.experts.114.w2", "model.layers.35.block_sparse_moe.experts.115.w2", "model.layers.35.block_sparse_moe.experts.116.w2", "model.layers.35.block_sparse_moe.experts.117.w2", "model.layers.35.block_sparse_moe.experts.118.w2", "model.layers.35.block_sparse_moe.experts.119.w2", "model.layers.35.block_sparse_moe.experts.120.w2", "model.layers.35.block_sparse_moe.experts.121.w2", "model.layers.35.block_sparse_moe.experts.122.w2", "model.layers.35.block_sparse_moe.experts.123.w2", "model.layers.35.block_sparse_moe.experts.124.w2", "model.layers.35.block_sparse_moe.experts.125.w2", "model.layers.35.block_sparse_moe.experts.126.w2", "model.layers.35.block_sparse_moe.experts.127.w2", "model.layers.35.block_sparse_moe.experts.128.w2", "model.layers.35.block_sparse_moe.experts.129.w2", "model.layers.35.block_sparse_moe.experts.130.w2", "model.layers.35.block_sparse_moe.experts.131.w2", "model.layers.35.block_sparse_moe.experts.132.w2", "model.layers.35.block_sparse_moe.experts.133.w2", "model.layers.35.block_sparse_moe.experts.134.w2", "model.layers.35.block_sparse_moe.experts.135.w2", "model.layers.35.block_sparse_moe.experts.136.w2", "model.layers.35.block_sparse_moe.experts.137.w2", "model.layers.35.block_sparse_moe.experts.138.w2", "model.layers.35.block_sparse_moe.experts.139.w2", "model.layers.35.block_sparse_moe.experts.140.w2", "model.layers.35.block_sparse_moe.experts.141.w2", "model.layers.35.block_sparse_moe.experts.142.w2", "model.layers.35.block_sparse_moe.experts.143.w2", "model.layers.35.block_sparse_moe.experts.144.w2", "model.layers.35.block_sparse_moe.experts.145.w2", "model.layers.35.block_sparse_moe.experts.146.w2", "model.layers.35.block_sparse_moe.experts.147.w2", "model.layers.35.block_sparse_moe.experts.148.w2", "model.layers.35.block_sparse_moe.experts.149.w2", "model.layers.35.block_sparse_moe.experts.150.w2", "model.layers.35.block_sparse_moe.experts.151.w2", "model.layers.35.block_sparse_moe.experts.152.w2", "model.layers.35.block_sparse_moe.experts.153.w2", "model.layers.35.block_sparse_moe.experts.154.w2", "model.layers.35.block_sparse_moe.experts.155.w2", "model.layers.35.block_sparse_moe.experts.156.w2", "model.layers.35.block_sparse_moe.experts.157.w2", "model.layers.35.block_sparse_moe.experts.158.w2", "model.layers.35.block_sparse_moe.experts.159.w2", "model.layers.35.block_sparse_moe.experts.160.w2", "model.layers.35.block_sparse_moe.experts.161.w2", "model.layers.35.block_sparse_moe.experts.162.w2", "model.layers.35.block_sparse_moe.experts.163.w2", "model.layers.35.block_sparse_moe.experts.164.w2", "model.layers.35.block_sparse_moe.experts.165.w2", "model.layers.35.block_sparse_moe.experts.166.w2", "model.layers.35.block_sparse_moe.experts.167.w2", "model.layers.35.block_sparse_moe.experts.168.w2", "model.layers.35.block_sparse_moe.experts.169.w2", "model.layers.35.block_sparse_moe.experts.170.w2", "model.layers.35.block_sparse_moe.experts.171.w2", "model.layers.35.block_sparse_moe.experts.172.w2", "model.layers.35.block_sparse_moe.experts.173.w2", "model.layers.35.block_sparse_moe.experts.174.w2", "model.layers.35.block_sparse_moe.experts.175.w2", "model.layers.35.block_sparse_moe.experts.176.w2", "model.layers.35.block_sparse_moe.experts.177.w2", "model.layers.35.block_sparse_moe.experts.178.w2", "model.layers.35.block_sparse_moe.experts.179.w2", "model.layers.35.block_sparse_moe.experts.180.w2", "model.layers.35.block_sparse_moe.experts.181.w2", "model.layers.35.block_sparse_moe.experts.182.w2", "model.layers.35.block_sparse_moe.experts.183.w2", "model.layers.35.block_sparse_moe.experts.184.w2", "model.layers.35.block_sparse_moe.experts.185.w2", "model.layers.35.block_sparse_moe.experts.186.w2", "model.layers.35.block_sparse_moe.experts.187.w2", "model.layers.35.block_sparse_moe.experts.188.w2", "model.layers.35.block_sparse_moe.experts.189.w2", "model.layers.35.block_sparse_moe.experts.190.w2", "model.layers.35.block_sparse_moe.experts.191.w2", "model.layers.35.block_sparse_moe.experts.192.w2", "model.layers.35.block_sparse_moe.experts.193.w2", "model.layers.35.block_sparse_moe.experts.194.w2", "model.layers.35.block_sparse_moe.experts.195.w2", "model.layers.35.block_sparse_moe.experts.196.w2", "model.layers.35.block_sparse_moe.experts.197.w2", "model.layers.35.block_sparse_moe.experts.198.w2", "model.layers.35.block_sparse_moe.experts.199.w2", "model.layers.35.block_sparse_moe.experts.200.w2", "model.layers.35.block_sparse_moe.experts.201.w2", "model.layers.35.block_sparse_moe.experts.202.w2", "model.layers.35.block_sparse_moe.experts.203.w2", "model.layers.35.block_sparse_moe.experts.204.w2", "model.layers.35.block_sparse_moe.experts.205.w2", "model.layers.35.block_sparse_moe.experts.206.w2", "model.layers.35.block_sparse_moe.experts.207.w2", "model.layers.35.block_sparse_moe.experts.208.w2", "model.layers.35.block_sparse_moe.experts.209.w2", "model.layers.35.block_sparse_moe.experts.210.w2", "model.layers.35.block_sparse_moe.experts.211.w2", "model.layers.35.block_sparse_moe.experts.212.w2", "model.layers.35.block_sparse_moe.experts.213.w2", "model.layers.35.block_sparse_moe.experts.214.w2", "model.layers.35.block_sparse_moe.experts.215.w2", "model.layers.35.block_sparse_moe.experts.216.w2", "model.layers.35.block_sparse_moe.experts.217.w2", "model.layers.35.block_sparse_moe.experts.218.w2", "model.layers.35.block_sparse_moe.experts.219.w2", "model.layers.35.block_sparse_moe.experts.220.w2", "model.layers.35.block_sparse_moe.experts.221.w2", "model.layers.35.block_sparse_moe.experts.222.w2", "model.layers.35.block_sparse_moe.experts.223.w2", "model.layers.35.block_sparse_moe.experts.224.w2", "model.layers.35.block_sparse_moe.experts.225.w2", "model.layers.35.block_sparse_moe.experts.226.w2", "model.layers.35.block_sparse_moe.experts.227.w2", "model.layers.35.block_sparse_moe.experts.228.w2", "model.layers.35.block_sparse_moe.experts.229.w2", "model.layers.35.block_sparse_moe.experts.230.w2", "model.layers.35.block_sparse_moe.experts.231.w2", "model.layers.35.block_sparse_moe.experts.232.w2", "model.layers.35.block_sparse_moe.experts.233.w2", "model.layers.35.block_sparse_moe.experts.234.w2", "model.layers.35.block_sparse_moe.experts.235.w2", "model.layers.35.block_sparse_moe.experts.236.w2", "model.layers.35.block_sparse_moe.experts.237.w2", "model.layers.35.block_sparse_moe.experts.238.w2", "model.layers.35.block_sparse_moe.experts.239.w2", "model.layers.35.block_sparse_moe.experts.240.w2", "model.layers.35.block_sparse_moe.experts.241.w2", "model.layers.35.block_sparse_moe.experts.242.w2", "model.layers.35.block_sparse_moe.experts.243.w2", "model.layers.35.block_sparse_moe.experts.244.w2", "model.layers.35.block_sparse_moe.experts.245.w2", "model.layers.35.block_sparse_moe.experts.246.w2", "model.layers.35.block_sparse_moe.experts.247.w2", "model.layers.35.block_sparse_moe.experts.248.w2", "model.layers.35.block_sparse_moe.experts.249.w2", "model.layers.35.block_sparse_moe.experts.250.w2", "model.layers.35.block_sparse_moe.experts.251.w2", "model.layers.35.block_sparse_moe.experts.252.w2", "model.layers.35.block_sparse_moe.experts.253.w2", "model.layers.35.block_sparse_moe.experts.254.w2", "model.layers.35.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0001230444759130478, "dbits": 1207959552 } ] }, { "idx": 180, "layers": [ "model.layers.36.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0004625765606760951, "dbits": 18874368 } ] }, { "idx": 181, "layers": [ "model.layers.36.self_attn.k_proj", "model.layers.36.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0033997982740402166, "dbits": 6291456 } ] }, { "idx": 182, "layers": [ "model.layers.36.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0021368362009525355, "dbits": 18874368 } ] }, { "idx": 183, "layers": [ "model.layers.36.block_sparse_moe.experts.0.w1", "model.layers.36.block_sparse_moe.experts.1.w1", "model.layers.36.block_sparse_moe.experts.2.w1", "model.layers.36.block_sparse_moe.experts.3.w1", "model.layers.36.block_sparse_moe.experts.4.w1", "model.layers.36.block_sparse_moe.experts.5.w1", "model.layers.36.block_sparse_moe.experts.6.w1", "model.layers.36.block_sparse_moe.experts.7.w1", "model.layers.36.block_sparse_moe.experts.8.w1", "model.layers.36.block_sparse_moe.experts.9.w1", "model.layers.36.block_sparse_moe.experts.10.w1", "model.layers.36.block_sparse_moe.experts.11.w1", "model.layers.36.block_sparse_moe.experts.12.w1", "model.layers.36.block_sparse_moe.experts.13.w1", "model.layers.36.block_sparse_moe.experts.14.w1", "model.layers.36.block_sparse_moe.experts.15.w1", "model.layers.36.block_sparse_moe.experts.16.w1", "model.layers.36.block_sparse_moe.experts.17.w1", "model.layers.36.block_sparse_moe.experts.18.w1", "model.layers.36.block_sparse_moe.experts.19.w1", "model.layers.36.block_sparse_moe.experts.20.w1", "model.layers.36.block_sparse_moe.experts.21.w1", "model.layers.36.block_sparse_moe.experts.22.w1", "model.layers.36.block_sparse_moe.experts.23.w1", "model.layers.36.block_sparse_moe.experts.24.w1", "model.layers.36.block_sparse_moe.experts.25.w1", "model.layers.36.block_sparse_moe.experts.26.w1", "model.layers.36.block_sparse_moe.experts.27.w1", "model.layers.36.block_sparse_moe.experts.28.w1", "model.layers.36.block_sparse_moe.experts.29.w1", "model.layers.36.block_sparse_moe.experts.30.w1", "model.layers.36.block_sparse_moe.experts.31.w1", "model.layers.36.block_sparse_moe.experts.32.w1", "model.layers.36.block_sparse_moe.experts.33.w1", "model.layers.36.block_sparse_moe.experts.34.w1", "model.layers.36.block_sparse_moe.experts.35.w1", "model.layers.36.block_sparse_moe.experts.36.w1", "model.layers.36.block_sparse_moe.experts.37.w1", "model.layers.36.block_sparse_moe.experts.38.w1", "model.layers.36.block_sparse_moe.experts.39.w1", "model.layers.36.block_sparse_moe.experts.40.w1", "model.layers.36.block_sparse_moe.experts.41.w1", "model.layers.36.block_sparse_moe.experts.42.w1", "model.layers.36.block_sparse_moe.experts.43.w1", "model.layers.36.block_sparse_moe.experts.44.w1", "model.layers.36.block_sparse_moe.experts.45.w1", "model.layers.36.block_sparse_moe.experts.46.w1", "model.layers.36.block_sparse_moe.experts.47.w1", "model.layers.36.block_sparse_moe.experts.48.w1", "model.layers.36.block_sparse_moe.experts.49.w1", "model.layers.36.block_sparse_moe.experts.50.w1", "model.layers.36.block_sparse_moe.experts.51.w1", "model.layers.36.block_sparse_moe.experts.52.w1", "model.layers.36.block_sparse_moe.experts.53.w1", "model.layers.36.block_sparse_moe.experts.54.w1", "model.layers.36.block_sparse_moe.experts.55.w1", "model.layers.36.block_sparse_moe.experts.56.w1", "model.layers.36.block_sparse_moe.experts.57.w1", "model.layers.36.block_sparse_moe.experts.58.w1", "model.layers.36.block_sparse_moe.experts.59.w1", "model.layers.36.block_sparse_moe.experts.60.w1", "model.layers.36.block_sparse_moe.experts.61.w1", "model.layers.36.block_sparse_moe.experts.62.w1", "model.layers.36.block_sparse_moe.experts.63.w1", "model.layers.36.block_sparse_moe.experts.64.w1", "model.layers.36.block_sparse_moe.experts.65.w1", "model.layers.36.block_sparse_moe.experts.66.w1", "model.layers.36.block_sparse_moe.experts.67.w1", "model.layers.36.block_sparse_moe.experts.68.w1", "model.layers.36.block_sparse_moe.experts.69.w1", "model.layers.36.block_sparse_moe.experts.70.w1", "model.layers.36.block_sparse_moe.experts.71.w1", "model.layers.36.block_sparse_moe.experts.72.w1", "model.layers.36.block_sparse_moe.experts.73.w1", "model.layers.36.block_sparse_moe.experts.74.w1", "model.layers.36.block_sparse_moe.experts.75.w1", "model.layers.36.block_sparse_moe.experts.76.w1", "model.layers.36.block_sparse_moe.experts.77.w1", "model.layers.36.block_sparse_moe.experts.78.w1", "model.layers.36.block_sparse_moe.experts.79.w1", "model.layers.36.block_sparse_moe.experts.80.w1", "model.layers.36.block_sparse_moe.experts.81.w1", "model.layers.36.block_sparse_moe.experts.82.w1", "model.layers.36.block_sparse_moe.experts.83.w1", "model.layers.36.block_sparse_moe.experts.84.w1", "model.layers.36.block_sparse_moe.experts.85.w1", "model.layers.36.block_sparse_moe.experts.86.w1", "model.layers.36.block_sparse_moe.experts.87.w1", "model.layers.36.block_sparse_moe.experts.88.w1", "model.layers.36.block_sparse_moe.experts.89.w1", "model.layers.36.block_sparse_moe.experts.90.w1", "model.layers.36.block_sparse_moe.experts.91.w1", "model.layers.36.block_sparse_moe.experts.92.w1", "model.layers.36.block_sparse_moe.experts.93.w1", "model.layers.36.block_sparse_moe.experts.94.w1", "model.layers.36.block_sparse_moe.experts.95.w1", "model.layers.36.block_sparse_moe.experts.96.w1", "model.layers.36.block_sparse_moe.experts.97.w1", "model.layers.36.block_sparse_moe.experts.98.w1", "model.layers.36.block_sparse_moe.experts.99.w1", "model.layers.36.block_sparse_moe.experts.100.w1", "model.layers.36.block_sparse_moe.experts.101.w1", "model.layers.36.block_sparse_moe.experts.102.w1", "model.layers.36.block_sparse_moe.experts.103.w1", "model.layers.36.block_sparse_moe.experts.104.w1", "model.layers.36.block_sparse_moe.experts.105.w1", "model.layers.36.block_sparse_moe.experts.106.w1", "model.layers.36.block_sparse_moe.experts.107.w1", "model.layers.36.block_sparse_moe.experts.108.w1", "model.layers.36.block_sparse_moe.experts.109.w1", "model.layers.36.block_sparse_moe.experts.110.w1", "model.layers.36.block_sparse_moe.experts.111.w1", "model.layers.36.block_sparse_moe.experts.112.w1", "model.layers.36.block_sparse_moe.experts.113.w1", "model.layers.36.block_sparse_moe.experts.114.w1", "model.layers.36.block_sparse_moe.experts.115.w1", "model.layers.36.block_sparse_moe.experts.116.w1", "model.layers.36.block_sparse_moe.experts.117.w1", "model.layers.36.block_sparse_moe.experts.118.w1", "model.layers.36.block_sparse_moe.experts.119.w1", "model.layers.36.block_sparse_moe.experts.120.w1", "model.layers.36.block_sparse_moe.experts.121.w1", "model.layers.36.block_sparse_moe.experts.122.w1", "model.layers.36.block_sparse_moe.experts.123.w1", "model.layers.36.block_sparse_moe.experts.124.w1", "model.layers.36.block_sparse_moe.experts.125.w1", "model.layers.36.block_sparse_moe.experts.126.w1", "model.layers.36.block_sparse_moe.experts.127.w1", "model.layers.36.block_sparse_moe.experts.128.w1", "model.layers.36.block_sparse_moe.experts.129.w1", "model.layers.36.block_sparse_moe.experts.130.w1", "model.layers.36.block_sparse_moe.experts.131.w1", "model.layers.36.block_sparse_moe.experts.132.w1", "model.layers.36.block_sparse_moe.experts.133.w1", "model.layers.36.block_sparse_moe.experts.134.w1", "model.layers.36.block_sparse_moe.experts.135.w1", "model.layers.36.block_sparse_moe.experts.136.w1", "model.layers.36.block_sparse_moe.experts.137.w1", "model.layers.36.block_sparse_moe.experts.138.w1", "model.layers.36.block_sparse_moe.experts.139.w1", "model.layers.36.block_sparse_moe.experts.140.w1", "model.layers.36.block_sparse_moe.experts.141.w1", "model.layers.36.block_sparse_moe.experts.142.w1", "model.layers.36.block_sparse_moe.experts.143.w1", "model.layers.36.block_sparse_moe.experts.144.w1", "model.layers.36.block_sparse_moe.experts.145.w1", "model.layers.36.block_sparse_moe.experts.146.w1", "model.layers.36.block_sparse_moe.experts.147.w1", "model.layers.36.block_sparse_moe.experts.148.w1", "model.layers.36.block_sparse_moe.experts.149.w1", "model.layers.36.block_sparse_moe.experts.150.w1", "model.layers.36.block_sparse_moe.experts.151.w1", "model.layers.36.block_sparse_moe.experts.152.w1", "model.layers.36.block_sparse_moe.experts.153.w1", "model.layers.36.block_sparse_moe.experts.154.w1", "model.layers.36.block_sparse_moe.experts.155.w1", "model.layers.36.block_sparse_moe.experts.156.w1", "model.layers.36.block_sparse_moe.experts.157.w1", "model.layers.36.block_sparse_moe.experts.158.w1", "model.layers.36.block_sparse_moe.experts.159.w1", "model.layers.36.block_sparse_moe.experts.160.w1", "model.layers.36.block_sparse_moe.experts.161.w1", "model.layers.36.block_sparse_moe.experts.162.w1", "model.layers.36.block_sparse_moe.experts.163.w1", "model.layers.36.block_sparse_moe.experts.164.w1", "model.layers.36.block_sparse_moe.experts.165.w1", "model.layers.36.block_sparse_moe.experts.166.w1", "model.layers.36.block_sparse_moe.experts.167.w1", "model.layers.36.block_sparse_moe.experts.168.w1", "model.layers.36.block_sparse_moe.experts.169.w1", "model.layers.36.block_sparse_moe.experts.170.w1", "model.layers.36.block_sparse_moe.experts.171.w1", "model.layers.36.block_sparse_moe.experts.172.w1", "model.layers.36.block_sparse_moe.experts.173.w1", "model.layers.36.block_sparse_moe.experts.174.w1", "model.layers.36.block_sparse_moe.experts.175.w1", "model.layers.36.block_sparse_moe.experts.176.w1", "model.layers.36.block_sparse_moe.experts.177.w1", "model.layers.36.block_sparse_moe.experts.178.w1", "model.layers.36.block_sparse_moe.experts.179.w1", "model.layers.36.block_sparse_moe.experts.180.w1", "model.layers.36.block_sparse_moe.experts.181.w1", "model.layers.36.block_sparse_moe.experts.182.w1", "model.layers.36.block_sparse_moe.experts.183.w1", "model.layers.36.block_sparse_moe.experts.184.w1", "model.layers.36.block_sparse_moe.experts.185.w1", "model.layers.36.block_sparse_moe.experts.186.w1", "model.layers.36.block_sparse_moe.experts.187.w1", "model.layers.36.block_sparse_moe.experts.188.w1", "model.layers.36.block_sparse_moe.experts.189.w1", "model.layers.36.block_sparse_moe.experts.190.w1", "model.layers.36.block_sparse_moe.experts.191.w1", "model.layers.36.block_sparse_moe.experts.192.w1", "model.layers.36.block_sparse_moe.experts.193.w1", "model.layers.36.block_sparse_moe.experts.194.w1", "model.layers.36.block_sparse_moe.experts.195.w1", "model.layers.36.block_sparse_moe.experts.196.w1", "model.layers.36.block_sparse_moe.experts.197.w1", "model.layers.36.block_sparse_moe.experts.198.w1", "model.layers.36.block_sparse_moe.experts.199.w1", "model.layers.36.block_sparse_moe.experts.200.w1", "model.layers.36.block_sparse_moe.experts.201.w1", "model.layers.36.block_sparse_moe.experts.202.w1", "model.layers.36.block_sparse_moe.experts.203.w1", "model.layers.36.block_sparse_moe.experts.204.w1", "model.layers.36.block_sparse_moe.experts.205.w1", "model.layers.36.block_sparse_moe.experts.206.w1", "model.layers.36.block_sparse_moe.experts.207.w1", "model.layers.36.block_sparse_moe.experts.208.w1", "model.layers.36.block_sparse_moe.experts.209.w1", "model.layers.36.block_sparse_moe.experts.210.w1", "model.layers.36.block_sparse_moe.experts.211.w1", "model.layers.36.block_sparse_moe.experts.212.w1", "model.layers.36.block_sparse_moe.experts.213.w1", "model.layers.36.block_sparse_moe.experts.214.w1", "model.layers.36.block_sparse_moe.experts.215.w1", "model.layers.36.block_sparse_moe.experts.216.w1", "model.layers.36.block_sparse_moe.experts.217.w1", "model.layers.36.block_sparse_moe.experts.218.w1", "model.layers.36.block_sparse_moe.experts.219.w1", "model.layers.36.block_sparse_moe.experts.220.w1", "model.layers.36.block_sparse_moe.experts.221.w1", "model.layers.36.block_sparse_moe.experts.222.w1", "model.layers.36.block_sparse_moe.experts.223.w1", "model.layers.36.block_sparse_moe.experts.224.w1", "model.layers.36.block_sparse_moe.experts.225.w1", "model.layers.36.block_sparse_moe.experts.226.w1", "model.layers.36.block_sparse_moe.experts.227.w1", "model.layers.36.block_sparse_moe.experts.228.w1", "model.layers.36.block_sparse_moe.experts.229.w1", "model.layers.36.block_sparse_moe.experts.230.w1", "model.layers.36.block_sparse_moe.experts.231.w1", "model.layers.36.block_sparse_moe.experts.232.w1", "model.layers.36.block_sparse_moe.experts.233.w1", "model.layers.36.block_sparse_moe.experts.234.w1", "model.layers.36.block_sparse_moe.experts.235.w1", "model.layers.36.block_sparse_moe.experts.236.w1", "model.layers.36.block_sparse_moe.experts.237.w1", "model.layers.36.block_sparse_moe.experts.238.w1", "model.layers.36.block_sparse_moe.experts.239.w1", "model.layers.36.block_sparse_moe.experts.240.w1", "model.layers.36.block_sparse_moe.experts.241.w1", "model.layers.36.block_sparse_moe.experts.242.w1", "model.layers.36.block_sparse_moe.experts.243.w1", "model.layers.36.block_sparse_moe.experts.244.w1", "model.layers.36.block_sparse_moe.experts.245.w1", "model.layers.36.block_sparse_moe.experts.246.w1", "model.layers.36.block_sparse_moe.experts.247.w1", "model.layers.36.block_sparse_moe.experts.248.w1", "model.layers.36.block_sparse_moe.experts.249.w1", "model.layers.36.block_sparse_moe.experts.250.w1", "model.layers.36.block_sparse_moe.experts.251.w1", "model.layers.36.block_sparse_moe.experts.252.w1", "model.layers.36.block_sparse_moe.experts.253.w1", "model.layers.36.block_sparse_moe.experts.254.w1", "model.layers.36.block_sparse_moe.experts.255.w1", "model.layers.36.block_sparse_moe.experts.0.w3", "model.layers.36.block_sparse_moe.experts.1.w3", "model.layers.36.block_sparse_moe.experts.2.w3", "model.layers.36.block_sparse_moe.experts.3.w3", "model.layers.36.block_sparse_moe.experts.4.w3", "model.layers.36.block_sparse_moe.experts.5.w3", "model.layers.36.block_sparse_moe.experts.6.w3", "model.layers.36.block_sparse_moe.experts.7.w3", "model.layers.36.block_sparse_moe.experts.8.w3", "model.layers.36.block_sparse_moe.experts.9.w3", "model.layers.36.block_sparse_moe.experts.10.w3", "model.layers.36.block_sparse_moe.experts.11.w3", "model.layers.36.block_sparse_moe.experts.12.w3", "model.layers.36.block_sparse_moe.experts.13.w3", "model.layers.36.block_sparse_moe.experts.14.w3", "model.layers.36.block_sparse_moe.experts.15.w3", "model.layers.36.block_sparse_moe.experts.16.w3", "model.layers.36.block_sparse_moe.experts.17.w3", "model.layers.36.block_sparse_moe.experts.18.w3", "model.layers.36.block_sparse_moe.experts.19.w3", "model.layers.36.block_sparse_moe.experts.20.w3", "model.layers.36.block_sparse_moe.experts.21.w3", "model.layers.36.block_sparse_moe.experts.22.w3", "model.layers.36.block_sparse_moe.experts.23.w3", "model.layers.36.block_sparse_moe.experts.24.w3", "model.layers.36.block_sparse_moe.experts.25.w3", "model.layers.36.block_sparse_moe.experts.26.w3", "model.layers.36.block_sparse_moe.experts.27.w3", "model.layers.36.block_sparse_moe.experts.28.w3", "model.layers.36.block_sparse_moe.experts.29.w3", "model.layers.36.block_sparse_moe.experts.30.w3", "model.layers.36.block_sparse_moe.experts.31.w3", "model.layers.36.block_sparse_moe.experts.32.w3", "model.layers.36.block_sparse_moe.experts.33.w3", "model.layers.36.block_sparse_moe.experts.34.w3", "model.layers.36.block_sparse_moe.experts.35.w3", "model.layers.36.block_sparse_moe.experts.36.w3", "model.layers.36.block_sparse_moe.experts.37.w3", "model.layers.36.block_sparse_moe.experts.38.w3", "model.layers.36.block_sparse_moe.experts.39.w3", "model.layers.36.block_sparse_moe.experts.40.w3", "model.layers.36.block_sparse_moe.experts.41.w3", "model.layers.36.block_sparse_moe.experts.42.w3", "model.layers.36.block_sparse_moe.experts.43.w3", "model.layers.36.block_sparse_moe.experts.44.w3", "model.layers.36.block_sparse_moe.experts.45.w3", "model.layers.36.block_sparse_moe.experts.46.w3", "model.layers.36.block_sparse_moe.experts.47.w3", "model.layers.36.block_sparse_moe.experts.48.w3", "model.layers.36.block_sparse_moe.experts.49.w3", "model.layers.36.block_sparse_moe.experts.50.w3", "model.layers.36.block_sparse_moe.experts.51.w3", "model.layers.36.block_sparse_moe.experts.52.w3", "model.layers.36.block_sparse_moe.experts.53.w3", "model.layers.36.block_sparse_moe.experts.54.w3", "model.layers.36.block_sparse_moe.experts.55.w3", "model.layers.36.block_sparse_moe.experts.56.w3", "model.layers.36.block_sparse_moe.experts.57.w3", "model.layers.36.block_sparse_moe.experts.58.w3", "model.layers.36.block_sparse_moe.experts.59.w3", "model.layers.36.block_sparse_moe.experts.60.w3", "model.layers.36.block_sparse_moe.experts.61.w3", "model.layers.36.block_sparse_moe.experts.62.w3", "model.layers.36.block_sparse_moe.experts.63.w3", "model.layers.36.block_sparse_moe.experts.64.w3", "model.layers.36.block_sparse_moe.experts.65.w3", "model.layers.36.block_sparse_moe.experts.66.w3", "model.layers.36.block_sparse_moe.experts.67.w3", "model.layers.36.block_sparse_moe.experts.68.w3", "model.layers.36.block_sparse_moe.experts.69.w3", "model.layers.36.block_sparse_moe.experts.70.w3", "model.layers.36.block_sparse_moe.experts.71.w3", "model.layers.36.block_sparse_moe.experts.72.w3", "model.layers.36.block_sparse_moe.experts.73.w3", "model.layers.36.block_sparse_moe.experts.74.w3", "model.layers.36.block_sparse_moe.experts.75.w3", "model.layers.36.block_sparse_moe.experts.76.w3", "model.layers.36.block_sparse_moe.experts.77.w3", "model.layers.36.block_sparse_moe.experts.78.w3", "model.layers.36.block_sparse_moe.experts.79.w3", "model.layers.36.block_sparse_moe.experts.80.w3", "model.layers.36.block_sparse_moe.experts.81.w3", "model.layers.36.block_sparse_moe.experts.82.w3", "model.layers.36.block_sparse_moe.experts.83.w3", "model.layers.36.block_sparse_moe.experts.84.w3", "model.layers.36.block_sparse_moe.experts.85.w3", "model.layers.36.block_sparse_moe.experts.86.w3", "model.layers.36.block_sparse_moe.experts.87.w3", "model.layers.36.block_sparse_moe.experts.88.w3", "model.layers.36.block_sparse_moe.experts.89.w3", "model.layers.36.block_sparse_moe.experts.90.w3", "model.layers.36.block_sparse_moe.experts.91.w3", "model.layers.36.block_sparse_moe.experts.92.w3", "model.layers.36.block_sparse_moe.experts.93.w3", "model.layers.36.block_sparse_moe.experts.94.w3", "model.layers.36.block_sparse_moe.experts.95.w3", "model.layers.36.block_sparse_moe.experts.96.w3", "model.layers.36.block_sparse_moe.experts.97.w3", "model.layers.36.block_sparse_moe.experts.98.w3", "model.layers.36.block_sparse_moe.experts.99.w3", "model.layers.36.block_sparse_moe.experts.100.w3", "model.layers.36.block_sparse_moe.experts.101.w3", "model.layers.36.block_sparse_moe.experts.102.w3", "model.layers.36.block_sparse_moe.experts.103.w3", "model.layers.36.block_sparse_moe.experts.104.w3", "model.layers.36.block_sparse_moe.experts.105.w3", "model.layers.36.block_sparse_moe.experts.106.w3", "model.layers.36.block_sparse_moe.experts.107.w3", "model.layers.36.block_sparse_moe.experts.108.w3", "model.layers.36.block_sparse_moe.experts.109.w3", "model.layers.36.block_sparse_moe.experts.110.w3", "model.layers.36.block_sparse_moe.experts.111.w3", "model.layers.36.block_sparse_moe.experts.112.w3", "model.layers.36.block_sparse_moe.experts.113.w3", "model.layers.36.block_sparse_moe.experts.114.w3", "model.layers.36.block_sparse_moe.experts.115.w3", "model.layers.36.block_sparse_moe.experts.116.w3", "model.layers.36.block_sparse_moe.experts.117.w3", "model.layers.36.block_sparse_moe.experts.118.w3", "model.layers.36.block_sparse_moe.experts.119.w3", "model.layers.36.block_sparse_moe.experts.120.w3", "model.layers.36.block_sparse_moe.experts.121.w3", "model.layers.36.block_sparse_moe.experts.122.w3", "model.layers.36.block_sparse_moe.experts.123.w3", "model.layers.36.block_sparse_moe.experts.124.w3", "model.layers.36.block_sparse_moe.experts.125.w3", "model.layers.36.block_sparse_moe.experts.126.w3", "model.layers.36.block_sparse_moe.experts.127.w3", "model.layers.36.block_sparse_moe.experts.128.w3", "model.layers.36.block_sparse_moe.experts.129.w3", "model.layers.36.block_sparse_moe.experts.130.w3", "model.layers.36.block_sparse_moe.experts.131.w3", "model.layers.36.block_sparse_moe.experts.132.w3", "model.layers.36.block_sparse_moe.experts.133.w3", "model.layers.36.block_sparse_moe.experts.134.w3", "model.layers.36.block_sparse_moe.experts.135.w3", "model.layers.36.block_sparse_moe.experts.136.w3", "model.layers.36.block_sparse_moe.experts.137.w3", "model.layers.36.block_sparse_moe.experts.138.w3", "model.layers.36.block_sparse_moe.experts.139.w3", "model.layers.36.block_sparse_moe.experts.140.w3", "model.layers.36.block_sparse_moe.experts.141.w3", "model.layers.36.block_sparse_moe.experts.142.w3", "model.layers.36.block_sparse_moe.experts.143.w3", "model.layers.36.block_sparse_moe.experts.144.w3", "model.layers.36.block_sparse_moe.experts.145.w3", "model.layers.36.block_sparse_moe.experts.146.w3", "model.layers.36.block_sparse_moe.experts.147.w3", "model.layers.36.block_sparse_moe.experts.148.w3", "model.layers.36.block_sparse_moe.experts.149.w3", "model.layers.36.block_sparse_moe.experts.150.w3", "model.layers.36.block_sparse_moe.experts.151.w3", "model.layers.36.block_sparse_moe.experts.152.w3", "model.layers.36.block_sparse_moe.experts.153.w3", "model.layers.36.block_sparse_moe.experts.154.w3", "model.layers.36.block_sparse_moe.experts.155.w3", "model.layers.36.block_sparse_moe.experts.156.w3", "model.layers.36.block_sparse_moe.experts.157.w3", "model.layers.36.block_sparse_moe.experts.158.w3", "model.layers.36.block_sparse_moe.experts.159.w3", "model.layers.36.block_sparse_moe.experts.160.w3", "model.layers.36.block_sparse_moe.experts.161.w3", "model.layers.36.block_sparse_moe.experts.162.w3", "model.layers.36.block_sparse_moe.experts.163.w3", "model.layers.36.block_sparse_moe.experts.164.w3", "model.layers.36.block_sparse_moe.experts.165.w3", "model.layers.36.block_sparse_moe.experts.166.w3", "model.layers.36.block_sparse_moe.experts.167.w3", "model.layers.36.block_sparse_moe.experts.168.w3", "model.layers.36.block_sparse_moe.experts.169.w3", "model.layers.36.block_sparse_moe.experts.170.w3", "model.layers.36.block_sparse_moe.experts.171.w3", "model.layers.36.block_sparse_moe.experts.172.w3", "model.layers.36.block_sparse_moe.experts.173.w3", "model.layers.36.block_sparse_moe.experts.174.w3", "model.layers.36.block_sparse_moe.experts.175.w3", "model.layers.36.block_sparse_moe.experts.176.w3", "model.layers.36.block_sparse_moe.experts.177.w3", "model.layers.36.block_sparse_moe.experts.178.w3", "model.layers.36.block_sparse_moe.experts.179.w3", "model.layers.36.block_sparse_moe.experts.180.w3", "model.layers.36.block_sparse_moe.experts.181.w3", "model.layers.36.block_sparse_moe.experts.182.w3", "model.layers.36.block_sparse_moe.experts.183.w3", "model.layers.36.block_sparse_moe.experts.184.w3", "model.layers.36.block_sparse_moe.experts.185.w3", "model.layers.36.block_sparse_moe.experts.186.w3", "model.layers.36.block_sparse_moe.experts.187.w3", "model.layers.36.block_sparse_moe.experts.188.w3", "model.layers.36.block_sparse_moe.experts.189.w3", "model.layers.36.block_sparse_moe.experts.190.w3", "model.layers.36.block_sparse_moe.experts.191.w3", "model.layers.36.block_sparse_moe.experts.192.w3", "model.layers.36.block_sparse_moe.experts.193.w3", "model.layers.36.block_sparse_moe.experts.194.w3", "model.layers.36.block_sparse_moe.experts.195.w3", "model.layers.36.block_sparse_moe.experts.196.w3", "model.layers.36.block_sparse_moe.experts.197.w3", "model.layers.36.block_sparse_moe.experts.198.w3", "model.layers.36.block_sparse_moe.experts.199.w3", "model.layers.36.block_sparse_moe.experts.200.w3", "model.layers.36.block_sparse_moe.experts.201.w3", "model.layers.36.block_sparse_moe.experts.202.w3", "model.layers.36.block_sparse_moe.experts.203.w3", "model.layers.36.block_sparse_moe.experts.204.w3", "model.layers.36.block_sparse_moe.experts.205.w3", "model.layers.36.block_sparse_moe.experts.206.w3", "model.layers.36.block_sparse_moe.experts.207.w3", "model.layers.36.block_sparse_moe.experts.208.w3", "model.layers.36.block_sparse_moe.experts.209.w3", "model.layers.36.block_sparse_moe.experts.210.w3", "model.layers.36.block_sparse_moe.experts.211.w3", "model.layers.36.block_sparse_moe.experts.212.w3", "model.layers.36.block_sparse_moe.experts.213.w3", "model.layers.36.block_sparse_moe.experts.214.w3", "model.layers.36.block_sparse_moe.experts.215.w3", "model.layers.36.block_sparse_moe.experts.216.w3", "model.layers.36.block_sparse_moe.experts.217.w3", "model.layers.36.block_sparse_moe.experts.218.w3", "model.layers.36.block_sparse_moe.experts.219.w3", "model.layers.36.block_sparse_moe.experts.220.w3", "model.layers.36.block_sparse_moe.experts.221.w3", "model.layers.36.block_sparse_moe.experts.222.w3", "model.layers.36.block_sparse_moe.experts.223.w3", "model.layers.36.block_sparse_moe.experts.224.w3", "model.layers.36.block_sparse_moe.experts.225.w3", "model.layers.36.block_sparse_moe.experts.226.w3", "model.layers.36.block_sparse_moe.experts.227.w3", "model.layers.36.block_sparse_moe.experts.228.w3", "model.layers.36.block_sparse_moe.experts.229.w3", "model.layers.36.block_sparse_moe.experts.230.w3", "model.layers.36.block_sparse_moe.experts.231.w3", "model.layers.36.block_sparse_moe.experts.232.w3", "model.layers.36.block_sparse_moe.experts.233.w3", "model.layers.36.block_sparse_moe.experts.234.w3", "model.layers.36.block_sparse_moe.experts.235.w3", "model.layers.36.block_sparse_moe.experts.236.w3", "model.layers.36.block_sparse_moe.experts.237.w3", "model.layers.36.block_sparse_moe.experts.238.w3", "model.layers.36.block_sparse_moe.experts.239.w3", "model.layers.36.block_sparse_moe.experts.240.w3", "model.layers.36.block_sparse_moe.experts.241.w3", "model.layers.36.block_sparse_moe.experts.242.w3", "model.layers.36.block_sparse_moe.experts.243.w3", "model.layers.36.block_sparse_moe.experts.244.w3", "model.layers.36.block_sparse_moe.experts.245.w3", "model.layers.36.block_sparse_moe.experts.246.w3", "model.layers.36.block_sparse_moe.experts.247.w3", "model.layers.36.block_sparse_moe.experts.248.w3", "model.layers.36.block_sparse_moe.experts.249.w3", "model.layers.36.block_sparse_moe.experts.250.w3", "model.layers.36.block_sparse_moe.experts.251.w3", "model.layers.36.block_sparse_moe.experts.252.w3", "model.layers.36.block_sparse_moe.experts.253.w3", "model.layers.36.block_sparse_moe.experts.254.w3", "model.layers.36.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0004187863320112284, "dbits": 2415919104 } ] }, { "idx": 184, "layers": [ "model.layers.36.block_sparse_moe.experts.0.w2", "model.layers.36.block_sparse_moe.experts.1.w2", "model.layers.36.block_sparse_moe.experts.2.w2", "model.layers.36.block_sparse_moe.experts.3.w2", "model.layers.36.block_sparse_moe.experts.4.w2", "model.layers.36.block_sparse_moe.experts.5.w2", "model.layers.36.block_sparse_moe.experts.6.w2", "model.layers.36.block_sparse_moe.experts.7.w2", "model.layers.36.block_sparse_moe.experts.8.w2", "model.layers.36.block_sparse_moe.experts.9.w2", "model.layers.36.block_sparse_moe.experts.10.w2", "model.layers.36.block_sparse_moe.experts.11.w2", "model.layers.36.block_sparse_moe.experts.12.w2", "model.layers.36.block_sparse_moe.experts.13.w2", "model.layers.36.block_sparse_moe.experts.14.w2", "model.layers.36.block_sparse_moe.experts.15.w2", "model.layers.36.block_sparse_moe.experts.16.w2", "model.layers.36.block_sparse_moe.experts.17.w2", "model.layers.36.block_sparse_moe.experts.18.w2", "model.layers.36.block_sparse_moe.experts.19.w2", "model.layers.36.block_sparse_moe.experts.20.w2", "model.layers.36.block_sparse_moe.experts.21.w2", "model.layers.36.block_sparse_moe.experts.22.w2", "model.layers.36.block_sparse_moe.experts.23.w2", "model.layers.36.block_sparse_moe.experts.24.w2", "model.layers.36.block_sparse_moe.experts.25.w2", "model.layers.36.block_sparse_moe.experts.26.w2", "model.layers.36.block_sparse_moe.experts.27.w2", "model.layers.36.block_sparse_moe.experts.28.w2", "model.layers.36.block_sparse_moe.experts.29.w2", "model.layers.36.block_sparse_moe.experts.30.w2", "model.layers.36.block_sparse_moe.experts.31.w2", "model.layers.36.block_sparse_moe.experts.32.w2", "model.layers.36.block_sparse_moe.experts.33.w2", "model.layers.36.block_sparse_moe.experts.34.w2", "model.layers.36.block_sparse_moe.experts.35.w2", "model.layers.36.block_sparse_moe.experts.36.w2", "model.layers.36.block_sparse_moe.experts.37.w2", "model.layers.36.block_sparse_moe.experts.38.w2", "model.layers.36.block_sparse_moe.experts.39.w2", "model.layers.36.block_sparse_moe.experts.40.w2", "model.layers.36.block_sparse_moe.experts.41.w2", "model.layers.36.block_sparse_moe.experts.42.w2", "model.layers.36.block_sparse_moe.experts.43.w2", "model.layers.36.block_sparse_moe.experts.44.w2", "model.layers.36.block_sparse_moe.experts.45.w2", "model.layers.36.block_sparse_moe.experts.46.w2", "model.layers.36.block_sparse_moe.experts.47.w2", "model.layers.36.block_sparse_moe.experts.48.w2", "model.layers.36.block_sparse_moe.experts.49.w2", "model.layers.36.block_sparse_moe.experts.50.w2", "model.layers.36.block_sparse_moe.experts.51.w2", "model.layers.36.block_sparse_moe.experts.52.w2", "model.layers.36.block_sparse_moe.experts.53.w2", "model.layers.36.block_sparse_moe.experts.54.w2", "model.layers.36.block_sparse_moe.experts.55.w2", "model.layers.36.block_sparse_moe.experts.56.w2", "model.layers.36.block_sparse_moe.experts.57.w2", "model.layers.36.block_sparse_moe.experts.58.w2", "model.layers.36.block_sparse_moe.experts.59.w2", "model.layers.36.block_sparse_moe.experts.60.w2", "model.layers.36.block_sparse_moe.experts.61.w2", "model.layers.36.block_sparse_moe.experts.62.w2", "model.layers.36.block_sparse_moe.experts.63.w2", "model.layers.36.block_sparse_moe.experts.64.w2", "model.layers.36.block_sparse_moe.experts.65.w2", "model.layers.36.block_sparse_moe.experts.66.w2", "model.layers.36.block_sparse_moe.experts.67.w2", "model.layers.36.block_sparse_moe.experts.68.w2", "model.layers.36.block_sparse_moe.experts.69.w2", "model.layers.36.block_sparse_moe.experts.70.w2", "model.layers.36.block_sparse_moe.experts.71.w2", "model.layers.36.block_sparse_moe.experts.72.w2", "model.layers.36.block_sparse_moe.experts.73.w2", "model.layers.36.block_sparse_moe.experts.74.w2", "model.layers.36.block_sparse_moe.experts.75.w2", "model.layers.36.block_sparse_moe.experts.76.w2", "model.layers.36.block_sparse_moe.experts.77.w2", "model.layers.36.block_sparse_moe.experts.78.w2", "model.layers.36.block_sparse_moe.experts.79.w2", "model.layers.36.block_sparse_moe.experts.80.w2", "model.layers.36.block_sparse_moe.experts.81.w2", "model.layers.36.block_sparse_moe.experts.82.w2", "model.layers.36.block_sparse_moe.experts.83.w2", "model.layers.36.block_sparse_moe.experts.84.w2", "model.layers.36.block_sparse_moe.experts.85.w2", "model.layers.36.block_sparse_moe.experts.86.w2", "model.layers.36.block_sparse_moe.experts.87.w2", "model.layers.36.block_sparse_moe.experts.88.w2", "model.layers.36.block_sparse_moe.experts.89.w2", "model.layers.36.block_sparse_moe.experts.90.w2", "model.layers.36.block_sparse_moe.experts.91.w2", "model.layers.36.block_sparse_moe.experts.92.w2", "model.layers.36.block_sparse_moe.experts.93.w2", "model.layers.36.block_sparse_moe.experts.94.w2", "model.layers.36.block_sparse_moe.experts.95.w2", "model.layers.36.block_sparse_moe.experts.96.w2", "model.layers.36.block_sparse_moe.experts.97.w2", "model.layers.36.block_sparse_moe.experts.98.w2", "model.layers.36.block_sparse_moe.experts.99.w2", "model.layers.36.block_sparse_moe.experts.100.w2", "model.layers.36.block_sparse_moe.experts.101.w2", "model.layers.36.block_sparse_moe.experts.102.w2", "model.layers.36.block_sparse_moe.experts.103.w2", "model.layers.36.block_sparse_moe.experts.104.w2", "model.layers.36.block_sparse_moe.experts.105.w2", "model.layers.36.block_sparse_moe.experts.106.w2", "model.layers.36.block_sparse_moe.experts.107.w2", "model.layers.36.block_sparse_moe.experts.108.w2", "model.layers.36.block_sparse_moe.experts.109.w2", "model.layers.36.block_sparse_moe.experts.110.w2", "model.layers.36.block_sparse_moe.experts.111.w2", "model.layers.36.block_sparse_moe.experts.112.w2", "model.layers.36.block_sparse_moe.experts.113.w2", "model.layers.36.block_sparse_moe.experts.114.w2", "model.layers.36.block_sparse_moe.experts.115.w2", "model.layers.36.block_sparse_moe.experts.116.w2", "model.layers.36.block_sparse_moe.experts.117.w2", "model.layers.36.block_sparse_moe.experts.118.w2", "model.layers.36.block_sparse_moe.experts.119.w2", "model.layers.36.block_sparse_moe.experts.120.w2", "model.layers.36.block_sparse_moe.experts.121.w2", "model.layers.36.block_sparse_moe.experts.122.w2", "model.layers.36.block_sparse_moe.experts.123.w2", "model.layers.36.block_sparse_moe.experts.124.w2", "model.layers.36.block_sparse_moe.experts.125.w2", "model.layers.36.block_sparse_moe.experts.126.w2", "model.layers.36.block_sparse_moe.experts.127.w2", "model.layers.36.block_sparse_moe.experts.128.w2", "model.layers.36.block_sparse_moe.experts.129.w2", "model.layers.36.block_sparse_moe.experts.130.w2", "model.layers.36.block_sparse_moe.experts.131.w2", "model.layers.36.block_sparse_moe.experts.132.w2", "model.layers.36.block_sparse_moe.experts.133.w2", "model.layers.36.block_sparse_moe.experts.134.w2", "model.layers.36.block_sparse_moe.experts.135.w2", "model.layers.36.block_sparse_moe.experts.136.w2", "model.layers.36.block_sparse_moe.experts.137.w2", "model.layers.36.block_sparse_moe.experts.138.w2", "model.layers.36.block_sparse_moe.experts.139.w2", "model.layers.36.block_sparse_moe.experts.140.w2", "model.layers.36.block_sparse_moe.experts.141.w2", "model.layers.36.block_sparse_moe.experts.142.w2", "model.layers.36.block_sparse_moe.experts.143.w2", "model.layers.36.block_sparse_moe.experts.144.w2", "model.layers.36.block_sparse_moe.experts.145.w2", "model.layers.36.block_sparse_moe.experts.146.w2", "model.layers.36.block_sparse_moe.experts.147.w2", "model.layers.36.block_sparse_moe.experts.148.w2", "model.layers.36.block_sparse_moe.experts.149.w2", "model.layers.36.block_sparse_moe.experts.150.w2", "model.layers.36.block_sparse_moe.experts.151.w2", "model.layers.36.block_sparse_moe.experts.152.w2", "model.layers.36.block_sparse_moe.experts.153.w2", "model.layers.36.block_sparse_moe.experts.154.w2", "model.layers.36.block_sparse_moe.experts.155.w2", "model.layers.36.block_sparse_moe.experts.156.w2", "model.layers.36.block_sparse_moe.experts.157.w2", "model.layers.36.block_sparse_moe.experts.158.w2", "model.layers.36.block_sparse_moe.experts.159.w2", "model.layers.36.block_sparse_moe.experts.160.w2", "model.layers.36.block_sparse_moe.experts.161.w2", "model.layers.36.block_sparse_moe.experts.162.w2", "model.layers.36.block_sparse_moe.experts.163.w2", "model.layers.36.block_sparse_moe.experts.164.w2", "model.layers.36.block_sparse_moe.experts.165.w2", "model.layers.36.block_sparse_moe.experts.166.w2", "model.layers.36.block_sparse_moe.experts.167.w2", "model.layers.36.block_sparse_moe.experts.168.w2", "model.layers.36.block_sparse_moe.experts.169.w2", "model.layers.36.block_sparse_moe.experts.170.w2", "model.layers.36.block_sparse_moe.experts.171.w2", "model.layers.36.block_sparse_moe.experts.172.w2", "model.layers.36.block_sparse_moe.experts.173.w2", "model.layers.36.block_sparse_moe.experts.174.w2", "model.layers.36.block_sparse_moe.experts.175.w2", "model.layers.36.block_sparse_moe.experts.176.w2", "model.layers.36.block_sparse_moe.experts.177.w2", "model.layers.36.block_sparse_moe.experts.178.w2", "model.layers.36.block_sparse_moe.experts.179.w2", "model.layers.36.block_sparse_moe.experts.180.w2", "model.layers.36.block_sparse_moe.experts.181.w2", "model.layers.36.block_sparse_moe.experts.182.w2", "model.layers.36.block_sparse_moe.experts.183.w2", "model.layers.36.block_sparse_moe.experts.184.w2", "model.layers.36.block_sparse_moe.experts.185.w2", "model.layers.36.block_sparse_moe.experts.186.w2", "model.layers.36.block_sparse_moe.experts.187.w2", "model.layers.36.block_sparse_moe.experts.188.w2", "model.layers.36.block_sparse_moe.experts.189.w2", "model.layers.36.block_sparse_moe.experts.190.w2", "model.layers.36.block_sparse_moe.experts.191.w2", "model.layers.36.block_sparse_moe.experts.192.w2", "model.layers.36.block_sparse_moe.experts.193.w2", "model.layers.36.block_sparse_moe.experts.194.w2", "model.layers.36.block_sparse_moe.experts.195.w2", "model.layers.36.block_sparse_moe.experts.196.w2", "model.layers.36.block_sparse_moe.experts.197.w2", "model.layers.36.block_sparse_moe.experts.198.w2", "model.layers.36.block_sparse_moe.experts.199.w2", "model.layers.36.block_sparse_moe.experts.200.w2", "model.layers.36.block_sparse_moe.experts.201.w2", "model.layers.36.block_sparse_moe.experts.202.w2", "model.layers.36.block_sparse_moe.experts.203.w2", "model.layers.36.block_sparse_moe.experts.204.w2", "model.layers.36.block_sparse_moe.experts.205.w2", "model.layers.36.block_sparse_moe.experts.206.w2", "model.layers.36.block_sparse_moe.experts.207.w2", "model.layers.36.block_sparse_moe.experts.208.w2", "model.layers.36.block_sparse_moe.experts.209.w2", "model.layers.36.block_sparse_moe.experts.210.w2", "model.layers.36.block_sparse_moe.experts.211.w2", "model.layers.36.block_sparse_moe.experts.212.w2", "model.layers.36.block_sparse_moe.experts.213.w2", "model.layers.36.block_sparse_moe.experts.214.w2", "model.layers.36.block_sparse_moe.experts.215.w2", "model.layers.36.block_sparse_moe.experts.216.w2", "model.layers.36.block_sparse_moe.experts.217.w2", "model.layers.36.block_sparse_moe.experts.218.w2", "model.layers.36.block_sparse_moe.experts.219.w2", "model.layers.36.block_sparse_moe.experts.220.w2", "model.layers.36.block_sparse_moe.experts.221.w2", "model.layers.36.block_sparse_moe.experts.222.w2", "model.layers.36.block_sparse_moe.experts.223.w2", "model.layers.36.block_sparse_moe.experts.224.w2", "model.layers.36.block_sparse_moe.experts.225.w2", "model.layers.36.block_sparse_moe.experts.226.w2", "model.layers.36.block_sparse_moe.experts.227.w2", "model.layers.36.block_sparse_moe.experts.228.w2", "model.layers.36.block_sparse_moe.experts.229.w2", "model.layers.36.block_sparse_moe.experts.230.w2", "model.layers.36.block_sparse_moe.experts.231.w2", "model.layers.36.block_sparse_moe.experts.232.w2", "model.layers.36.block_sparse_moe.experts.233.w2", "model.layers.36.block_sparse_moe.experts.234.w2", "model.layers.36.block_sparse_moe.experts.235.w2", "model.layers.36.block_sparse_moe.experts.236.w2", "model.layers.36.block_sparse_moe.experts.237.w2", "model.layers.36.block_sparse_moe.experts.238.w2", "model.layers.36.block_sparse_moe.experts.239.w2", "model.layers.36.block_sparse_moe.experts.240.w2", "model.layers.36.block_sparse_moe.experts.241.w2", "model.layers.36.block_sparse_moe.experts.242.w2", "model.layers.36.block_sparse_moe.experts.243.w2", "model.layers.36.block_sparse_moe.experts.244.w2", "model.layers.36.block_sparse_moe.experts.245.w2", "model.layers.36.block_sparse_moe.experts.246.w2", "model.layers.36.block_sparse_moe.experts.247.w2", "model.layers.36.block_sparse_moe.experts.248.w2", "model.layers.36.block_sparse_moe.experts.249.w2", "model.layers.36.block_sparse_moe.experts.250.w2", "model.layers.36.block_sparse_moe.experts.251.w2", "model.layers.36.block_sparse_moe.experts.252.w2", "model.layers.36.block_sparse_moe.experts.253.w2", "model.layers.36.block_sparse_moe.experts.254.w2", "model.layers.36.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.000444543734192851, "dbits": 1207959552 } ] }, { "idx": 185, "layers": [ "model.layers.37.self_attn.q_proj" ], "candidates": [ { "dkld": 0.000670348107814786, "dbits": 18874368 } ] }, { "idx": 186, "layers": [ "model.layers.37.self_attn.k_proj", "model.layers.37.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0016411339864134816, "dbits": 6291456 } ] }, { "idx": 187, "layers": [ "model.layers.37.self_attn.o_proj" ], "candidates": [ { "dkld": -0.002382532507181165, "dbits": 18874368 } ] }, { "idx": 188, "layers": [ "model.layers.37.block_sparse_moe.experts.0.w1", "model.layers.37.block_sparse_moe.experts.1.w1", "model.layers.37.block_sparse_moe.experts.2.w1", "model.layers.37.block_sparse_moe.experts.3.w1", "model.layers.37.block_sparse_moe.experts.4.w1", "model.layers.37.block_sparse_moe.experts.5.w1", "model.layers.37.block_sparse_moe.experts.6.w1", "model.layers.37.block_sparse_moe.experts.7.w1", "model.layers.37.block_sparse_moe.experts.8.w1", "model.layers.37.block_sparse_moe.experts.9.w1", "model.layers.37.block_sparse_moe.experts.10.w1", "model.layers.37.block_sparse_moe.experts.11.w1", "model.layers.37.block_sparse_moe.experts.12.w1", "model.layers.37.block_sparse_moe.experts.13.w1", "model.layers.37.block_sparse_moe.experts.14.w1", "model.layers.37.block_sparse_moe.experts.15.w1", "model.layers.37.block_sparse_moe.experts.16.w1", "model.layers.37.block_sparse_moe.experts.17.w1", "model.layers.37.block_sparse_moe.experts.18.w1", "model.layers.37.block_sparse_moe.experts.19.w1", "model.layers.37.block_sparse_moe.experts.20.w1", "model.layers.37.block_sparse_moe.experts.21.w1", "model.layers.37.block_sparse_moe.experts.22.w1", "model.layers.37.block_sparse_moe.experts.23.w1", "model.layers.37.block_sparse_moe.experts.24.w1", "model.layers.37.block_sparse_moe.experts.25.w1", "model.layers.37.block_sparse_moe.experts.26.w1", "model.layers.37.block_sparse_moe.experts.27.w1", "model.layers.37.block_sparse_moe.experts.28.w1", "model.layers.37.block_sparse_moe.experts.29.w1", "model.layers.37.block_sparse_moe.experts.30.w1", "model.layers.37.block_sparse_moe.experts.31.w1", "model.layers.37.block_sparse_moe.experts.32.w1", "model.layers.37.block_sparse_moe.experts.33.w1", "model.layers.37.block_sparse_moe.experts.34.w1", "model.layers.37.block_sparse_moe.experts.35.w1", "model.layers.37.block_sparse_moe.experts.36.w1", "model.layers.37.block_sparse_moe.experts.37.w1", "model.layers.37.block_sparse_moe.experts.38.w1", "model.layers.37.block_sparse_moe.experts.39.w1", "model.layers.37.block_sparse_moe.experts.40.w1", "model.layers.37.block_sparse_moe.experts.41.w1", "model.layers.37.block_sparse_moe.experts.42.w1", "model.layers.37.block_sparse_moe.experts.43.w1", "model.layers.37.block_sparse_moe.experts.44.w1", "model.layers.37.block_sparse_moe.experts.45.w1", "model.layers.37.block_sparse_moe.experts.46.w1", "model.layers.37.block_sparse_moe.experts.47.w1", "model.layers.37.block_sparse_moe.experts.48.w1", "model.layers.37.block_sparse_moe.experts.49.w1", "model.layers.37.block_sparse_moe.experts.50.w1", "model.layers.37.block_sparse_moe.experts.51.w1", "model.layers.37.block_sparse_moe.experts.52.w1", "model.layers.37.block_sparse_moe.experts.53.w1", "model.layers.37.block_sparse_moe.experts.54.w1", "model.layers.37.block_sparse_moe.experts.55.w1", "model.layers.37.block_sparse_moe.experts.56.w1", "model.layers.37.block_sparse_moe.experts.57.w1", "model.layers.37.block_sparse_moe.experts.58.w1", "model.layers.37.block_sparse_moe.experts.59.w1", "model.layers.37.block_sparse_moe.experts.60.w1", "model.layers.37.block_sparse_moe.experts.61.w1", "model.layers.37.block_sparse_moe.experts.62.w1", "model.layers.37.block_sparse_moe.experts.63.w1", "model.layers.37.block_sparse_moe.experts.64.w1", "model.layers.37.block_sparse_moe.experts.65.w1", "model.layers.37.block_sparse_moe.experts.66.w1", "model.layers.37.block_sparse_moe.experts.67.w1", "model.layers.37.block_sparse_moe.experts.68.w1", "model.layers.37.block_sparse_moe.experts.69.w1", "model.layers.37.block_sparse_moe.experts.70.w1", "model.layers.37.block_sparse_moe.experts.71.w1", "model.layers.37.block_sparse_moe.experts.72.w1", "model.layers.37.block_sparse_moe.experts.73.w1", "model.layers.37.block_sparse_moe.experts.74.w1", "model.layers.37.block_sparse_moe.experts.75.w1", "model.layers.37.block_sparse_moe.experts.76.w1", "model.layers.37.block_sparse_moe.experts.77.w1", "model.layers.37.block_sparse_moe.experts.78.w1", "model.layers.37.block_sparse_moe.experts.79.w1", "model.layers.37.block_sparse_moe.experts.80.w1", "model.layers.37.block_sparse_moe.experts.81.w1", "model.layers.37.block_sparse_moe.experts.82.w1", "model.layers.37.block_sparse_moe.experts.83.w1", "model.layers.37.block_sparse_moe.experts.84.w1", "model.layers.37.block_sparse_moe.experts.85.w1", "model.layers.37.block_sparse_moe.experts.86.w1", "model.layers.37.block_sparse_moe.experts.87.w1", "model.layers.37.block_sparse_moe.experts.88.w1", "model.layers.37.block_sparse_moe.experts.89.w1", "model.layers.37.block_sparse_moe.experts.90.w1", "model.layers.37.block_sparse_moe.experts.91.w1", "model.layers.37.block_sparse_moe.experts.92.w1", "model.layers.37.block_sparse_moe.experts.93.w1", "model.layers.37.block_sparse_moe.experts.94.w1", "model.layers.37.block_sparse_moe.experts.95.w1", "model.layers.37.block_sparse_moe.experts.96.w1", "model.layers.37.block_sparse_moe.experts.97.w1", "model.layers.37.block_sparse_moe.experts.98.w1", "model.layers.37.block_sparse_moe.experts.99.w1", "model.layers.37.block_sparse_moe.experts.100.w1", "model.layers.37.block_sparse_moe.experts.101.w1", "model.layers.37.block_sparse_moe.experts.102.w1", "model.layers.37.block_sparse_moe.experts.103.w1", "model.layers.37.block_sparse_moe.experts.104.w1", "model.layers.37.block_sparse_moe.experts.105.w1", "model.layers.37.block_sparse_moe.experts.106.w1", "model.layers.37.block_sparse_moe.experts.107.w1", "model.layers.37.block_sparse_moe.experts.108.w1", "model.layers.37.block_sparse_moe.experts.109.w1", "model.layers.37.block_sparse_moe.experts.110.w1", "model.layers.37.block_sparse_moe.experts.111.w1", "model.layers.37.block_sparse_moe.experts.112.w1", "model.layers.37.block_sparse_moe.experts.113.w1", "model.layers.37.block_sparse_moe.experts.114.w1", "model.layers.37.block_sparse_moe.experts.115.w1", "model.layers.37.block_sparse_moe.experts.116.w1", "model.layers.37.block_sparse_moe.experts.117.w1", "model.layers.37.block_sparse_moe.experts.118.w1", "model.layers.37.block_sparse_moe.experts.119.w1", "model.layers.37.block_sparse_moe.experts.120.w1", "model.layers.37.block_sparse_moe.experts.121.w1", "model.layers.37.block_sparse_moe.experts.122.w1", "model.layers.37.block_sparse_moe.experts.123.w1", "model.layers.37.block_sparse_moe.experts.124.w1", "model.layers.37.block_sparse_moe.experts.125.w1", "model.layers.37.block_sparse_moe.experts.126.w1", "model.layers.37.block_sparse_moe.experts.127.w1", "model.layers.37.block_sparse_moe.experts.128.w1", "model.layers.37.block_sparse_moe.experts.129.w1", "model.layers.37.block_sparse_moe.experts.130.w1", "model.layers.37.block_sparse_moe.experts.131.w1", "model.layers.37.block_sparse_moe.experts.132.w1", "model.layers.37.block_sparse_moe.experts.133.w1", "model.layers.37.block_sparse_moe.experts.134.w1", "model.layers.37.block_sparse_moe.experts.135.w1", "model.layers.37.block_sparse_moe.experts.136.w1", "model.layers.37.block_sparse_moe.experts.137.w1", "model.layers.37.block_sparse_moe.experts.138.w1", "model.layers.37.block_sparse_moe.experts.139.w1", "model.layers.37.block_sparse_moe.experts.140.w1", "model.layers.37.block_sparse_moe.experts.141.w1", "model.layers.37.block_sparse_moe.experts.142.w1", "model.layers.37.block_sparse_moe.experts.143.w1", "model.layers.37.block_sparse_moe.experts.144.w1", "model.layers.37.block_sparse_moe.experts.145.w1", "model.layers.37.block_sparse_moe.experts.146.w1", "model.layers.37.block_sparse_moe.experts.147.w1", "model.layers.37.block_sparse_moe.experts.148.w1", "model.layers.37.block_sparse_moe.experts.149.w1", "model.layers.37.block_sparse_moe.experts.150.w1", "model.layers.37.block_sparse_moe.experts.151.w1", "model.layers.37.block_sparse_moe.experts.152.w1", "model.layers.37.block_sparse_moe.experts.153.w1", "model.layers.37.block_sparse_moe.experts.154.w1", "model.layers.37.block_sparse_moe.experts.155.w1", "model.layers.37.block_sparse_moe.experts.156.w1", "model.layers.37.block_sparse_moe.experts.157.w1", "model.layers.37.block_sparse_moe.experts.158.w1", "model.layers.37.block_sparse_moe.experts.159.w1", "model.layers.37.block_sparse_moe.experts.160.w1", "model.layers.37.block_sparse_moe.experts.161.w1", "model.layers.37.block_sparse_moe.experts.162.w1", "model.layers.37.block_sparse_moe.experts.163.w1", "model.layers.37.block_sparse_moe.experts.164.w1", "model.layers.37.block_sparse_moe.experts.165.w1", "model.layers.37.block_sparse_moe.experts.166.w1", "model.layers.37.block_sparse_moe.experts.167.w1", "model.layers.37.block_sparse_moe.experts.168.w1", "model.layers.37.block_sparse_moe.experts.169.w1", "model.layers.37.block_sparse_moe.experts.170.w1", "model.layers.37.block_sparse_moe.experts.171.w1", "model.layers.37.block_sparse_moe.experts.172.w1", "model.layers.37.block_sparse_moe.experts.173.w1", "model.layers.37.block_sparse_moe.experts.174.w1", "model.layers.37.block_sparse_moe.experts.175.w1", "model.layers.37.block_sparse_moe.experts.176.w1", "model.layers.37.block_sparse_moe.experts.177.w1", "model.layers.37.block_sparse_moe.experts.178.w1", "model.layers.37.block_sparse_moe.experts.179.w1", "model.layers.37.block_sparse_moe.experts.180.w1", "model.layers.37.block_sparse_moe.experts.181.w1", "model.layers.37.block_sparse_moe.experts.182.w1", "model.layers.37.block_sparse_moe.experts.183.w1", "model.layers.37.block_sparse_moe.experts.184.w1", "model.layers.37.block_sparse_moe.experts.185.w1", "model.layers.37.block_sparse_moe.experts.186.w1", "model.layers.37.block_sparse_moe.experts.187.w1", "model.layers.37.block_sparse_moe.experts.188.w1", "model.layers.37.block_sparse_moe.experts.189.w1", "model.layers.37.block_sparse_moe.experts.190.w1", "model.layers.37.block_sparse_moe.experts.191.w1", "model.layers.37.block_sparse_moe.experts.192.w1", "model.layers.37.block_sparse_moe.experts.193.w1", "model.layers.37.block_sparse_moe.experts.194.w1", "model.layers.37.block_sparse_moe.experts.195.w1", "model.layers.37.block_sparse_moe.experts.196.w1", "model.layers.37.block_sparse_moe.experts.197.w1", "model.layers.37.block_sparse_moe.experts.198.w1", "model.layers.37.block_sparse_moe.experts.199.w1", "model.layers.37.block_sparse_moe.experts.200.w1", "model.layers.37.block_sparse_moe.experts.201.w1", "model.layers.37.block_sparse_moe.experts.202.w1", "model.layers.37.block_sparse_moe.experts.203.w1", "model.layers.37.block_sparse_moe.experts.204.w1", "model.layers.37.block_sparse_moe.experts.205.w1", "model.layers.37.block_sparse_moe.experts.206.w1", "model.layers.37.block_sparse_moe.experts.207.w1", "model.layers.37.block_sparse_moe.experts.208.w1", "model.layers.37.block_sparse_moe.experts.209.w1", "model.layers.37.block_sparse_moe.experts.210.w1", "model.layers.37.block_sparse_moe.experts.211.w1", "model.layers.37.block_sparse_moe.experts.212.w1", "model.layers.37.block_sparse_moe.experts.213.w1", "model.layers.37.block_sparse_moe.experts.214.w1", "model.layers.37.block_sparse_moe.experts.215.w1", "model.layers.37.block_sparse_moe.experts.216.w1", "model.layers.37.block_sparse_moe.experts.217.w1", "model.layers.37.block_sparse_moe.experts.218.w1", "model.layers.37.block_sparse_moe.experts.219.w1", "model.layers.37.block_sparse_moe.experts.220.w1", "model.layers.37.block_sparse_moe.experts.221.w1", "model.layers.37.block_sparse_moe.experts.222.w1", "model.layers.37.block_sparse_moe.experts.223.w1", "model.layers.37.block_sparse_moe.experts.224.w1", "model.layers.37.block_sparse_moe.experts.225.w1", "model.layers.37.block_sparse_moe.experts.226.w1", "model.layers.37.block_sparse_moe.experts.227.w1", "model.layers.37.block_sparse_moe.experts.228.w1", "model.layers.37.block_sparse_moe.experts.229.w1", "model.layers.37.block_sparse_moe.experts.230.w1", "model.layers.37.block_sparse_moe.experts.231.w1", "model.layers.37.block_sparse_moe.experts.232.w1", "model.layers.37.block_sparse_moe.experts.233.w1", "model.layers.37.block_sparse_moe.experts.234.w1", "model.layers.37.block_sparse_moe.experts.235.w1", "model.layers.37.block_sparse_moe.experts.236.w1", "model.layers.37.block_sparse_moe.experts.237.w1", "model.layers.37.block_sparse_moe.experts.238.w1", "model.layers.37.block_sparse_moe.experts.239.w1", "model.layers.37.block_sparse_moe.experts.240.w1", "model.layers.37.block_sparse_moe.experts.241.w1", "model.layers.37.block_sparse_moe.experts.242.w1", "model.layers.37.block_sparse_moe.experts.243.w1", "model.layers.37.block_sparse_moe.experts.244.w1", "model.layers.37.block_sparse_moe.experts.245.w1", "model.layers.37.block_sparse_moe.experts.246.w1", "model.layers.37.block_sparse_moe.experts.247.w1", "model.layers.37.block_sparse_moe.experts.248.w1", "model.layers.37.block_sparse_moe.experts.249.w1", "model.layers.37.block_sparse_moe.experts.250.w1", "model.layers.37.block_sparse_moe.experts.251.w1", "model.layers.37.block_sparse_moe.experts.252.w1", "model.layers.37.block_sparse_moe.experts.253.w1", "model.layers.37.block_sparse_moe.experts.254.w1", "model.layers.37.block_sparse_moe.experts.255.w1", "model.layers.37.block_sparse_moe.experts.0.w3", "model.layers.37.block_sparse_moe.experts.1.w3", "model.layers.37.block_sparse_moe.experts.2.w3", "model.layers.37.block_sparse_moe.experts.3.w3", "model.layers.37.block_sparse_moe.experts.4.w3", "model.layers.37.block_sparse_moe.experts.5.w3", "model.layers.37.block_sparse_moe.experts.6.w3", "model.layers.37.block_sparse_moe.experts.7.w3", "model.layers.37.block_sparse_moe.experts.8.w3", "model.layers.37.block_sparse_moe.experts.9.w3", "model.layers.37.block_sparse_moe.experts.10.w3", "model.layers.37.block_sparse_moe.experts.11.w3", "model.layers.37.block_sparse_moe.experts.12.w3", "model.layers.37.block_sparse_moe.experts.13.w3", "model.layers.37.block_sparse_moe.experts.14.w3", "model.layers.37.block_sparse_moe.experts.15.w3", "model.layers.37.block_sparse_moe.experts.16.w3", "model.layers.37.block_sparse_moe.experts.17.w3", "model.layers.37.block_sparse_moe.experts.18.w3", "model.layers.37.block_sparse_moe.experts.19.w3", "model.layers.37.block_sparse_moe.experts.20.w3", "model.layers.37.block_sparse_moe.experts.21.w3", "model.layers.37.block_sparse_moe.experts.22.w3", "model.layers.37.block_sparse_moe.experts.23.w3", "model.layers.37.block_sparse_moe.experts.24.w3", "model.layers.37.block_sparse_moe.experts.25.w3", "model.layers.37.block_sparse_moe.experts.26.w3", "model.layers.37.block_sparse_moe.experts.27.w3", "model.layers.37.block_sparse_moe.experts.28.w3", "model.layers.37.block_sparse_moe.experts.29.w3", "model.layers.37.block_sparse_moe.experts.30.w3", "model.layers.37.block_sparse_moe.experts.31.w3", "model.layers.37.block_sparse_moe.experts.32.w3", "model.layers.37.block_sparse_moe.experts.33.w3", "model.layers.37.block_sparse_moe.experts.34.w3", "model.layers.37.block_sparse_moe.experts.35.w3", "model.layers.37.block_sparse_moe.experts.36.w3", "model.layers.37.block_sparse_moe.experts.37.w3", "model.layers.37.block_sparse_moe.experts.38.w3", "model.layers.37.block_sparse_moe.experts.39.w3", "model.layers.37.block_sparse_moe.experts.40.w3", "model.layers.37.block_sparse_moe.experts.41.w3", "model.layers.37.block_sparse_moe.experts.42.w3", "model.layers.37.block_sparse_moe.experts.43.w3", "model.layers.37.block_sparse_moe.experts.44.w3", "model.layers.37.block_sparse_moe.experts.45.w3", "model.layers.37.block_sparse_moe.experts.46.w3", "model.layers.37.block_sparse_moe.experts.47.w3", "model.layers.37.block_sparse_moe.experts.48.w3", "model.layers.37.block_sparse_moe.experts.49.w3", "model.layers.37.block_sparse_moe.experts.50.w3", "model.layers.37.block_sparse_moe.experts.51.w3", "model.layers.37.block_sparse_moe.experts.52.w3", "model.layers.37.block_sparse_moe.experts.53.w3", "model.layers.37.block_sparse_moe.experts.54.w3", "model.layers.37.block_sparse_moe.experts.55.w3", "model.layers.37.block_sparse_moe.experts.56.w3", "model.layers.37.block_sparse_moe.experts.57.w3", "model.layers.37.block_sparse_moe.experts.58.w3", "model.layers.37.block_sparse_moe.experts.59.w3", "model.layers.37.block_sparse_moe.experts.60.w3", "model.layers.37.block_sparse_moe.experts.61.w3", "model.layers.37.block_sparse_moe.experts.62.w3", "model.layers.37.block_sparse_moe.experts.63.w3", "model.layers.37.block_sparse_moe.experts.64.w3", "model.layers.37.block_sparse_moe.experts.65.w3", "model.layers.37.block_sparse_moe.experts.66.w3", "model.layers.37.block_sparse_moe.experts.67.w3", "model.layers.37.block_sparse_moe.experts.68.w3", "model.layers.37.block_sparse_moe.experts.69.w3", "model.layers.37.block_sparse_moe.experts.70.w3", "model.layers.37.block_sparse_moe.experts.71.w3", "model.layers.37.block_sparse_moe.experts.72.w3", "model.layers.37.block_sparse_moe.experts.73.w3", "model.layers.37.block_sparse_moe.experts.74.w3", "model.layers.37.block_sparse_moe.experts.75.w3", "model.layers.37.block_sparse_moe.experts.76.w3", "model.layers.37.block_sparse_moe.experts.77.w3", "model.layers.37.block_sparse_moe.experts.78.w3", "model.layers.37.block_sparse_moe.experts.79.w3", "model.layers.37.block_sparse_moe.experts.80.w3", "model.layers.37.block_sparse_moe.experts.81.w3", "model.layers.37.block_sparse_moe.experts.82.w3", "model.layers.37.block_sparse_moe.experts.83.w3", "model.layers.37.block_sparse_moe.experts.84.w3", "model.layers.37.block_sparse_moe.experts.85.w3", "model.layers.37.block_sparse_moe.experts.86.w3", "model.layers.37.block_sparse_moe.experts.87.w3", "model.layers.37.block_sparse_moe.experts.88.w3", "model.layers.37.block_sparse_moe.experts.89.w3", "model.layers.37.block_sparse_moe.experts.90.w3", "model.layers.37.block_sparse_moe.experts.91.w3", "model.layers.37.block_sparse_moe.experts.92.w3", "model.layers.37.block_sparse_moe.experts.93.w3", "model.layers.37.block_sparse_moe.experts.94.w3", "model.layers.37.block_sparse_moe.experts.95.w3", "model.layers.37.block_sparse_moe.experts.96.w3", "model.layers.37.block_sparse_moe.experts.97.w3", "model.layers.37.block_sparse_moe.experts.98.w3", "model.layers.37.block_sparse_moe.experts.99.w3", "model.layers.37.block_sparse_moe.experts.100.w3", "model.layers.37.block_sparse_moe.experts.101.w3", "model.layers.37.block_sparse_moe.experts.102.w3", "model.layers.37.block_sparse_moe.experts.103.w3", "model.layers.37.block_sparse_moe.experts.104.w3", "model.layers.37.block_sparse_moe.experts.105.w3", "model.layers.37.block_sparse_moe.experts.106.w3", "model.layers.37.block_sparse_moe.experts.107.w3", "model.layers.37.block_sparse_moe.experts.108.w3", "model.layers.37.block_sparse_moe.experts.109.w3", "model.layers.37.block_sparse_moe.experts.110.w3", "model.layers.37.block_sparse_moe.experts.111.w3", "model.layers.37.block_sparse_moe.experts.112.w3", "model.layers.37.block_sparse_moe.experts.113.w3", "model.layers.37.block_sparse_moe.experts.114.w3", "model.layers.37.block_sparse_moe.experts.115.w3", "model.layers.37.block_sparse_moe.experts.116.w3", "model.layers.37.block_sparse_moe.experts.117.w3", "model.layers.37.block_sparse_moe.experts.118.w3", "model.layers.37.block_sparse_moe.experts.119.w3", "model.layers.37.block_sparse_moe.experts.120.w3", "model.layers.37.block_sparse_moe.experts.121.w3", "model.layers.37.block_sparse_moe.experts.122.w3", "model.layers.37.block_sparse_moe.experts.123.w3", "model.layers.37.block_sparse_moe.experts.124.w3", "model.layers.37.block_sparse_moe.experts.125.w3", "model.layers.37.block_sparse_moe.experts.126.w3", "model.layers.37.block_sparse_moe.experts.127.w3", "model.layers.37.block_sparse_moe.experts.128.w3", "model.layers.37.block_sparse_moe.experts.129.w3", "model.layers.37.block_sparse_moe.experts.130.w3", "model.layers.37.block_sparse_moe.experts.131.w3", "model.layers.37.block_sparse_moe.experts.132.w3", "model.layers.37.block_sparse_moe.experts.133.w3", "model.layers.37.block_sparse_moe.experts.134.w3", "model.layers.37.block_sparse_moe.experts.135.w3", "model.layers.37.block_sparse_moe.experts.136.w3", "model.layers.37.block_sparse_moe.experts.137.w3", "model.layers.37.block_sparse_moe.experts.138.w3", "model.layers.37.block_sparse_moe.experts.139.w3", "model.layers.37.block_sparse_moe.experts.140.w3", "model.layers.37.block_sparse_moe.experts.141.w3", "model.layers.37.block_sparse_moe.experts.142.w3", "model.layers.37.block_sparse_moe.experts.143.w3", "model.layers.37.block_sparse_moe.experts.144.w3", "model.layers.37.block_sparse_moe.experts.145.w3", "model.layers.37.block_sparse_moe.experts.146.w3", "model.layers.37.block_sparse_moe.experts.147.w3", "model.layers.37.block_sparse_moe.experts.148.w3", "model.layers.37.block_sparse_moe.experts.149.w3", "model.layers.37.block_sparse_moe.experts.150.w3", "model.layers.37.block_sparse_moe.experts.151.w3", "model.layers.37.block_sparse_moe.experts.152.w3", "model.layers.37.block_sparse_moe.experts.153.w3", "model.layers.37.block_sparse_moe.experts.154.w3", "model.layers.37.block_sparse_moe.experts.155.w3", "model.layers.37.block_sparse_moe.experts.156.w3", "model.layers.37.block_sparse_moe.experts.157.w3", "model.layers.37.block_sparse_moe.experts.158.w3", "model.layers.37.block_sparse_moe.experts.159.w3", "model.layers.37.block_sparse_moe.experts.160.w3", "model.layers.37.block_sparse_moe.experts.161.w3", "model.layers.37.block_sparse_moe.experts.162.w3", "model.layers.37.block_sparse_moe.experts.163.w3", "model.layers.37.block_sparse_moe.experts.164.w3", "model.layers.37.block_sparse_moe.experts.165.w3", "model.layers.37.block_sparse_moe.experts.166.w3", "model.layers.37.block_sparse_moe.experts.167.w3", "model.layers.37.block_sparse_moe.experts.168.w3", "model.layers.37.block_sparse_moe.experts.169.w3", "model.layers.37.block_sparse_moe.experts.170.w3", "model.layers.37.block_sparse_moe.experts.171.w3", "model.layers.37.block_sparse_moe.experts.172.w3", "model.layers.37.block_sparse_moe.experts.173.w3", "model.layers.37.block_sparse_moe.experts.174.w3", "model.layers.37.block_sparse_moe.experts.175.w3", "model.layers.37.block_sparse_moe.experts.176.w3", "model.layers.37.block_sparse_moe.experts.177.w3", "model.layers.37.block_sparse_moe.experts.178.w3", "model.layers.37.block_sparse_moe.experts.179.w3", "model.layers.37.block_sparse_moe.experts.180.w3", "model.layers.37.block_sparse_moe.experts.181.w3", "model.layers.37.block_sparse_moe.experts.182.w3", "model.layers.37.block_sparse_moe.experts.183.w3", "model.layers.37.block_sparse_moe.experts.184.w3", "model.layers.37.block_sparse_moe.experts.185.w3", "model.layers.37.block_sparse_moe.experts.186.w3", "model.layers.37.block_sparse_moe.experts.187.w3", "model.layers.37.block_sparse_moe.experts.188.w3", "model.layers.37.block_sparse_moe.experts.189.w3", "model.layers.37.block_sparse_moe.experts.190.w3", "model.layers.37.block_sparse_moe.experts.191.w3", "model.layers.37.block_sparse_moe.experts.192.w3", "model.layers.37.block_sparse_moe.experts.193.w3", "model.layers.37.block_sparse_moe.experts.194.w3", "model.layers.37.block_sparse_moe.experts.195.w3", "model.layers.37.block_sparse_moe.experts.196.w3", "model.layers.37.block_sparse_moe.experts.197.w3", "model.layers.37.block_sparse_moe.experts.198.w3", "model.layers.37.block_sparse_moe.experts.199.w3", "model.layers.37.block_sparse_moe.experts.200.w3", "model.layers.37.block_sparse_moe.experts.201.w3", "model.layers.37.block_sparse_moe.experts.202.w3", "model.layers.37.block_sparse_moe.experts.203.w3", "model.layers.37.block_sparse_moe.experts.204.w3", "model.layers.37.block_sparse_moe.experts.205.w3", "model.layers.37.block_sparse_moe.experts.206.w3", "model.layers.37.block_sparse_moe.experts.207.w3", "model.layers.37.block_sparse_moe.experts.208.w3", "model.layers.37.block_sparse_moe.experts.209.w3", "model.layers.37.block_sparse_moe.experts.210.w3", "model.layers.37.block_sparse_moe.experts.211.w3", "model.layers.37.block_sparse_moe.experts.212.w3", "model.layers.37.block_sparse_moe.experts.213.w3", "model.layers.37.block_sparse_moe.experts.214.w3", "model.layers.37.block_sparse_moe.experts.215.w3", "model.layers.37.block_sparse_moe.experts.216.w3", "model.layers.37.block_sparse_moe.experts.217.w3", "model.layers.37.block_sparse_moe.experts.218.w3", "model.layers.37.block_sparse_moe.experts.219.w3", "model.layers.37.block_sparse_moe.experts.220.w3", "model.layers.37.block_sparse_moe.experts.221.w3", "model.layers.37.block_sparse_moe.experts.222.w3", "model.layers.37.block_sparse_moe.experts.223.w3", "model.layers.37.block_sparse_moe.experts.224.w3", "model.layers.37.block_sparse_moe.experts.225.w3", "model.layers.37.block_sparse_moe.experts.226.w3", "model.layers.37.block_sparse_moe.experts.227.w3", "model.layers.37.block_sparse_moe.experts.228.w3", "model.layers.37.block_sparse_moe.experts.229.w3", "model.layers.37.block_sparse_moe.experts.230.w3", "model.layers.37.block_sparse_moe.experts.231.w3", "model.layers.37.block_sparse_moe.experts.232.w3", "model.layers.37.block_sparse_moe.experts.233.w3", "model.layers.37.block_sparse_moe.experts.234.w3", "model.layers.37.block_sparse_moe.experts.235.w3", "model.layers.37.block_sparse_moe.experts.236.w3", "model.layers.37.block_sparse_moe.experts.237.w3", "model.layers.37.block_sparse_moe.experts.238.w3", "model.layers.37.block_sparse_moe.experts.239.w3", "model.layers.37.block_sparse_moe.experts.240.w3", "model.layers.37.block_sparse_moe.experts.241.w3", "model.layers.37.block_sparse_moe.experts.242.w3", "model.layers.37.block_sparse_moe.experts.243.w3", "model.layers.37.block_sparse_moe.experts.244.w3", "model.layers.37.block_sparse_moe.experts.245.w3", "model.layers.37.block_sparse_moe.experts.246.w3", "model.layers.37.block_sparse_moe.experts.247.w3", "model.layers.37.block_sparse_moe.experts.248.w3", "model.layers.37.block_sparse_moe.experts.249.w3", "model.layers.37.block_sparse_moe.experts.250.w3", "model.layers.37.block_sparse_moe.experts.251.w3", "model.layers.37.block_sparse_moe.experts.252.w3", "model.layers.37.block_sparse_moe.experts.253.w3", "model.layers.37.block_sparse_moe.experts.254.w3", "model.layers.37.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 3.0106119811537657e-05, "dbits": 2415919104 } ] }, { "idx": 189, "layers": [ "model.layers.37.block_sparse_moe.experts.0.w2", "model.layers.37.block_sparse_moe.experts.1.w2", "model.layers.37.block_sparse_moe.experts.2.w2", "model.layers.37.block_sparse_moe.experts.3.w2", "model.layers.37.block_sparse_moe.experts.4.w2", "model.layers.37.block_sparse_moe.experts.5.w2", "model.layers.37.block_sparse_moe.experts.6.w2", "model.layers.37.block_sparse_moe.experts.7.w2", "model.layers.37.block_sparse_moe.experts.8.w2", "model.layers.37.block_sparse_moe.experts.9.w2", "model.layers.37.block_sparse_moe.experts.10.w2", "model.layers.37.block_sparse_moe.experts.11.w2", "model.layers.37.block_sparse_moe.experts.12.w2", "model.layers.37.block_sparse_moe.experts.13.w2", "model.layers.37.block_sparse_moe.experts.14.w2", "model.layers.37.block_sparse_moe.experts.15.w2", "model.layers.37.block_sparse_moe.experts.16.w2", "model.layers.37.block_sparse_moe.experts.17.w2", "model.layers.37.block_sparse_moe.experts.18.w2", "model.layers.37.block_sparse_moe.experts.19.w2", "model.layers.37.block_sparse_moe.experts.20.w2", "model.layers.37.block_sparse_moe.experts.21.w2", "model.layers.37.block_sparse_moe.experts.22.w2", "model.layers.37.block_sparse_moe.experts.23.w2", "model.layers.37.block_sparse_moe.experts.24.w2", "model.layers.37.block_sparse_moe.experts.25.w2", "model.layers.37.block_sparse_moe.experts.26.w2", "model.layers.37.block_sparse_moe.experts.27.w2", "model.layers.37.block_sparse_moe.experts.28.w2", "model.layers.37.block_sparse_moe.experts.29.w2", "model.layers.37.block_sparse_moe.experts.30.w2", "model.layers.37.block_sparse_moe.experts.31.w2", "model.layers.37.block_sparse_moe.experts.32.w2", "model.layers.37.block_sparse_moe.experts.33.w2", "model.layers.37.block_sparse_moe.experts.34.w2", "model.layers.37.block_sparse_moe.experts.35.w2", "model.layers.37.block_sparse_moe.experts.36.w2", "model.layers.37.block_sparse_moe.experts.37.w2", "model.layers.37.block_sparse_moe.experts.38.w2", "model.layers.37.block_sparse_moe.experts.39.w2", "model.layers.37.block_sparse_moe.experts.40.w2", "model.layers.37.block_sparse_moe.experts.41.w2", "model.layers.37.block_sparse_moe.experts.42.w2", "model.layers.37.block_sparse_moe.experts.43.w2", "model.layers.37.block_sparse_moe.experts.44.w2", "model.layers.37.block_sparse_moe.experts.45.w2", "model.layers.37.block_sparse_moe.experts.46.w2", "model.layers.37.block_sparse_moe.experts.47.w2", "model.layers.37.block_sparse_moe.experts.48.w2", "model.layers.37.block_sparse_moe.experts.49.w2", "model.layers.37.block_sparse_moe.experts.50.w2", "model.layers.37.block_sparse_moe.experts.51.w2", "model.layers.37.block_sparse_moe.experts.52.w2", "model.layers.37.block_sparse_moe.experts.53.w2", "model.layers.37.block_sparse_moe.experts.54.w2", "model.layers.37.block_sparse_moe.experts.55.w2", "model.layers.37.block_sparse_moe.experts.56.w2", "model.layers.37.block_sparse_moe.experts.57.w2", "model.layers.37.block_sparse_moe.experts.58.w2", "model.layers.37.block_sparse_moe.experts.59.w2", "model.layers.37.block_sparse_moe.experts.60.w2", "model.layers.37.block_sparse_moe.experts.61.w2", "model.layers.37.block_sparse_moe.experts.62.w2", "model.layers.37.block_sparse_moe.experts.63.w2", "model.layers.37.block_sparse_moe.experts.64.w2", "model.layers.37.block_sparse_moe.experts.65.w2", "model.layers.37.block_sparse_moe.experts.66.w2", "model.layers.37.block_sparse_moe.experts.67.w2", "model.layers.37.block_sparse_moe.experts.68.w2", "model.layers.37.block_sparse_moe.experts.69.w2", "model.layers.37.block_sparse_moe.experts.70.w2", "model.layers.37.block_sparse_moe.experts.71.w2", "model.layers.37.block_sparse_moe.experts.72.w2", "model.layers.37.block_sparse_moe.experts.73.w2", "model.layers.37.block_sparse_moe.experts.74.w2", "model.layers.37.block_sparse_moe.experts.75.w2", "model.layers.37.block_sparse_moe.experts.76.w2", "model.layers.37.block_sparse_moe.experts.77.w2", "model.layers.37.block_sparse_moe.experts.78.w2", "model.layers.37.block_sparse_moe.experts.79.w2", "model.layers.37.block_sparse_moe.experts.80.w2", "model.layers.37.block_sparse_moe.experts.81.w2", "model.layers.37.block_sparse_moe.experts.82.w2", "model.layers.37.block_sparse_moe.experts.83.w2", "model.layers.37.block_sparse_moe.experts.84.w2", "model.layers.37.block_sparse_moe.experts.85.w2", "model.layers.37.block_sparse_moe.experts.86.w2", "model.layers.37.block_sparse_moe.experts.87.w2", "model.layers.37.block_sparse_moe.experts.88.w2", "model.layers.37.block_sparse_moe.experts.89.w2", "model.layers.37.block_sparse_moe.experts.90.w2", "model.layers.37.block_sparse_moe.experts.91.w2", "model.layers.37.block_sparse_moe.experts.92.w2", "model.layers.37.block_sparse_moe.experts.93.w2", "model.layers.37.block_sparse_moe.experts.94.w2", "model.layers.37.block_sparse_moe.experts.95.w2", "model.layers.37.block_sparse_moe.experts.96.w2", "model.layers.37.block_sparse_moe.experts.97.w2", "model.layers.37.block_sparse_moe.experts.98.w2", "model.layers.37.block_sparse_moe.experts.99.w2", "model.layers.37.block_sparse_moe.experts.100.w2", "model.layers.37.block_sparse_moe.experts.101.w2", "model.layers.37.block_sparse_moe.experts.102.w2", "model.layers.37.block_sparse_moe.experts.103.w2", "model.layers.37.block_sparse_moe.experts.104.w2", "model.layers.37.block_sparse_moe.experts.105.w2", "model.layers.37.block_sparse_moe.experts.106.w2", "model.layers.37.block_sparse_moe.experts.107.w2", "model.layers.37.block_sparse_moe.experts.108.w2", "model.layers.37.block_sparse_moe.experts.109.w2", "model.layers.37.block_sparse_moe.experts.110.w2", "model.layers.37.block_sparse_moe.experts.111.w2", "model.layers.37.block_sparse_moe.experts.112.w2", "model.layers.37.block_sparse_moe.experts.113.w2", "model.layers.37.block_sparse_moe.experts.114.w2", "model.layers.37.block_sparse_moe.experts.115.w2", "model.layers.37.block_sparse_moe.experts.116.w2", "model.layers.37.block_sparse_moe.experts.117.w2", "model.layers.37.block_sparse_moe.experts.118.w2", "model.layers.37.block_sparse_moe.experts.119.w2", "model.layers.37.block_sparse_moe.experts.120.w2", "model.layers.37.block_sparse_moe.experts.121.w2", "model.layers.37.block_sparse_moe.experts.122.w2", "model.layers.37.block_sparse_moe.experts.123.w2", "model.layers.37.block_sparse_moe.experts.124.w2", "model.layers.37.block_sparse_moe.experts.125.w2", "model.layers.37.block_sparse_moe.experts.126.w2", "model.layers.37.block_sparse_moe.experts.127.w2", "model.layers.37.block_sparse_moe.experts.128.w2", "model.layers.37.block_sparse_moe.experts.129.w2", "model.layers.37.block_sparse_moe.experts.130.w2", "model.layers.37.block_sparse_moe.experts.131.w2", "model.layers.37.block_sparse_moe.experts.132.w2", "model.layers.37.block_sparse_moe.experts.133.w2", "model.layers.37.block_sparse_moe.experts.134.w2", "model.layers.37.block_sparse_moe.experts.135.w2", "model.layers.37.block_sparse_moe.experts.136.w2", "model.layers.37.block_sparse_moe.experts.137.w2", "model.layers.37.block_sparse_moe.experts.138.w2", "model.layers.37.block_sparse_moe.experts.139.w2", "model.layers.37.block_sparse_moe.experts.140.w2", "model.layers.37.block_sparse_moe.experts.141.w2", "model.layers.37.block_sparse_moe.experts.142.w2", "model.layers.37.block_sparse_moe.experts.143.w2", "model.layers.37.block_sparse_moe.experts.144.w2", "model.layers.37.block_sparse_moe.experts.145.w2", "model.layers.37.block_sparse_moe.experts.146.w2", "model.layers.37.block_sparse_moe.experts.147.w2", "model.layers.37.block_sparse_moe.experts.148.w2", "model.layers.37.block_sparse_moe.experts.149.w2", "model.layers.37.block_sparse_moe.experts.150.w2", "model.layers.37.block_sparse_moe.experts.151.w2", "model.layers.37.block_sparse_moe.experts.152.w2", "model.layers.37.block_sparse_moe.experts.153.w2", "model.layers.37.block_sparse_moe.experts.154.w2", "model.layers.37.block_sparse_moe.experts.155.w2", "model.layers.37.block_sparse_moe.experts.156.w2", "model.layers.37.block_sparse_moe.experts.157.w2", "model.layers.37.block_sparse_moe.experts.158.w2", "model.layers.37.block_sparse_moe.experts.159.w2", "model.layers.37.block_sparse_moe.experts.160.w2", "model.layers.37.block_sparse_moe.experts.161.w2", "model.layers.37.block_sparse_moe.experts.162.w2", "model.layers.37.block_sparse_moe.experts.163.w2", "model.layers.37.block_sparse_moe.experts.164.w2", "model.layers.37.block_sparse_moe.experts.165.w2", "model.layers.37.block_sparse_moe.experts.166.w2", "model.layers.37.block_sparse_moe.experts.167.w2", "model.layers.37.block_sparse_moe.experts.168.w2", "model.layers.37.block_sparse_moe.experts.169.w2", "model.layers.37.block_sparse_moe.experts.170.w2", "model.layers.37.block_sparse_moe.experts.171.w2", "model.layers.37.block_sparse_moe.experts.172.w2", "model.layers.37.block_sparse_moe.experts.173.w2", "model.layers.37.block_sparse_moe.experts.174.w2", "model.layers.37.block_sparse_moe.experts.175.w2", "model.layers.37.block_sparse_moe.experts.176.w2", "model.layers.37.block_sparse_moe.experts.177.w2", "model.layers.37.block_sparse_moe.experts.178.w2", "model.layers.37.block_sparse_moe.experts.179.w2", "model.layers.37.block_sparse_moe.experts.180.w2", "model.layers.37.block_sparse_moe.experts.181.w2", "model.layers.37.block_sparse_moe.experts.182.w2", "model.layers.37.block_sparse_moe.experts.183.w2", "model.layers.37.block_sparse_moe.experts.184.w2", "model.layers.37.block_sparse_moe.experts.185.w2", "model.layers.37.block_sparse_moe.experts.186.w2", "model.layers.37.block_sparse_moe.experts.187.w2", "model.layers.37.block_sparse_moe.experts.188.w2", "model.layers.37.block_sparse_moe.experts.189.w2", "model.layers.37.block_sparse_moe.experts.190.w2", "model.layers.37.block_sparse_moe.experts.191.w2", "model.layers.37.block_sparse_moe.experts.192.w2", "model.layers.37.block_sparse_moe.experts.193.w2", "model.layers.37.block_sparse_moe.experts.194.w2", "model.layers.37.block_sparse_moe.experts.195.w2", "model.layers.37.block_sparse_moe.experts.196.w2", "model.layers.37.block_sparse_moe.experts.197.w2", "model.layers.37.block_sparse_moe.experts.198.w2", "model.layers.37.block_sparse_moe.experts.199.w2", "model.layers.37.block_sparse_moe.experts.200.w2", "model.layers.37.block_sparse_moe.experts.201.w2", "model.layers.37.block_sparse_moe.experts.202.w2", "model.layers.37.block_sparse_moe.experts.203.w2", "model.layers.37.block_sparse_moe.experts.204.w2", "model.layers.37.block_sparse_moe.experts.205.w2", "model.layers.37.block_sparse_moe.experts.206.w2", "model.layers.37.block_sparse_moe.experts.207.w2", "model.layers.37.block_sparse_moe.experts.208.w2", "model.layers.37.block_sparse_moe.experts.209.w2", "model.layers.37.block_sparse_moe.experts.210.w2", "model.layers.37.block_sparse_moe.experts.211.w2", "model.layers.37.block_sparse_moe.experts.212.w2", "model.layers.37.block_sparse_moe.experts.213.w2", "model.layers.37.block_sparse_moe.experts.214.w2", "model.layers.37.block_sparse_moe.experts.215.w2", "model.layers.37.block_sparse_moe.experts.216.w2", "model.layers.37.block_sparse_moe.experts.217.w2", "model.layers.37.block_sparse_moe.experts.218.w2", "model.layers.37.block_sparse_moe.experts.219.w2", "model.layers.37.block_sparse_moe.experts.220.w2", "model.layers.37.block_sparse_moe.experts.221.w2", "model.layers.37.block_sparse_moe.experts.222.w2", "model.layers.37.block_sparse_moe.experts.223.w2", "model.layers.37.block_sparse_moe.experts.224.w2", "model.layers.37.block_sparse_moe.experts.225.w2", "model.layers.37.block_sparse_moe.experts.226.w2", "model.layers.37.block_sparse_moe.experts.227.w2", "model.layers.37.block_sparse_moe.experts.228.w2", "model.layers.37.block_sparse_moe.experts.229.w2", "model.layers.37.block_sparse_moe.experts.230.w2", "model.layers.37.block_sparse_moe.experts.231.w2", "model.layers.37.block_sparse_moe.experts.232.w2", "model.layers.37.block_sparse_moe.experts.233.w2", "model.layers.37.block_sparse_moe.experts.234.w2", "model.layers.37.block_sparse_moe.experts.235.w2", "model.layers.37.block_sparse_moe.experts.236.w2", "model.layers.37.block_sparse_moe.experts.237.w2", "model.layers.37.block_sparse_moe.experts.238.w2", "model.layers.37.block_sparse_moe.experts.239.w2", "model.layers.37.block_sparse_moe.experts.240.w2", "model.layers.37.block_sparse_moe.experts.241.w2", "model.layers.37.block_sparse_moe.experts.242.w2", "model.layers.37.block_sparse_moe.experts.243.w2", "model.layers.37.block_sparse_moe.experts.244.w2", "model.layers.37.block_sparse_moe.experts.245.w2", "model.layers.37.block_sparse_moe.experts.246.w2", "model.layers.37.block_sparse_moe.experts.247.w2", "model.layers.37.block_sparse_moe.experts.248.w2", "model.layers.37.block_sparse_moe.experts.249.w2", "model.layers.37.block_sparse_moe.experts.250.w2", "model.layers.37.block_sparse_moe.experts.251.w2", "model.layers.37.block_sparse_moe.experts.252.w2", "model.layers.37.block_sparse_moe.experts.253.w2", "model.layers.37.block_sparse_moe.experts.254.w2", "model.layers.37.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 2.8840266168117523e-05, "dbits": 1207959552 } ] }, { "idx": 190, "layers": [ "model.layers.38.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00040189418941735666, "dbits": 18874368 } ] }, { "idx": 191, "layers": [ "model.layers.38.self_attn.k_proj", "model.layers.38.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0015209063887596103, "dbits": 6291456 } ] }, { "idx": 192, "layers": [ "model.layers.38.self_attn.o_proj" ], "candidates": [ { "dkld": 0.00047802422195672434, "dbits": 18874368 } ] }, { "idx": 193, "layers": [ "model.layers.38.block_sparse_moe.experts.0.w1", "model.layers.38.block_sparse_moe.experts.1.w1", "model.layers.38.block_sparse_moe.experts.2.w1", "model.layers.38.block_sparse_moe.experts.3.w1", "model.layers.38.block_sparse_moe.experts.4.w1", "model.layers.38.block_sparse_moe.experts.5.w1", "model.layers.38.block_sparse_moe.experts.6.w1", "model.layers.38.block_sparse_moe.experts.7.w1", "model.layers.38.block_sparse_moe.experts.8.w1", "model.layers.38.block_sparse_moe.experts.9.w1", "model.layers.38.block_sparse_moe.experts.10.w1", "model.layers.38.block_sparse_moe.experts.11.w1", "model.layers.38.block_sparse_moe.experts.12.w1", "model.layers.38.block_sparse_moe.experts.13.w1", "model.layers.38.block_sparse_moe.experts.14.w1", "model.layers.38.block_sparse_moe.experts.15.w1", "model.layers.38.block_sparse_moe.experts.16.w1", "model.layers.38.block_sparse_moe.experts.17.w1", "model.layers.38.block_sparse_moe.experts.18.w1", "model.layers.38.block_sparse_moe.experts.19.w1", "model.layers.38.block_sparse_moe.experts.20.w1", "model.layers.38.block_sparse_moe.experts.21.w1", "model.layers.38.block_sparse_moe.experts.22.w1", "model.layers.38.block_sparse_moe.experts.23.w1", "model.layers.38.block_sparse_moe.experts.24.w1", "model.layers.38.block_sparse_moe.experts.25.w1", "model.layers.38.block_sparse_moe.experts.26.w1", "model.layers.38.block_sparse_moe.experts.27.w1", "model.layers.38.block_sparse_moe.experts.28.w1", "model.layers.38.block_sparse_moe.experts.29.w1", "model.layers.38.block_sparse_moe.experts.30.w1", "model.layers.38.block_sparse_moe.experts.31.w1", "model.layers.38.block_sparse_moe.experts.32.w1", "model.layers.38.block_sparse_moe.experts.33.w1", "model.layers.38.block_sparse_moe.experts.34.w1", "model.layers.38.block_sparse_moe.experts.35.w1", "model.layers.38.block_sparse_moe.experts.36.w1", "model.layers.38.block_sparse_moe.experts.37.w1", "model.layers.38.block_sparse_moe.experts.38.w1", "model.layers.38.block_sparse_moe.experts.39.w1", "model.layers.38.block_sparse_moe.experts.40.w1", "model.layers.38.block_sparse_moe.experts.41.w1", "model.layers.38.block_sparse_moe.experts.42.w1", "model.layers.38.block_sparse_moe.experts.43.w1", "model.layers.38.block_sparse_moe.experts.44.w1", "model.layers.38.block_sparse_moe.experts.45.w1", "model.layers.38.block_sparse_moe.experts.46.w1", "model.layers.38.block_sparse_moe.experts.47.w1", "model.layers.38.block_sparse_moe.experts.48.w1", "model.layers.38.block_sparse_moe.experts.49.w1", "model.layers.38.block_sparse_moe.experts.50.w1", "model.layers.38.block_sparse_moe.experts.51.w1", "model.layers.38.block_sparse_moe.experts.52.w1", "model.layers.38.block_sparse_moe.experts.53.w1", "model.layers.38.block_sparse_moe.experts.54.w1", "model.layers.38.block_sparse_moe.experts.55.w1", "model.layers.38.block_sparse_moe.experts.56.w1", "model.layers.38.block_sparse_moe.experts.57.w1", "model.layers.38.block_sparse_moe.experts.58.w1", "model.layers.38.block_sparse_moe.experts.59.w1", "model.layers.38.block_sparse_moe.experts.60.w1", "model.layers.38.block_sparse_moe.experts.61.w1", "model.layers.38.block_sparse_moe.experts.62.w1", "model.layers.38.block_sparse_moe.experts.63.w1", "model.layers.38.block_sparse_moe.experts.64.w1", "model.layers.38.block_sparse_moe.experts.65.w1", "model.layers.38.block_sparse_moe.experts.66.w1", "model.layers.38.block_sparse_moe.experts.67.w1", "model.layers.38.block_sparse_moe.experts.68.w1", "model.layers.38.block_sparse_moe.experts.69.w1", "model.layers.38.block_sparse_moe.experts.70.w1", "model.layers.38.block_sparse_moe.experts.71.w1", "model.layers.38.block_sparse_moe.experts.72.w1", "model.layers.38.block_sparse_moe.experts.73.w1", "model.layers.38.block_sparse_moe.experts.74.w1", "model.layers.38.block_sparse_moe.experts.75.w1", "model.layers.38.block_sparse_moe.experts.76.w1", "model.layers.38.block_sparse_moe.experts.77.w1", "model.layers.38.block_sparse_moe.experts.78.w1", "model.layers.38.block_sparse_moe.experts.79.w1", "model.layers.38.block_sparse_moe.experts.80.w1", "model.layers.38.block_sparse_moe.experts.81.w1", "model.layers.38.block_sparse_moe.experts.82.w1", "model.layers.38.block_sparse_moe.experts.83.w1", "model.layers.38.block_sparse_moe.experts.84.w1", "model.layers.38.block_sparse_moe.experts.85.w1", "model.layers.38.block_sparse_moe.experts.86.w1", "model.layers.38.block_sparse_moe.experts.87.w1", "model.layers.38.block_sparse_moe.experts.88.w1", "model.layers.38.block_sparse_moe.experts.89.w1", "model.layers.38.block_sparse_moe.experts.90.w1", "model.layers.38.block_sparse_moe.experts.91.w1", "model.layers.38.block_sparse_moe.experts.92.w1", "model.layers.38.block_sparse_moe.experts.93.w1", "model.layers.38.block_sparse_moe.experts.94.w1", "model.layers.38.block_sparse_moe.experts.95.w1", "model.layers.38.block_sparse_moe.experts.96.w1", "model.layers.38.block_sparse_moe.experts.97.w1", "model.layers.38.block_sparse_moe.experts.98.w1", "model.layers.38.block_sparse_moe.experts.99.w1", "model.layers.38.block_sparse_moe.experts.100.w1", "model.layers.38.block_sparse_moe.experts.101.w1", "model.layers.38.block_sparse_moe.experts.102.w1", "model.layers.38.block_sparse_moe.experts.103.w1", "model.layers.38.block_sparse_moe.experts.104.w1", "model.layers.38.block_sparse_moe.experts.105.w1", "model.layers.38.block_sparse_moe.experts.106.w1", "model.layers.38.block_sparse_moe.experts.107.w1", "model.layers.38.block_sparse_moe.experts.108.w1", "model.layers.38.block_sparse_moe.experts.109.w1", "model.layers.38.block_sparse_moe.experts.110.w1", "model.layers.38.block_sparse_moe.experts.111.w1", "model.layers.38.block_sparse_moe.experts.112.w1", "model.layers.38.block_sparse_moe.experts.113.w1", "model.layers.38.block_sparse_moe.experts.114.w1", "model.layers.38.block_sparse_moe.experts.115.w1", "model.layers.38.block_sparse_moe.experts.116.w1", "model.layers.38.block_sparse_moe.experts.117.w1", "model.layers.38.block_sparse_moe.experts.118.w1", "model.layers.38.block_sparse_moe.experts.119.w1", "model.layers.38.block_sparse_moe.experts.120.w1", "model.layers.38.block_sparse_moe.experts.121.w1", "model.layers.38.block_sparse_moe.experts.122.w1", "model.layers.38.block_sparse_moe.experts.123.w1", "model.layers.38.block_sparse_moe.experts.124.w1", "model.layers.38.block_sparse_moe.experts.125.w1", "model.layers.38.block_sparse_moe.experts.126.w1", "model.layers.38.block_sparse_moe.experts.127.w1", "model.layers.38.block_sparse_moe.experts.128.w1", "model.layers.38.block_sparse_moe.experts.129.w1", "model.layers.38.block_sparse_moe.experts.130.w1", "model.layers.38.block_sparse_moe.experts.131.w1", "model.layers.38.block_sparse_moe.experts.132.w1", "model.layers.38.block_sparse_moe.experts.133.w1", "model.layers.38.block_sparse_moe.experts.134.w1", "model.layers.38.block_sparse_moe.experts.135.w1", "model.layers.38.block_sparse_moe.experts.136.w1", "model.layers.38.block_sparse_moe.experts.137.w1", "model.layers.38.block_sparse_moe.experts.138.w1", "model.layers.38.block_sparse_moe.experts.139.w1", "model.layers.38.block_sparse_moe.experts.140.w1", "model.layers.38.block_sparse_moe.experts.141.w1", "model.layers.38.block_sparse_moe.experts.142.w1", "model.layers.38.block_sparse_moe.experts.143.w1", "model.layers.38.block_sparse_moe.experts.144.w1", "model.layers.38.block_sparse_moe.experts.145.w1", "model.layers.38.block_sparse_moe.experts.146.w1", "model.layers.38.block_sparse_moe.experts.147.w1", "model.layers.38.block_sparse_moe.experts.148.w1", "model.layers.38.block_sparse_moe.experts.149.w1", "model.layers.38.block_sparse_moe.experts.150.w1", "model.layers.38.block_sparse_moe.experts.151.w1", "model.layers.38.block_sparse_moe.experts.152.w1", "model.layers.38.block_sparse_moe.experts.153.w1", "model.layers.38.block_sparse_moe.experts.154.w1", "model.layers.38.block_sparse_moe.experts.155.w1", "model.layers.38.block_sparse_moe.experts.156.w1", "model.layers.38.block_sparse_moe.experts.157.w1", "model.layers.38.block_sparse_moe.experts.158.w1", "model.layers.38.block_sparse_moe.experts.159.w1", "model.layers.38.block_sparse_moe.experts.160.w1", "model.layers.38.block_sparse_moe.experts.161.w1", "model.layers.38.block_sparse_moe.experts.162.w1", "model.layers.38.block_sparse_moe.experts.163.w1", "model.layers.38.block_sparse_moe.experts.164.w1", "model.layers.38.block_sparse_moe.experts.165.w1", "model.layers.38.block_sparse_moe.experts.166.w1", "model.layers.38.block_sparse_moe.experts.167.w1", "model.layers.38.block_sparse_moe.experts.168.w1", "model.layers.38.block_sparse_moe.experts.169.w1", "model.layers.38.block_sparse_moe.experts.170.w1", "model.layers.38.block_sparse_moe.experts.171.w1", "model.layers.38.block_sparse_moe.experts.172.w1", "model.layers.38.block_sparse_moe.experts.173.w1", "model.layers.38.block_sparse_moe.experts.174.w1", "model.layers.38.block_sparse_moe.experts.175.w1", "model.layers.38.block_sparse_moe.experts.176.w1", "model.layers.38.block_sparse_moe.experts.177.w1", "model.layers.38.block_sparse_moe.experts.178.w1", "model.layers.38.block_sparse_moe.experts.179.w1", "model.layers.38.block_sparse_moe.experts.180.w1", "model.layers.38.block_sparse_moe.experts.181.w1", "model.layers.38.block_sparse_moe.experts.182.w1", "model.layers.38.block_sparse_moe.experts.183.w1", "model.layers.38.block_sparse_moe.experts.184.w1", "model.layers.38.block_sparse_moe.experts.185.w1", "model.layers.38.block_sparse_moe.experts.186.w1", "model.layers.38.block_sparse_moe.experts.187.w1", "model.layers.38.block_sparse_moe.experts.188.w1", "model.layers.38.block_sparse_moe.experts.189.w1", "model.layers.38.block_sparse_moe.experts.190.w1", "model.layers.38.block_sparse_moe.experts.191.w1", "model.layers.38.block_sparse_moe.experts.192.w1", "model.layers.38.block_sparse_moe.experts.193.w1", "model.layers.38.block_sparse_moe.experts.194.w1", "model.layers.38.block_sparse_moe.experts.195.w1", "model.layers.38.block_sparse_moe.experts.196.w1", "model.layers.38.block_sparse_moe.experts.197.w1", "model.layers.38.block_sparse_moe.experts.198.w1", "model.layers.38.block_sparse_moe.experts.199.w1", "model.layers.38.block_sparse_moe.experts.200.w1", "model.layers.38.block_sparse_moe.experts.201.w1", "model.layers.38.block_sparse_moe.experts.202.w1", "model.layers.38.block_sparse_moe.experts.203.w1", "model.layers.38.block_sparse_moe.experts.204.w1", "model.layers.38.block_sparse_moe.experts.205.w1", "model.layers.38.block_sparse_moe.experts.206.w1", "model.layers.38.block_sparse_moe.experts.207.w1", "model.layers.38.block_sparse_moe.experts.208.w1", "model.layers.38.block_sparse_moe.experts.209.w1", "model.layers.38.block_sparse_moe.experts.210.w1", "model.layers.38.block_sparse_moe.experts.211.w1", "model.layers.38.block_sparse_moe.experts.212.w1", "model.layers.38.block_sparse_moe.experts.213.w1", "model.layers.38.block_sparse_moe.experts.214.w1", "model.layers.38.block_sparse_moe.experts.215.w1", "model.layers.38.block_sparse_moe.experts.216.w1", "model.layers.38.block_sparse_moe.experts.217.w1", "model.layers.38.block_sparse_moe.experts.218.w1", "model.layers.38.block_sparse_moe.experts.219.w1", "model.layers.38.block_sparse_moe.experts.220.w1", "model.layers.38.block_sparse_moe.experts.221.w1", "model.layers.38.block_sparse_moe.experts.222.w1", "model.layers.38.block_sparse_moe.experts.223.w1", "model.layers.38.block_sparse_moe.experts.224.w1", "model.layers.38.block_sparse_moe.experts.225.w1", "model.layers.38.block_sparse_moe.experts.226.w1", "model.layers.38.block_sparse_moe.experts.227.w1", "model.layers.38.block_sparse_moe.experts.228.w1", "model.layers.38.block_sparse_moe.experts.229.w1", "model.layers.38.block_sparse_moe.experts.230.w1", "model.layers.38.block_sparse_moe.experts.231.w1", "model.layers.38.block_sparse_moe.experts.232.w1", "model.layers.38.block_sparse_moe.experts.233.w1", "model.layers.38.block_sparse_moe.experts.234.w1", "model.layers.38.block_sparse_moe.experts.235.w1", "model.layers.38.block_sparse_moe.experts.236.w1", "model.layers.38.block_sparse_moe.experts.237.w1", "model.layers.38.block_sparse_moe.experts.238.w1", "model.layers.38.block_sparse_moe.experts.239.w1", "model.layers.38.block_sparse_moe.experts.240.w1", "model.layers.38.block_sparse_moe.experts.241.w1", "model.layers.38.block_sparse_moe.experts.242.w1", "model.layers.38.block_sparse_moe.experts.243.w1", "model.layers.38.block_sparse_moe.experts.244.w1", "model.layers.38.block_sparse_moe.experts.245.w1", "model.layers.38.block_sparse_moe.experts.246.w1", "model.layers.38.block_sparse_moe.experts.247.w1", "model.layers.38.block_sparse_moe.experts.248.w1", "model.layers.38.block_sparse_moe.experts.249.w1", "model.layers.38.block_sparse_moe.experts.250.w1", "model.layers.38.block_sparse_moe.experts.251.w1", "model.layers.38.block_sparse_moe.experts.252.w1", "model.layers.38.block_sparse_moe.experts.253.w1", "model.layers.38.block_sparse_moe.experts.254.w1", "model.layers.38.block_sparse_moe.experts.255.w1", "model.layers.38.block_sparse_moe.experts.0.w3", "model.layers.38.block_sparse_moe.experts.1.w3", "model.layers.38.block_sparse_moe.experts.2.w3", "model.layers.38.block_sparse_moe.experts.3.w3", "model.layers.38.block_sparse_moe.experts.4.w3", "model.layers.38.block_sparse_moe.experts.5.w3", "model.layers.38.block_sparse_moe.experts.6.w3", "model.layers.38.block_sparse_moe.experts.7.w3", "model.layers.38.block_sparse_moe.experts.8.w3", "model.layers.38.block_sparse_moe.experts.9.w3", "model.layers.38.block_sparse_moe.experts.10.w3", "model.layers.38.block_sparse_moe.experts.11.w3", "model.layers.38.block_sparse_moe.experts.12.w3", "model.layers.38.block_sparse_moe.experts.13.w3", "model.layers.38.block_sparse_moe.experts.14.w3", "model.layers.38.block_sparse_moe.experts.15.w3", "model.layers.38.block_sparse_moe.experts.16.w3", "model.layers.38.block_sparse_moe.experts.17.w3", "model.layers.38.block_sparse_moe.experts.18.w3", "model.layers.38.block_sparse_moe.experts.19.w3", "model.layers.38.block_sparse_moe.experts.20.w3", "model.layers.38.block_sparse_moe.experts.21.w3", "model.layers.38.block_sparse_moe.experts.22.w3", "model.layers.38.block_sparse_moe.experts.23.w3", "model.layers.38.block_sparse_moe.experts.24.w3", "model.layers.38.block_sparse_moe.experts.25.w3", "model.layers.38.block_sparse_moe.experts.26.w3", "model.layers.38.block_sparse_moe.experts.27.w3", "model.layers.38.block_sparse_moe.experts.28.w3", "model.layers.38.block_sparse_moe.experts.29.w3", "model.layers.38.block_sparse_moe.experts.30.w3", "model.layers.38.block_sparse_moe.experts.31.w3", "model.layers.38.block_sparse_moe.experts.32.w3", "model.layers.38.block_sparse_moe.experts.33.w3", "model.layers.38.block_sparse_moe.experts.34.w3", "model.layers.38.block_sparse_moe.experts.35.w3", "model.layers.38.block_sparse_moe.experts.36.w3", "model.layers.38.block_sparse_moe.experts.37.w3", "model.layers.38.block_sparse_moe.experts.38.w3", "model.layers.38.block_sparse_moe.experts.39.w3", "model.layers.38.block_sparse_moe.experts.40.w3", "model.layers.38.block_sparse_moe.experts.41.w3", "model.layers.38.block_sparse_moe.experts.42.w3", "model.layers.38.block_sparse_moe.experts.43.w3", "model.layers.38.block_sparse_moe.experts.44.w3", "model.layers.38.block_sparse_moe.experts.45.w3", "model.layers.38.block_sparse_moe.experts.46.w3", "model.layers.38.block_sparse_moe.experts.47.w3", "model.layers.38.block_sparse_moe.experts.48.w3", "model.layers.38.block_sparse_moe.experts.49.w3", "model.layers.38.block_sparse_moe.experts.50.w3", "model.layers.38.block_sparse_moe.experts.51.w3", "model.layers.38.block_sparse_moe.experts.52.w3", "model.layers.38.block_sparse_moe.experts.53.w3", "model.layers.38.block_sparse_moe.experts.54.w3", "model.layers.38.block_sparse_moe.experts.55.w3", "model.layers.38.block_sparse_moe.experts.56.w3", "model.layers.38.block_sparse_moe.experts.57.w3", "model.layers.38.block_sparse_moe.experts.58.w3", "model.layers.38.block_sparse_moe.experts.59.w3", "model.layers.38.block_sparse_moe.experts.60.w3", "model.layers.38.block_sparse_moe.experts.61.w3", "model.layers.38.block_sparse_moe.experts.62.w3", "model.layers.38.block_sparse_moe.experts.63.w3", "model.layers.38.block_sparse_moe.experts.64.w3", "model.layers.38.block_sparse_moe.experts.65.w3", "model.layers.38.block_sparse_moe.experts.66.w3", "model.layers.38.block_sparse_moe.experts.67.w3", "model.layers.38.block_sparse_moe.experts.68.w3", "model.layers.38.block_sparse_moe.experts.69.w3", "model.layers.38.block_sparse_moe.experts.70.w3", "model.layers.38.block_sparse_moe.experts.71.w3", "model.layers.38.block_sparse_moe.experts.72.w3", "model.layers.38.block_sparse_moe.experts.73.w3", "model.layers.38.block_sparse_moe.experts.74.w3", "model.layers.38.block_sparse_moe.experts.75.w3", "model.layers.38.block_sparse_moe.experts.76.w3", "model.layers.38.block_sparse_moe.experts.77.w3", "model.layers.38.block_sparse_moe.experts.78.w3", "model.layers.38.block_sparse_moe.experts.79.w3", "model.layers.38.block_sparse_moe.experts.80.w3", "model.layers.38.block_sparse_moe.experts.81.w3", "model.layers.38.block_sparse_moe.experts.82.w3", "model.layers.38.block_sparse_moe.experts.83.w3", "model.layers.38.block_sparse_moe.experts.84.w3", "model.layers.38.block_sparse_moe.experts.85.w3", "model.layers.38.block_sparse_moe.experts.86.w3", "model.layers.38.block_sparse_moe.experts.87.w3", "model.layers.38.block_sparse_moe.experts.88.w3", "model.layers.38.block_sparse_moe.experts.89.w3", "model.layers.38.block_sparse_moe.experts.90.w3", "model.layers.38.block_sparse_moe.experts.91.w3", "model.layers.38.block_sparse_moe.experts.92.w3", "model.layers.38.block_sparse_moe.experts.93.w3", "model.layers.38.block_sparse_moe.experts.94.w3", "model.layers.38.block_sparse_moe.experts.95.w3", "model.layers.38.block_sparse_moe.experts.96.w3", "model.layers.38.block_sparse_moe.experts.97.w3", "model.layers.38.block_sparse_moe.experts.98.w3", "model.layers.38.block_sparse_moe.experts.99.w3", "model.layers.38.block_sparse_moe.experts.100.w3", "model.layers.38.block_sparse_moe.experts.101.w3", "model.layers.38.block_sparse_moe.experts.102.w3", "model.layers.38.block_sparse_moe.experts.103.w3", "model.layers.38.block_sparse_moe.experts.104.w3", "model.layers.38.block_sparse_moe.experts.105.w3", "model.layers.38.block_sparse_moe.experts.106.w3", "model.layers.38.block_sparse_moe.experts.107.w3", "model.layers.38.block_sparse_moe.experts.108.w3", "model.layers.38.block_sparse_moe.experts.109.w3", "model.layers.38.block_sparse_moe.experts.110.w3", "model.layers.38.block_sparse_moe.experts.111.w3", "model.layers.38.block_sparse_moe.experts.112.w3", "model.layers.38.block_sparse_moe.experts.113.w3", "model.layers.38.block_sparse_moe.experts.114.w3", "model.layers.38.block_sparse_moe.experts.115.w3", "model.layers.38.block_sparse_moe.experts.116.w3", "model.layers.38.block_sparse_moe.experts.117.w3", "model.layers.38.block_sparse_moe.experts.118.w3", "model.layers.38.block_sparse_moe.experts.119.w3", "model.layers.38.block_sparse_moe.experts.120.w3", "model.layers.38.block_sparse_moe.experts.121.w3", "model.layers.38.block_sparse_moe.experts.122.w3", "model.layers.38.block_sparse_moe.experts.123.w3", "model.layers.38.block_sparse_moe.experts.124.w3", "model.layers.38.block_sparse_moe.experts.125.w3", "model.layers.38.block_sparse_moe.experts.126.w3", "model.layers.38.block_sparse_moe.experts.127.w3", "model.layers.38.block_sparse_moe.experts.128.w3", "model.layers.38.block_sparse_moe.experts.129.w3", "model.layers.38.block_sparse_moe.experts.130.w3", "model.layers.38.block_sparse_moe.experts.131.w3", "model.layers.38.block_sparse_moe.experts.132.w3", "model.layers.38.block_sparse_moe.experts.133.w3", "model.layers.38.block_sparse_moe.experts.134.w3", "model.layers.38.block_sparse_moe.experts.135.w3", "model.layers.38.block_sparse_moe.experts.136.w3", "model.layers.38.block_sparse_moe.experts.137.w3", "model.layers.38.block_sparse_moe.experts.138.w3", "model.layers.38.block_sparse_moe.experts.139.w3", "model.layers.38.block_sparse_moe.experts.140.w3", "model.layers.38.block_sparse_moe.experts.141.w3", "model.layers.38.block_sparse_moe.experts.142.w3", "model.layers.38.block_sparse_moe.experts.143.w3", "model.layers.38.block_sparse_moe.experts.144.w3", "model.layers.38.block_sparse_moe.experts.145.w3", "model.layers.38.block_sparse_moe.experts.146.w3", "model.layers.38.block_sparse_moe.experts.147.w3", "model.layers.38.block_sparse_moe.experts.148.w3", "model.layers.38.block_sparse_moe.experts.149.w3", "model.layers.38.block_sparse_moe.experts.150.w3", "model.layers.38.block_sparse_moe.experts.151.w3", "model.layers.38.block_sparse_moe.experts.152.w3", "model.layers.38.block_sparse_moe.experts.153.w3", "model.layers.38.block_sparse_moe.experts.154.w3", "model.layers.38.block_sparse_moe.experts.155.w3", "model.layers.38.block_sparse_moe.experts.156.w3", "model.layers.38.block_sparse_moe.experts.157.w3", "model.layers.38.block_sparse_moe.experts.158.w3", "model.layers.38.block_sparse_moe.experts.159.w3", "model.layers.38.block_sparse_moe.experts.160.w3", "model.layers.38.block_sparse_moe.experts.161.w3", "model.layers.38.block_sparse_moe.experts.162.w3", "model.layers.38.block_sparse_moe.experts.163.w3", "model.layers.38.block_sparse_moe.experts.164.w3", "model.layers.38.block_sparse_moe.experts.165.w3", "model.layers.38.block_sparse_moe.experts.166.w3", "model.layers.38.block_sparse_moe.experts.167.w3", "model.layers.38.block_sparse_moe.experts.168.w3", "model.layers.38.block_sparse_moe.experts.169.w3", "model.layers.38.block_sparse_moe.experts.170.w3", "model.layers.38.block_sparse_moe.experts.171.w3", "model.layers.38.block_sparse_moe.experts.172.w3", "model.layers.38.block_sparse_moe.experts.173.w3", "model.layers.38.block_sparse_moe.experts.174.w3", "model.layers.38.block_sparse_moe.experts.175.w3", "model.layers.38.block_sparse_moe.experts.176.w3", "model.layers.38.block_sparse_moe.experts.177.w3", "model.layers.38.block_sparse_moe.experts.178.w3", "model.layers.38.block_sparse_moe.experts.179.w3", "model.layers.38.block_sparse_moe.experts.180.w3", "model.layers.38.block_sparse_moe.experts.181.w3", "model.layers.38.block_sparse_moe.experts.182.w3", "model.layers.38.block_sparse_moe.experts.183.w3", "model.layers.38.block_sparse_moe.experts.184.w3", "model.layers.38.block_sparse_moe.experts.185.w3", "model.layers.38.block_sparse_moe.experts.186.w3", "model.layers.38.block_sparse_moe.experts.187.w3", "model.layers.38.block_sparse_moe.experts.188.w3", "model.layers.38.block_sparse_moe.experts.189.w3", "model.layers.38.block_sparse_moe.experts.190.w3", "model.layers.38.block_sparse_moe.experts.191.w3", "model.layers.38.block_sparse_moe.experts.192.w3", "model.layers.38.block_sparse_moe.experts.193.w3", "model.layers.38.block_sparse_moe.experts.194.w3", "model.layers.38.block_sparse_moe.experts.195.w3", "model.layers.38.block_sparse_moe.experts.196.w3", "model.layers.38.block_sparse_moe.experts.197.w3", "model.layers.38.block_sparse_moe.experts.198.w3", "model.layers.38.block_sparse_moe.experts.199.w3", "model.layers.38.block_sparse_moe.experts.200.w3", "model.layers.38.block_sparse_moe.experts.201.w3", "model.layers.38.block_sparse_moe.experts.202.w3", "model.layers.38.block_sparse_moe.experts.203.w3", "model.layers.38.block_sparse_moe.experts.204.w3", "model.layers.38.block_sparse_moe.experts.205.w3", "model.layers.38.block_sparse_moe.experts.206.w3", "model.layers.38.block_sparse_moe.experts.207.w3", "model.layers.38.block_sparse_moe.experts.208.w3", "model.layers.38.block_sparse_moe.experts.209.w3", "model.layers.38.block_sparse_moe.experts.210.w3", "model.layers.38.block_sparse_moe.experts.211.w3", "model.layers.38.block_sparse_moe.experts.212.w3", "model.layers.38.block_sparse_moe.experts.213.w3", "model.layers.38.block_sparse_moe.experts.214.w3", "model.layers.38.block_sparse_moe.experts.215.w3", "model.layers.38.block_sparse_moe.experts.216.w3", "model.layers.38.block_sparse_moe.experts.217.w3", "model.layers.38.block_sparse_moe.experts.218.w3", "model.layers.38.block_sparse_moe.experts.219.w3", "model.layers.38.block_sparse_moe.experts.220.w3", "model.layers.38.block_sparse_moe.experts.221.w3", "model.layers.38.block_sparse_moe.experts.222.w3", "model.layers.38.block_sparse_moe.experts.223.w3", "model.layers.38.block_sparse_moe.experts.224.w3", "model.layers.38.block_sparse_moe.experts.225.w3", "model.layers.38.block_sparse_moe.experts.226.w3", "model.layers.38.block_sparse_moe.experts.227.w3", "model.layers.38.block_sparse_moe.experts.228.w3", "model.layers.38.block_sparse_moe.experts.229.w3", "model.layers.38.block_sparse_moe.experts.230.w3", "model.layers.38.block_sparse_moe.experts.231.w3", "model.layers.38.block_sparse_moe.experts.232.w3", "model.layers.38.block_sparse_moe.experts.233.w3", "model.layers.38.block_sparse_moe.experts.234.w3", "model.layers.38.block_sparse_moe.experts.235.w3", "model.layers.38.block_sparse_moe.experts.236.w3", "model.layers.38.block_sparse_moe.experts.237.w3", "model.layers.38.block_sparse_moe.experts.238.w3", "model.layers.38.block_sparse_moe.experts.239.w3", "model.layers.38.block_sparse_moe.experts.240.w3", "model.layers.38.block_sparse_moe.experts.241.w3", "model.layers.38.block_sparse_moe.experts.242.w3", "model.layers.38.block_sparse_moe.experts.243.w3", "model.layers.38.block_sparse_moe.experts.244.w3", "model.layers.38.block_sparse_moe.experts.245.w3", "model.layers.38.block_sparse_moe.experts.246.w3", "model.layers.38.block_sparse_moe.experts.247.w3", "model.layers.38.block_sparse_moe.experts.248.w3", "model.layers.38.block_sparse_moe.experts.249.w3", "model.layers.38.block_sparse_moe.experts.250.w3", "model.layers.38.block_sparse_moe.experts.251.w3", "model.layers.38.block_sparse_moe.experts.252.w3", "model.layers.38.block_sparse_moe.experts.253.w3", "model.layers.38.block_sparse_moe.experts.254.w3", "model.layers.38.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -8.015148341655731e-05, "dbits": 2415919104 } ] }, { "idx": 194, "layers": [ "model.layers.38.block_sparse_moe.experts.0.w2", "model.layers.38.block_sparse_moe.experts.1.w2", "model.layers.38.block_sparse_moe.experts.2.w2", "model.layers.38.block_sparse_moe.experts.3.w2", "model.layers.38.block_sparse_moe.experts.4.w2", "model.layers.38.block_sparse_moe.experts.5.w2", "model.layers.38.block_sparse_moe.experts.6.w2", "model.layers.38.block_sparse_moe.experts.7.w2", "model.layers.38.block_sparse_moe.experts.8.w2", "model.layers.38.block_sparse_moe.experts.9.w2", "model.layers.38.block_sparse_moe.experts.10.w2", "model.layers.38.block_sparse_moe.experts.11.w2", "model.layers.38.block_sparse_moe.experts.12.w2", "model.layers.38.block_sparse_moe.experts.13.w2", "model.layers.38.block_sparse_moe.experts.14.w2", "model.layers.38.block_sparse_moe.experts.15.w2", "model.layers.38.block_sparse_moe.experts.16.w2", "model.layers.38.block_sparse_moe.experts.17.w2", "model.layers.38.block_sparse_moe.experts.18.w2", "model.layers.38.block_sparse_moe.experts.19.w2", "model.layers.38.block_sparse_moe.experts.20.w2", "model.layers.38.block_sparse_moe.experts.21.w2", "model.layers.38.block_sparse_moe.experts.22.w2", "model.layers.38.block_sparse_moe.experts.23.w2", "model.layers.38.block_sparse_moe.experts.24.w2", "model.layers.38.block_sparse_moe.experts.25.w2", "model.layers.38.block_sparse_moe.experts.26.w2", "model.layers.38.block_sparse_moe.experts.27.w2", "model.layers.38.block_sparse_moe.experts.28.w2", "model.layers.38.block_sparse_moe.experts.29.w2", "model.layers.38.block_sparse_moe.experts.30.w2", "model.layers.38.block_sparse_moe.experts.31.w2", "model.layers.38.block_sparse_moe.experts.32.w2", "model.layers.38.block_sparse_moe.experts.33.w2", "model.layers.38.block_sparse_moe.experts.34.w2", "model.layers.38.block_sparse_moe.experts.35.w2", "model.layers.38.block_sparse_moe.experts.36.w2", "model.layers.38.block_sparse_moe.experts.37.w2", "model.layers.38.block_sparse_moe.experts.38.w2", "model.layers.38.block_sparse_moe.experts.39.w2", "model.layers.38.block_sparse_moe.experts.40.w2", "model.layers.38.block_sparse_moe.experts.41.w2", "model.layers.38.block_sparse_moe.experts.42.w2", "model.layers.38.block_sparse_moe.experts.43.w2", "model.layers.38.block_sparse_moe.experts.44.w2", "model.layers.38.block_sparse_moe.experts.45.w2", "model.layers.38.block_sparse_moe.experts.46.w2", "model.layers.38.block_sparse_moe.experts.47.w2", "model.layers.38.block_sparse_moe.experts.48.w2", "model.layers.38.block_sparse_moe.experts.49.w2", "model.layers.38.block_sparse_moe.experts.50.w2", "model.layers.38.block_sparse_moe.experts.51.w2", "model.layers.38.block_sparse_moe.experts.52.w2", "model.layers.38.block_sparse_moe.experts.53.w2", "model.layers.38.block_sparse_moe.experts.54.w2", "model.layers.38.block_sparse_moe.experts.55.w2", "model.layers.38.block_sparse_moe.experts.56.w2", "model.layers.38.block_sparse_moe.experts.57.w2", "model.layers.38.block_sparse_moe.experts.58.w2", "model.layers.38.block_sparse_moe.experts.59.w2", "model.layers.38.block_sparse_moe.experts.60.w2", "model.layers.38.block_sparse_moe.experts.61.w2", "model.layers.38.block_sparse_moe.experts.62.w2", "model.layers.38.block_sparse_moe.experts.63.w2", "model.layers.38.block_sparse_moe.experts.64.w2", "model.layers.38.block_sparse_moe.experts.65.w2", "model.layers.38.block_sparse_moe.experts.66.w2", "model.layers.38.block_sparse_moe.experts.67.w2", "model.layers.38.block_sparse_moe.experts.68.w2", "model.layers.38.block_sparse_moe.experts.69.w2", "model.layers.38.block_sparse_moe.experts.70.w2", "model.layers.38.block_sparse_moe.experts.71.w2", "model.layers.38.block_sparse_moe.experts.72.w2", "model.layers.38.block_sparse_moe.experts.73.w2", "model.layers.38.block_sparse_moe.experts.74.w2", "model.layers.38.block_sparse_moe.experts.75.w2", "model.layers.38.block_sparse_moe.experts.76.w2", "model.layers.38.block_sparse_moe.experts.77.w2", "model.layers.38.block_sparse_moe.experts.78.w2", "model.layers.38.block_sparse_moe.experts.79.w2", "model.layers.38.block_sparse_moe.experts.80.w2", "model.layers.38.block_sparse_moe.experts.81.w2", "model.layers.38.block_sparse_moe.experts.82.w2", "model.layers.38.block_sparse_moe.experts.83.w2", "model.layers.38.block_sparse_moe.experts.84.w2", "model.layers.38.block_sparse_moe.experts.85.w2", "model.layers.38.block_sparse_moe.experts.86.w2", "model.layers.38.block_sparse_moe.experts.87.w2", "model.layers.38.block_sparse_moe.experts.88.w2", "model.layers.38.block_sparse_moe.experts.89.w2", "model.layers.38.block_sparse_moe.experts.90.w2", "model.layers.38.block_sparse_moe.experts.91.w2", "model.layers.38.block_sparse_moe.experts.92.w2", "model.layers.38.block_sparse_moe.experts.93.w2", "model.layers.38.block_sparse_moe.experts.94.w2", "model.layers.38.block_sparse_moe.experts.95.w2", "model.layers.38.block_sparse_moe.experts.96.w2", "model.layers.38.block_sparse_moe.experts.97.w2", "model.layers.38.block_sparse_moe.experts.98.w2", "model.layers.38.block_sparse_moe.experts.99.w2", "model.layers.38.block_sparse_moe.experts.100.w2", "model.layers.38.block_sparse_moe.experts.101.w2", "model.layers.38.block_sparse_moe.experts.102.w2", "model.layers.38.block_sparse_moe.experts.103.w2", "model.layers.38.block_sparse_moe.experts.104.w2", "model.layers.38.block_sparse_moe.experts.105.w2", "model.layers.38.block_sparse_moe.experts.106.w2", "model.layers.38.block_sparse_moe.experts.107.w2", "model.layers.38.block_sparse_moe.experts.108.w2", "model.layers.38.block_sparse_moe.experts.109.w2", "model.layers.38.block_sparse_moe.experts.110.w2", "model.layers.38.block_sparse_moe.experts.111.w2", "model.layers.38.block_sparse_moe.experts.112.w2", "model.layers.38.block_sparse_moe.experts.113.w2", "model.layers.38.block_sparse_moe.experts.114.w2", "model.layers.38.block_sparse_moe.experts.115.w2", "model.layers.38.block_sparse_moe.experts.116.w2", "model.layers.38.block_sparse_moe.experts.117.w2", "model.layers.38.block_sparse_moe.experts.118.w2", "model.layers.38.block_sparse_moe.experts.119.w2", "model.layers.38.block_sparse_moe.experts.120.w2", "model.layers.38.block_sparse_moe.experts.121.w2", "model.layers.38.block_sparse_moe.experts.122.w2", "model.layers.38.block_sparse_moe.experts.123.w2", "model.layers.38.block_sparse_moe.experts.124.w2", "model.layers.38.block_sparse_moe.experts.125.w2", "model.layers.38.block_sparse_moe.experts.126.w2", "model.layers.38.block_sparse_moe.experts.127.w2", "model.layers.38.block_sparse_moe.experts.128.w2", "model.layers.38.block_sparse_moe.experts.129.w2", "model.layers.38.block_sparse_moe.experts.130.w2", "model.layers.38.block_sparse_moe.experts.131.w2", "model.layers.38.block_sparse_moe.experts.132.w2", "model.layers.38.block_sparse_moe.experts.133.w2", "model.layers.38.block_sparse_moe.experts.134.w2", "model.layers.38.block_sparse_moe.experts.135.w2", "model.layers.38.block_sparse_moe.experts.136.w2", "model.layers.38.block_sparse_moe.experts.137.w2", "model.layers.38.block_sparse_moe.experts.138.w2", "model.layers.38.block_sparse_moe.experts.139.w2", "model.layers.38.block_sparse_moe.experts.140.w2", "model.layers.38.block_sparse_moe.experts.141.w2", "model.layers.38.block_sparse_moe.experts.142.w2", "model.layers.38.block_sparse_moe.experts.143.w2", "model.layers.38.block_sparse_moe.experts.144.w2", "model.layers.38.block_sparse_moe.experts.145.w2", "model.layers.38.block_sparse_moe.experts.146.w2", "model.layers.38.block_sparse_moe.experts.147.w2", "model.layers.38.block_sparse_moe.experts.148.w2", "model.layers.38.block_sparse_moe.experts.149.w2", "model.layers.38.block_sparse_moe.experts.150.w2", "model.layers.38.block_sparse_moe.experts.151.w2", "model.layers.38.block_sparse_moe.experts.152.w2", "model.layers.38.block_sparse_moe.experts.153.w2", "model.layers.38.block_sparse_moe.experts.154.w2", "model.layers.38.block_sparse_moe.experts.155.w2", "model.layers.38.block_sparse_moe.experts.156.w2", "model.layers.38.block_sparse_moe.experts.157.w2", "model.layers.38.block_sparse_moe.experts.158.w2", "model.layers.38.block_sparse_moe.experts.159.w2", "model.layers.38.block_sparse_moe.experts.160.w2", "model.layers.38.block_sparse_moe.experts.161.w2", "model.layers.38.block_sparse_moe.experts.162.w2", "model.layers.38.block_sparse_moe.experts.163.w2", "model.layers.38.block_sparse_moe.experts.164.w2", "model.layers.38.block_sparse_moe.experts.165.w2", "model.layers.38.block_sparse_moe.experts.166.w2", "model.layers.38.block_sparse_moe.experts.167.w2", "model.layers.38.block_sparse_moe.experts.168.w2", "model.layers.38.block_sparse_moe.experts.169.w2", "model.layers.38.block_sparse_moe.experts.170.w2", "model.layers.38.block_sparse_moe.experts.171.w2", "model.layers.38.block_sparse_moe.experts.172.w2", "model.layers.38.block_sparse_moe.experts.173.w2", "model.layers.38.block_sparse_moe.experts.174.w2", "model.layers.38.block_sparse_moe.experts.175.w2", "model.layers.38.block_sparse_moe.experts.176.w2", "model.layers.38.block_sparse_moe.experts.177.w2", "model.layers.38.block_sparse_moe.experts.178.w2", "model.layers.38.block_sparse_moe.experts.179.w2", "model.layers.38.block_sparse_moe.experts.180.w2", "model.layers.38.block_sparse_moe.experts.181.w2", "model.layers.38.block_sparse_moe.experts.182.w2", "model.layers.38.block_sparse_moe.experts.183.w2", "model.layers.38.block_sparse_moe.experts.184.w2", "model.layers.38.block_sparse_moe.experts.185.w2", "model.layers.38.block_sparse_moe.experts.186.w2", "model.layers.38.block_sparse_moe.experts.187.w2", "model.layers.38.block_sparse_moe.experts.188.w2", "model.layers.38.block_sparse_moe.experts.189.w2", "model.layers.38.block_sparse_moe.experts.190.w2", "model.layers.38.block_sparse_moe.experts.191.w2", "model.layers.38.block_sparse_moe.experts.192.w2", "model.layers.38.block_sparse_moe.experts.193.w2", "model.layers.38.block_sparse_moe.experts.194.w2", "model.layers.38.block_sparse_moe.experts.195.w2", "model.layers.38.block_sparse_moe.experts.196.w2", "model.layers.38.block_sparse_moe.experts.197.w2", "model.layers.38.block_sparse_moe.experts.198.w2", "model.layers.38.block_sparse_moe.experts.199.w2", "model.layers.38.block_sparse_moe.experts.200.w2", "model.layers.38.block_sparse_moe.experts.201.w2", "model.layers.38.block_sparse_moe.experts.202.w2", "model.layers.38.block_sparse_moe.experts.203.w2", "model.layers.38.block_sparse_moe.experts.204.w2", "model.layers.38.block_sparse_moe.experts.205.w2", "model.layers.38.block_sparse_moe.experts.206.w2", "model.layers.38.block_sparse_moe.experts.207.w2", "model.layers.38.block_sparse_moe.experts.208.w2", "model.layers.38.block_sparse_moe.experts.209.w2", "model.layers.38.block_sparse_moe.experts.210.w2", "model.layers.38.block_sparse_moe.experts.211.w2", "model.layers.38.block_sparse_moe.experts.212.w2", "model.layers.38.block_sparse_moe.experts.213.w2", "model.layers.38.block_sparse_moe.experts.214.w2", "model.layers.38.block_sparse_moe.experts.215.w2", "model.layers.38.block_sparse_moe.experts.216.w2", "model.layers.38.block_sparse_moe.experts.217.w2", "model.layers.38.block_sparse_moe.experts.218.w2", "model.layers.38.block_sparse_moe.experts.219.w2", "model.layers.38.block_sparse_moe.experts.220.w2", "model.layers.38.block_sparse_moe.experts.221.w2", "model.layers.38.block_sparse_moe.experts.222.w2", "model.layers.38.block_sparse_moe.experts.223.w2", "model.layers.38.block_sparse_moe.experts.224.w2", "model.layers.38.block_sparse_moe.experts.225.w2", "model.layers.38.block_sparse_moe.experts.226.w2", "model.layers.38.block_sparse_moe.experts.227.w2", "model.layers.38.block_sparse_moe.experts.228.w2", "model.layers.38.block_sparse_moe.experts.229.w2", "model.layers.38.block_sparse_moe.experts.230.w2", "model.layers.38.block_sparse_moe.experts.231.w2", "model.layers.38.block_sparse_moe.experts.232.w2", "model.layers.38.block_sparse_moe.experts.233.w2", "model.layers.38.block_sparse_moe.experts.234.w2", "model.layers.38.block_sparse_moe.experts.235.w2", "model.layers.38.block_sparse_moe.experts.236.w2", "model.layers.38.block_sparse_moe.experts.237.w2", "model.layers.38.block_sparse_moe.experts.238.w2", "model.layers.38.block_sparse_moe.experts.239.w2", "model.layers.38.block_sparse_moe.experts.240.w2", "model.layers.38.block_sparse_moe.experts.241.w2", "model.layers.38.block_sparse_moe.experts.242.w2", "model.layers.38.block_sparse_moe.experts.243.w2", "model.layers.38.block_sparse_moe.experts.244.w2", "model.layers.38.block_sparse_moe.experts.245.w2", "model.layers.38.block_sparse_moe.experts.246.w2", "model.layers.38.block_sparse_moe.experts.247.w2", "model.layers.38.block_sparse_moe.experts.248.w2", "model.layers.38.block_sparse_moe.experts.249.w2", "model.layers.38.block_sparse_moe.experts.250.w2", "model.layers.38.block_sparse_moe.experts.251.w2", "model.layers.38.block_sparse_moe.experts.252.w2", "model.layers.38.block_sparse_moe.experts.253.w2", "model.layers.38.block_sparse_moe.experts.254.w2", "model.layers.38.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00010077934712171555, "dbits": 1207959552 } ] }, { "idx": 195, "layers": [ "model.layers.39.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00018573515117167871, "dbits": 18874368 } ] }, { "idx": 196, "layers": [ "model.layers.39.self_attn.k_proj", "model.layers.39.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0008473016321659116, "dbits": 6291456 } ] }, { "idx": 197, "layers": [ "model.layers.39.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0003904413431882886, "dbits": 18874368 } ] }, { "idx": 198, "layers": [ "model.layers.39.block_sparse_moe.experts.0.w1", "model.layers.39.block_sparse_moe.experts.1.w1", "model.layers.39.block_sparse_moe.experts.2.w1", "model.layers.39.block_sparse_moe.experts.3.w1", "model.layers.39.block_sparse_moe.experts.4.w1", "model.layers.39.block_sparse_moe.experts.5.w1", "model.layers.39.block_sparse_moe.experts.6.w1", "model.layers.39.block_sparse_moe.experts.7.w1", "model.layers.39.block_sparse_moe.experts.8.w1", "model.layers.39.block_sparse_moe.experts.9.w1", "model.layers.39.block_sparse_moe.experts.10.w1", "model.layers.39.block_sparse_moe.experts.11.w1", "model.layers.39.block_sparse_moe.experts.12.w1", "model.layers.39.block_sparse_moe.experts.13.w1", "model.layers.39.block_sparse_moe.experts.14.w1", "model.layers.39.block_sparse_moe.experts.15.w1", "model.layers.39.block_sparse_moe.experts.16.w1", "model.layers.39.block_sparse_moe.experts.17.w1", "model.layers.39.block_sparse_moe.experts.18.w1", "model.layers.39.block_sparse_moe.experts.19.w1", "model.layers.39.block_sparse_moe.experts.20.w1", "model.layers.39.block_sparse_moe.experts.21.w1", "model.layers.39.block_sparse_moe.experts.22.w1", "model.layers.39.block_sparse_moe.experts.23.w1", "model.layers.39.block_sparse_moe.experts.24.w1", "model.layers.39.block_sparse_moe.experts.25.w1", "model.layers.39.block_sparse_moe.experts.26.w1", "model.layers.39.block_sparse_moe.experts.27.w1", "model.layers.39.block_sparse_moe.experts.28.w1", "model.layers.39.block_sparse_moe.experts.29.w1", "model.layers.39.block_sparse_moe.experts.30.w1", "model.layers.39.block_sparse_moe.experts.31.w1", "model.layers.39.block_sparse_moe.experts.32.w1", "model.layers.39.block_sparse_moe.experts.33.w1", "model.layers.39.block_sparse_moe.experts.34.w1", "model.layers.39.block_sparse_moe.experts.35.w1", "model.layers.39.block_sparse_moe.experts.36.w1", "model.layers.39.block_sparse_moe.experts.37.w1", "model.layers.39.block_sparse_moe.experts.38.w1", "model.layers.39.block_sparse_moe.experts.39.w1", "model.layers.39.block_sparse_moe.experts.40.w1", "model.layers.39.block_sparse_moe.experts.41.w1", "model.layers.39.block_sparse_moe.experts.42.w1", "model.layers.39.block_sparse_moe.experts.43.w1", "model.layers.39.block_sparse_moe.experts.44.w1", "model.layers.39.block_sparse_moe.experts.45.w1", "model.layers.39.block_sparse_moe.experts.46.w1", "model.layers.39.block_sparse_moe.experts.47.w1", "model.layers.39.block_sparse_moe.experts.48.w1", "model.layers.39.block_sparse_moe.experts.49.w1", "model.layers.39.block_sparse_moe.experts.50.w1", "model.layers.39.block_sparse_moe.experts.51.w1", "model.layers.39.block_sparse_moe.experts.52.w1", "model.layers.39.block_sparse_moe.experts.53.w1", "model.layers.39.block_sparse_moe.experts.54.w1", "model.layers.39.block_sparse_moe.experts.55.w1", "model.layers.39.block_sparse_moe.experts.56.w1", "model.layers.39.block_sparse_moe.experts.57.w1", "model.layers.39.block_sparse_moe.experts.58.w1", "model.layers.39.block_sparse_moe.experts.59.w1", "model.layers.39.block_sparse_moe.experts.60.w1", "model.layers.39.block_sparse_moe.experts.61.w1", "model.layers.39.block_sparse_moe.experts.62.w1", "model.layers.39.block_sparse_moe.experts.63.w1", "model.layers.39.block_sparse_moe.experts.64.w1", "model.layers.39.block_sparse_moe.experts.65.w1", "model.layers.39.block_sparse_moe.experts.66.w1", "model.layers.39.block_sparse_moe.experts.67.w1", "model.layers.39.block_sparse_moe.experts.68.w1", "model.layers.39.block_sparse_moe.experts.69.w1", "model.layers.39.block_sparse_moe.experts.70.w1", "model.layers.39.block_sparse_moe.experts.71.w1", "model.layers.39.block_sparse_moe.experts.72.w1", "model.layers.39.block_sparse_moe.experts.73.w1", "model.layers.39.block_sparse_moe.experts.74.w1", "model.layers.39.block_sparse_moe.experts.75.w1", "model.layers.39.block_sparse_moe.experts.76.w1", "model.layers.39.block_sparse_moe.experts.77.w1", "model.layers.39.block_sparse_moe.experts.78.w1", "model.layers.39.block_sparse_moe.experts.79.w1", "model.layers.39.block_sparse_moe.experts.80.w1", "model.layers.39.block_sparse_moe.experts.81.w1", "model.layers.39.block_sparse_moe.experts.82.w1", "model.layers.39.block_sparse_moe.experts.83.w1", "model.layers.39.block_sparse_moe.experts.84.w1", "model.layers.39.block_sparse_moe.experts.85.w1", "model.layers.39.block_sparse_moe.experts.86.w1", "model.layers.39.block_sparse_moe.experts.87.w1", "model.layers.39.block_sparse_moe.experts.88.w1", "model.layers.39.block_sparse_moe.experts.89.w1", "model.layers.39.block_sparse_moe.experts.90.w1", "model.layers.39.block_sparse_moe.experts.91.w1", "model.layers.39.block_sparse_moe.experts.92.w1", "model.layers.39.block_sparse_moe.experts.93.w1", "model.layers.39.block_sparse_moe.experts.94.w1", "model.layers.39.block_sparse_moe.experts.95.w1", "model.layers.39.block_sparse_moe.experts.96.w1", "model.layers.39.block_sparse_moe.experts.97.w1", "model.layers.39.block_sparse_moe.experts.98.w1", "model.layers.39.block_sparse_moe.experts.99.w1", "model.layers.39.block_sparse_moe.experts.100.w1", "model.layers.39.block_sparse_moe.experts.101.w1", "model.layers.39.block_sparse_moe.experts.102.w1", "model.layers.39.block_sparse_moe.experts.103.w1", "model.layers.39.block_sparse_moe.experts.104.w1", "model.layers.39.block_sparse_moe.experts.105.w1", "model.layers.39.block_sparse_moe.experts.106.w1", "model.layers.39.block_sparse_moe.experts.107.w1", "model.layers.39.block_sparse_moe.experts.108.w1", "model.layers.39.block_sparse_moe.experts.109.w1", "model.layers.39.block_sparse_moe.experts.110.w1", "model.layers.39.block_sparse_moe.experts.111.w1", "model.layers.39.block_sparse_moe.experts.112.w1", "model.layers.39.block_sparse_moe.experts.113.w1", "model.layers.39.block_sparse_moe.experts.114.w1", "model.layers.39.block_sparse_moe.experts.115.w1", "model.layers.39.block_sparse_moe.experts.116.w1", "model.layers.39.block_sparse_moe.experts.117.w1", "model.layers.39.block_sparse_moe.experts.118.w1", "model.layers.39.block_sparse_moe.experts.119.w1", "model.layers.39.block_sparse_moe.experts.120.w1", "model.layers.39.block_sparse_moe.experts.121.w1", "model.layers.39.block_sparse_moe.experts.122.w1", "model.layers.39.block_sparse_moe.experts.123.w1", "model.layers.39.block_sparse_moe.experts.124.w1", "model.layers.39.block_sparse_moe.experts.125.w1", "model.layers.39.block_sparse_moe.experts.126.w1", "model.layers.39.block_sparse_moe.experts.127.w1", "model.layers.39.block_sparse_moe.experts.128.w1", "model.layers.39.block_sparse_moe.experts.129.w1", "model.layers.39.block_sparse_moe.experts.130.w1", "model.layers.39.block_sparse_moe.experts.131.w1", "model.layers.39.block_sparse_moe.experts.132.w1", "model.layers.39.block_sparse_moe.experts.133.w1", "model.layers.39.block_sparse_moe.experts.134.w1", "model.layers.39.block_sparse_moe.experts.135.w1", "model.layers.39.block_sparse_moe.experts.136.w1", "model.layers.39.block_sparse_moe.experts.137.w1", "model.layers.39.block_sparse_moe.experts.138.w1", "model.layers.39.block_sparse_moe.experts.139.w1", "model.layers.39.block_sparse_moe.experts.140.w1", "model.layers.39.block_sparse_moe.experts.141.w1", "model.layers.39.block_sparse_moe.experts.142.w1", "model.layers.39.block_sparse_moe.experts.143.w1", "model.layers.39.block_sparse_moe.experts.144.w1", "model.layers.39.block_sparse_moe.experts.145.w1", "model.layers.39.block_sparse_moe.experts.146.w1", "model.layers.39.block_sparse_moe.experts.147.w1", "model.layers.39.block_sparse_moe.experts.148.w1", "model.layers.39.block_sparse_moe.experts.149.w1", "model.layers.39.block_sparse_moe.experts.150.w1", "model.layers.39.block_sparse_moe.experts.151.w1", "model.layers.39.block_sparse_moe.experts.152.w1", "model.layers.39.block_sparse_moe.experts.153.w1", "model.layers.39.block_sparse_moe.experts.154.w1", "model.layers.39.block_sparse_moe.experts.155.w1", "model.layers.39.block_sparse_moe.experts.156.w1", "model.layers.39.block_sparse_moe.experts.157.w1", "model.layers.39.block_sparse_moe.experts.158.w1", "model.layers.39.block_sparse_moe.experts.159.w1", "model.layers.39.block_sparse_moe.experts.160.w1", "model.layers.39.block_sparse_moe.experts.161.w1", "model.layers.39.block_sparse_moe.experts.162.w1", "model.layers.39.block_sparse_moe.experts.163.w1", "model.layers.39.block_sparse_moe.experts.164.w1", "model.layers.39.block_sparse_moe.experts.165.w1", "model.layers.39.block_sparse_moe.experts.166.w1", "model.layers.39.block_sparse_moe.experts.167.w1", "model.layers.39.block_sparse_moe.experts.168.w1", "model.layers.39.block_sparse_moe.experts.169.w1", "model.layers.39.block_sparse_moe.experts.170.w1", "model.layers.39.block_sparse_moe.experts.171.w1", "model.layers.39.block_sparse_moe.experts.172.w1", "model.layers.39.block_sparse_moe.experts.173.w1", "model.layers.39.block_sparse_moe.experts.174.w1", "model.layers.39.block_sparse_moe.experts.175.w1", "model.layers.39.block_sparse_moe.experts.176.w1", "model.layers.39.block_sparse_moe.experts.177.w1", "model.layers.39.block_sparse_moe.experts.178.w1", "model.layers.39.block_sparse_moe.experts.179.w1", "model.layers.39.block_sparse_moe.experts.180.w1", "model.layers.39.block_sparse_moe.experts.181.w1", "model.layers.39.block_sparse_moe.experts.182.w1", "model.layers.39.block_sparse_moe.experts.183.w1", "model.layers.39.block_sparse_moe.experts.184.w1", "model.layers.39.block_sparse_moe.experts.185.w1", "model.layers.39.block_sparse_moe.experts.186.w1", "model.layers.39.block_sparse_moe.experts.187.w1", "model.layers.39.block_sparse_moe.experts.188.w1", "model.layers.39.block_sparse_moe.experts.189.w1", "model.layers.39.block_sparse_moe.experts.190.w1", "model.layers.39.block_sparse_moe.experts.191.w1", "model.layers.39.block_sparse_moe.experts.192.w1", "model.layers.39.block_sparse_moe.experts.193.w1", "model.layers.39.block_sparse_moe.experts.194.w1", "model.layers.39.block_sparse_moe.experts.195.w1", "model.layers.39.block_sparse_moe.experts.196.w1", "model.layers.39.block_sparse_moe.experts.197.w1", "model.layers.39.block_sparse_moe.experts.198.w1", "model.layers.39.block_sparse_moe.experts.199.w1", "model.layers.39.block_sparse_moe.experts.200.w1", "model.layers.39.block_sparse_moe.experts.201.w1", "model.layers.39.block_sparse_moe.experts.202.w1", "model.layers.39.block_sparse_moe.experts.203.w1", "model.layers.39.block_sparse_moe.experts.204.w1", "model.layers.39.block_sparse_moe.experts.205.w1", "model.layers.39.block_sparse_moe.experts.206.w1", "model.layers.39.block_sparse_moe.experts.207.w1", "model.layers.39.block_sparse_moe.experts.208.w1", "model.layers.39.block_sparse_moe.experts.209.w1", "model.layers.39.block_sparse_moe.experts.210.w1", "model.layers.39.block_sparse_moe.experts.211.w1", "model.layers.39.block_sparse_moe.experts.212.w1", "model.layers.39.block_sparse_moe.experts.213.w1", "model.layers.39.block_sparse_moe.experts.214.w1", "model.layers.39.block_sparse_moe.experts.215.w1", "model.layers.39.block_sparse_moe.experts.216.w1", "model.layers.39.block_sparse_moe.experts.217.w1", "model.layers.39.block_sparse_moe.experts.218.w1", "model.layers.39.block_sparse_moe.experts.219.w1", "model.layers.39.block_sparse_moe.experts.220.w1", "model.layers.39.block_sparse_moe.experts.221.w1", "model.layers.39.block_sparse_moe.experts.222.w1", "model.layers.39.block_sparse_moe.experts.223.w1", "model.layers.39.block_sparse_moe.experts.224.w1", "model.layers.39.block_sparse_moe.experts.225.w1", "model.layers.39.block_sparse_moe.experts.226.w1", "model.layers.39.block_sparse_moe.experts.227.w1", "model.layers.39.block_sparse_moe.experts.228.w1", "model.layers.39.block_sparse_moe.experts.229.w1", "model.layers.39.block_sparse_moe.experts.230.w1", "model.layers.39.block_sparse_moe.experts.231.w1", "model.layers.39.block_sparse_moe.experts.232.w1", "model.layers.39.block_sparse_moe.experts.233.w1", "model.layers.39.block_sparse_moe.experts.234.w1", "model.layers.39.block_sparse_moe.experts.235.w1", "model.layers.39.block_sparse_moe.experts.236.w1", "model.layers.39.block_sparse_moe.experts.237.w1", "model.layers.39.block_sparse_moe.experts.238.w1", "model.layers.39.block_sparse_moe.experts.239.w1", "model.layers.39.block_sparse_moe.experts.240.w1", "model.layers.39.block_sparse_moe.experts.241.w1", "model.layers.39.block_sparse_moe.experts.242.w1", "model.layers.39.block_sparse_moe.experts.243.w1", "model.layers.39.block_sparse_moe.experts.244.w1", "model.layers.39.block_sparse_moe.experts.245.w1", "model.layers.39.block_sparse_moe.experts.246.w1", "model.layers.39.block_sparse_moe.experts.247.w1", "model.layers.39.block_sparse_moe.experts.248.w1", "model.layers.39.block_sparse_moe.experts.249.w1", "model.layers.39.block_sparse_moe.experts.250.w1", "model.layers.39.block_sparse_moe.experts.251.w1", "model.layers.39.block_sparse_moe.experts.252.w1", "model.layers.39.block_sparse_moe.experts.253.w1", "model.layers.39.block_sparse_moe.experts.254.w1", "model.layers.39.block_sparse_moe.experts.255.w1", "model.layers.39.block_sparse_moe.experts.0.w3", "model.layers.39.block_sparse_moe.experts.1.w3", "model.layers.39.block_sparse_moe.experts.2.w3", "model.layers.39.block_sparse_moe.experts.3.w3", "model.layers.39.block_sparse_moe.experts.4.w3", "model.layers.39.block_sparse_moe.experts.5.w3", "model.layers.39.block_sparse_moe.experts.6.w3", "model.layers.39.block_sparse_moe.experts.7.w3", "model.layers.39.block_sparse_moe.experts.8.w3", "model.layers.39.block_sparse_moe.experts.9.w3", "model.layers.39.block_sparse_moe.experts.10.w3", "model.layers.39.block_sparse_moe.experts.11.w3", "model.layers.39.block_sparse_moe.experts.12.w3", "model.layers.39.block_sparse_moe.experts.13.w3", "model.layers.39.block_sparse_moe.experts.14.w3", "model.layers.39.block_sparse_moe.experts.15.w3", "model.layers.39.block_sparse_moe.experts.16.w3", "model.layers.39.block_sparse_moe.experts.17.w3", "model.layers.39.block_sparse_moe.experts.18.w3", "model.layers.39.block_sparse_moe.experts.19.w3", "model.layers.39.block_sparse_moe.experts.20.w3", "model.layers.39.block_sparse_moe.experts.21.w3", "model.layers.39.block_sparse_moe.experts.22.w3", "model.layers.39.block_sparse_moe.experts.23.w3", "model.layers.39.block_sparse_moe.experts.24.w3", "model.layers.39.block_sparse_moe.experts.25.w3", "model.layers.39.block_sparse_moe.experts.26.w3", "model.layers.39.block_sparse_moe.experts.27.w3", "model.layers.39.block_sparse_moe.experts.28.w3", "model.layers.39.block_sparse_moe.experts.29.w3", "model.layers.39.block_sparse_moe.experts.30.w3", "model.layers.39.block_sparse_moe.experts.31.w3", "model.layers.39.block_sparse_moe.experts.32.w3", "model.layers.39.block_sparse_moe.experts.33.w3", "model.layers.39.block_sparse_moe.experts.34.w3", "model.layers.39.block_sparse_moe.experts.35.w3", "model.layers.39.block_sparse_moe.experts.36.w3", "model.layers.39.block_sparse_moe.experts.37.w3", "model.layers.39.block_sparse_moe.experts.38.w3", "model.layers.39.block_sparse_moe.experts.39.w3", "model.layers.39.block_sparse_moe.experts.40.w3", "model.layers.39.block_sparse_moe.experts.41.w3", "model.layers.39.block_sparse_moe.experts.42.w3", "model.layers.39.block_sparse_moe.experts.43.w3", "model.layers.39.block_sparse_moe.experts.44.w3", "model.layers.39.block_sparse_moe.experts.45.w3", "model.layers.39.block_sparse_moe.experts.46.w3", "model.layers.39.block_sparse_moe.experts.47.w3", "model.layers.39.block_sparse_moe.experts.48.w3", "model.layers.39.block_sparse_moe.experts.49.w3", "model.layers.39.block_sparse_moe.experts.50.w3", "model.layers.39.block_sparse_moe.experts.51.w3", "model.layers.39.block_sparse_moe.experts.52.w3", "model.layers.39.block_sparse_moe.experts.53.w3", "model.layers.39.block_sparse_moe.experts.54.w3", "model.layers.39.block_sparse_moe.experts.55.w3", "model.layers.39.block_sparse_moe.experts.56.w3", "model.layers.39.block_sparse_moe.experts.57.w3", "model.layers.39.block_sparse_moe.experts.58.w3", "model.layers.39.block_sparse_moe.experts.59.w3", "model.layers.39.block_sparse_moe.experts.60.w3", "model.layers.39.block_sparse_moe.experts.61.w3", "model.layers.39.block_sparse_moe.experts.62.w3", "model.layers.39.block_sparse_moe.experts.63.w3", "model.layers.39.block_sparse_moe.experts.64.w3", "model.layers.39.block_sparse_moe.experts.65.w3", "model.layers.39.block_sparse_moe.experts.66.w3", "model.layers.39.block_sparse_moe.experts.67.w3", "model.layers.39.block_sparse_moe.experts.68.w3", "model.layers.39.block_sparse_moe.experts.69.w3", "model.layers.39.block_sparse_moe.experts.70.w3", "model.layers.39.block_sparse_moe.experts.71.w3", "model.layers.39.block_sparse_moe.experts.72.w3", "model.layers.39.block_sparse_moe.experts.73.w3", "model.layers.39.block_sparse_moe.experts.74.w3", "model.layers.39.block_sparse_moe.experts.75.w3", "model.layers.39.block_sparse_moe.experts.76.w3", "model.layers.39.block_sparse_moe.experts.77.w3", "model.layers.39.block_sparse_moe.experts.78.w3", "model.layers.39.block_sparse_moe.experts.79.w3", "model.layers.39.block_sparse_moe.experts.80.w3", "model.layers.39.block_sparse_moe.experts.81.w3", "model.layers.39.block_sparse_moe.experts.82.w3", "model.layers.39.block_sparse_moe.experts.83.w3", "model.layers.39.block_sparse_moe.experts.84.w3", "model.layers.39.block_sparse_moe.experts.85.w3", "model.layers.39.block_sparse_moe.experts.86.w3", "model.layers.39.block_sparse_moe.experts.87.w3", "model.layers.39.block_sparse_moe.experts.88.w3", "model.layers.39.block_sparse_moe.experts.89.w3", "model.layers.39.block_sparse_moe.experts.90.w3", "model.layers.39.block_sparse_moe.experts.91.w3", "model.layers.39.block_sparse_moe.experts.92.w3", "model.layers.39.block_sparse_moe.experts.93.w3", "model.layers.39.block_sparse_moe.experts.94.w3", "model.layers.39.block_sparse_moe.experts.95.w3", "model.layers.39.block_sparse_moe.experts.96.w3", "model.layers.39.block_sparse_moe.experts.97.w3", "model.layers.39.block_sparse_moe.experts.98.w3", "model.layers.39.block_sparse_moe.experts.99.w3", "model.layers.39.block_sparse_moe.experts.100.w3", "model.layers.39.block_sparse_moe.experts.101.w3", "model.layers.39.block_sparse_moe.experts.102.w3", "model.layers.39.block_sparse_moe.experts.103.w3", "model.layers.39.block_sparse_moe.experts.104.w3", "model.layers.39.block_sparse_moe.experts.105.w3", "model.layers.39.block_sparse_moe.experts.106.w3", "model.layers.39.block_sparse_moe.experts.107.w3", "model.layers.39.block_sparse_moe.experts.108.w3", "model.layers.39.block_sparse_moe.experts.109.w3", "model.layers.39.block_sparse_moe.experts.110.w3", "model.layers.39.block_sparse_moe.experts.111.w3", "model.layers.39.block_sparse_moe.experts.112.w3", "model.layers.39.block_sparse_moe.experts.113.w3", "model.layers.39.block_sparse_moe.experts.114.w3", "model.layers.39.block_sparse_moe.experts.115.w3", "model.layers.39.block_sparse_moe.experts.116.w3", "model.layers.39.block_sparse_moe.experts.117.w3", "model.layers.39.block_sparse_moe.experts.118.w3", "model.layers.39.block_sparse_moe.experts.119.w3", "model.layers.39.block_sparse_moe.experts.120.w3", "model.layers.39.block_sparse_moe.experts.121.w3", "model.layers.39.block_sparse_moe.experts.122.w3", "model.layers.39.block_sparse_moe.experts.123.w3", "model.layers.39.block_sparse_moe.experts.124.w3", "model.layers.39.block_sparse_moe.experts.125.w3", "model.layers.39.block_sparse_moe.experts.126.w3", "model.layers.39.block_sparse_moe.experts.127.w3", "model.layers.39.block_sparse_moe.experts.128.w3", "model.layers.39.block_sparse_moe.experts.129.w3", "model.layers.39.block_sparse_moe.experts.130.w3", "model.layers.39.block_sparse_moe.experts.131.w3", "model.layers.39.block_sparse_moe.experts.132.w3", "model.layers.39.block_sparse_moe.experts.133.w3", "model.layers.39.block_sparse_moe.experts.134.w3", "model.layers.39.block_sparse_moe.experts.135.w3", "model.layers.39.block_sparse_moe.experts.136.w3", "model.layers.39.block_sparse_moe.experts.137.w3", "model.layers.39.block_sparse_moe.experts.138.w3", "model.layers.39.block_sparse_moe.experts.139.w3", "model.layers.39.block_sparse_moe.experts.140.w3", "model.layers.39.block_sparse_moe.experts.141.w3", "model.layers.39.block_sparse_moe.experts.142.w3", "model.layers.39.block_sparse_moe.experts.143.w3", "model.layers.39.block_sparse_moe.experts.144.w3", "model.layers.39.block_sparse_moe.experts.145.w3", "model.layers.39.block_sparse_moe.experts.146.w3", "model.layers.39.block_sparse_moe.experts.147.w3", "model.layers.39.block_sparse_moe.experts.148.w3", "model.layers.39.block_sparse_moe.experts.149.w3", "model.layers.39.block_sparse_moe.experts.150.w3", "model.layers.39.block_sparse_moe.experts.151.w3", "model.layers.39.block_sparse_moe.experts.152.w3", "model.layers.39.block_sparse_moe.experts.153.w3", "model.layers.39.block_sparse_moe.experts.154.w3", "model.layers.39.block_sparse_moe.experts.155.w3", "model.layers.39.block_sparse_moe.experts.156.w3", "model.layers.39.block_sparse_moe.experts.157.w3", "model.layers.39.block_sparse_moe.experts.158.w3", "model.layers.39.block_sparse_moe.experts.159.w3", "model.layers.39.block_sparse_moe.experts.160.w3", "model.layers.39.block_sparse_moe.experts.161.w3", "model.layers.39.block_sparse_moe.experts.162.w3", "model.layers.39.block_sparse_moe.experts.163.w3", "model.layers.39.block_sparse_moe.experts.164.w3", "model.layers.39.block_sparse_moe.experts.165.w3", "model.layers.39.block_sparse_moe.experts.166.w3", "model.layers.39.block_sparse_moe.experts.167.w3", "model.layers.39.block_sparse_moe.experts.168.w3", "model.layers.39.block_sparse_moe.experts.169.w3", "model.layers.39.block_sparse_moe.experts.170.w3", "model.layers.39.block_sparse_moe.experts.171.w3", "model.layers.39.block_sparse_moe.experts.172.w3", "model.layers.39.block_sparse_moe.experts.173.w3", "model.layers.39.block_sparse_moe.experts.174.w3", "model.layers.39.block_sparse_moe.experts.175.w3", "model.layers.39.block_sparse_moe.experts.176.w3", "model.layers.39.block_sparse_moe.experts.177.w3", "model.layers.39.block_sparse_moe.experts.178.w3", "model.layers.39.block_sparse_moe.experts.179.w3", "model.layers.39.block_sparse_moe.experts.180.w3", "model.layers.39.block_sparse_moe.experts.181.w3", "model.layers.39.block_sparse_moe.experts.182.w3", "model.layers.39.block_sparse_moe.experts.183.w3", "model.layers.39.block_sparse_moe.experts.184.w3", "model.layers.39.block_sparse_moe.experts.185.w3", "model.layers.39.block_sparse_moe.experts.186.w3", "model.layers.39.block_sparse_moe.experts.187.w3", "model.layers.39.block_sparse_moe.experts.188.w3", "model.layers.39.block_sparse_moe.experts.189.w3", "model.layers.39.block_sparse_moe.experts.190.w3", "model.layers.39.block_sparse_moe.experts.191.w3", "model.layers.39.block_sparse_moe.experts.192.w3", "model.layers.39.block_sparse_moe.experts.193.w3", "model.layers.39.block_sparse_moe.experts.194.w3", "model.layers.39.block_sparse_moe.experts.195.w3", "model.layers.39.block_sparse_moe.experts.196.w3", "model.layers.39.block_sparse_moe.experts.197.w3", "model.layers.39.block_sparse_moe.experts.198.w3", "model.layers.39.block_sparse_moe.experts.199.w3", "model.layers.39.block_sparse_moe.experts.200.w3", "model.layers.39.block_sparse_moe.experts.201.w3", "model.layers.39.block_sparse_moe.experts.202.w3", "model.layers.39.block_sparse_moe.experts.203.w3", "model.layers.39.block_sparse_moe.experts.204.w3", "model.layers.39.block_sparse_moe.experts.205.w3", "model.layers.39.block_sparse_moe.experts.206.w3", "model.layers.39.block_sparse_moe.experts.207.w3", "model.layers.39.block_sparse_moe.experts.208.w3", "model.layers.39.block_sparse_moe.experts.209.w3", "model.layers.39.block_sparse_moe.experts.210.w3", "model.layers.39.block_sparse_moe.experts.211.w3", "model.layers.39.block_sparse_moe.experts.212.w3", "model.layers.39.block_sparse_moe.experts.213.w3", "model.layers.39.block_sparse_moe.experts.214.w3", "model.layers.39.block_sparse_moe.experts.215.w3", "model.layers.39.block_sparse_moe.experts.216.w3", "model.layers.39.block_sparse_moe.experts.217.w3", "model.layers.39.block_sparse_moe.experts.218.w3", "model.layers.39.block_sparse_moe.experts.219.w3", "model.layers.39.block_sparse_moe.experts.220.w3", "model.layers.39.block_sparse_moe.experts.221.w3", "model.layers.39.block_sparse_moe.experts.222.w3", "model.layers.39.block_sparse_moe.experts.223.w3", "model.layers.39.block_sparse_moe.experts.224.w3", "model.layers.39.block_sparse_moe.experts.225.w3", "model.layers.39.block_sparse_moe.experts.226.w3", "model.layers.39.block_sparse_moe.experts.227.w3", "model.layers.39.block_sparse_moe.experts.228.w3", "model.layers.39.block_sparse_moe.experts.229.w3", "model.layers.39.block_sparse_moe.experts.230.w3", "model.layers.39.block_sparse_moe.experts.231.w3", "model.layers.39.block_sparse_moe.experts.232.w3", "model.layers.39.block_sparse_moe.experts.233.w3", "model.layers.39.block_sparse_moe.experts.234.w3", "model.layers.39.block_sparse_moe.experts.235.w3", "model.layers.39.block_sparse_moe.experts.236.w3", "model.layers.39.block_sparse_moe.experts.237.w3", "model.layers.39.block_sparse_moe.experts.238.w3", "model.layers.39.block_sparse_moe.experts.239.w3", "model.layers.39.block_sparse_moe.experts.240.w3", "model.layers.39.block_sparse_moe.experts.241.w3", "model.layers.39.block_sparse_moe.experts.242.w3", "model.layers.39.block_sparse_moe.experts.243.w3", "model.layers.39.block_sparse_moe.experts.244.w3", "model.layers.39.block_sparse_moe.experts.245.w3", "model.layers.39.block_sparse_moe.experts.246.w3", "model.layers.39.block_sparse_moe.experts.247.w3", "model.layers.39.block_sparse_moe.experts.248.w3", "model.layers.39.block_sparse_moe.experts.249.w3", "model.layers.39.block_sparse_moe.experts.250.w3", "model.layers.39.block_sparse_moe.experts.251.w3", "model.layers.39.block_sparse_moe.experts.252.w3", "model.layers.39.block_sparse_moe.experts.253.w3", "model.layers.39.block_sparse_moe.experts.254.w3", "model.layers.39.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00010288096964360116, "dbits": 2415919104 } ] }, { "idx": 199, "layers": [ "model.layers.39.block_sparse_moe.experts.0.w2", "model.layers.39.block_sparse_moe.experts.1.w2", "model.layers.39.block_sparse_moe.experts.2.w2", "model.layers.39.block_sparse_moe.experts.3.w2", "model.layers.39.block_sparse_moe.experts.4.w2", "model.layers.39.block_sparse_moe.experts.5.w2", "model.layers.39.block_sparse_moe.experts.6.w2", "model.layers.39.block_sparse_moe.experts.7.w2", "model.layers.39.block_sparse_moe.experts.8.w2", "model.layers.39.block_sparse_moe.experts.9.w2", "model.layers.39.block_sparse_moe.experts.10.w2", "model.layers.39.block_sparse_moe.experts.11.w2", "model.layers.39.block_sparse_moe.experts.12.w2", "model.layers.39.block_sparse_moe.experts.13.w2", "model.layers.39.block_sparse_moe.experts.14.w2", "model.layers.39.block_sparse_moe.experts.15.w2", "model.layers.39.block_sparse_moe.experts.16.w2", "model.layers.39.block_sparse_moe.experts.17.w2", "model.layers.39.block_sparse_moe.experts.18.w2", "model.layers.39.block_sparse_moe.experts.19.w2", "model.layers.39.block_sparse_moe.experts.20.w2", "model.layers.39.block_sparse_moe.experts.21.w2", "model.layers.39.block_sparse_moe.experts.22.w2", "model.layers.39.block_sparse_moe.experts.23.w2", "model.layers.39.block_sparse_moe.experts.24.w2", "model.layers.39.block_sparse_moe.experts.25.w2", "model.layers.39.block_sparse_moe.experts.26.w2", "model.layers.39.block_sparse_moe.experts.27.w2", "model.layers.39.block_sparse_moe.experts.28.w2", "model.layers.39.block_sparse_moe.experts.29.w2", "model.layers.39.block_sparse_moe.experts.30.w2", "model.layers.39.block_sparse_moe.experts.31.w2", "model.layers.39.block_sparse_moe.experts.32.w2", "model.layers.39.block_sparse_moe.experts.33.w2", "model.layers.39.block_sparse_moe.experts.34.w2", "model.layers.39.block_sparse_moe.experts.35.w2", "model.layers.39.block_sparse_moe.experts.36.w2", "model.layers.39.block_sparse_moe.experts.37.w2", "model.layers.39.block_sparse_moe.experts.38.w2", "model.layers.39.block_sparse_moe.experts.39.w2", "model.layers.39.block_sparse_moe.experts.40.w2", "model.layers.39.block_sparse_moe.experts.41.w2", "model.layers.39.block_sparse_moe.experts.42.w2", "model.layers.39.block_sparse_moe.experts.43.w2", "model.layers.39.block_sparse_moe.experts.44.w2", "model.layers.39.block_sparse_moe.experts.45.w2", "model.layers.39.block_sparse_moe.experts.46.w2", "model.layers.39.block_sparse_moe.experts.47.w2", "model.layers.39.block_sparse_moe.experts.48.w2", "model.layers.39.block_sparse_moe.experts.49.w2", "model.layers.39.block_sparse_moe.experts.50.w2", "model.layers.39.block_sparse_moe.experts.51.w2", "model.layers.39.block_sparse_moe.experts.52.w2", "model.layers.39.block_sparse_moe.experts.53.w2", "model.layers.39.block_sparse_moe.experts.54.w2", "model.layers.39.block_sparse_moe.experts.55.w2", "model.layers.39.block_sparse_moe.experts.56.w2", "model.layers.39.block_sparse_moe.experts.57.w2", "model.layers.39.block_sparse_moe.experts.58.w2", "model.layers.39.block_sparse_moe.experts.59.w2", "model.layers.39.block_sparse_moe.experts.60.w2", "model.layers.39.block_sparse_moe.experts.61.w2", "model.layers.39.block_sparse_moe.experts.62.w2", "model.layers.39.block_sparse_moe.experts.63.w2", "model.layers.39.block_sparse_moe.experts.64.w2", "model.layers.39.block_sparse_moe.experts.65.w2", "model.layers.39.block_sparse_moe.experts.66.w2", "model.layers.39.block_sparse_moe.experts.67.w2", "model.layers.39.block_sparse_moe.experts.68.w2", "model.layers.39.block_sparse_moe.experts.69.w2", "model.layers.39.block_sparse_moe.experts.70.w2", "model.layers.39.block_sparse_moe.experts.71.w2", "model.layers.39.block_sparse_moe.experts.72.w2", "model.layers.39.block_sparse_moe.experts.73.w2", "model.layers.39.block_sparse_moe.experts.74.w2", "model.layers.39.block_sparse_moe.experts.75.w2", "model.layers.39.block_sparse_moe.experts.76.w2", "model.layers.39.block_sparse_moe.experts.77.w2", "model.layers.39.block_sparse_moe.experts.78.w2", "model.layers.39.block_sparse_moe.experts.79.w2", "model.layers.39.block_sparse_moe.experts.80.w2", "model.layers.39.block_sparse_moe.experts.81.w2", "model.layers.39.block_sparse_moe.experts.82.w2", "model.layers.39.block_sparse_moe.experts.83.w2", "model.layers.39.block_sparse_moe.experts.84.w2", "model.layers.39.block_sparse_moe.experts.85.w2", "model.layers.39.block_sparse_moe.experts.86.w2", "model.layers.39.block_sparse_moe.experts.87.w2", "model.layers.39.block_sparse_moe.experts.88.w2", "model.layers.39.block_sparse_moe.experts.89.w2", "model.layers.39.block_sparse_moe.experts.90.w2", "model.layers.39.block_sparse_moe.experts.91.w2", "model.layers.39.block_sparse_moe.experts.92.w2", "model.layers.39.block_sparse_moe.experts.93.w2", "model.layers.39.block_sparse_moe.experts.94.w2", "model.layers.39.block_sparse_moe.experts.95.w2", "model.layers.39.block_sparse_moe.experts.96.w2", "model.layers.39.block_sparse_moe.experts.97.w2", "model.layers.39.block_sparse_moe.experts.98.w2", "model.layers.39.block_sparse_moe.experts.99.w2", "model.layers.39.block_sparse_moe.experts.100.w2", "model.layers.39.block_sparse_moe.experts.101.w2", "model.layers.39.block_sparse_moe.experts.102.w2", "model.layers.39.block_sparse_moe.experts.103.w2", "model.layers.39.block_sparse_moe.experts.104.w2", "model.layers.39.block_sparse_moe.experts.105.w2", "model.layers.39.block_sparse_moe.experts.106.w2", "model.layers.39.block_sparse_moe.experts.107.w2", "model.layers.39.block_sparse_moe.experts.108.w2", "model.layers.39.block_sparse_moe.experts.109.w2", "model.layers.39.block_sparse_moe.experts.110.w2", "model.layers.39.block_sparse_moe.experts.111.w2", "model.layers.39.block_sparse_moe.experts.112.w2", "model.layers.39.block_sparse_moe.experts.113.w2", "model.layers.39.block_sparse_moe.experts.114.w2", "model.layers.39.block_sparse_moe.experts.115.w2", "model.layers.39.block_sparse_moe.experts.116.w2", "model.layers.39.block_sparse_moe.experts.117.w2", "model.layers.39.block_sparse_moe.experts.118.w2", "model.layers.39.block_sparse_moe.experts.119.w2", "model.layers.39.block_sparse_moe.experts.120.w2", "model.layers.39.block_sparse_moe.experts.121.w2", "model.layers.39.block_sparse_moe.experts.122.w2", "model.layers.39.block_sparse_moe.experts.123.w2", "model.layers.39.block_sparse_moe.experts.124.w2", "model.layers.39.block_sparse_moe.experts.125.w2", "model.layers.39.block_sparse_moe.experts.126.w2", "model.layers.39.block_sparse_moe.experts.127.w2", "model.layers.39.block_sparse_moe.experts.128.w2", "model.layers.39.block_sparse_moe.experts.129.w2", "model.layers.39.block_sparse_moe.experts.130.w2", "model.layers.39.block_sparse_moe.experts.131.w2", "model.layers.39.block_sparse_moe.experts.132.w2", "model.layers.39.block_sparse_moe.experts.133.w2", "model.layers.39.block_sparse_moe.experts.134.w2", "model.layers.39.block_sparse_moe.experts.135.w2", "model.layers.39.block_sparse_moe.experts.136.w2", "model.layers.39.block_sparse_moe.experts.137.w2", "model.layers.39.block_sparse_moe.experts.138.w2", "model.layers.39.block_sparse_moe.experts.139.w2", "model.layers.39.block_sparse_moe.experts.140.w2", "model.layers.39.block_sparse_moe.experts.141.w2", "model.layers.39.block_sparse_moe.experts.142.w2", "model.layers.39.block_sparse_moe.experts.143.w2", "model.layers.39.block_sparse_moe.experts.144.w2", "model.layers.39.block_sparse_moe.experts.145.w2", "model.layers.39.block_sparse_moe.experts.146.w2", "model.layers.39.block_sparse_moe.experts.147.w2", "model.layers.39.block_sparse_moe.experts.148.w2", "model.layers.39.block_sparse_moe.experts.149.w2", "model.layers.39.block_sparse_moe.experts.150.w2", "model.layers.39.block_sparse_moe.experts.151.w2", "model.layers.39.block_sparse_moe.experts.152.w2", "model.layers.39.block_sparse_moe.experts.153.w2", "model.layers.39.block_sparse_moe.experts.154.w2", "model.layers.39.block_sparse_moe.experts.155.w2", "model.layers.39.block_sparse_moe.experts.156.w2", "model.layers.39.block_sparse_moe.experts.157.w2", "model.layers.39.block_sparse_moe.experts.158.w2", "model.layers.39.block_sparse_moe.experts.159.w2", "model.layers.39.block_sparse_moe.experts.160.w2", "model.layers.39.block_sparse_moe.experts.161.w2", "model.layers.39.block_sparse_moe.experts.162.w2", "model.layers.39.block_sparse_moe.experts.163.w2", "model.layers.39.block_sparse_moe.experts.164.w2", "model.layers.39.block_sparse_moe.experts.165.w2", "model.layers.39.block_sparse_moe.experts.166.w2", "model.layers.39.block_sparse_moe.experts.167.w2", "model.layers.39.block_sparse_moe.experts.168.w2", "model.layers.39.block_sparse_moe.experts.169.w2", "model.layers.39.block_sparse_moe.experts.170.w2", "model.layers.39.block_sparse_moe.experts.171.w2", "model.layers.39.block_sparse_moe.experts.172.w2", "model.layers.39.block_sparse_moe.experts.173.w2", "model.layers.39.block_sparse_moe.experts.174.w2", "model.layers.39.block_sparse_moe.experts.175.w2", "model.layers.39.block_sparse_moe.experts.176.w2", "model.layers.39.block_sparse_moe.experts.177.w2", "model.layers.39.block_sparse_moe.experts.178.w2", "model.layers.39.block_sparse_moe.experts.179.w2", "model.layers.39.block_sparse_moe.experts.180.w2", "model.layers.39.block_sparse_moe.experts.181.w2", "model.layers.39.block_sparse_moe.experts.182.w2", "model.layers.39.block_sparse_moe.experts.183.w2", "model.layers.39.block_sparse_moe.experts.184.w2", "model.layers.39.block_sparse_moe.experts.185.w2", "model.layers.39.block_sparse_moe.experts.186.w2", "model.layers.39.block_sparse_moe.experts.187.w2", "model.layers.39.block_sparse_moe.experts.188.w2", "model.layers.39.block_sparse_moe.experts.189.w2", "model.layers.39.block_sparse_moe.experts.190.w2", "model.layers.39.block_sparse_moe.experts.191.w2", "model.layers.39.block_sparse_moe.experts.192.w2", "model.layers.39.block_sparse_moe.experts.193.w2", "model.layers.39.block_sparse_moe.experts.194.w2", "model.layers.39.block_sparse_moe.experts.195.w2", "model.layers.39.block_sparse_moe.experts.196.w2", "model.layers.39.block_sparse_moe.experts.197.w2", "model.layers.39.block_sparse_moe.experts.198.w2", "model.layers.39.block_sparse_moe.experts.199.w2", "model.layers.39.block_sparse_moe.experts.200.w2", "model.layers.39.block_sparse_moe.experts.201.w2", "model.layers.39.block_sparse_moe.experts.202.w2", "model.layers.39.block_sparse_moe.experts.203.w2", "model.layers.39.block_sparse_moe.experts.204.w2", "model.layers.39.block_sparse_moe.experts.205.w2", "model.layers.39.block_sparse_moe.experts.206.w2", "model.layers.39.block_sparse_moe.experts.207.w2", "model.layers.39.block_sparse_moe.experts.208.w2", "model.layers.39.block_sparse_moe.experts.209.w2", "model.layers.39.block_sparse_moe.experts.210.w2", "model.layers.39.block_sparse_moe.experts.211.w2", "model.layers.39.block_sparse_moe.experts.212.w2", "model.layers.39.block_sparse_moe.experts.213.w2", "model.layers.39.block_sparse_moe.experts.214.w2", "model.layers.39.block_sparse_moe.experts.215.w2", "model.layers.39.block_sparse_moe.experts.216.w2", "model.layers.39.block_sparse_moe.experts.217.w2", "model.layers.39.block_sparse_moe.experts.218.w2", "model.layers.39.block_sparse_moe.experts.219.w2", "model.layers.39.block_sparse_moe.experts.220.w2", "model.layers.39.block_sparse_moe.experts.221.w2", "model.layers.39.block_sparse_moe.experts.222.w2", "model.layers.39.block_sparse_moe.experts.223.w2", "model.layers.39.block_sparse_moe.experts.224.w2", "model.layers.39.block_sparse_moe.experts.225.w2", "model.layers.39.block_sparse_moe.experts.226.w2", "model.layers.39.block_sparse_moe.experts.227.w2", "model.layers.39.block_sparse_moe.experts.228.w2", "model.layers.39.block_sparse_moe.experts.229.w2", "model.layers.39.block_sparse_moe.experts.230.w2", "model.layers.39.block_sparse_moe.experts.231.w2", "model.layers.39.block_sparse_moe.experts.232.w2", "model.layers.39.block_sparse_moe.experts.233.w2", "model.layers.39.block_sparse_moe.experts.234.w2", "model.layers.39.block_sparse_moe.experts.235.w2", "model.layers.39.block_sparse_moe.experts.236.w2", "model.layers.39.block_sparse_moe.experts.237.w2", "model.layers.39.block_sparse_moe.experts.238.w2", "model.layers.39.block_sparse_moe.experts.239.w2", "model.layers.39.block_sparse_moe.experts.240.w2", "model.layers.39.block_sparse_moe.experts.241.w2", "model.layers.39.block_sparse_moe.experts.242.w2", "model.layers.39.block_sparse_moe.experts.243.w2", "model.layers.39.block_sparse_moe.experts.244.w2", "model.layers.39.block_sparse_moe.experts.245.w2", "model.layers.39.block_sparse_moe.experts.246.w2", "model.layers.39.block_sparse_moe.experts.247.w2", "model.layers.39.block_sparse_moe.experts.248.w2", "model.layers.39.block_sparse_moe.experts.249.w2", "model.layers.39.block_sparse_moe.experts.250.w2", "model.layers.39.block_sparse_moe.experts.251.w2", "model.layers.39.block_sparse_moe.experts.252.w2", "model.layers.39.block_sparse_moe.experts.253.w2", "model.layers.39.block_sparse_moe.experts.254.w2", "model.layers.39.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00032182186841965554, "dbits": 1207959552 } ] }, { "idx": 200, "layers": [ "model.layers.40.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0005201548337936401, "dbits": 18874368 } ] }, { "idx": 201, "layers": [ "model.layers.40.self_attn.k_proj", "model.layers.40.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0008975792676210459, "dbits": 6291456 } ] }, { "idx": 202, "layers": [ "model.layers.40.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0005307190120220212, "dbits": 18874368 } ] }, { "idx": 203, "layers": [ "model.layers.40.block_sparse_moe.experts.0.w1", "model.layers.40.block_sparse_moe.experts.1.w1", "model.layers.40.block_sparse_moe.experts.2.w1", "model.layers.40.block_sparse_moe.experts.3.w1", "model.layers.40.block_sparse_moe.experts.4.w1", "model.layers.40.block_sparse_moe.experts.5.w1", "model.layers.40.block_sparse_moe.experts.6.w1", "model.layers.40.block_sparse_moe.experts.7.w1", "model.layers.40.block_sparse_moe.experts.8.w1", "model.layers.40.block_sparse_moe.experts.9.w1", "model.layers.40.block_sparse_moe.experts.10.w1", "model.layers.40.block_sparse_moe.experts.11.w1", "model.layers.40.block_sparse_moe.experts.12.w1", "model.layers.40.block_sparse_moe.experts.13.w1", "model.layers.40.block_sparse_moe.experts.14.w1", "model.layers.40.block_sparse_moe.experts.15.w1", "model.layers.40.block_sparse_moe.experts.16.w1", "model.layers.40.block_sparse_moe.experts.17.w1", "model.layers.40.block_sparse_moe.experts.18.w1", "model.layers.40.block_sparse_moe.experts.19.w1", "model.layers.40.block_sparse_moe.experts.20.w1", "model.layers.40.block_sparse_moe.experts.21.w1", "model.layers.40.block_sparse_moe.experts.22.w1", "model.layers.40.block_sparse_moe.experts.23.w1", "model.layers.40.block_sparse_moe.experts.24.w1", "model.layers.40.block_sparse_moe.experts.25.w1", "model.layers.40.block_sparse_moe.experts.26.w1", "model.layers.40.block_sparse_moe.experts.27.w1", "model.layers.40.block_sparse_moe.experts.28.w1", "model.layers.40.block_sparse_moe.experts.29.w1", "model.layers.40.block_sparse_moe.experts.30.w1", "model.layers.40.block_sparse_moe.experts.31.w1", "model.layers.40.block_sparse_moe.experts.32.w1", "model.layers.40.block_sparse_moe.experts.33.w1", "model.layers.40.block_sparse_moe.experts.34.w1", "model.layers.40.block_sparse_moe.experts.35.w1", "model.layers.40.block_sparse_moe.experts.36.w1", "model.layers.40.block_sparse_moe.experts.37.w1", "model.layers.40.block_sparse_moe.experts.38.w1", "model.layers.40.block_sparse_moe.experts.39.w1", "model.layers.40.block_sparse_moe.experts.40.w1", "model.layers.40.block_sparse_moe.experts.41.w1", "model.layers.40.block_sparse_moe.experts.42.w1", "model.layers.40.block_sparse_moe.experts.43.w1", "model.layers.40.block_sparse_moe.experts.44.w1", "model.layers.40.block_sparse_moe.experts.45.w1", "model.layers.40.block_sparse_moe.experts.46.w1", "model.layers.40.block_sparse_moe.experts.47.w1", "model.layers.40.block_sparse_moe.experts.48.w1", "model.layers.40.block_sparse_moe.experts.49.w1", "model.layers.40.block_sparse_moe.experts.50.w1", "model.layers.40.block_sparse_moe.experts.51.w1", "model.layers.40.block_sparse_moe.experts.52.w1", "model.layers.40.block_sparse_moe.experts.53.w1", "model.layers.40.block_sparse_moe.experts.54.w1", "model.layers.40.block_sparse_moe.experts.55.w1", "model.layers.40.block_sparse_moe.experts.56.w1", "model.layers.40.block_sparse_moe.experts.57.w1", "model.layers.40.block_sparse_moe.experts.58.w1", "model.layers.40.block_sparse_moe.experts.59.w1", "model.layers.40.block_sparse_moe.experts.60.w1", "model.layers.40.block_sparse_moe.experts.61.w1", "model.layers.40.block_sparse_moe.experts.62.w1", "model.layers.40.block_sparse_moe.experts.63.w1", "model.layers.40.block_sparse_moe.experts.64.w1", "model.layers.40.block_sparse_moe.experts.65.w1", "model.layers.40.block_sparse_moe.experts.66.w1", "model.layers.40.block_sparse_moe.experts.67.w1", "model.layers.40.block_sparse_moe.experts.68.w1", "model.layers.40.block_sparse_moe.experts.69.w1", "model.layers.40.block_sparse_moe.experts.70.w1", "model.layers.40.block_sparse_moe.experts.71.w1", "model.layers.40.block_sparse_moe.experts.72.w1", "model.layers.40.block_sparse_moe.experts.73.w1", "model.layers.40.block_sparse_moe.experts.74.w1", "model.layers.40.block_sparse_moe.experts.75.w1", "model.layers.40.block_sparse_moe.experts.76.w1", "model.layers.40.block_sparse_moe.experts.77.w1", "model.layers.40.block_sparse_moe.experts.78.w1", "model.layers.40.block_sparse_moe.experts.79.w1", "model.layers.40.block_sparse_moe.experts.80.w1", "model.layers.40.block_sparse_moe.experts.81.w1", "model.layers.40.block_sparse_moe.experts.82.w1", "model.layers.40.block_sparse_moe.experts.83.w1", "model.layers.40.block_sparse_moe.experts.84.w1", "model.layers.40.block_sparse_moe.experts.85.w1", "model.layers.40.block_sparse_moe.experts.86.w1", "model.layers.40.block_sparse_moe.experts.87.w1", "model.layers.40.block_sparse_moe.experts.88.w1", "model.layers.40.block_sparse_moe.experts.89.w1", "model.layers.40.block_sparse_moe.experts.90.w1", "model.layers.40.block_sparse_moe.experts.91.w1", "model.layers.40.block_sparse_moe.experts.92.w1", "model.layers.40.block_sparse_moe.experts.93.w1", "model.layers.40.block_sparse_moe.experts.94.w1", "model.layers.40.block_sparse_moe.experts.95.w1", "model.layers.40.block_sparse_moe.experts.96.w1", "model.layers.40.block_sparse_moe.experts.97.w1", "model.layers.40.block_sparse_moe.experts.98.w1", "model.layers.40.block_sparse_moe.experts.99.w1", "model.layers.40.block_sparse_moe.experts.100.w1", "model.layers.40.block_sparse_moe.experts.101.w1", "model.layers.40.block_sparse_moe.experts.102.w1", "model.layers.40.block_sparse_moe.experts.103.w1", "model.layers.40.block_sparse_moe.experts.104.w1", "model.layers.40.block_sparse_moe.experts.105.w1", "model.layers.40.block_sparse_moe.experts.106.w1", "model.layers.40.block_sparse_moe.experts.107.w1", "model.layers.40.block_sparse_moe.experts.108.w1", "model.layers.40.block_sparse_moe.experts.109.w1", "model.layers.40.block_sparse_moe.experts.110.w1", "model.layers.40.block_sparse_moe.experts.111.w1", "model.layers.40.block_sparse_moe.experts.112.w1", "model.layers.40.block_sparse_moe.experts.113.w1", "model.layers.40.block_sparse_moe.experts.114.w1", "model.layers.40.block_sparse_moe.experts.115.w1", "model.layers.40.block_sparse_moe.experts.116.w1", "model.layers.40.block_sparse_moe.experts.117.w1", "model.layers.40.block_sparse_moe.experts.118.w1", "model.layers.40.block_sparse_moe.experts.119.w1", "model.layers.40.block_sparse_moe.experts.120.w1", "model.layers.40.block_sparse_moe.experts.121.w1", "model.layers.40.block_sparse_moe.experts.122.w1", "model.layers.40.block_sparse_moe.experts.123.w1", "model.layers.40.block_sparse_moe.experts.124.w1", "model.layers.40.block_sparse_moe.experts.125.w1", "model.layers.40.block_sparse_moe.experts.126.w1", "model.layers.40.block_sparse_moe.experts.127.w1", "model.layers.40.block_sparse_moe.experts.128.w1", "model.layers.40.block_sparse_moe.experts.129.w1", "model.layers.40.block_sparse_moe.experts.130.w1", "model.layers.40.block_sparse_moe.experts.131.w1", "model.layers.40.block_sparse_moe.experts.132.w1", "model.layers.40.block_sparse_moe.experts.133.w1", "model.layers.40.block_sparse_moe.experts.134.w1", "model.layers.40.block_sparse_moe.experts.135.w1", "model.layers.40.block_sparse_moe.experts.136.w1", "model.layers.40.block_sparse_moe.experts.137.w1", "model.layers.40.block_sparse_moe.experts.138.w1", "model.layers.40.block_sparse_moe.experts.139.w1", "model.layers.40.block_sparse_moe.experts.140.w1", "model.layers.40.block_sparse_moe.experts.141.w1", "model.layers.40.block_sparse_moe.experts.142.w1", "model.layers.40.block_sparse_moe.experts.143.w1", "model.layers.40.block_sparse_moe.experts.144.w1", "model.layers.40.block_sparse_moe.experts.145.w1", "model.layers.40.block_sparse_moe.experts.146.w1", "model.layers.40.block_sparse_moe.experts.147.w1", "model.layers.40.block_sparse_moe.experts.148.w1", "model.layers.40.block_sparse_moe.experts.149.w1", "model.layers.40.block_sparse_moe.experts.150.w1", "model.layers.40.block_sparse_moe.experts.151.w1", "model.layers.40.block_sparse_moe.experts.152.w1", "model.layers.40.block_sparse_moe.experts.153.w1", "model.layers.40.block_sparse_moe.experts.154.w1", "model.layers.40.block_sparse_moe.experts.155.w1", "model.layers.40.block_sparse_moe.experts.156.w1", "model.layers.40.block_sparse_moe.experts.157.w1", "model.layers.40.block_sparse_moe.experts.158.w1", "model.layers.40.block_sparse_moe.experts.159.w1", "model.layers.40.block_sparse_moe.experts.160.w1", "model.layers.40.block_sparse_moe.experts.161.w1", "model.layers.40.block_sparse_moe.experts.162.w1", "model.layers.40.block_sparse_moe.experts.163.w1", "model.layers.40.block_sparse_moe.experts.164.w1", "model.layers.40.block_sparse_moe.experts.165.w1", "model.layers.40.block_sparse_moe.experts.166.w1", "model.layers.40.block_sparse_moe.experts.167.w1", "model.layers.40.block_sparse_moe.experts.168.w1", "model.layers.40.block_sparse_moe.experts.169.w1", "model.layers.40.block_sparse_moe.experts.170.w1", "model.layers.40.block_sparse_moe.experts.171.w1", "model.layers.40.block_sparse_moe.experts.172.w1", "model.layers.40.block_sparse_moe.experts.173.w1", "model.layers.40.block_sparse_moe.experts.174.w1", "model.layers.40.block_sparse_moe.experts.175.w1", "model.layers.40.block_sparse_moe.experts.176.w1", "model.layers.40.block_sparse_moe.experts.177.w1", "model.layers.40.block_sparse_moe.experts.178.w1", "model.layers.40.block_sparse_moe.experts.179.w1", "model.layers.40.block_sparse_moe.experts.180.w1", "model.layers.40.block_sparse_moe.experts.181.w1", "model.layers.40.block_sparse_moe.experts.182.w1", "model.layers.40.block_sparse_moe.experts.183.w1", "model.layers.40.block_sparse_moe.experts.184.w1", "model.layers.40.block_sparse_moe.experts.185.w1", "model.layers.40.block_sparse_moe.experts.186.w1", "model.layers.40.block_sparse_moe.experts.187.w1", "model.layers.40.block_sparse_moe.experts.188.w1", "model.layers.40.block_sparse_moe.experts.189.w1", "model.layers.40.block_sparse_moe.experts.190.w1", "model.layers.40.block_sparse_moe.experts.191.w1", "model.layers.40.block_sparse_moe.experts.192.w1", "model.layers.40.block_sparse_moe.experts.193.w1", "model.layers.40.block_sparse_moe.experts.194.w1", "model.layers.40.block_sparse_moe.experts.195.w1", "model.layers.40.block_sparse_moe.experts.196.w1", "model.layers.40.block_sparse_moe.experts.197.w1", "model.layers.40.block_sparse_moe.experts.198.w1", "model.layers.40.block_sparse_moe.experts.199.w1", "model.layers.40.block_sparse_moe.experts.200.w1", "model.layers.40.block_sparse_moe.experts.201.w1", "model.layers.40.block_sparse_moe.experts.202.w1", "model.layers.40.block_sparse_moe.experts.203.w1", "model.layers.40.block_sparse_moe.experts.204.w1", "model.layers.40.block_sparse_moe.experts.205.w1", "model.layers.40.block_sparse_moe.experts.206.w1", "model.layers.40.block_sparse_moe.experts.207.w1", "model.layers.40.block_sparse_moe.experts.208.w1", "model.layers.40.block_sparse_moe.experts.209.w1", "model.layers.40.block_sparse_moe.experts.210.w1", "model.layers.40.block_sparse_moe.experts.211.w1", "model.layers.40.block_sparse_moe.experts.212.w1", "model.layers.40.block_sparse_moe.experts.213.w1", "model.layers.40.block_sparse_moe.experts.214.w1", "model.layers.40.block_sparse_moe.experts.215.w1", "model.layers.40.block_sparse_moe.experts.216.w1", "model.layers.40.block_sparse_moe.experts.217.w1", "model.layers.40.block_sparse_moe.experts.218.w1", "model.layers.40.block_sparse_moe.experts.219.w1", "model.layers.40.block_sparse_moe.experts.220.w1", "model.layers.40.block_sparse_moe.experts.221.w1", "model.layers.40.block_sparse_moe.experts.222.w1", "model.layers.40.block_sparse_moe.experts.223.w1", "model.layers.40.block_sparse_moe.experts.224.w1", "model.layers.40.block_sparse_moe.experts.225.w1", "model.layers.40.block_sparse_moe.experts.226.w1", "model.layers.40.block_sparse_moe.experts.227.w1", "model.layers.40.block_sparse_moe.experts.228.w1", "model.layers.40.block_sparse_moe.experts.229.w1", "model.layers.40.block_sparse_moe.experts.230.w1", "model.layers.40.block_sparse_moe.experts.231.w1", "model.layers.40.block_sparse_moe.experts.232.w1", "model.layers.40.block_sparse_moe.experts.233.w1", "model.layers.40.block_sparse_moe.experts.234.w1", "model.layers.40.block_sparse_moe.experts.235.w1", "model.layers.40.block_sparse_moe.experts.236.w1", "model.layers.40.block_sparse_moe.experts.237.w1", "model.layers.40.block_sparse_moe.experts.238.w1", "model.layers.40.block_sparse_moe.experts.239.w1", "model.layers.40.block_sparse_moe.experts.240.w1", "model.layers.40.block_sparse_moe.experts.241.w1", "model.layers.40.block_sparse_moe.experts.242.w1", "model.layers.40.block_sparse_moe.experts.243.w1", "model.layers.40.block_sparse_moe.experts.244.w1", "model.layers.40.block_sparse_moe.experts.245.w1", "model.layers.40.block_sparse_moe.experts.246.w1", "model.layers.40.block_sparse_moe.experts.247.w1", "model.layers.40.block_sparse_moe.experts.248.w1", "model.layers.40.block_sparse_moe.experts.249.w1", "model.layers.40.block_sparse_moe.experts.250.w1", "model.layers.40.block_sparse_moe.experts.251.w1", "model.layers.40.block_sparse_moe.experts.252.w1", "model.layers.40.block_sparse_moe.experts.253.w1", "model.layers.40.block_sparse_moe.experts.254.w1", "model.layers.40.block_sparse_moe.experts.255.w1", "model.layers.40.block_sparse_moe.experts.0.w3", "model.layers.40.block_sparse_moe.experts.1.w3", "model.layers.40.block_sparse_moe.experts.2.w3", "model.layers.40.block_sparse_moe.experts.3.w3", "model.layers.40.block_sparse_moe.experts.4.w3", "model.layers.40.block_sparse_moe.experts.5.w3", "model.layers.40.block_sparse_moe.experts.6.w3", "model.layers.40.block_sparse_moe.experts.7.w3", "model.layers.40.block_sparse_moe.experts.8.w3", "model.layers.40.block_sparse_moe.experts.9.w3", "model.layers.40.block_sparse_moe.experts.10.w3", "model.layers.40.block_sparse_moe.experts.11.w3", "model.layers.40.block_sparse_moe.experts.12.w3", "model.layers.40.block_sparse_moe.experts.13.w3", "model.layers.40.block_sparse_moe.experts.14.w3", "model.layers.40.block_sparse_moe.experts.15.w3", "model.layers.40.block_sparse_moe.experts.16.w3", "model.layers.40.block_sparse_moe.experts.17.w3", "model.layers.40.block_sparse_moe.experts.18.w3", "model.layers.40.block_sparse_moe.experts.19.w3", "model.layers.40.block_sparse_moe.experts.20.w3", "model.layers.40.block_sparse_moe.experts.21.w3", "model.layers.40.block_sparse_moe.experts.22.w3", "model.layers.40.block_sparse_moe.experts.23.w3", "model.layers.40.block_sparse_moe.experts.24.w3", "model.layers.40.block_sparse_moe.experts.25.w3", "model.layers.40.block_sparse_moe.experts.26.w3", "model.layers.40.block_sparse_moe.experts.27.w3", "model.layers.40.block_sparse_moe.experts.28.w3", "model.layers.40.block_sparse_moe.experts.29.w3", "model.layers.40.block_sparse_moe.experts.30.w3", "model.layers.40.block_sparse_moe.experts.31.w3", "model.layers.40.block_sparse_moe.experts.32.w3", "model.layers.40.block_sparse_moe.experts.33.w3", "model.layers.40.block_sparse_moe.experts.34.w3", "model.layers.40.block_sparse_moe.experts.35.w3", "model.layers.40.block_sparse_moe.experts.36.w3", "model.layers.40.block_sparse_moe.experts.37.w3", "model.layers.40.block_sparse_moe.experts.38.w3", "model.layers.40.block_sparse_moe.experts.39.w3", "model.layers.40.block_sparse_moe.experts.40.w3", "model.layers.40.block_sparse_moe.experts.41.w3", "model.layers.40.block_sparse_moe.experts.42.w3", "model.layers.40.block_sparse_moe.experts.43.w3", "model.layers.40.block_sparse_moe.experts.44.w3", "model.layers.40.block_sparse_moe.experts.45.w3", "model.layers.40.block_sparse_moe.experts.46.w3", "model.layers.40.block_sparse_moe.experts.47.w3", "model.layers.40.block_sparse_moe.experts.48.w3", "model.layers.40.block_sparse_moe.experts.49.w3", "model.layers.40.block_sparse_moe.experts.50.w3", "model.layers.40.block_sparse_moe.experts.51.w3", "model.layers.40.block_sparse_moe.experts.52.w3", "model.layers.40.block_sparse_moe.experts.53.w3", "model.layers.40.block_sparse_moe.experts.54.w3", "model.layers.40.block_sparse_moe.experts.55.w3", "model.layers.40.block_sparse_moe.experts.56.w3", "model.layers.40.block_sparse_moe.experts.57.w3", "model.layers.40.block_sparse_moe.experts.58.w3", "model.layers.40.block_sparse_moe.experts.59.w3", "model.layers.40.block_sparse_moe.experts.60.w3", "model.layers.40.block_sparse_moe.experts.61.w3", "model.layers.40.block_sparse_moe.experts.62.w3", "model.layers.40.block_sparse_moe.experts.63.w3", "model.layers.40.block_sparse_moe.experts.64.w3", "model.layers.40.block_sparse_moe.experts.65.w3", "model.layers.40.block_sparse_moe.experts.66.w3", "model.layers.40.block_sparse_moe.experts.67.w3", "model.layers.40.block_sparse_moe.experts.68.w3", "model.layers.40.block_sparse_moe.experts.69.w3", "model.layers.40.block_sparse_moe.experts.70.w3", "model.layers.40.block_sparse_moe.experts.71.w3", "model.layers.40.block_sparse_moe.experts.72.w3", "model.layers.40.block_sparse_moe.experts.73.w3", "model.layers.40.block_sparse_moe.experts.74.w3", "model.layers.40.block_sparse_moe.experts.75.w3", "model.layers.40.block_sparse_moe.experts.76.w3", "model.layers.40.block_sparse_moe.experts.77.w3", "model.layers.40.block_sparse_moe.experts.78.w3", "model.layers.40.block_sparse_moe.experts.79.w3", "model.layers.40.block_sparse_moe.experts.80.w3", "model.layers.40.block_sparse_moe.experts.81.w3", "model.layers.40.block_sparse_moe.experts.82.w3", "model.layers.40.block_sparse_moe.experts.83.w3", "model.layers.40.block_sparse_moe.experts.84.w3", "model.layers.40.block_sparse_moe.experts.85.w3", "model.layers.40.block_sparse_moe.experts.86.w3", "model.layers.40.block_sparse_moe.experts.87.w3", "model.layers.40.block_sparse_moe.experts.88.w3", "model.layers.40.block_sparse_moe.experts.89.w3", "model.layers.40.block_sparse_moe.experts.90.w3", "model.layers.40.block_sparse_moe.experts.91.w3", "model.layers.40.block_sparse_moe.experts.92.w3", "model.layers.40.block_sparse_moe.experts.93.w3", "model.layers.40.block_sparse_moe.experts.94.w3", "model.layers.40.block_sparse_moe.experts.95.w3", "model.layers.40.block_sparse_moe.experts.96.w3", "model.layers.40.block_sparse_moe.experts.97.w3", "model.layers.40.block_sparse_moe.experts.98.w3", "model.layers.40.block_sparse_moe.experts.99.w3", "model.layers.40.block_sparse_moe.experts.100.w3", "model.layers.40.block_sparse_moe.experts.101.w3", "model.layers.40.block_sparse_moe.experts.102.w3", "model.layers.40.block_sparse_moe.experts.103.w3", "model.layers.40.block_sparse_moe.experts.104.w3", "model.layers.40.block_sparse_moe.experts.105.w3", "model.layers.40.block_sparse_moe.experts.106.w3", "model.layers.40.block_sparse_moe.experts.107.w3", "model.layers.40.block_sparse_moe.experts.108.w3", "model.layers.40.block_sparse_moe.experts.109.w3", "model.layers.40.block_sparse_moe.experts.110.w3", "model.layers.40.block_sparse_moe.experts.111.w3", "model.layers.40.block_sparse_moe.experts.112.w3", "model.layers.40.block_sparse_moe.experts.113.w3", "model.layers.40.block_sparse_moe.experts.114.w3", "model.layers.40.block_sparse_moe.experts.115.w3", "model.layers.40.block_sparse_moe.experts.116.w3", "model.layers.40.block_sparse_moe.experts.117.w3", "model.layers.40.block_sparse_moe.experts.118.w3", "model.layers.40.block_sparse_moe.experts.119.w3", "model.layers.40.block_sparse_moe.experts.120.w3", "model.layers.40.block_sparse_moe.experts.121.w3", "model.layers.40.block_sparse_moe.experts.122.w3", "model.layers.40.block_sparse_moe.experts.123.w3", "model.layers.40.block_sparse_moe.experts.124.w3", "model.layers.40.block_sparse_moe.experts.125.w3", "model.layers.40.block_sparse_moe.experts.126.w3", "model.layers.40.block_sparse_moe.experts.127.w3", "model.layers.40.block_sparse_moe.experts.128.w3", "model.layers.40.block_sparse_moe.experts.129.w3", "model.layers.40.block_sparse_moe.experts.130.w3", "model.layers.40.block_sparse_moe.experts.131.w3", "model.layers.40.block_sparse_moe.experts.132.w3", "model.layers.40.block_sparse_moe.experts.133.w3", "model.layers.40.block_sparse_moe.experts.134.w3", "model.layers.40.block_sparse_moe.experts.135.w3", "model.layers.40.block_sparse_moe.experts.136.w3", "model.layers.40.block_sparse_moe.experts.137.w3", "model.layers.40.block_sparse_moe.experts.138.w3", "model.layers.40.block_sparse_moe.experts.139.w3", "model.layers.40.block_sparse_moe.experts.140.w3", "model.layers.40.block_sparse_moe.experts.141.w3", "model.layers.40.block_sparse_moe.experts.142.w3", "model.layers.40.block_sparse_moe.experts.143.w3", "model.layers.40.block_sparse_moe.experts.144.w3", "model.layers.40.block_sparse_moe.experts.145.w3", "model.layers.40.block_sparse_moe.experts.146.w3", "model.layers.40.block_sparse_moe.experts.147.w3", "model.layers.40.block_sparse_moe.experts.148.w3", "model.layers.40.block_sparse_moe.experts.149.w3", "model.layers.40.block_sparse_moe.experts.150.w3", "model.layers.40.block_sparse_moe.experts.151.w3", "model.layers.40.block_sparse_moe.experts.152.w3", "model.layers.40.block_sparse_moe.experts.153.w3", "model.layers.40.block_sparse_moe.experts.154.w3", "model.layers.40.block_sparse_moe.experts.155.w3", "model.layers.40.block_sparse_moe.experts.156.w3", "model.layers.40.block_sparse_moe.experts.157.w3", "model.layers.40.block_sparse_moe.experts.158.w3", "model.layers.40.block_sparse_moe.experts.159.w3", "model.layers.40.block_sparse_moe.experts.160.w3", "model.layers.40.block_sparse_moe.experts.161.w3", "model.layers.40.block_sparse_moe.experts.162.w3", "model.layers.40.block_sparse_moe.experts.163.w3", "model.layers.40.block_sparse_moe.experts.164.w3", "model.layers.40.block_sparse_moe.experts.165.w3", "model.layers.40.block_sparse_moe.experts.166.w3", "model.layers.40.block_sparse_moe.experts.167.w3", "model.layers.40.block_sparse_moe.experts.168.w3", "model.layers.40.block_sparse_moe.experts.169.w3", "model.layers.40.block_sparse_moe.experts.170.w3", "model.layers.40.block_sparse_moe.experts.171.w3", "model.layers.40.block_sparse_moe.experts.172.w3", "model.layers.40.block_sparse_moe.experts.173.w3", "model.layers.40.block_sparse_moe.experts.174.w3", "model.layers.40.block_sparse_moe.experts.175.w3", "model.layers.40.block_sparse_moe.experts.176.w3", "model.layers.40.block_sparse_moe.experts.177.w3", "model.layers.40.block_sparse_moe.experts.178.w3", "model.layers.40.block_sparse_moe.experts.179.w3", "model.layers.40.block_sparse_moe.experts.180.w3", "model.layers.40.block_sparse_moe.experts.181.w3", "model.layers.40.block_sparse_moe.experts.182.w3", "model.layers.40.block_sparse_moe.experts.183.w3", "model.layers.40.block_sparse_moe.experts.184.w3", "model.layers.40.block_sparse_moe.experts.185.w3", "model.layers.40.block_sparse_moe.experts.186.w3", "model.layers.40.block_sparse_moe.experts.187.w3", "model.layers.40.block_sparse_moe.experts.188.w3", "model.layers.40.block_sparse_moe.experts.189.w3", "model.layers.40.block_sparse_moe.experts.190.w3", "model.layers.40.block_sparse_moe.experts.191.w3", "model.layers.40.block_sparse_moe.experts.192.w3", "model.layers.40.block_sparse_moe.experts.193.w3", "model.layers.40.block_sparse_moe.experts.194.w3", "model.layers.40.block_sparse_moe.experts.195.w3", "model.layers.40.block_sparse_moe.experts.196.w3", "model.layers.40.block_sparse_moe.experts.197.w3", "model.layers.40.block_sparse_moe.experts.198.w3", "model.layers.40.block_sparse_moe.experts.199.w3", "model.layers.40.block_sparse_moe.experts.200.w3", "model.layers.40.block_sparse_moe.experts.201.w3", "model.layers.40.block_sparse_moe.experts.202.w3", "model.layers.40.block_sparse_moe.experts.203.w3", "model.layers.40.block_sparse_moe.experts.204.w3", "model.layers.40.block_sparse_moe.experts.205.w3", "model.layers.40.block_sparse_moe.experts.206.w3", "model.layers.40.block_sparse_moe.experts.207.w3", "model.layers.40.block_sparse_moe.experts.208.w3", "model.layers.40.block_sparse_moe.experts.209.w3", "model.layers.40.block_sparse_moe.experts.210.w3", "model.layers.40.block_sparse_moe.experts.211.w3", "model.layers.40.block_sparse_moe.experts.212.w3", "model.layers.40.block_sparse_moe.experts.213.w3", "model.layers.40.block_sparse_moe.experts.214.w3", "model.layers.40.block_sparse_moe.experts.215.w3", "model.layers.40.block_sparse_moe.experts.216.w3", "model.layers.40.block_sparse_moe.experts.217.w3", "model.layers.40.block_sparse_moe.experts.218.w3", "model.layers.40.block_sparse_moe.experts.219.w3", "model.layers.40.block_sparse_moe.experts.220.w3", "model.layers.40.block_sparse_moe.experts.221.w3", "model.layers.40.block_sparse_moe.experts.222.w3", "model.layers.40.block_sparse_moe.experts.223.w3", "model.layers.40.block_sparse_moe.experts.224.w3", "model.layers.40.block_sparse_moe.experts.225.w3", "model.layers.40.block_sparse_moe.experts.226.w3", "model.layers.40.block_sparse_moe.experts.227.w3", "model.layers.40.block_sparse_moe.experts.228.w3", "model.layers.40.block_sparse_moe.experts.229.w3", "model.layers.40.block_sparse_moe.experts.230.w3", "model.layers.40.block_sparse_moe.experts.231.w3", "model.layers.40.block_sparse_moe.experts.232.w3", "model.layers.40.block_sparse_moe.experts.233.w3", "model.layers.40.block_sparse_moe.experts.234.w3", "model.layers.40.block_sparse_moe.experts.235.w3", "model.layers.40.block_sparse_moe.experts.236.w3", "model.layers.40.block_sparse_moe.experts.237.w3", "model.layers.40.block_sparse_moe.experts.238.w3", "model.layers.40.block_sparse_moe.experts.239.w3", "model.layers.40.block_sparse_moe.experts.240.w3", "model.layers.40.block_sparse_moe.experts.241.w3", "model.layers.40.block_sparse_moe.experts.242.w3", "model.layers.40.block_sparse_moe.experts.243.w3", "model.layers.40.block_sparse_moe.experts.244.w3", "model.layers.40.block_sparse_moe.experts.245.w3", "model.layers.40.block_sparse_moe.experts.246.w3", "model.layers.40.block_sparse_moe.experts.247.w3", "model.layers.40.block_sparse_moe.experts.248.w3", "model.layers.40.block_sparse_moe.experts.249.w3", "model.layers.40.block_sparse_moe.experts.250.w3", "model.layers.40.block_sparse_moe.experts.251.w3", "model.layers.40.block_sparse_moe.experts.252.w3", "model.layers.40.block_sparse_moe.experts.253.w3", "model.layers.40.block_sparse_moe.experts.254.w3", "model.layers.40.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0002786466851830427, "dbits": 2415919104 } ] }, { "idx": 204, "layers": [ "model.layers.40.block_sparse_moe.experts.0.w2", "model.layers.40.block_sparse_moe.experts.1.w2", "model.layers.40.block_sparse_moe.experts.2.w2", "model.layers.40.block_sparse_moe.experts.3.w2", "model.layers.40.block_sparse_moe.experts.4.w2", "model.layers.40.block_sparse_moe.experts.5.w2", "model.layers.40.block_sparse_moe.experts.6.w2", "model.layers.40.block_sparse_moe.experts.7.w2", "model.layers.40.block_sparse_moe.experts.8.w2", "model.layers.40.block_sparse_moe.experts.9.w2", "model.layers.40.block_sparse_moe.experts.10.w2", "model.layers.40.block_sparse_moe.experts.11.w2", "model.layers.40.block_sparse_moe.experts.12.w2", "model.layers.40.block_sparse_moe.experts.13.w2", "model.layers.40.block_sparse_moe.experts.14.w2", "model.layers.40.block_sparse_moe.experts.15.w2", "model.layers.40.block_sparse_moe.experts.16.w2", "model.layers.40.block_sparse_moe.experts.17.w2", "model.layers.40.block_sparse_moe.experts.18.w2", "model.layers.40.block_sparse_moe.experts.19.w2", "model.layers.40.block_sparse_moe.experts.20.w2", "model.layers.40.block_sparse_moe.experts.21.w2", "model.layers.40.block_sparse_moe.experts.22.w2", "model.layers.40.block_sparse_moe.experts.23.w2", "model.layers.40.block_sparse_moe.experts.24.w2", "model.layers.40.block_sparse_moe.experts.25.w2", "model.layers.40.block_sparse_moe.experts.26.w2", "model.layers.40.block_sparse_moe.experts.27.w2", "model.layers.40.block_sparse_moe.experts.28.w2", "model.layers.40.block_sparse_moe.experts.29.w2", "model.layers.40.block_sparse_moe.experts.30.w2", "model.layers.40.block_sparse_moe.experts.31.w2", "model.layers.40.block_sparse_moe.experts.32.w2", "model.layers.40.block_sparse_moe.experts.33.w2", "model.layers.40.block_sparse_moe.experts.34.w2", "model.layers.40.block_sparse_moe.experts.35.w2", "model.layers.40.block_sparse_moe.experts.36.w2", "model.layers.40.block_sparse_moe.experts.37.w2", "model.layers.40.block_sparse_moe.experts.38.w2", "model.layers.40.block_sparse_moe.experts.39.w2", "model.layers.40.block_sparse_moe.experts.40.w2", "model.layers.40.block_sparse_moe.experts.41.w2", "model.layers.40.block_sparse_moe.experts.42.w2", "model.layers.40.block_sparse_moe.experts.43.w2", "model.layers.40.block_sparse_moe.experts.44.w2", "model.layers.40.block_sparse_moe.experts.45.w2", "model.layers.40.block_sparse_moe.experts.46.w2", "model.layers.40.block_sparse_moe.experts.47.w2", "model.layers.40.block_sparse_moe.experts.48.w2", "model.layers.40.block_sparse_moe.experts.49.w2", "model.layers.40.block_sparse_moe.experts.50.w2", "model.layers.40.block_sparse_moe.experts.51.w2", "model.layers.40.block_sparse_moe.experts.52.w2", "model.layers.40.block_sparse_moe.experts.53.w2", "model.layers.40.block_sparse_moe.experts.54.w2", "model.layers.40.block_sparse_moe.experts.55.w2", "model.layers.40.block_sparse_moe.experts.56.w2", "model.layers.40.block_sparse_moe.experts.57.w2", "model.layers.40.block_sparse_moe.experts.58.w2", "model.layers.40.block_sparse_moe.experts.59.w2", "model.layers.40.block_sparse_moe.experts.60.w2", "model.layers.40.block_sparse_moe.experts.61.w2", "model.layers.40.block_sparse_moe.experts.62.w2", "model.layers.40.block_sparse_moe.experts.63.w2", "model.layers.40.block_sparse_moe.experts.64.w2", "model.layers.40.block_sparse_moe.experts.65.w2", "model.layers.40.block_sparse_moe.experts.66.w2", "model.layers.40.block_sparse_moe.experts.67.w2", "model.layers.40.block_sparse_moe.experts.68.w2", "model.layers.40.block_sparse_moe.experts.69.w2", "model.layers.40.block_sparse_moe.experts.70.w2", "model.layers.40.block_sparse_moe.experts.71.w2", "model.layers.40.block_sparse_moe.experts.72.w2", "model.layers.40.block_sparse_moe.experts.73.w2", "model.layers.40.block_sparse_moe.experts.74.w2", "model.layers.40.block_sparse_moe.experts.75.w2", "model.layers.40.block_sparse_moe.experts.76.w2", "model.layers.40.block_sparse_moe.experts.77.w2", "model.layers.40.block_sparse_moe.experts.78.w2", "model.layers.40.block_sparse_moe.experts.79.w2", "model.layers.40.block_sparse_moe.experts.80.w2", "model.layers.40.block_sparse_moe.experts.81.w2", "model.layers.40.block_sparse_moe.experts.82.w2", "model.layers.40.block_sparse_moe.experts.83.w2", "model.layers.40.block_sparse_moe.experts.84.w2", "model.layers.40.block_sparse_moe.experts.85.w2", "model.layers.40.block_sparse_moe.experts.86.w2", "model.layers.40.block_sparse_moe.experts.87.w2", "model.layers.40.block_sparse_moe.experts.88.w2", "model.layers.40.block_sparse_moe.experts.89.w2", "model.layers.40.block_sparse_moe.experts.90.w2", "model.layers.40.block_sparse_moe.experts.91.w2", "model.layers.40.block_sparse_moe.experts.92.w2", "model.layers.40.block_sparse_moe.experts.93.w2", "model.layers.40.block_sparse_moe.experts.94.w2", "model.layers.40.block_sparse_moe.experts.95.w2", "model.layers.40.block_sparse_moe.experts.96.w2", "model.layers.40.block_sparse_moe.experts.97.w2", "model.layers.40.block_sparse_moe.experts.98.w2", "model.layers.40.block_sparse_moe.experts.99.w2", "model.layers.40.block_sparse_moe.experts.100.w2", "model.layers.40.block_sparse_moe.experts.101.w2", "model.layers.40.block_sparse_moe.experts.102.w2", "model.layers.40.block_sparse_moe.experts.103.w2", "model.layers.40.block_sparse_moe.experts.104.w2", "model.layers.40.block_sparse_moe.experts.105.w2", "model.layers.40.block_sparse_moe.experts.106.w2", "model.layers.40.block_sparse_moe.experts.107.w2", "model.layers.40.block_sparse_moe.experts.108.w2", "model.layers.40.block_sparse_moe.experts.109.w2", "model.layers.40.block_sparse_moe.experts.110.w2", "model.layers.40.block_sparse_moe.experts.111.w2", "model.layers.40.block_sparse_moe.experts.112.w2", "model.layers.40.block_sparse_moe.experts.113.w2", "model.layers.40.block_sparse_moe.experts.114.w2", "model.layers.40.block_sparse_moe.experts.115.w2", "model.layers.40.block_sparse_moe.experts.116.w2", "model.layers.40.block_sparse_moe.experts.117.w2", "model.layers.40.block_sparse_moe.experts.118.w2", "model.layers.40.block_sparse_moe.experts.119.w2", "model.layers.40.block_sparse_moe.experts.120.w2", "model.layers.40.block_sparse_moe.experts.121.w2", "model.layers.40.block_sparse_moe.experts.122.w2", "model.layers.40.block_sparse_moe.experts.123.w2", "model.layers.40.block_sparse_moe.experts.124.w2", "model.layers.40.block_sparse_moe.experts.125.w2", "model.layers.40.block_sparse_moe.experts.126.w2", "model.layers.40.block_sparse_moe.experts.127.w2", "model.layers.40.block_sparse_moe.experts.128.w2", "model.layers.40.block_sparse_moe.experts.129.w2", "model.layers.40.block_sparse_moe.experts.130.w2", "model.layers.40.block_sparse_moe.experts.131.w2", "model.layers.40.block_sparse_moe.experts.132.w2", "model.layers.40.block_sparse_moe.experts.133.w2", "model.layers.40.block_sparse_moe.experts.134.w2", "model.layers.40.block_sparse_moe.experts.135.w2", "model.layers.40.block_sparse_moe.experts.136.w2", "model.layers.40.block_sparse_moe.experts.137.w2", "model.layers.40.block_sparse_moe.experts.138.w2", "model.layers.40.block_sparse_moe.experts.139.w2", "model.layers.40.block_sparse_moe.experts.140.w2", "model.layers.40.block_sparse_moe.experts.141.w2", "model.layers.40.block_sparse_moe.experts.142.w2", "model.layers.40.block_sparse_moe.experts.143.w2", "model.layers.40.block_sparse_moe.experts.144.w2", "model.layers.40.block_sparse_moe.experts.145.w2", "model.layers.40.block_sparse_moe.experts.146.w2", "model.layers.40.block_sparse_moe.experts.147.w2", "model.layers.40.block_sparse_moe.experts.148.w2", "model.layers.40.block_sparse_moe.experts.149.w2", "model.layers.40.block_sparse_moe.experts.150.w2", "model.layers.40.block_sparse_moe.experts.151.w2", "model.layers.40.block_sparse_moe.experts.152.w2", "model.layers.40.block_sparse_moe.experts.153.w2", "model.layers.40.block_sparse_moe.experts.154.w2", "model.layers.40.block_sparse_moe.experts.155.w2", "model.layers.40.block_sparse_moe.experts.156.w2", "model.layers.40.block_sparse_moe.experts.157.w2", "model.layers.40.block_sparse_moe.experts.158.w2", "model.layers.40.block_sparse_moe.experts.159.w2", "model.layers.40.block_sparse_moe.experts.160.w2", "model.layers.40.block_sparse_moe.experts.161.w2", "model.layers.40.block_sparse_moe.experts.162.w2", "model.layers.40.block_sparse_moe.experts.163.w2", "model.layers.40.block_sparse_moe.experts.164.w2", "model.layers.40.block_sparse_moe.experts.165.w2", "model.layers.40.block_sparse_moe.experts.166.w2", "model.layers.40.block_sparse_moe.experts.167.w2", "model.layers.40.block_sparse_moe.experts.168.w2", "model.layers.40.block_sparse_moe.experts.169.w2", "model.layers.40.block_sparse_moe.experts.170.w2", "model.layers.40.block_sparse_moe.experts.171.w2", "model.layers.40.block_sparse_moe.experts.172.w2", "model.layers.40.block_sparse_moe.experts.173.w2", "model.layers.40.block_sparse_moe.experts.174.w2", "model.layers.40.block_sparse_moe.experts.175.w2", "model.layers.40.block_sparse_moe.experts.176.w2", "model.layers.40.block_sparse_moe.experts.177.w2", "model.layers.40.block_sparse_moe.experts.178.w2", "model.layers.40.block_sparse_moe.experts.179.w2", "model.layers.40.block_sparse_moe.experts.180.w2", "model.layers.40.block_sparse_moe.experts.181.w2", "model.layers.40.block_sparse_moe.experts.182.w2", "model.layers.40.block_sparse_moe.experts.183.w2", "model.layers.40.block_sparse_moe.experts.184.w2", "model.layers.40.block_sparse_moe.experts.185.w2", "model.layers.40.block_sparse_moe.experts.186.w2", "model.layers.40.block_sparse_moe.experts.187.w2", "model.layers.40.block_sparse_moe.experts.188.w2", "model.layers.40.block_sparse_moe.experts.189.w2", "model.layers.40.block_sparse_moe.experts.190.w2", "model.layers.40.block_sparse_moe.experts.191.w2", "model.layers.40.block_sparse_moe.experts.192.w2", "model.layers.40.block_sparse_moe.experts.193.w2", "model.layers.40.block_sparse_moe.experts.194.w2", "model.layers.40.block_sparse_moe.experts.195.w2", "model.layers.40.block_sparse_moe.experts.196.w2", "model.layers.40.block_sparse_moe.experts.197.w2", "model.layers.40.block_sparse_moe.experts.198.w2", "model.layers.40.block_sparse_moe.experts.199.w2", "model.layers.40.block_sparse_moe.experts.200.w2", "model.layers.40.block_sparse_moe.experts.201.w2", "model.layers.40.block_sparse_moe.experts.202.w2", "model.layers.40.block_sparse_moe.experts.203.w2", "model.layers.40.block_sparse_moe.experts.204.w2", "model.layers.40.block_sparse_moe.experts.205.w2", "model.layers.40.block_sparse_moe.experts.206.w2", "model.layers.40.block_sparse_moe.experts.207.w2", "model.layers.40.block_sparse_moe.experts.208.w2", "model.layers.40.block_sparse_moe.experts.209.w2", "model.layers.40.block_sparse_moe.experts.210.w2", "model.layers.40.block_sparse_moe.experts.211.w2", "model.layers.40.block_sparse_moe.experts.212.w2", "model.layers.40.block_sparse_moe.experts.213.w2", "model.layers.40.block_sparse_moe.experts.214.w2", "model.layers.40.block_sparse_moe.experts.215.w2", "model.layers.40.block_sparse_moe.experts.216.w2", "model.layers.40.block_sparse_moe.experts.217.w2", "model.layers.40.block_sparse_moe.experts.218.w2", "model.layers.40.block_sparse_moe.experts.219.w2", "model.layers.40.block_sparse_moe.experts.220.w2", "model.layers.40.block_sparse_moe.experts.221.w2", "model.layers.40.block_sparse_moe.experts.222.w2", "model.layers.40.block_sparse_moe.experts.223.w2", "model.layers.40.block_sparse_moe.experts.224.w2", "model.layers.40.block_sparse_moe.experts.225.w2", "model.layers.40.block_sparse_moe.experts.226.w2", "model.layers.40.block_sparse_moe.experts.227.w2", "model.layers.40.block_sparse_moe.experts.228.w2", "model.layers.40.block_sparse_moe.experts.229.w2", "model.layers.40.block_sparse_moe.experts.230.w2", "model.layers.40.block_sparse_moe.experts.231.w2", "model.layers.40.block_sparse_moe.experts.232.w2", "model.layers.40.block_sparse_moe.experts.233.w2", "model.layers.40.block_sparse_moe.experts.234.w2", "model.layers.40.block_sparse_moe.experts.235.w2", "model.layers.40.block_sparse_moe.experts.236.w2", "model.layers.40.block_sparse_moe.experts.237.w2", "model.layers.40.block_sparse_moe.experts.238.w2", "model.layers.40.block_sparse_moe.experts.239.w2", "model.layers.40.block_sparse_moe.experts.240.w2", "model.layers.40.block_sparse_moe.experts.241.w2", "model.layers.40.block_sparse_moe.experts.242.w2", "model.layers.40.block_sparse_moe.experts.243.w2", "model.layers.40.block_sparse_moe.experts.244.w2", "model.layers.40.block_sparse_moe.experts.245.w2", "model.layers.40.block_sparse_moe.experts.246.w2", "model.layers.40.block_sparse_moe.experts.247.w2", "model.layers.40.block_sparse_moe.experts.248.w2", "model.layers.40.block_sparse_moe.experts.249.w2", "model.layers.40.block_sparse_moe.experts.250.w2", "model.layers.40.block_sparse_moe.experts.251.w2", "model.layers.40.block_sparse_moe.experts.252.w2", "model.layers.40.block_sparse_moe.experts.253.w2", "model.layers.40.block_sparse_moe.experts.254.w2", "model.layers.40.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00014900658279658197, "dbits": 1207959552 } ] }, { "idx": 205, "layers": [ "model.layers.41.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0006970807909965487, "dbits": 18874368 } ] }, { "idx": 206, "layers": [ "model.layers.41.self_attn.k_proj", "model.layers.41.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0010982910171151161, "dbits": 6291456 } ] }, { "idx": 207, "layers": [ "model.layers.41.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0008604114875197383, "dbits": 18874368 } ] }, { "idx": 208, "layers": [ "model.layers.41.block_sparse_moe.experts.0.w1", "model.layers.41.block_sparse_moe.experts.1.w1", "model.layers.41.block_sparse_moe.experts.2.w1", "model.layers.41.block_sparse_moe.experts.3.w1", "model.layers.41.block_sparse_moe.experts.4.w1", "model.layers.41.block_sparse_moe.experts.5.w1", "model.layers.41.block_sparse_moe.experts.6.w1", "model.layers.41.block_sparse_moe.experts.7.w1", "model.layers.41.block_sparse_moe.experts.8.w1", "model.layers.41.block_sparse_moe.experts.9.w1", "model.layers.41.block_sparse_moe.experts.10.w1", "model.layers.41.block_sparse_moe.experts.11.w1", "model.layers.41.block_sparse_moe.experts.12.w1", "model.layers.41.block_sparse_moe.experts.13.w1", "model.layers.41.block_sparse_moe.experts.14.w1", "model.layers.41.block_sparse_moe.experts.15.w1", "model.layers.41.block_sparse_moe.experts.16.w1", "model.layers.41.block_sparse_moe.experts.17.w1", "model.layers.41.block_sparse_moe.experts.18.w1", "model.layers.41.block_sparse_moe.experts.19.w1", "model.layers.41.block_sparse_moe.experts.20.w1", "model.layers.41.block_sparse_moe.experts.21.w1", "model.layers.41.block_sparse_moe.experts.22.w1", "model.layers.41.block_sparse_moe.experts.23.w1", "model.layers.41.block_sparse_moe.experts.24.w1", "model.layers.41.block_sparse_moe.experts.25.w1", "model.layers.41.block_sparse_moe.experts.26.w1", "model.layers.41.block_sparse_moe.experts.27.w1", "model.layers.41.block_sparse_moe.experts.28.w1", "model.layers.41.block_sparse_moe.experts.29.w1", "model.layers.41.block_sparse_moe.experts.30.w1", "model.layers.41.block_sparse_moe.experts.31.w1", "model.layers.41.block_sparse_moe.experts.32.w1", "model.layers.41.block_sparse_moe.experts.33.w1", "model.layers.41.block_sparse_moe.experts.34.w1", "model.layers.41.block_sparse_moe.experts.35.w1", "model.layers.41.block_sparse_moe.experts.36.w1", "model.layers.41.block_sparse_moe.experts.37.w1", "model.layers.41.block_sparse_moe.experts.38.w1", "model.layers.41.block_sparse_moe.experts.39.w1", "model.layers.41.block_sparse_moe.experts.40.w1", "model.layers.41.block_sparse_moe.experts.41.w1", "model.layers.41.block_sparse_moe.experts.42.w1", "model.layers.41.block_sparse_moe.experts.43.w1", "model.layers.41.block_sparse_moe.experts.44.w1", "model.layers.41.block_sparse_moe.experts.45.w1", "model.layers.41.block_sparse_moe.experts.46.w1", "model.layers.41.block_sparse_moe.experts.47.w1", "model.layers.41.block_sparse_moe.experts.48.w1", "model.layers.41.block_sparse_moe.experts.49.w1", "model.layers.41.block_sparse_moe.experts.50.w1", "model.layers.41.block_sparse_moe.experts.51.w1", "model.layers.41.block_sparse_moe.experts.52.w1", "model.layers.41.block_sparse_moe.experts.53.w1", "model.layers.41.block_sparse_moe.experts.54.w1", "model.layers.41.block_sparse_moe.experts.55.w1", "model.layers.41.block_sparse_moe.experts.56.w1", "model.layers.41.block_sparse_moe.experts.57.w1", "model.layers.41.block_sparse_moe.experts.58.w1", "model.layers.41.block_sparse_moe.experts.59.w1", "model.layers.41.block_sparse_moe.experts.60.w1", "model.layers.41.block_sparse_moe.experts.61.w1", "model.layers.41.block_sparse_moe.experts.62.w1", "model.layers.41.block_sparse_moe.experts.63.w1", "model.layers.41.block_sparse_moe.experts.64.w1", "model.layers.41.block_sparse_moe.experts.65.w1", "model.layers.41.block_sparse_moe.experts.66.w1", "model.layers.41.block_sparse_moe.experts.67.w1", "model.layers.41.block_sparse_moe.experts.68.w1", "model.layers.41.block_sparse_moe.experts.69.w1", "model.layers.41.block_sparse_moe.experts.70.w1", "model.layers.41.block_sparse_moe.experts.71.w1", "model.layers.41.block_sparse_moe.experts.72.w1", "model.layers.41.block_sparse_moe.experts.73.w1", "model.layers.41.block_sparse_moe.experts.74.w1", "model.layers.41.block_sparse_moe.experts.75.w1", "model.layers.41.block_sparse_moe.experts.76.w1", "model.layers.41.block_sparse_moe.experts.77.w1", "model.layers.41.block_sparse_moe.experts.78.w1", "model.layers.41.block_sparse_moe.experts.79.w1", "model.layers.41.block_sparse_moe.experts.80.w1", "model.layers.41.block_sparse_moe.experts.81.w1", "model.layers.41.block_sparse_moe.experts.82.w1", "model.layers.41.block_sparse_moe.experts.83.w1", "model.layers.41.block_sparse_moe.experts.84.w1", "model.layers.41.block_sparse_moe.experts.85.w1", "model.layers.41.block_sparse_moe.experts.86.w1", "model.layers.41.block_sparse_moe.experts.87.w1", "model.layers.41.block_sparse_moe.experts.88.w1", "model.layers.41.block_sparse_moe.experts.89.w1", "model.layers.41.block_sparse_moe.experts.90.w1", "model.layers.41.block_sparse_moe.experts.91.w1", "model.layers.41.block_sparse_moe.experts.92.w1", "model.layers.41.block_sparse_moe.experts.93.w1", "model.layers.41.block_sparse_moe.experts.94.w1", "model.layers.41.block_sparse_moe.experts.95.w1", "model.layers.41.block_sparse_moe.experts.96.w1", "model.layers.41.block_sparse_moe.experts.97.w1", "model.layers.41.block_sparse_moe.experts.98.w1", "model.layers.41.block_sparse_moe.experts.99.w1", "model.layers.41.block_sparse_moe.experts.100.w1", "model.layers.41.block_sparse_moe.experts.101.w1", "model.layers.41.block_sparse_moe.experts.102.w1", "model.layers.41.block_sparse_moe.experts.103.w1", "model.layers.41.block_sparse_moe.experts.104.w1", "model.layers.41.block_sparse_moe.experts.105.w1", "model.layers.41.block_sparse_moe.experts.106.w1", "model.layers.41.block_sparse_moe.experts.107.w1", "model.layers.41.block_sparse_moe.experts.108.w1", "model.layers.41.block_sparse_moe.experts.109.w1", "model.layers.41.block_sparse_moe.experts.110.w1", "model.layers.41.block_sparse_moe.experts.111.w1", "model.layers.41.block_sparse_moe.experts.112.w1", "model.layers.41.block_sparse_moe.experts.113.w1", "model.layers.41.block_sparse_moe.experts.114.w1", "model.layers.41.block_sparse_moe.experts.115.w1", "model.layers.41.block_sparse_moe.experts.116.w1", "model.layers.41.block_sparse_moe.experts.117.w1", "model.layers.41.block_sparse_moe.experts.118.w1", "model.layers.41.block_sparse_moe.experts.119.w1", "model.layers.41.block_sparse_moe.experts.120.w1", "model.layers.41.block_sparse_moe.experts.121.w1", "model.layers.41.block_sparse_moe.experts.122.w1", "model.layers.41.block_sparse_moe.experts.123.w1", "model.layers.41.block_sparse_moe.experts.124.w1", "model.layers.41.block_sparse_moe.experts.125.w1", "model.layers.41.block_sparse_moe.experts.126.w1", "model.layers.41.block_sparse_moe.experts.127.w1", "model.layers.41.block_sparse_moe.experts.128.w1", "model.layers.41.block_sparse_moe.experts.129.w1", "model.layers.41.block_sparse_moe.experts.130.w1", "model.layers.41.block_sparse_moe.experts.131.w1", "model.layers.41.block_sparse_moe.experts.132.w1", "model.layers.41.block_sparse_moe.experts.133.w1", "model.layers.41.block_sparse_moe.experts.134.w1", "model.layers.41.block_sparse_moe.experts.135.w1", "model.layers.41.block_sparse_moe.experts.136.w1", "model.layers.41.block_sparse_moe.experts.137.w1", "model.layers.41.block_sparse_moe.experts.138.w1", "model.layers.41.block_sparse_moe.experts.139.w1", "model.layers.41.block_sparse_moe.experts.140.w1", "model.layers.41.block_sparse_moe.experts.141.w1", "model.layers.41.block_sparse_moe.experts.142.w1", "model.layers.41.block_sparse_moe.experts.143.w1", "model.layers.41.block_sparse_moe.experts.144.w1", "model.layers.41.block_sparse_moe.experts.145.w1", "model.layers.41.block_sparse_moe.experts.146.w1", "model.layers.41.block_sparse_moe.experts.147.w1", "model.layers.41.block_sparse_moe.experts.148.w1", "model.layers.41.block_sparse_moe.experts.149.w1", "model.layers.41.block_sparse_moe.experts.150.w1", "model.layers.41.block_sparse_moe.experts.151.w1", "model.layers.41.block_sparse_moe.experts.152.w1", "model.layers.41.block_sparse_moe.experts.153.w1", "model.layers.41.block_sparse_moe.experts.154.w1", "model.layers.41.block_sparse_moe.experts.155.w1", "model.layers.41.block_sparse_moe.experts.156.w1", "model.layers.41.block_sparse_moe.experts.157.w1", "model.layers.41.block_sparse_moe.experts.158.w1", "model.layers.41.block_sparse_moe.experts.159.w1", "model.layers.41.block_sparse_moe.experts.160.w1", "model.layers.41.block_sparse_moe.experts.161.w1", "model.layers.41.block_sparse_moe.experts.162.w1", "model.layers.41.block_sparse_moe.experts.163.w1", "model.layers.41.block_sparse_moe.experts.164.w1", "model.layers.41.block_sparse_moe.experts.165.w1", "model.layers.41.block_sparse_moe.experts.166.w1", "model.layers.41.block_sparse_moe.experts.167.w1", "model.layers.41.block_sparse_moe.experts.168.w1", "model.layers.41.block_sparse_moe.experts.169.w1", "model.layers.41.block_sparse_moe.experts.170.w1", "model.layers.41.block_sparse_moe.experts.171.w1", "model.layers.41.block_sparse_moe.experts.172.w1", "model.layers.41.block_sparse_moe.experts.173.w1", "model.layers.41.block_sparse_moe.experts.174.w1", "model.layers.41.block_sparse_moe.experts.175.w1", "model.layers.41.block_sparse_moe.experts.176.w1", "model.layers.41.block_sparse_moe.experts.177.w1", "model.layers.41.block_sparse_moe.experts.178.w1", "model.layers.41.block_sparse_moe.experts.179.w1", "model.layers.41.block_sparse_moe.experts.180.w1", "model.layers.41.block_sparse_moe.experts.181.w1", "model.layers.41.block_sparse_moe.experts.182.w1", "model.layers.41.block_sparse_moe.experts.183.w1", "model.layers.41.block_sparse_moe.experts.184.w1", "model.layers.41.block_sparse_moe.experts.185.w1", "model.layers.41.block_sparse_moe.experts.186.w1", "model.layers.41.block_sparse_moe.experts.187.w1", "model.layers.41.block_sparse_moe.experts.188.w1", "model.layers.41.block_sparse_moe.experts.189.w1", "model.layers.41.block_sparse_moe.experts.190.w1", "model.layers.41.block_sparse_moe.experts.191.w1", "model.layers.41.block_sparse_moe.experts.192.w1", "model.layers.41.block_sparse_moe.experts.193.w1", "model.layers.41.block_sparse_moe.experts.194.w1", "model.layers.41.block_sparse_moe.experts.195.w1", "model.layers.41.block_sparse_moe.experts.196.w1", "model.layers.41.block_sparse_moe.experts.197.w1", "model.layers.41.block_sparse_moe.experts.198.w1", "model.layers.41.block_sparse_moe.experts.199.w1", "model.layers.41.block_sparse_moe.experts.200.w1", "model.layers.41.block_sparse_moe.experts.201.w1", "model.layers.41.block_sparse_moe.experts.202.w1", "model.layers.41.block_sparse_moe.experts.203.w1", "model.layers.41.block_sparse_moe.experts.204.w1", "model.layers.41.block_sparse_moe.experts.205.w1", "model.layers.41.block_sparse_moe.experts.206.w1", "model.layers.41.block_sparse_moe.experts.207.w1", "model.layers.41.block_sparse_moe.experts.208.w1", "model.layers.41.block_sparse_moe.experts.209.w1", "model.layers.41.block_sparse_moe.experts.210.w1", "model.layers.41.block_sparse_moe.experts.211.w1", "model.layers.41.block_sparse_moe.experts.212.w1", "model.layers.41.block_sparse_moe.experts.213.w1", "model.layers.41.block_sparse_moe.experts.214.w1", "model.layers.41.block_sparse_moe.experts.215.w1", "model.layers.41.block_sparse_moe.experts.216.w1", "model.layers.41.block_sparse_moe.experts.217.w1", "model.layers.41.block_sparse_moe.experts.218.w1", "model.layers.41.block_sparse_moe.experts.219.w1", "model.layers.41.block_sparse_moe.experts.220.w1", "model.layers.41.block_sparse_moe.experts.221.w1", "model.layers.41.block_sparse_moe.experts.222.w1", "model.layers.41.block_sparse_moe.experts.223.w1", "model.layers.41.block_sparse_moe.experts.224.w1", "model.layers.41.block_sparse_moe.experts.225.w1", "model.layers.41.block_sparse_moe.experts.226.w1", "model.layers.41.block_sparse_moe.experts.227.w1", "model.layers.41.block_sparse_moe.experts.228.w1", "model.layers.41.block_sparse_moe.experts.229.w1", "model.layers.41.block_sparse_moe.experts.230.w1", "model.layers.41.block_sparse_moe.experts.231.w1", "model.layers.41.block_sparse_moe.experts.232.w1", "model.layers.41.block_sparse_moe.experts.233.w1", "model.layers.41.block_sparse_moe.experts.234.w1", "model.layers.41.block_sparse_moe.experts.235.w1", "model.layers.41.block_sparse_moe.experts.236.w1", "model.layers.41.block_sparse_moe.experts.237.w1", "model.layers.41.block_sparse_moe.experts.238.w1", "model.layers.41.block_sparse_moe.experts.239.w1", "model.layers.41.block_sparse_moe.experts.240.w1", "model.layers.41.block_sparse_moe.experts.241.w1", "model.layers.41.block_sparse_moe.experts.242.w1", "model.layers.41.block_sparse_moe.experts.243.w1", "model.layers.41.block_sparse_moe.experts.244.w1", "model.layers.41.block_sparse_moe.experts.245.w1", "model.layers.41.block_sparse_moe.experts.246.w1", "model.layers.41.block_sparse_moe.experts.247.w1", "model.layers.41.block_sparse_moe.experts.248.w1", "model.layers.41.block_sparse_moe.experts.249.w1", "model.layers.41.block_sparse_moe.experts.250.w1", "model.layers.41.block_sparse_moe.experts.251.w1", "model.layers.41.block_sparse_moe.experts.252.w1", "model.layers.41.block_sparse_moe.experts.253.w1", "model.layers.41.block_sparse_moe.experts.254.w1", "model.layers.41.block_sparse_moe.experts.255.w1", "model.layers.41.block_sparse_moe.experts.0.w3", "model.layers.41.block_sparse_moe.experts.1.w3", "model.layers.41.block_sparse_moe.experts.2.w3", "model.layers.41.block_sparse_moe.experts.3.w3", "model.layers.41.block_sparse_moe.experts.4.w3", "model.layers.41.block_sparse_moe.experts.5.w3", "model.layers.41.block_sparse_moe.experts.6.w3", "model.layers.41.block_sparse_moe.experts.7.w3", "model.layers.41.block_sparse_moe.experts.8.w3", "model.layers.41.block_sparse_moe.experts.9.w3", "model.layers.41.block_sparse_moe.experts.10.w3", "model.layers.41.block_sparse_moe.experts.11.w3", "model.layers.41.block_sparse_moe.experts.12.w3", "model.layers.41.block_sparse_moe.experts.13.w3", "model.layers.41.block_sparse_moe.experts.14.w3", "model.layers.41.block_sparse_moe.experts.15.w3", "model.layers.41.block_sparse_moe.experts.16.w3", "model.layers.41.block_sparse_moe.experts.17.w3", "model.layers.41.block_sparse_moe.experts.18.w3", "model.layers.41.block_sparse_moe.experts.19.w3", "model.layers.41.block_sparse_moe.experts.20.w3", "model.layers.41.block_sparse_moe.experts.21.w3", "model.layers.41.block_sparse_moe.experts.22.w3", "model.layers.41.block_sparse_moe.experts.23.w3", "model.layers.41.block_sparse_moe.experts.24.w3", "model.layers.41.block_sparse_moe.experts.25.w3", "model.layers.41.block_sparse_moe.experts.26.w3", "model.layers.41.block_sparse_moe.experts.27.w3", "model.layers.41.block_sparse_moe.experts.28.w3", "model.layers.41.block_sparse_moe.experts.29.w3", "model.layers.41.block_sparse_moe.experts.30.w3", "model.layers.41.block_sparse_moe.experts.31.w3", "model.layers.41.block_sparse_moe.experts.32.w3", "model.layers.41.block_sparse_moe.experts.33.w3", "model.layers.41.block_sparse_moe.experts.34.w3", "model.layers.41.block_sparse_moe.experts.35.w3", "model.layers.41.block_sparse_moe.experts.36.w3", "model.layers.41.block_sparse_moe.experts.37.w3", "model.layers.41.block_sparse_moe.experts.38.w3", "model.layers.41.block_sparse_moe.experts.39.w3", "model.layers.41.block_sparse_moe.experts.40.w3", "model.layers.41.block_sparse_moe.experts.41.w3", "model.layers.41.block_sparse_moe.experts.42.w3", "model.layers.41.block_sparse_moe.experts.43.w3", "model.layers.41.block_sparse_moe.experts.44.w3", "model.layers.41.block_sparse_moe.experts.45.w3", "model.layers.41.block_sparse_moe.experts.46.w3", "model.layers.41.block_sparse_moe.experts.47.w3", "model.layers.41.block_sparse_moe.experts.48.w3", "model.layers.41.block_sparse_moe.experts.49.w3", "model.layers.41.block_sparse_moe.experts.50.w3", "model.layers.41.block_sparse_moe.experts.51.w3", "model.layers.41.block_sparse_moe.experts.52.w3", "model.layers.41.block_sparse_moe.experts.53.w3", "model.layers.41.block_sparse_moe.experts.54.w3", "model.layers.41.block_sparse_moe.experts.55.w3", "model.layers.41.block_sparse_moe.experts.56.w3", "model.layers.41.block_sparse_moe.experts.57.w3", "model.layers.41.block_sparse_moe.experts.58.w3", "model.layers.41.block_sparse_moe.experts.59.w3", "model.layers.41.block_sparse_moe.experts.60.w3", "model.layers.41.block_sparse_moe.experts.61.w3", "model.layers.41.block_sparse_moe.experts.62.w3", "model.layers.41.block_sparse_moe.experts.63.w3", "model.layers.41.block_sparse_moe.experts.64.w3", "model.layers.41.block_sparse_moe.experts.65.w3", "model.layers.41.block_sparse_moe.experts.66.w3", "model.layers.41.block_sparse_moe.experts.67.w3", "model.layers.41.block_sparse_moe.experts.68.w3", "model.layers.41.block_sparse_moe.experts.69.w3", "model.layers.41.block_sparse_moe.experts.70.w3", "model.layers.41.block_sparse_moe.experts.71.w3", "model.layers.41.block_sparse_moe.experts.72.w3", "model.layers.41.block_sparse_moe.experts.73.w3", "model.layers.41.block_sparse_moe.experts.74.w3", "model.layers.41.block_sparse_moe.experts.75.w3", "model.layers.41.block_sparse_moe.experts.76.w3", "model.layers.41.block_sparse_moe.experts.77.w3", "model.layers.41.block_sparse_moe.experts.78.w3", "model.layers.41.block_sparse_moe.experts.79.w3", "model.layers.41.block_sparse_moe.experts.80.w3", "model.layers.41.block_sparse_moe.experts.81.w3", "model.layers.41.block_sparse_moe.experts.82.w3", "model.layers.41.block_sparse_moe.experts.83.w3", "model.layers.41.block_sparse_moe.experts.84.w3", "model.layers.41.block_sparse_moe.experts.85.w3", "model.layers.41.block_sparse_moe.experts.86.w3", "model.layers.41.block_sparse_moe.experts.87.w3", "model.layers.41.block_sparse_moe.experts.88.w3", "model.layers.41.block_sparse_moe.experts.89.w3", "model.layers.41.block_sparse_moe.experts.90.w3", "model.layers.41.block_sparse_moe.experts.91.w3", "model.layers.41.block_sparse_moe.experts.92.w3", "model.layers.41.block_sparse_moe.experts.93.w3", "model.layers.41.block_sparse_moe.experts.94.w3", "model.layers.41.block_sparse_moe.experts.95.w3", "model.layers.41.block_sparse_moe.experts.96.w3", "model.layers.41.block_sparse_moe.experts.97.w3", "model.layers.41.block_sparse_moe.experts.98.w3", "model.layers.41.block_sparse_moe.experts.99.w3", "model.layers.41.block_sparse_moe.experts.100.w3", "model.layers.41.block_sparse_moe.experts.101.w3", "model.layers.41.block_sparse_moe.experts.102.w3", "model.layers.41.block_sparse_moe.experts.103.w3", "model.layers.41.block_sparse_moe.experts.104.w3", "model.layers.41.block_sparse_moe.experts.105.w3", "model.layers.41.block_sparse_moe.experts.106.w3", "model.layers.41.block_sparse_moe.experts.107.w3", "model.layers.41.block_sparse_moe.experts.108.w3", "model.layers.41.block_sparse_moe.experts.109.w3", "model.layers.41.block_sparse_moe.experts.110.w3", "model.layers.41.block_sparse_moe.experts.111.w3", "model.layers.41.block_sparse_moe.experts.112.w3", "model.layers.41.block_sparse_moe.experts.113.w3", "model.layers.41.block_sparse_moe.experts.114.w3", "model.layers.41.block_sparse_moe.experts.115.w3", "model.layers.41.block_sparse_moe.experts.116.w3", "model.layers.41.block_sparse_moe.experts.117.w3", "model.layers.41.block_sparse_moe.experts.118.w3", "model.layers.41.block_sparse_moe.experts.119.w3", "model.layers.41.block_sparse_moe.experts.120.w3", "model.layers.41.block_sparse_moe.experts.121.w3", "model.layers.41.block_sparse_moe.experts.122.w3", "model.layers.41.block_sparse_moe.experts.123.w3", "model.layers.41.block_sparse_moe.experts.124.w3", "model.layers.41.block_sparse_moe.experts.125.w3", "model.layers.41.block_sparse_moe.experts.126.w3", "model.layers.41.block_sparse_moe.experts.127.w3", "model.layers.41.block_sparse_moe.experts.128.w3", "model.layers.41.block_sparse_moe.experts.129.w3", "model.layers.41.block_sparse_moe.experts.130.w3", "model.layers.41.block_sparse_moe.experts.131.w3", "model.layers.41.block_sparse_moe.experts.132.w3", "model.layers.41.block_sparse_moe.experts.133.w3", "model.layers.41.block_sparse_moe.experts.134.w3", "model.layers.41.block_sparse_moe.experts.135.w3", "model.layers.41.block_sparse_moe.experts.136.w3", "model.layers.41.block_sparse_moe.experts.137.w3", "model.layers.41.block_sparse_moe.experts.138.w3", "model.layers.41.block_sparse_moe.experts.139.w3", "model.layers.41.block_sparse_moe.experts.140.w3", "model.layers.41.block_sparse_moe.experts.141.w3", "model.layers.41.block_sparse_moe.experts.142.w3", "model.layers.41.block_sparse_moe.experts.143.w3", "model.layers.41.block_sparse_moe.experts.144.w3", "model.layers.41.block_sparse_moe.experts.145.w3", "model.layers.41.block_sparse_moe.experts.146.w3", "model.layers.41.block_sparse_moe.experts.147.w3", "model.layers.41.block_sparse_moe.experts.148.w3", "model.layers.41.block_sparse_moe.experts.149.w3", "model.layers.41.block_sparse_moe.experts.150.w3", "model.layers.41.block_sparse_moe.experts.151.w3", "model.layers.41.block_sparse_moe.experts.152.w3", "model.layers.41.block_sparse_moe.experts.153.w3", "model.layers.41.block_sparse_moe.experts.154.w3", "model.layers.41.block_sparse_moe.experts.155.w3", "model.layers.41.block_sparse_moe.experts.156.w3", "model.layers.41.block_sparse_moe.experts.157.w3", "model.layers.41.block_sparse_moe.experts.158.w3", "model.layers.41.block_sparse_moe.experts.159.w3", "model.layers.41.block_sparse_moe.experts.160.w3", "model.layers.41.block_sparse_moe.experts.161.w3", "model.layers.41.block_sparse_moe.experts.162.w3", "model.layers.41.block_sparse_moe.experts.163.w3", "model.layers.41.block_sparse_moe.experts.164.w3", "model.layers.41.block_sparse_moe.experts.165.w3", "model.layers.41.block_sparse_moe.experts.166.w3", "model.layers.41.block_sparse_moe.experts.167.w3", "model.layers.41.block_sparse_moe.experts.168.w3", "model.layers.41.block_sparse_moe.experts.169.w3", "model.layers.41.block_sparse_moe.experts.170.w3", "model.layers.41.block_sparse_moe.experts.171.w3", "model.layers.41.block_sparse_moe.experts.172.w3", "model.layers.41.block_sparse_moe.experts.173.w3", "model.layers.41.block_sparse_moe.experts.174.w3", "model.layers.41.block_sparse_moe.experts.175.w3", "model.layers.41.block_sparse_moe.experts.176.w3", "model.layers.41.block_sparse_moe.experts.177.w3", "model.layers.41.block_sparse_moe.experts.178.w3", "model.layers.41.block_sparse_moe.experts.179.w3", "model.layers.41.block_sparse_moe.experts.180.w3", "model.layers.41.block_sparse_moe.experts.181.w3", "model.layers.41.block_sparse_moe.experts.182.w3", "model.layers.41.block_sparse_moe.experts.183.w3", "model.layers.41.block_sparse_moe.experts.184.w3", "model.layers.41.block_sparse_moe.experts.185.w3", "model.layers.41.block_sparse_moe.experts.186.w3", "model.layers.41.block_sparse_moe.experts.187.w3", "model.layers.41.block_sparse_moe.experts.188.w3", "model.layers.41.block_sparse_moe.experts.189.w3", "model.layers.41.block_sparse_moe.experts.190.w3", "model.layers.41.block_sparse_moe.experts.191.w3", "model.layers.41.block_sparse_moe.experts.192.w3", "model.layers.41.block_sparse_moe.experts.193.w3", "model.layers.41.block_sparse_moe.experts.194.w3", "model.layers.41.block_sparse_moe.experts.195.w3", "model.layers.41.block_sparse_moe.experts.196.w3", "model.layers.41.block_sparse_moe.experts.197.w3", "model.layers.41.block_sparse_moe.experts.198.w3", "model.layers.41.block_sparse_moe.experts.199.w3", "model.layers.41.block_sparse_moe.experts.200.w3", "model.layers.41.block_sparse_moe.experts.201.w3", "model.layers.41.block_sparse_moe.experts.202.w3", "model.layers.41.block_sparse_moe.experts.203.w3", "model.layers.41.block_sparse_moe.experts.204.w3", "model.layers.41.block_sparse_moe.experts.205.w3", "model.layers.41.block_sparse_moe.experts.206.w3", "model.layers.41.block_sparse_moe.experts.207.w3", "model.layers.41.block_sparse_moe.experts.208.w3", "model.layers.41.block_sparse_moe.experts.209.w3", "model.layers.41.block_sparse_moe.experts.210.w3", "model.layers.41.block_sparse_moe.experts.211.w3", "model.layers.41.block_sparse_moe.experts.212.w3", "model.layers.41.block_sparse_moe.experts.213.w3", "model.layers.41.block_sparse_moe.experts.214.w3", "model.layers.41.block_sparse_moe.experts.215.w3", "model.layers.41.block_sparse_moe.experts.216.w3", "model.layers.41.block_sparse_moe.experts.217.w3", "model.layers.41.block_sparse_moe.experts.218.w3", "model.layers.41.block_sparse_moe.experts.219.w3", "model.layers.41.block_sparse_moe.experts.220.w3", "model.layers.41.block_sparse_moe.experts.221.w3", "model.layers.41.block_sparse_moe.experts.222.w3", "model.layers.41.block_sparse_moe.experts.223.w3", "model.layers.41.block_sparse_moe.experts.224.w3", "model.layers.41.block_sparse_moe.experts.225.w3", "model.layers.41.block_sparse_moe.experts.226.w3", "model.layers.41.block_sparse_moe.experts.227.w3", "model.layers.41.block_sparse_moe.experts.228.w3", "model.layers.41.block_sparse_moe.experts.229.w3", "model.layers.41.block_sparse_moe.experts.230.w3", "model.layers.41.block_sparse_moe.experts.231.w3", "model.layers.41.block_sparse_moe.experts.232.w3", "model.layers.41.block_sparse_moe.experts.233.w3", "model.layers.41.block_sparse_moe.experts.234.w3", "model.layers.41.block_sparse_moe.experts.235.w3", "model.layers.41.block_sparse_moe.experts.236.w3", "model.layers.41.block_sparse_moe.experts.237.w3", "model.layers.41.block_sparse_moe.experts.238.w3", "model.layers.41.block_sparse_moe.experts.239.w3", "model.layers.41.block_sparse_moe.experts.240.w3", "model.layers.41.block_sparse_moe.experts.241.w3", "model.layers.41.block_sparse_moe.experts.242.w3", "model.layers.41.block_sparse_moe.experts.243.w3", "model.layers.41.block_sparse_moe.experts.244.w3", "model.layers.41.block_sparse_moe.experts.245.w3", "model.layers.41.block_sparse_moe.experts.246.w3", "model.layers.41.block_sparse_moe.experts.247.w3", "model.layers.41.block_sparse_moe.experts.248.w3", "model.layers.41.block_sparse_moe.experts.249.w3", "model.layers.41.block_sparse_moe.experts.250.w3", "model.layers.41.block_sparse_moe.experts.251.w3", "model.layers.41.block_sparse_moe.experts.252.w3", "model.layers.41.block_sparse_moe.experts.253.w3", "model.layers.41.block_sparse_moe.experts.254.w3", "model.layers.41.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00012016221880913058, "dbits": 2415919104 } ] }, { "idx": 209, "layers": [ "model.layers.41.block_sparse_moe.experts.0.w2", "model.layers.41.block_sparse_moe.experts.1.w2", "model.layers.41.block_sparse_moe.experts.2.w2", "model.layers.41.block_sparse_moe.experts.3.w2", "model.layers.41.block_sparse_moe.experts.4.w2", "model.layers.41.block_sparse_moe.experts.5.w2", "model.layers.41.block_sparse_moe.experts.6.w2", "model.layers.41.block_sparse_moe.experts.7.w2", "model.layers.41.block_sparse_moe.experts.8.w2", "model.layers.41.block_sparse_moe.experts.9.w2", "model.layers.41.block_sparse_moe.experts.10.w2", "model.layers.41.block_sparse_moe.experts.11.w2", "model.layers.41.block_sparse_moe.experts.12.w2", "model.layers.41.block_sparse_moe.experts.13.w2", "model.layers.41.block_sparse_moe.experts.14.w2", "model.layers.41.block_sparse_moe.experts.15.w2", "model.layers.41.block_sparse_moe.experts.16.w2", "model.layers.41.block_sparse_moe.experts.17.w2", "model.layers.41.block_sparse_moe.experts.18.w2", "model.layers.41.block_sparse_moe.experts.19.w2", "model.layers.41.block_sparse_moe.experts.20.w2", "model.layers.41.block_sparse_moe.experts.21.w2", "model.layers.41.block_sparse_moe.experts.22.w2", "model.layers.41.block_sparse_moe.experts.23.w2", "model.layers.41.block_sparse_moe.experts.24.w2", "model.layers.41.block_sparse_moe.experts.25.w2", "model.layers.41.block_sparse_moe.experts.26.w2", "model.layers.41.block_sparse_moe.experts.27.w2", "model.layers.41.block_sparse_moe.experts.28.w2", "model.layers.41.block_sparse_moe.experts.29.w2", "model.layers.41.block_sparse_moe.experts.30.w2", "model.layers.41.block_sparse_moe.experts.31.w2", "model.layers.41.block_sparse_moe.experts.32.w2", "model.layers.41.block_sparse_moe.experts.33.w2", "model.layers.41.block_sparse_moe.experts.34.w2", "model.layers.41.block_sparse_moe.experts.35.w2", "model.layers.41.block_sparse_moe.experts.36.w2", "model.layers.41.block_sparse_moe.experts.37.w2", "model.layers.41.block_sparse_moe.experts.38.w2", "model.layers.41.block_sparse_moe.experts.39.w2", "model.layers.41.block_sparse_moe.experts.40.w2", "model.layers.41.block_sparse_moe.experts.41.w2", "model.layers.41.block_sparse_moe.experts.42.w2", "model.layers.41.block_sparse_moe.experts.43.w2", "model.layers.41.block_sparse_moe.experts.44.w2", "model.layers.41.block_sparse_moe.experts.45.w2", "model.layers.41.block_sparse_moe.experts.46.w2", "model.layers.41.block_sparse_moe.experts.47.w2", "model.layers.41.block_sparse_moe.experts.48.w2", "model.layers.41.block_sparse_moe.experts.49.w2", "model.layers.41.block_sparse_moe.experts.50.w2", "model.layers.41.block_sparse_moe.experts.51.w2", "model.layers.41.block_sparse_moe.experts.52.w2", "model.layers.41.block_sparse_moe.experts.53.w2", "model.layers.41.block_sparse_moe.experts.54.w2", "model.layers.41.block_sparse_moe.experts.55.w2", "model.layers.41.block_sparse_moe.experts.56.w2", "model.layers.41.block_sparse_moe.experts.57.w2", "model.layers.41.block_sparse_moe.experts.58.w2", "model.layers.41.block_sparse_moe.experts.59.w2", "model.layers.41.block_sparse_moe.experts.60.w2", "model.layers.41.block_sparse_moe.experts.61.w2", "model.layers.41.block_sparse_moe.experts.62.w2", "model.layers.41.block_sparse_moe.experts.63.w2", "model.layers.41.block_sparse_moe.experts.64.w2", "model.layers.41.block_sparse_moe.experts.65.w2", "model.layers.41.block_sparse_moe.experts.66.w2", "model.layers.41.block_sparse_moe.experts.67.w2", "model.layers.41.block_sparse_moe.experts.68.w2", "model.layers.41.block_sparse_moe.experts.69.w2", "model.layers.41.block_sparse_moe.experts.70.w2", "model.layers.41.block_sparse_moe.experts.71.w2", "model.layers.41.block_sparse_moe.experts.72.w2", "model.layers.41.block_sparse_moe.experts.73.w2", "model.layers.41.block_sparse_moe.experts.74.w2", "model.layers.41.block_sparse_moe.experts.75.w2", "model.layers.41.block_sparse_moe.experts.76.w2", "model.layers.41.block_sparse_moe.experts.77.w2", "model.layers.41.block_sparse_moe.experts.78.w2", "model.layers.41.block_sparse_moe.experts.79.w2", "model.layers.41.block_sparse_moe.experts.80.w2", "model.layers.41.block_sparse_moe.experts.81.w2", "model.layers.41.block_sparse_moe.experts.82.w2", "model.layers.41.block_sparse_moe.experts.83.w2", "model.layers.41.block_sparse_moe.experts.84.w2", "model.layers.41.block_sparse_moe.experts.85.w2", "model.layers.41.block_sparse_moe.experts.86.w2", "model.layers.41.block_sparse_moe.experts.87.w2", "model.layers.41.block_sparse_moe.experts.88.w2", "model.layers.41.block_sparse_moe.experts.89.w2", "model.layers.41.block_sparse_moe.experts.90.w2", "model.layers.41.block_sparse_moe.experts.91.w2", "model.layers.41.block_sparse_moe.experts.92.w2", "model.layers.41.block_sparse_moe.experts.93.w2", "model.layers.41.block_sparse_moe.experts.94.w2", "model.layers.41.block_sparse_moe.experts.95.w2", "model.layers.41.block_sparse_moe.experts.96.w2", "model.layers.41.block_sparse_moe.experts.97.w2", "model.layers.41.block_sparse_moe.experts.98.w2", "model.layers.41.block_sparse_moe.experts.99.w2", "model.layers.41.block_sparse_moe.experts.100.w2", "model.layers.41.block_sparse_moe.experts.101.w2", "model.layers.41.block_sparse_moe.experts.102.w2", "model.layers.41.block_sparse_moe.experts.103.w2", "model.layers.41.block_sparse_moe.experts.104.w2", "model.layers.41.block_sparse_moe.experts.105.w2", "model.layers.41.block_sparse_moe.experts.106.w2", "model.layers.41.block_sparse_moe.experts.107.w2", "model.layers.41.block_sparse_moe.experts.108.w2", "model.layers.41.block_sparse_moe.experts.109.w2", "model.layers.41.block_sparse_moe.experts.110.w2", "model.layers.41.block_sparse_moe.experts.111.w2", "model.layers.41.block_sparse_moe.experts.112.w2", "model.layers.41.block_sparse_moe.experts.113.w2", "model.layers.41.block_sparse_moe.experts.114.w2", "model.layers.41.block_sparse_moe.experts.115.w2", "model.layers.41.block_sparse_moe.experts.116.w2", "model.layers.41.block_sparse_moe.experts.117.w2", "model.layers.41.block_sparse_moe.experts.118.w2", "model.layers.41.block_sparse_moe.experts.119.w2", "model.layers.41.block_sparse_moe.experts.120.w2", "model.layers.41.block_sparse_moe.experts.121.w2", "model.layers.41.block_sparse_moe.experts.122.w2", "model.layers.41.block_sparse_moe.experts.123.w2", "model.layers.41.block_sparse_moe.experts.124.w2", "model.layers.41.block_sparse_moe.experts.125.w2", "model.layers.41.block_sparse_moe.experts.126.w2", "model.layers.41.block_sparse_moe.experts.127.w2", "model.layers.41.block_sparse_moe.experts.128.w2", "model.layers.41.block_sparse_moe.experts.129.w2", "model.layers.41.block_sparse_moe.experts.130.w2", "model.layers.41.block_sparse_moe.experts.131.w2", "model.layers.41.block_sparse_moe.experts.132.w2", "model.layers.41.block_sparse_moe.experts.133.w2", "model.layers.41.block_sparse_moe.experts.134.w2", "model.layers.41.block_sparse_moe.experts.135.w2", "model.layers.41.block_sparse_moe.experts.136.w2", "model.layers.41.block_sparse_moe.experts.137.w2", "model.layers.41.block_sparse_moe.experts.138.w2", "model.layers.41.block_sparse_moe.experts.139.w2", "model.layers.41.block_sparse_moe.experts.140.w2", "model.layers.41.block_sparse_moe.experts.141.w2", "model.layers.41.block_sparse_moe.experts.142.w2", "model.layers.41.block_sparse_moe.experts.143.w2", "model.layers.41.block_sparse_moe.experts.144.w2", "model.layers.41.block_sparse_moe.experts.145.w2", "model.layers.41.block_sparse_moe.experts.146.w2", "model.layers.41.block_sparse_moe.experts.147.w2", "model.layers.41.block_sparse_moe.experts.148.w2", "model.layers.41.block_sparse_moe.experts.149.w2", "model.layers.41.block_sparse_moe.experts.150.w2", "model.layers.41.block_sparse_moe.experts.151.w2", "model.layers.41.block_sparse_moe.experts.152.w2", "model.layers.41.block_sparse_moe.experts.153.w2", "model.layers.41.block_sparse_moe.experts.154.w2", "model.layers.41.block_sparse_moe.experts.155.w2", "model.layers.41.block_sparse_moe.experts.156.w2", "model.layers.41.block_sparse_moe.experts.157.w2", "model.layers.41.block_sparse_moe.experts.158.w2", "model.layers.41.block_sparse_moe.experts.159.w2", "model.layers.41.block_sparse_moe.experts.160.w2", "model.layers.41.block_sparse_moe.experts.161.w2", "model.layers.41.block_sparse_moe.experts.162.w2", "model.layers.41.block_sparse_moe.experts.163.w2", "model.layers.41.block_sparse_moe.experts.164.w2", "model.layers.41.block_sparse_moe.experts.165.w2", "model.layers.41.block_sparse_moe.experts.166.w2", "model.layers.41.block_sparse_moe.experts.167.w2", "model.layers.41.block_sparse_moe.experts.168.w2", "model.layers.41.block_sparse_moe.experts.169.w2", "model.layers.41.block_sparse_moe.experts.170.w2", "model.layers.41.block_sparse_moe.experts.171.w2", "model.layers.41.block_sparse_moe.experts.172.w2", "model.layers.41.block_sparse_moe.experts.173.w2", "model.layers.41.block_sparse_moe.experts.174.w2", "model.layers.41.block_sparse_moe.experts.175.w2", "model.layers.41.block_sparse_moe.experts.176.w2", "model.layers.41.block_sparse_moe.experts.177.w2", "model.layers.41.block_sparse_moe.experts.178.w2", "model.layers.41.block_sparse_moe.experts.179.w2", "model.layers.41.block_sparse_moe.experts.180.w2", "model.layers.41.block_sparse_moe.experts.181.w2", "model.layers.41.block_sparse_moe.experts.182.w2", "model.layers.41.block_sparse_moe.experts.183.w2", "model.layers.41.block_sparse_moe.experts.184.w2", "model.layers.41.block_sparse_moe.experts.185.w2", "model.layers.41.block_sparse_moe.experts.186.w2", "model.layers.41.block_sparse_moe.experts.187.w2", "model.layers.41.block_sparse_moe.experts.188.w2", "model.layers.41.block_sparse_moe.experts.189.w2", "model.layers.41.block_sparse_moe.experts.190.w2", "model.layers.41.block_sparse_moe.experts.191.w2", "model.layers.41.block_sparse_moe.experts.192.w2", "model.layers.41.block_sparse_moe.experts.193.w2", "model.layers.41.block_sparse_moe.experts.194.w2", "model.layers.41.block_sparse_moe.experts.195.w2", "model.layers.41.block_sparse_moe.experts.196.w2", "model.layers.41.block_sparse_moe.experts.197.w2", "model.layers.41.block_sparse_moe.experts.198.w2", "model.layers.41.block_sparse_moe.experts.199.w2", "model.layers.41.block_sparse_moe.experts.200.w2", "model.layers.41.block_sparse_moe.experts.201.w2", "model.layers.41.block_sparse_moe.experts.202.w2", "model.layers.41.block_sparse_moe.experts.203.w2", "model.layers.41.block_sparse_moe.experts.204.w2", "model.layers.41.block_sparse_moe.experts.205.w2", "model.layers.41.block_sparse_moe.experts.206.w2", "model.layers.41.block_sparse_moe.experts.207.w2", "model.layers.41.block_sparse_moe.experts.208.w2", "model.layers.41.block_sparse_moe.experts.209.w2", "model.layers.41.block_sparse_moe.experts.210.w2", "model.layers.41.block_sparse_moe.experts.211.w2", "model.layers.41.block_sparse_moe.experts.212.w2", "model.layers.41.block_sparse_moe.experts.213.w2", "model.layers.41.block_sparse_moe.experts.214.w2", "model.layers.41.block_sparse_moe.experts.215.w2", "model.layers.41.block_sparse_moe.experts.216.w2", "model.layers.41.block_sparse_moe.experts.217.w2", "model.layers.41.block_sparse_moe.experts.218.w2", "model.layers.41.block_sparse_moe.experts.219.w2", "model.layers.41.block_sparse_moe.experts.220.w2", "model.layers.41.block_sparse_moe.experts.221.w2", "model.layers.41.block_sparse_moe.experts.222.w2", "model.layers.41.block_sparse_moe.experts.223.w2", "model.layers.41.block_sparse_moe.experts.224.w2", "model.layers.41.block_sparse_moe.experts.225.w2", "model.layers.41.block_sparse_moe.experts.226.w2", "model.layers.41.block_sparse_moe.experts.227.w2", "model.layers.41.block_sparse_moe.experts.228.w2", "model.layers.41.block_sparse_moe.experts.229.w2", "model.layers.41.block_sparse_moe.experts.230.w2", "model.layers.41.block_sparse_moe.experts.231.w2", "model.layers.41.block_sparse_moe.experts.232.w2", "model.layers.41.block_sparse_moe.experts.233.w2", "model.layers.41.block_sparse_moe.experts.234.w2", "model.layers.41.block_sparse_moe.experts.235.w2", "model.layers.41.block_sparse_moe.experts.236.w2", "model.layers.41.block_sparse_moe.experts.237.w2", "model.layers.41.block_sparse_moe.experts.238.w2", "model.layers.41.block_sparse_moe.experts.239.w2", "model.layers.41.block_sparse_moe.experts.240.w2", "model.layers.41.block_sparse_moe.experts.241.w2", "model.layers.41.block_sparse_moe.experts.242.w2", "model.layers.41.block_sparse_moe.experts.243.w2", "model.layers.41.block_sparse_moe.experts.244.w2", "model.layers.41.block_sparse_moe.experts.245.w2", "model.layers.41.block_sparse_moe.experts.246.w2", "model.layers.41.block_sparse_moe.experts.247.w2", "model.layers.41.block_sparse_moe.experts.248.w2", "model.layers.41.block_sparse_moe.experts.249.w2", "model.layers.41.block_sparse_moe.experts.250.w2", "model.layers.41.block_sparse_moe.experts.251.w2", "model.layers.41.block_sparse_moe.experts.252.w2", "model.layers.41.block_sparse_moe.experts.253.w2", "model.layers.41.block_sparse_moe.experts.254.w2", "model.layers.41.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -8.479002863169549e-05, "dbits": 1207959552 } ] }, { "idx": 210, "layers": [ "model.layers.42.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0002822291105985669, "dbits": 18874368 } ] }, { "idx": 211, "layers": [ "model.layers.42.self_attn.k_proj", "model.layers.42.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0026038330048322594, "dbits": 6291456 } ] }, { "idx": 212, "layers": [ "model.layers.42.self_attn.o_proj" ], "candidates": [ { "dkld": -0.001142771169543269, "dbits": 18874368 } ] }, { "idx": 213, "layers": [ "model.layers.42.block_sparse_moe.experts.0.w1", "model.layers.42.block_sparse_moe.experts.1.w1", "model.layers.42.block_sparse_moe.experts.2.w1", "model.layers.42.block_sparse_moe.experts.3.w1", "model.layers.42.block_sparse_moe.experts.4.w1", "model.layers.42.block_sparse_moe.experts.5.w1", "model.layers.42.block_sparse_moe.experts.6.w1", "model.layers.42.block_sparse_moe.experts.7.w1", "model.layers.42.block_sparse_moe.experts.8.w1", "model.layers.42.block_sparse_moe.experts.9.w1", "model.layers.42.block_sparse_moe.experts.10.w1", "model.layers.42.block_sparse_moe.experts.11.w1", "model.layers.42.block_sparse_moe.experts.12.w1", "model.layers.42.block_sparse_moe.experts.13.w1", "model.layers.42.block_sparse_moe.experts.14.w1", "model.layers.42.block_sparse_moe.experts.15.w1", "model.layers.42.block_sparse_moe.experts.16.w1", "model.layers.42.block_sparse_moe.experts.17.w1", "model.layers.42.block_sparse_moe.experts.18.w1", "model.layers.42.block_sparse_moe.experts.19.w1", "model.layers.42.block_sparse_moe.experts.20.w1", "model.layers.42.block_sparse_moe.experts.21.w1", "model.layers.42.block_sparse_moe.experts.22.w1", "model.layers.42.block_sparse_moe.experts.23.w1", "model.layers.42.block_sparse_moe.experts.24.w1", "model.layers.42.block_sparse_moe.experts.25.w1", "model.layers.42.block_sparse_moe.experts.26.w1", "model.layers.42.block_sparse_moe.experts.27.w1", "model.layers.42.block_sparse_moe.experts.28.w1", "model.layers.42.block_sparse_moe.experts.29.w1", "model.layers.42.block_sparse_moe.experts.30.w1", "model.layers.42.block_sparse_moe.experts.31.w1", "model.layers.42.block_sparse_moe.experts.32.w1", "model.layers.42.block_sparse_moe.experts.33.w1", "model.layers.42.block_sparse_moe.experts.34.w1", "model.layers.42.block_sparse_moe.experts.35.w1", "model.layers.42.block_sparse_moe.experts.36.w1", "model.layers.42.block_sparse_moe.experts.37.w1", "model.layers.42.block_sparse_moe.experts.38.w1", "model.layers.42.block_sparse_moe.experts.39.w1", "model.layers.42.block_sparse_moe.experts.40.w1", "model.layers.42.block_sparse_moe.experts.41.w1", "model.layers.42.block_sparse_moe.experts.42.w1", "model.layers.42.block_sparse_moe.experts.43.w1", "model.layers.42.block_sparse_moe.experts.44.w1", "model.layers.42.block_sparse_moe.experts.45.w1", "model.layers.42.block_sparse_moe.experts.46.w1", "model.layers.42.block_sparse_moe.experts.47.w1", "model.layers.42.block_sparse_moe.experts.48.w1", "model.layers.42.block_sparse_moe.experts.49.w1", "model.layers.42.block_sparse_moe.experts.50.w1", "model.layers.42.block_sparse_moe.experts.51.w1", "model.layers.42.block_sparse_moe.experts.52.w1", "model.layers.42.block_sparse_moe.experts.53.w1", "model.layers.42.block_sparse_moe.experts.54.w1", "model.layers.42.block_sparse_moe.experts.55.w1", "model.layers.42.block_sparse_moe.experts.56.w1", "model.layers.42.block_sparse_moe.experts.57.w1", "model.layers.42.block_sparse_moe.experts.58.w1", "model.layers.42.block_sparse_moe.experts.59.w1", "model.layers.42.block_sparse_moe.experts.60.w1", "model.layers.42.block_sparse_moe.experts.61.w1", "model.layers.42.block_sparse_moe.experts.62.w1", "model.layers.42.block_sparse_moe.experts.63.w1", "model.layers.42.block_sparse_moe.experts.64.w1", "model.layers.42.block_sparse_moe.experts.65.w1", "model.layers.42.block_sparse_moe.experts.66.w1", "model.layers.42.block_sparse_moe.experts.67.w1", "model.layers.42.block_sparse_moe.experts.68.w1", "model.layers.42.block_sparse_moe.experts.69.w1", "model.layers.42.block_sparse_moe.experts.70.w1", "model.layers.42.block_sparse_moe.experts.71.w1", "model.layers.42.block_sparse_moe.experts.72.w1", "model.layers.42.block_sparse_moe.experts.73.w1", "model.layers.42.block_sparse_moe.experts.74.w1", "model.layers.42.block_sparse_moe.experts.75.w1", "model.layers.42.block_sparse_moe.experts.76.w1", "model.layers.42.block_sparse_moe.experts.77.w1", "model.layers.42.block_sparse_moe.experts.78.w1", "model.layers.42.block_sparse_moe.experts.79.w1", "model.layers.42.block_sparse_moe.experts.80.w1", "model.layers.42.block_sparse_moe.experts.81.w1", "model.layers.42.block_sparse_moe.experts.82.w1", "model.layers.42.block_sparse_moe.experts.83.w1", "model.layers.42.block_sparse_moe.experts.84.w1", "model.layers.42.block_sparse_moe.experts.85.w1", "model.layers.42.block_sparse_moe.experts.86.w1", "model.layers.42.block_sparse_moe.experts.87.w1", "model.layers.42.block_sparse_moe.experts.88.w1", "model.layers.42.block_sparse_moe.experts.89.w1", "model.layers.42.block_sparse_moe.experts.90.w1", "model.layers.42.block_sparse_moe.experts.91.w1", "model.layers.42.block_sparse_moe.experts.92.w1", "model.layers.42.block_sparse_moe.experts.93.w1", "model.layers.42.block_sparse_moe.experts.94.w1", "model.layers.42.block_sparse_moe.experts.95.w1", "model.layers.42.block_sparse_moe.experts.96.w1", "model.layers.42.block_sparse_moe.experts.97.w1", "model.layers.42.block_sparse_moe.experts.98.w1", "model.layers.42.block_sparse_moe.experts.99.w1", "model.layers.42.block_sparse_moe.experts.100.w1", "model.layers.42.block_sparse_moe.experts.101.w1", "model.layers.42.block_sparse_moe.experts.102.w1", "model.layers.42.block_sparse_moe.experts.103.w1", "model.layers.42.block_sparse_moe.experts.104.w1", "model.layers.42.block_sparse_moe.experts.105.w1", "model.layers.42.block_sparse_moe.experts.106.w1", "model.layers.42.block_sparse_moe.experts.107.w1", "model.layers.42.block_sparse_moe.experts.108.w1", "model.layers.42.block_sparse_moe.experts.109.w1", "model.layers.42.block_sparse_moe.experts.110.w1", "model.layers.42.block_sparse_moe.experts.111.w1", "model.layers.42.block_sparse_moe.experts.112.w1", "model.layers.42.block_sparse_moe.experts.113.w1", "model.layers.42.block_sparse_moe.experts.114.w1", "model.layers.42.block_sparse_moe.experts.115.w1", "model.layers.42.block_sparse_moe.experts.116.w1", "model.layers.42.block_sparse_moe.experts.117.w1", "model.layers.42.block_sparse_moe.experts.118.w1", "model.layers.42.block_sparse_moe.experts.119.w1", "model.layers.42.block_sparse_moe.experts.120.w1", "model.layers.42.block_sparse_moe.experts.121.w1", "model.layers.42.block_sparse_moe.experts.122.w1", "model.layers.42.block_sparse_moe.experts.123.w1", "model.layers.42.block_sparse_moe.experts.124.w1", "model.layers.42.block_sparse_moe.experts.125.w1", "model.layers.42.block_sparse_moe.experts.126.w1", "model.layers.42.block_sparse_moe.experts.127.w1", "model.layers.42.block_sparse_moe.experts.128.w1", "model.layers.42.block_sparse_moe.experts.129.w1", "model.layers.42.block_sparse_moe.experts.130.w1", "model.layers.42.block_sparse_moe.experts.131.w1", "model.layers.42.block_sparse_moe.experts.132.w1", "model.layers.42.block_sparse_moe.experts.133.w1", "model.layers.42.block_sparse_moe.experts.134.w1", "model.layers.42.block_sparse_moe.experts.135.w1", "model.layers.42.block_sparse_moe.experts.136.w1", "model.layers.42.block_sparse_moe.experts.137.w1", "model.layers.42.block_sparse_moe.experts.138.w1", "model.layers.42.block_sparse_moe.experts.139.w1", "model.layers.42.block_sparse_moe.experts.140.w1", "model.layers.42.block_sparse_moe.experts.141.w1", "model.layers.42.block_sparse_moe.experts.142.w1", "model.layers.42.block_sparse_moe.experts.143.w1", "model.layers.42.block_sparse_moe.experts.144.w1", "model.layers.42.block_sparse_moe.experts.145.w1", "model.layers.42.block_sparse_moe.experts.146.w1", "model.layers.42.block_sparse_moe.experts.147.w1", "model.layers.42.block_sparse_moe.experts.148.w1", "model.layers.42.block_sparse_moe.experts.149.w1", "model.layers.42.block_sparse_moe.experts.150.w1", "model.layers.42.block_sparse_moe.experts.151.w1", "model.layers.42.block_sparse_moe.experts.152.w1", "model.layers.42.block_sparse_moe.experts.153.w1", "model.layers.42.block_sparse_moe.experts.154.w1", "model.layers.42.block_sparse_moe.experts.155.w1", "model.layers.42.block_sparse_moe.experts.156.w1", "model.layers.42.block_sparse_moe.experts.157.w1", "model.layers.42.block_sparse_moe.experts.158.w1", "model.layers.42.block_sparse_moe.experts.159.w1", "model.layers.42.block_sparse_moe.experts.160.w1", "model.layers.42.block_sparse_moe.experts.161.w1", "model.layers.42.block_sparse_moe.experts.162.w1", "model.layers.42.block_sparse_moe.experts.163.w1", "model.layers.42.block_sparse_moe.experts.164.w1", "model.layers.42.block_sparse_moe.experts.165.w1", "model.layers.42.block_sparse_moe.experts.166.w1", "model.layers.42.block_sparse_moe.experts.167.w1", "model.layers.42.block_sparse_moe.experts.168.w1", "model.layers.42.block_sparse_moe.experts.169.w1", "model.layers.42.block_sparse_moe.experts.170.w1", "model.layers.42.block_sparse_moe.experts.171.w1", "model.layers.42.block_sparse_moe.experts.172.w1", "model.layers.42.block_sparse_moe.experts.173.w1", "model.layers.42.block_sparse_moe.experts.174.w1", "model.layers.42.block_sparse_moe.experts.175.w1", "model.layers.42.block_sparse_moe.experts.176.w1", "model.layers.42.block_sparse_moe.experts.177.w1", "model.layers.42.block_sparse_moe.experts.178.w1", "model.layers.42.block_sparse_moe.experts.179.w1", "model.layers.42.block_sparse_moe.experts.180.w1", "model.layers.42.block_sparse_moe.experts.181.w1", "model.layers.42.block_sparse_moe.experts.182.w1", "model.layers.42.block_sparse_moe.experts.183.w1", "model.layers.42.block_sparse_moe.experts.184.w1", "model.layers.42.block_sparse_moe.experts.185.w1", "model.layers.42.block_sparse_moe.experts.186.w1", "model.layers.42.block_sparse_moe.experts.187.w1", "model.layers.42.block_sparse_moe.experts.188.w1", "model.layers.42.block_sparse_moe.experts.189.w1", "model.layers.42.block_sparse_moe.experts.190.w1", "model.layers.42.block_sparse_moe.experts.191.w1", "model.layers.42.block_sparse_moe.experts.192.w1", "model.layers.42.block_sparse_moe.experts.193.w1", "model.layers.42.block_sparse_moe.experts.194.w1", "model.layers.42.block_sparse_moe.experts.195.w1", "model.layers.42.block_sparse_moe.experts.196.w1", "model.layers.42.block_sparse_moe.experts.197.w1", "model.layers.42.block_sparse_moe.experts.198.w1", "model.layers.42.block_sparse_moe.experts.199.w1", "model.layers.42.block_sparse_moe.experts.200.w1", "model.layers.42.block_sparse_moe.experts.201.w1", "model.layers.42.block_sparse_moe.experts.202.w1", "model.layers.42.block_sparse_moe.experts.203.w1", "model.layers.42.block_sparse_moe.experts.204.w1", "model.layers.42.block_sparse_moe.experts.205.w1", "model.layers.42.block_sparse_moe.experts.206.w1", "model.layers.42.block_sparse_moe.experts.207.w1", "model.layers.42.block_sparse_moe.experts.208.w1", "model.layers.42.block_sparse_moe.experts.209.w1", "model.layers.42.block_sparse_moe.experts.210.w1", "model.layers.42.block_sparse_moe.experts.211.w1", "model.layers.42.block_sparse_moe.experts.212.w1", "model.layers.42.block_sparse_moe.experts.213.w1", "model.layers.42.block_sparse_moe.experts.214.w1", "model.layers.42.block_sparse_moe.experts.215.w1", "model.layers.42.block_sparse_moe.experts.216.w1", "model.layers.42.block_sparse_moe.experts.217.w1", "model.layers.42.block_sparse_moe.experts.218.w1", "model.layers.42.block_sparse_moe.experts.219.w1", "model.layers.42.block_sparse_moe.experts.220.w1", "model.layers.42.block_sparse_moe.experts.221.w1", "model.layers.42.block_sparse_moe.experts.222.w1", "model.layers.42.block_sparse_moe.experts.223.w1", "model.layers.42.block_sparse_moe.experts.224.w1", "model.layers.42.block_sparse_moe.experts.225.w1", "model.layers.42.block_sparse_moe.experts.226.w1", "model.layers.42.block_sparse_moe.experts.227.w1", "model.layers.42.block_sparse_moe.experts.228.w1", "model.layers.42.block_sparse_moe.experts.229.w1", "model.layers.42.block_sparse_moe.experts.230.w1", "model.layers.42.block_sparse_moe.experts.231.w1", "model.layers.42.block_sparse_moe.experts.232.w1", "model.layers.42.block_sparse_moe.experts.233.w1", "model.layers.42.block_sparse_moe.experts.234.w1", "model.layers.42.block_sparse_moe.experts.235.w1", "model.layers.42.block_sparse_moe.experts.236.w1", "model.layers.42.block_sparse_moe.experts.237.w1", "model.layers.42.block_sparse_moe.experts.238.w1", "model.layers.42.block_sparse_moe.experts.239.w1", "model.layers.42.block_sparse_moe.experts.240.w1", "model.layers.42.block_sparse_moe.experts.241.w1", "model.layers.42.block_sparse_moe.experts.242.w1", "model.layers.42.block_sparse_moe.experts.243.w1", "model.layers.42.block_sparse_moe.experts.244.w1", "model.layers.42.block_sparse_moe.experts.245.w1", "model.layers.42.block_sparse_moe.experts.246.w1", "model.layers.42.block_sparse_moe.experts.247.w1", "model.layers.42.block_sparse_moe.experts.248.w1", "model.layers.42.block_sparse_moe.experts.249.w1", "model.layers.42.block_sparse_moe.experts.250.w1", "model.layers.42.block_sparse_moe.experts.251.w1", "model.layers.42.block_sparse_moe.experts.252.w1", "model.layers.42.block_sparse_moe.experts.253.w1", "model.layers.42.block_sparse_moe.experts.254.w1", "model.layers.42.block_sparse_moe.experts.255.w1", "model.layers.42.block_sparse_moe.experts.0.w3", "model.layers.42.block_sparse_moe.experts.1.w3", "model.layers.42.block_sparse_moe.experts.2.w3", "model.layers.42.block_sparse_moe.experts.3.w3", "model.layers.42.block_sparse_moe.experts.4.w3", "model.layers.42.block_sparse_moe.experts.5.w3", "model.layers.42.block_sparse_moe.experts.6.w3", "model.layers.42.block_sparse_moe.experts.7.w3", "model.layers.42.block_sparse_moe.experts.8.w3", "model.layers.42.block_sparse_moe.experts.9.w3", "model.layers.42.block_sparse_moe.experts.10.w3", "model.layers.42.block_sparse_moe.experts.11.w3", "model.layers.42.block_sparse_moe.experts.12.w3", "model.layers.42.block_sparse_moe.experts.13.w3", "model.layers.42.block_sparse_moe.experts.14.w3", "model.layers.42.block_sparse_moe.experts.15.w3", "model.layers.42.block_sparse_moe.experts.16.w3", "model.layers.42.block_sparse_moe.experts.17.w3", "model.layers.42.block_sparse_moe.experts.18.w3", "model.layers.42.block_sparse_moe.experts.19.w3", "model.layers.42.block_sparse_moe.experts.20.w3", "model.layers.42.block_sparse_moe.experts.21.w3", "model.layers.42.block_sparse_moe.experts.22.w3", "model.layers.42.block_sparse_moe.experts.23.w3", "model.layers.42.block_sparse_moe.experts.24.w3", "model.layers.42.block_sparse_moe.experts.25.w3", "model.layers.42.block_sparse_moe.experts.26.w3", "model.layers.42.block_sparse_moe.experts.27.w3", "model.layers.42.block_sparse_moe.experts.28.w3", "model.layers.42.block_sparse_moe.experts.29.w3", "model.layers.42.block_sparse_moe.experts.30.w3", "model.layers.42.block_sparse_moe.experts.31.w3", "model.layers.42.block_sparse_moe.experts.32.w3", "model.layers.42.block_sparse_moe.experts.33.w3", "model.layers.42.block_sparse_moe.experts.34.w3", "model.layers.42.block_sparse_moe.experts.35.w3", "model.layers.42.block_sparse_moe.experts.36.w3", "model.layers.42.block_sparse_moe.experts.37.w3", "model.layers.42.block_sparse_moe.experts.38.w3", "model.layers.42.block_sparse_moe.experts.39.w3", "model.layers.42.block_sparse_moe.experts.40.w3", "model.layers.42.block_sparse_moe.experts.41.w3", "model.layers.42.block_sparse_moe.experts.42.w3", "model.layers.42.block_sparse_moe.experts.43.w3", "model.layers.42.block_sparse_moe.experts.44.w3", "model.layers.42.block_sparse_moe.experts.45.w3", "model.layers.42.block_sparse_moe.experts.46.w3", "model.layers.42.block_sparse_moe.experts.47.w3", "model.layers.42.block_sparse_moe.experts.48.w3", "model.layers.42.block_sparse_moe.experts.49.w3", "model.layers.42.block_sparse_moe.experts.50.w3", "model.layers.42.block_sparse_moe.experts.51.w3", "model.layers.42.block_sparse_moe.experts.52.w3", "model.layers.42.block_sparse_moe.experts.53.w3", "model.layers.42.block_sparse_moe.experts.54.w3", "model.layers.42.block_sparse_moe.experts.55.w3", "model.layers.42.block_sparse_moe.experts.56.w3", "model.layers.42.block_sparse_moe.experts.57.w3", "model.layers.42.block_sparse_moe.experts.58.w3", "model.layers.42.block_sparse_moe.experts.59.w3", "model.layers.42.block_sparse_moe.experts.60.w3", "model.layers.42.block_sparse_moe.experts.61.w3", "model.layers.42.block_sparse_moe.experts.62.w3", "model.layers.42.block_sparse_moe.experts.63.w3", "model.layers.42.block_sparse_moe.experts.64.w3", "model.layers.42.block_sparse_moe.experts.65.w3", "model.layers.42.block_sparse_moe.experts.66.w3", "model.layers.42.block_sparse_moe.experts.67.w3", "model.layers.42.block_sparse_moe.experts.68.w3", "model.layers.42.block_sparse_moe.experts.69.w3", "model.layers.42.block_sparse_moe.experts.70.w3", "model.layers.42.block_sparse_moe.experts.71.w3", "model.layers.42.block_sparse_moe.experts.72.w3", "model.layers.42.block_sparse_moe.experts.73.w3", "model.layers.42.block_sparse_moe.experts.74.w3", "model.layers.42.block_sparse_moe.experts.75.w3", "model.layers.42.block_sparse_moe.experts.76.w3", "model.layers.42.block_sparse_moe.experts.77.w3", "model.layers.42.block_sparse_moe.experts.78.w3", "model.layers.42.block_sparse_moe.experts.79.w3", "model.layers.42.block_sparse_moe.experts.80.w3", "model.layers.42.block_sparse_moe.experts.81.w3", "model.layers.42.block_sparse_moe.experts.82.w3", "model.layers.42.block_sparse_moe.experts.83.w3", "model.layers.42.block_sparse_moe.experts.84.w3", "model.layers.42.block_sparse_moe.experts.85.w3", "model.layers.42.block_sparse_moe.experts.86.w3", "model.layers.42.block_sparse_moe.experts.87.w3", "model.layers.42.block_sparse_moe.experts.88.w3", "model.layers.42.block_sparse_moe.experts.89.w3", "model.layers.42.block_sparse_moe.experts.90.w3", "model.layers.42.block_sparse_moe.experts.91.w3", "model.layers.42.block_sparse_moe.experts.92.w3", "model.layers.42.block_sparse_moe.experts.93.w3", "model.layers.42.block_sparse_moe.experts.94.w3", "model.layers.42.block_sparse_moe.experts.95.w3", "model.layers.42.block_sparse_moe.experts.96.w3", "model.layers.42.block_sparse_moe.experts.97.w3", "model.layers.42.block_sparse_moe.experts.98.w3", "model.layers.42.block_sparse_moe.experts.99.w3", "model.layers.42.block_sparse_moe.experts.100.w3", "model.layers.42.block_sparse_moe.experts.101.w3", "model.layers.42.block_sparse_moe.experts.102.w3", "model.layers.42.block_sparse_moe.experts.103.w3", "model.layers.42.block_sparse_moe.experts.104.w3", "model.layers.42.block_sparse_moe.experts.105.w3", "model.layers.42.block_sparse_moe.experts.106.w3", "model.layers.42.block_sparse_moe.experts.107.w3", "model.layers.42.block_sparse_moe.experts.108.w3", "model.layers.42.block_sparse_moe.experts.109.w3", "model.layers.42.block_sparse_moe.experts.110.w3", "model.layers.42.block_sparse_moe.experts.111.w3", "model.layers.42.block_sparse_moe.experts.112.w3", "model.layers.42.block_sparse_moe.experts.113.w3", "model.layers.42.block_sparse_moe.experts.114.w3", "model.layers.42.block_sparse_moe.experts.115.w3", "model.layers.42.block_sparse_moe.experts.116.w3", "model.layers.42.block_sparse_moe.experts.117.w3", "model.layers.42.block_sparse_moe.experts.118.w3", "model.layers.42.block_sparse_moe.experts.119.w3", "model.layers.42.block_sparse_moe.experts.120.w3", "model.layers.42.block_sparse_moe.experts.121.w3", "model.layers.42.block_sparse_moe.experts.122.w3", "model.layers.42.block_sparse_moe.experts.123.w3", "model.layers.42.block_sparse_moe.experts.124.w3", "model.layers.42.block_sparse_moe.experts.125.w3", "model.layers.42.block_sparse_moe.experts.126.w3", "model.layers.42.block_sparse_moe.experts.127.w3", "model.layers.42.block_sparse_moe.experts.128.w3", "model.layers.42.block_sparse_moe.experts.129.w3", "model.layers.42.block_sparse_moe.experts.130.w3", "model.layers.42.block_sparse_moe.experts.131.w3", "model.layers.42.block_sparse_moe.experts.132.w3", "model.layers.42.block_sparse_moe.experts.133.w3", "model.layers.42.block_sparse_moe.experts.134.w3", "model.layers.42.block_sparse_moe.experts.135.w3", "model.layers.42.block_sparse_moe.experts.136.w3", "model.layers.42.block_sparse_moe.experts.137.w3", "model.layers.42.block_sparse_moe.experts.138.w3", "model.layers.42.block_sparse_moe.experts.139.w3", "model.layers.42.block_sparse_moe.experts.140.w3", "model.layers.42.block_sparse_moe.experts.141.w3", "model.layers.42.block_sparse_moe.experts.142.w3", "model.layers.42.block_sparse_moe.experts.143.w3", "model.layers.42.block_sparse_moe.experts.144.w3", "model.layers.42.block_sparse_moe.experts.145.w3", "model.layers.42.block_sparse_moe.experts.146.w3", "model.layers.42.block_sparse_moe.experts.147.w3", "model.layers.42.block_sparse_moe.experts.148.w3", "model.layers.42.block_sparse_moe.experts.149.w3", "model.layers.42.block_sparse_moe.experts.150.w3", "model.layers.42.block_sparse_moe.experts.151.w3", "model.layers.42.block_sparse_moe.experts.152.w3", "model.layers.42.block_sparse_moe.experts.153.w3", "model.layers.42.block_sparse_moe.experts.154.w3", "model.layers.42.block_sparse_moe.experts.155.w3", "model.layers.42.block_sparse_moe.experts.156.w3", "model.layers.42.block_sparse_moe.experts.157.w3", "model.layers.42.block_sparse_moe.experts.158.w3", "model.layers.42.block_sparse_moe.experts.159.w3", "model.layers.42.block_sparse_moe.experts.160.w3", "model.layers.42.block_sparse_moe.experts.161.w3", "model.layers.42.block_sparse_moe.experts.162.w3", "model.layers.42.block_sparse_moe.experts.163.w3", "model.layers.42.block_sparse_moe.experts.164.w3", "model.layers.42.block_sparse_moe.experts.165.w3", "model.layers.42.block_sparse_moe.experts.166.w3", "model.layers.42.block_sparse_moe.experts.167.w3", "model.layers.42.block_sparse_moe.experts.168.w3", "model.layers.42.block_sparse_moe.experts.169.w3", "model.layers.42.block_sparse_moe.experts.170.w3", "model.layers.42.block_sparse_moe.experts.171.w3", "model.layers.42.block_sparse_moe.experts.172.w3", "model.layers.42.block_sparse_moe.experts.173.w3", "model.layers.42.block_sparse_moe.experts.174.w3", "model.layers.42.block_sparse_moe.experts.175.w3", "model.layers.42.block_sparse_moe.experts.176.w3", "model.layers.42.block_sparse_moe.experts.177.w3", "model.layers.42.block_sparse_moe.experts.178.w3", "model.layers.42.block_sparse_moe.experts.179.w3", "model.layers.42.block_sparse_moe.experts.180.w3", "model.layers.42.block_sparse_moe.experts.181.w3", "model.layers.42.block_sparse_moe.experts.182.w3", "model.layers.42.block_sparse_moe.experts.183.w3", "model.layers.42.block_sparse_moe.experts.184.w3", "model.layers.42.block_sparse_moe.experts.185.w3", "model.layers.42.block_sparse_moe.experts.186.w3", "model.layers.42.block_sparse_moe.experts.187.w3", "model.layers.42.block_sparse_moe.experts.188.w3", "model.layers.42.block_sparse_moe.experts.189.w3", "model.layers.42.block_sparse_moe.experts.190.w3", "model.layers.42.block_sparse_moe.experts.191.w3", "model.layers.42.block_sparse_moe.experts.192.w3", "model.layers.42.block_sparse_moe.experts.193.w3", "model.layers.42.block_sparse_moe.experts.194.w3", "model.layers.42.block_sparse_moe.experts.195.w3", "model.layers.42.block_sparse_moe.experts.196.w3", "model.layers.42.block_sparse_moe.experts.197.w3", "model.layers.42.block_sparse_moe.experts.198.w3", "model.layers.42.block_sparse_moe.experts.199.w3", "model.layers.42.block_sparse_moe.experts.200.w3", "model.layers.42.block_sparse_moe.experts.201.w3", "model.layers.42.block_sparse_moe.experts.202.w3", "model.layers.42.block_sparse_moe.experts.203.w3", "model.layers.42.block_sparse_moe.experts.204.w3", "model.layers.42.block_sparse_moe.experts.205.w3", "model.layers.42.block_sparse_moe.experts.206.w3", "model.layers.42.block_sparse_moe.experts.207.w3", "model.layers.42.block_sparse_moe.experts.208.w3", "model.layers.42.block_sparse_moe.experts.209.w3", "model.layers.42.block_sparse_moe.experts.210.w3", "model.layers.42.block_sparse_moe.experts.211.w3", "model.layers.42.block_sparse_moe.experts.212.w3", "model.layers.42.block_sparse_moe.experts.213.w3", "model.layers.42.block_sparse_moe.experts.214.w3", "model.layers.42.block_sparse_moe.experts.215.w3", "model.layers.42.block_sparse_moe.experts.216.w3", "model.layers.42.block_sparse_moe.experts.217.w3", "model.layers.42.block_sparse_moe.experts.218.w3", "model.layers.42.block_sparse_moe.experts.219.w3", "model.layers.42.block_sparse_moe.experts.220.w3", "model.layers.42.block_sparse_moe.experts.221.w3", "model.layers.42.block_sparse_moe.experts.222.w3", "model.layers.42.block_sparse_moe.experts.223.w3", "model.layers.42.block_sparse_moe.experts.224.w3", "model.layers.42.block_sparse_moe.experts.225.w3", "model.layers.42.block_sparse_moe.experts.226.w3", "model.layers.42.block_sparse_moe.experts.227.w3", "model.layers.42.block_sparse_moe.experts.228.w3", "model.layers.42.block_sparse_moe.experts.229.w3", "model.layers.42.block_sparse_moe.experts.230.w3", "model.layers.42.block_sparse_moe.experts.231.w3", "model.layers.42.block_sparse_moe.experts.232.w3", "model.layers.42.block_sparse_moe.experts.233.w3", "model.layers.42.block_sparse_moe.experts.234.w3", "model.layers.42.block_sparse_moe.experts.235.w3", "model.layers.42.block_sparse_moe.experts.236.w3", "model.layers.42.block_sparse_moe.experts.237.w3", "model.layers.42.block_sparse_moe.experts.238.w3", "model.layers.42.block_sparse_moe.experts.239.w3", "model.layers.42.block_sparse_moe.experts.240.w3", "model.layers.42.block_sparse_moe.experts.241.w3", "model.layers.42.block_sparse_moe.experts.242.w3", "model.layers.42.block_sparse_moe.experts.243.w3", "model.layers.42.block_sparse_moe.experts.244.w3", "model.layers.42.block_sparse_moe.experts.245.w3", "model.layers.42.block_sparse_moe.experts.246.w3", "model.layers.42.block_sparse_moe.experts.247.w3", "model.layers.42.block_sparse_moe.experts.248.w3", "model.layers.42.block_sparse_moe.experts.249.w3", "model.layers.42.block_sparse_moe.experts.250.w3", "model.layers.42.block_sparse_moe.experts.251.w3", "model.layers.42.block_sparse_moe.experts.252.w3", "model.layers.42.block_sparse_moe.experts.253.w3", "model.layers.42.block_sparse_moe.experts.254.w3", "model.layers.42.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 2.6525557041162662e-05, "dbits": 2415919104 } ] }, { "idx": 214, "layers": [ "model.layers.42.block_sparse_moe.experts.0.w2", "model.layers.42.block_sparse_moe.experts.1.w2", "model.layers.42.block_sparse_moe.experts.2.w2", "model.layers.42.block_sparse_moe.experts.3.w2", "model.layers.42.block_sparse_moe.experts.4.w2", "model.layers.42.block_sparse_moe.experts.5.w2", "model.layers.42.block_sparse_moe.experts.6.w2", "model.layers.42.block_sparse_moe.experts.7.w2", "model.layers.42.block_sparse_moe.experts.8.w2", "model.layers.42.block_sparse_moe.experts.9.w2", "model.layers.42.block_sparse_moe.experts.10.w2", "model.layers.42.block_sparse_moe.experts.11.w2", "model.layers.42.block_sparse_moe.experts.12.w2", "model.layers.42.block_sparse_moe.experts.13.w2", "model.layers.42.block_sparse_moe.experts.14.w2", "model.layers.42.block_sparse_moe.experts.15.w2", "model.layers.42.block_sparse_moe.experts.16.w2", "model.layers.42.block_sparse_moe.experts.17.w2", "model.layers.42.block_sparse_moe.experts.18.w2", "model.layers.42.block_sparse_moe.experts.19.w2", "model.layers.42.block_sparse_moe.experts.20.w2", "model.layers.42.block_sparse_moe.experts.21.w2", "model.layers.42.block_sparse_moe.experts.22.w2", "model.layers.42.block_sparse_moe.experts.23.w2", "model.layers.42.block_sparse_moe.experts.24.w2", "model.layers.42.block_sparse_moe.experts.25.w2", "model.layers.42.block_sparse_moe.experts.26.w2", "model.layers.42.block_sparse_moe.experts.27.w2", "model.layers.42.block_sparse_moe.experts.28.w2", "model.layers.42.block_sparse_moe.experts.29.w2", "model.layers.42.block_sparse_moe.experts.30.w2", "model.layers.42.block_sparse_moe.experts.31.w2", "model.layers.42.block_sparse_moe.experts.32.w2", "model.layers.42.block_sparse_moe.experts.33.w2", "model.layers.42.block_sparse_moe.experts.34.w2", "model.layers.42.block_sparse_moe.experts.35.w2", "model.layers.42.block_sparse_moe.experts.36.w2", "model.layers.42.block_sparse_moe.experts.37.w2", "model.layers.42.block_sparse_moe.experts.38.w2", "model.layers.42.block_sparse_moe.experts.39.w2", "model.layers.42.block_sparse_moe.experts.40.w2", "model.layers.42.block_sparse_moe.experts.41.w2", "model.layers.42.block_sparse_moe.experts.42.w2", "model.layers.42.block_sparse_moe.experts.43.w2", "model.layers.42.block_sparse_moe.experts.44.w2", "model.layers.42.block_sparse_moe.experts.45.w2", "model.layers.42.block_sparse_moe.experts.46.w2", "model.layers.42.block_sparse_moe.experts.47.w2", "model.layers.42.block_sparse_moe.experts.48.w2", "model.layers.42.block_sparse_moe.experts.49.w2", "model.layers.42.block_sparse_moe.experts.50.w2", "model.layers.42.block_sparse_moe.experts.51.w2", "model.layers.42.block_sparse_moe.experts.52.w2", "model.layers.42.block_sparse_moe.experts.53.w2", "model.layers.42.block_sparse_moe.experts.54.w2", "model.layers.42.block_sparse_moe.experts.55.w2", "model.layers.42.block_sparse_moe.experts.56.w2", "model.layers.42.block_sparse_moe.experts.57.w2", "model.layers.42.block_sparse_moe.experts.58.w2", "model.layers.42.block_sparse_moe.experts.59.w2", "model.layers.42.block_sparse_moe.experts.60.w2", "model.layers.42.block_sparse_moe.experts.61.w2", "model.layers.42.block_sparse_moe.experts.62.w2", "model.layers.42.block_sparse_moe.experts.63.w2", "model.layers.42.block_sparse_moe.experts.64.w2", "model.layers.42.block_sparse_moe.experts.65.w2", "model.layers.42.block_sparse_moe.experts.66.w2", "model.layers.42.block_sparse_moe.experts.67.w2", "model.layers.42.block_sparse_moe.experts.68.w2", "model.layers.42.block_sparse_moe.experts.69.w2", "model.layers.42.block_sparse_moe.experts.70.w2", "model.layers.42.block_sparse_moe.experts.71.w2", "model.layers.42.block_sparse_moe.experts.72.w2", "model.layers.42.block_sparse_moe.experts.73.w2", "model.layers.42.block_sparse_moe.experts.74.w2", "model.layers.42.block_sparse_moe.experts.75.w2", "model.layers.42.block_sparse_moe.experts.76.w2", "model.layers.42.block_sparse_moe.experts.77.w2", "model.layers.42.block_sparse_moe.experts.78.w2", "model.layers.42.block_sparse_moe.experts.79.w2", "model.layers.42.block_sparse_moe.experts.80.w2", "model.layers.42.block_sparse_moe.experts.81.w2", "model.layers.42.block_sparse_moe.experts.82.w2", "model.layers.42.block_sparse_moe.experts.83.w2", "model.layers.42.block_sparse_moe.experts.84.w2", "model.layers.42.block_sparse_moe.experts.85.w2", "model.layers.42.block_sparse_moe.experts.86.w2", "model.layers.42.block_sparse_moe.experts.87.w2", "model.layers.42.block_sparse_moe.experts.88.w2", "model.layers.42.block_sparse_moe.experts.89.w2", "model.layers.42.block_sparse_moe.experts.90.w2", "model.layers.42.block_sparse_moe.experts.91.w2", "model.layers.42.block_sparse_moe.experts.92.w2", "model.layers.42.block_sparse_moe.experts.93.w2", "model.layers.42.block_sparse_moe.experts.94.w2", "model.layers.42.block_sparse_moe.experts.95.w2", "model.layers.42.block_sparse_moe.experts.96.w2", "model.layers.42.block_sparse_moe.experts.97.w2", "model.layers.42.block_sparse_moe.experts.98.w2", "model.layers.42.block_sparse_moe.experts.99.w2", "model.layers.42.block_sparse_moe.experts.100.w2", "model.layers.42.block_sparse_moe.experts.101.w2", "model.layers.42.block_sparse_moe.experts.102.w2", "model.layers.42.block_sparse_moe.experts.103.w2", "model.layers.42.block_sparse_moe.experts.104.w2", "model.layers.42.block_sparse_moe.experts.105.w2", "model.layers.42.block_sparse_moe.experts.106.w2", "model.layers.42.block_sparse_moe.experts.107.w2", "model.layers.42.block_sparse_moe.experts.108.w2", "model.layers.42.block_sparse_moe.experts.109.w2", "model.layers.42.block_sparse_moe.experts.110.w2", "model.layers.42.block_sparse_moe.experts.111.w2", "model.layers.42.block_sparse_moe.experts.112.w2", "model.layers.42.block_sparse_moe.experts.113.w2", "model.layers.42.block_sparse_moe.experts.114.w2", "model.layers.42.block_sparse_moe.experts.115.w2", "model.layers.42.block_sparse_moe.experts.116.w2", "model.layers.42.block_sparse_moe.experts.117.w2", "model.layers.42.block_sparse_moe.experts.118.w2", "model.layers.42.block_sparse_moe.experts.119.w2", "model.layers.42.block_sparse_moe.experts.120.w2", "model.layers.42.block_sparse_moe.experts.121.w2", "model.layers.42.block_sparse_moe.experts.122.w2", "model.layers.42.block_sparse_moe.experts.123.w2", "model.layers.42.block_sparse_moe.experts.124.w2", "model.layers.42.block_sparse_moe.experts.125.w2", "model.layers.42.block_sparse_moe.experts.126.w2", "model.layers.42.block_sparse_moe.experts.127.w2", "model.layers.42.block_sparse_moe.experts.128.w2", "model.layers.42.block_sparse_moe.experts.129.w2", "model.layers.42.block_sparse_moe.experts.130.w2", "model.layers.42.block_sparse_moe.experts.131.w2", "model.layers.42.block_sparse_moe.experts.132.w2", "model.layers.42.block_sparse_moe.experts.133.w2", "model.layers.42.block_sparse_moe.experts.134.w2", "model.layers.42.block_sparse_moe.experts.135.w2", "model.layers.42.block_sparse_moe.experts.136.w2", "model.layers.42.block_sparse_moe.experts.137.w2", "model.layers.42.block_sparse_moe.experts.138.w2", "model.layers.42.block_sparse_moe.experts.139.w2", "model.layers.42.block_sparse_moe.experts.140.w2", "model.layers.42.block_sparse_moe.experts.141.w2", "model.layers.42.block_sparse_moe.experts.142.w2", "model.layers.42.block_sparse_moe.experts.143.w2", "model.layers.42.block_sparse_moe.experts.144.w2", "model.layers.42.block_sparse_moe.experts.145.w2", "model.layers.42.block_sparse_moe.experts.146.w2", "model.layers.42.block_sparse_moe.experts.147.w2", "model.layers.42.block_sparse_moe.experts.148.w2", "model.layers.42.block_sparse_moe.experts.149.w2", "model.layers.42.block_sparse_moe.experts.150.w2", "model.layers.42.block_sparse_moe.experts.151.w2", "model.layers.42.block_sparse_moe.experts.152.w2", "model.layers.42.block_sparse_moe.experts.153.w2", "model.layers.42.block_sparse_moe.experts.154.w2", "model.layers.42.block_sparse_moe.experts.155.w2", "model.layers.42.block_sparse_moe.experts.156.w2", "model.layers.42.block_sparse_moe.experts.157.w2", "model.layers.42.block_sparse_moe.experts.158.w2", "model.layers.42.block_sparse_moe.experts.159.w2", "model.layers.42.block_sparse_moe.experts.160.w2", "model.layers.42.block_sparse_moe.experts.161.w2", "model.layers.42.block_sparse_moe.experts.162.w2", "model.layers.42.block_sparse_moe.experts.163.w2", "model.layers.42.block_sparse_moe.experts.164.w2", "model.layers.42.block_sparse_moe.experts.165.w2", "model.layers.42.block_sparse_moe.experts.166.w2", "model.layers.42.block_sparse_moe.experts.167.w2", "model.layers.42.block_sparse_moe.experts.168.w2", "model.layers.42.block_sparse_moe.experts.169.w2", "model.layers.42.block_sparse_moe.experts.170.w2", "model.layers.42.block_sparse_moe.experts.171.w2", "model.layers.42.block_sparse_moe.experts.172.w2", "model.layers.42.block_sparse_moe.experts.173.w2", "model.layers.42.block_sparse_moe.experts.174.w2", "model.layers.42.block_sparse_moe.experts.175.w2", "model.layers.42.block_sparse_moe.experts.176.w2", "model.layers.42.block_sparse_moe.experts.177.w2", "model.layers.42.block_sparse_moe.experts.178.w2", "model.layers.42.block_sparse_moe.experts.179.w2", "model.layers.42.block_sparse_moe.experts.180.w2", "model.layers.42.block_sparse_moe.experts.181.w2", "model.layers.42.block_sparse_moe.experts.182.w2", "model.layers.42.block_sparse_moe.experts.183.w2", "model.layers.42.block_sparse_moe.experts.184.w2", "model.layers.42.block_sparse_moe.experts.185.w2", "model.layers.42.block_sparse_moe.experts.186.w2", "model.layers.42.block_sparse_moe.experts.187.w2", "model.layers.42.block_sparse_moe.experts.188.w2", "model.layers.42.block_sparse_moe.experts.189.w2", "model.layers.42.block_sparse_moe.experts.190.w2", "model.layers.42.block_sparse_moe.experts.191.w2", "model.layers.42.block_sparse_moe.experts.192.w2", "model.layers.42.block_sparse_moe.experts.193.w2", "model.layers.42.block_sparse_moe.experts.194.w2", "model.layers.42.block_sparse_moe.experts.195.w2", "model.layers.42.block_sparse_moe.experts.196.w2", "model.layers.42.block_sparse_moe.experts.197.w2", "model.layers.42.block_sparse_moe.experts.198.w2", "model.layers.42.block_sparse_moe.experts.199.w2", "model.layers.42.block_sparse_moe.experts.200.w2", "model.layers.42.block_sparse_moe.experts.201.w2", "model.layers.42.block_sparse_moe.experts.202.w2", "model.layers.42.block_sparse_moe.experts.203.w2", "model.layers.42.block_sparse_moe.experts.204.w2", "model.layers.42.block_sparse_moe.experts.205.w2", "model.layers.42.block_sparse_moe.experts.206.w2", "model.layers.42.block_sparse_moe.experts.207.w2", "model.layers.42.block_sparse_moe.experts.208.w2", "model.layers.42.block_sparse_moe.experts.209.w2", "model.layers.42.block_sparse_moe.experts.210.w2", "model.layers.42.block_sparse_moe.experts.211.w2", "model.layers.42.block_sparse_moe.experts.212.w2", "model.layers.42.block_sparse_moe.experts.213.w2", "model.layers.42.block_sparse_moe.experts.214.w2", "model.layers.42.block_sparse_moe.experts.215.w2", "model.layers.42.block_sparse_moe.experts.216.w2", "model.layers.42.block_sparse_moe.experts.217.w2", "model.layers.42.block_sparse_moe.experts.218.w2", "model.layers.42.block_sparse_moe.experts.219.w2", "model.layers.42.block_sparse_moe.experts.220.w2", "model.layers.42.block_sparse_moe.experts.221.w2", "model.layers.42.block_sparse_moe.experts.222.w2", "model.layers.42.block_sparse_moe.experts.223.w2", "model.layers.42.block_sparse_moe.experts.224.w2", "model.layers.42.block_sparse_moe.experts.225.w2", "model.layers.42.block_sparse_moe.experts.226.w2", "model.layers.42.block_sparse_moe.experts.227.w2", "model.layers.42.block_sparse_moe.experts.228.w2", "model.layers.42.block_sparse_moe.experts.229.w2", "model.layers.42.block_sparse_moe.experts.230.w2", "model.layers.42.block_sparse_moe.experts.231.w2", "model.layers.42.block_sparse_moe.experts.232.w2", "model.layers.42.block_sparse_moe.experts.233.w2", "model.layers.42.block_sparse_moe.experts.234.w2", "model.layers.42.block_sparse_moe.experts.235.w2", "model.layers.42.block_sparse_moe.experts.236.w2", "model.layers.42.block_sparse_moe.experts.237.w2", "model.layers.42.block_sparse_moe.experts.238.w2", "model.layers.42.block_sparse_moe.experts.239.w2", "model.layers.42.block_sparse_moe.experts.240.w2", "model.layers.42.block_sparse_moe.experts.241.w2", "model.layers.42.block_sparse_moe.experts.242.w2", "model.layers.42.block_sparse_moe.experts.243.w2", "model.layers.42.block_sparse_moe.experts.244.w2", "model.layers.42.block_sparse_moe.experts.245.w2", "model.layers.42.block_sparse_moe.experts.246.w2", "model.layers.42.block_sparse_moe.experts.247.w2", "model.layers.42.block_sparse_moe.experts.248.w2", "model.layers.42.block_sparse_moe.experts.249.w2", "model.layers.42.block_sparse_moe.experts.250.w2", "model.layers.42.block_sparse_moe.experts.251.w2", "model.layers.42.block_sparse_moe.experts.252.w2", "model.layers.42.block_sparse_moe.experts.253.w2", "model.layers.42.block_sparse_moe.experts.254.w2", "model.layers.42.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -8.619017899044734e-06, "dbits": 1207959552 } ] }, { "idx": 215, "layers": [ "model.layers.43.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00030375216156243445, "dbits": 18874368 } ] }, { "idx": 216, "layers": [ "model.layers.43.self_attn.k_proj", "model.layers.43.self_attn.v_proj" ], "candidates": [ { "dkld": 0.002658901922404769, "dbits": 6291456 } ] }, { "idx": 217, "layers": [ "model.layers.43.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0006272619590163148, "dbits": 18874368 } ] }, { "idx": 218, "layers": [ "model.layers.43.block_sparse_moe.experts.0.w1", "model.layers.43.block_sparse_moe.experts.1.w1", "model.layers.43.block_sparse_moe.experts.2.w1", "model.layers.43.block_sparse_moe.experts.3.w1", "model.layers.43.block_sparse_moe.experts.4.w1", "model.layers.43.block_sparse_moe.experts.5.w1", "model.layers.43.block_sparse_moe.experts.6.w1", "model.layers.43.block_sparse_moe.experts.7.w1", "model.layers.43.block_sparse_moe.experts.8.w1", "model.layers.43.block_sparse_moe.experts.9.w1", "model.layers.43.block_sparse_moe.experts.10.w1", "model.layers.43.block_sparse_moe.experts.11.w1", "model.layers.43.block_sparse_moe.experts.12.w1", "model.layers.43.block_sparse_moe.experts.13.w1", "model.layers.43.block_sparse_moe.experts.14.w1", "model.layers.43.block_sparse_moe.experts.15.w1", "model.layers.43.block_sparse_moe.experts.16.w1", "model.layers.43.block_sparse_moe.experts.17.w1", "model.layers.43.block_sparse_moe.experts.18.w1", "model.layers.43.block_sparse_moe.experts.19.w1", "model.layers.43.block_sparse_moe.experts.20.w1", "model.layers.43.block_sparse_moe.experts.21.w1", "model.layers.43.block_sparse_moe.experts.22.w1", "model.layers.43.block_sparse_moe.experts.23.w1", "model.layers.43.block_sparse_moe.experts.24.w1", "model.layers.43.block_sparse_moe.experts.25.w1", "model.layers.43.block_sparse_moe.experts.26.w1", "model.layers.43.block_sparse_moe.experts.27.w1", "model.layers.43.block_sparse_moe.experts.28.w1", "model.layers.43.block_sparse_moe.experts.29.w1", "model.layers.43.block_sparse_moe.experts.30.w1", "model.layers.43.block_sparse_moe.experts.31.w1", "model.layers.43.block_sparse_moe.experts.32.w1", "model.layers.43.block_sparse_moe.experts.33.w1", "model.layers.43.block_sparse_moe.experts.34.w1", "model.layers.43.block_sparse_moe.experts.35.w1", "model.layers.43.block_sparse_moe.experts.36.w1", "model.layers.43.block_sparse_moe.experts.37.w1", "model.layers.43.block_sparse_moe.experts.38.w1", "model.layers.43.block_sparse_moe.experts.39.w1", "model.layers.43.block_sparse_moe.experts.40.w1", "model.layers.43.block_sparse_moe.experts.41.w1", "model.layers.43.block_sparse_moe.experts.42.w1", "model.layers.43.block_sparse_moe.experts.43.w1", "model.layers.43.block_sparse_moe.experts.44.w1", "model.layers.43.block_sparse_moe.experts.45.w1", "model.layers.43.block_sparse_moe.experts.46.w1", "model.layers.43.block_sparse_moe.experts.47.w1", "model.layers.43.block_sparse_moe.experts.48.w1", "model.layers.43.block_sparse_moe.experts.49.w1", "model.layers.43.block_sparse_moe.experts.50.w1", "model.layers.43.block_sparse_moe.experts.51.w1", "model.layers.43.block_sparse_moe.experts.52.w1", "model.layers.43.block_sparse_moe.experts.53.w1", "model.layers.43.block_sparse_moe.experts.54.w1", "model.layers.43.block_sparse_moe.experts.55.w1", "model.layers.43.block_sparse_moe.experts.56.w1", "model.layers.43.block_sparse_moe.experts.57.w1", "model.layers.43.block_sparse_moe.experts.58.w1", "model.layers.43.block_sparse_moe.experts.59.w1", "model.layers.43.block_sparse_moe.experts.60.w1", "model.layers.43.block_sparse_moe.experts.61.w1", "model.layers.43.block_sparse_moe.experts.62.w1", "model.layers.43.block_sparse_moe.experts.63.w1", "model.layers.43.block_sparse_moe.experts.64.w1", "model.layers.43.block_sparse_moe.experts.65.w1", "model.layers.43.block_sparse_moe.experts.66.w1", "model.layers.43.block_sparse_moe.experts.67.w1", "model.layers.43.block_sparse_moe.experts.68.w1", "model.layers.43.block_sparse_moe.experts.69.w1", "model.layers.43.block_sparse_moe.experts.70.w1", "model.layers.43.block_sparse_moe.experts.71.w1", "model.layers.43.block_sparse_moe.experts.72.w1", "model.layers.43.block_sparse_moe.experts.73.w1", "model.layers.43.block_sparse_moe.experts.74.w1", "model.layers.43.block_sparse_moe.experts.75.w1", "model.layers.43.block_sparse_moe.experts.76.w1", "model.layers.43.block_sparse_moe.experts.77.w1", "model.layers.43.block_sparse_moe.experts.78.w1", "model.layers.43.block_sparse_moe.experts.79.w1", "model.layers.43.block_sparse_moe.experts.80.w1", "model.layers.43.block_sparse_moe.experts.81.w1", "model.layers.43.block_sparse_moe.experts.82.w1", "model.layers.43.block_sparse_moe.experts.83.w1", "model.layers.43.block_sparse_moe.experts.84.w1", "model.layers.43.block_sparse_moe.experts.85.w1", "model.layers.43.block_sparse_moe.experts.86.w1", "model.layers.43.block_sparse_moe.experts.87.w1", "model.layers.43.block_sparse_moe.experts.88.w1", "model.layers.43.block_sparse_moe.experts.89.w1", "model.layers.43.block_sparse_moe.experts.90.w1", "model.layers.43.block_sparse_moe.experts.91.w1", "model.layers.43.block_sparse_moe.experts.92.w1", "model.layers.43.block_sparse_moe.experts.93.w1", "model.layers.43.block_sparse_moe.experts.94.w1", "model.layers.43.block_sparse_moe.experts.95.w1", "model.layers.43.block_sparse_moe.experts.96.w1", "model.layers.43.block_sparse_moe.experts.97.w1", "model.layers.43.block_sparse_moe.experts.98.w1", "model.layers.43.block_sparse_moe.experts.99.w1", "model.layers.43.block_sparse_moe.experts.100.w1", "model.layers.43.block_sparse_moe.experts.101.w1", "model.layers.43.block_sparse_moe.experts.102.w1", "model.layers.43.block_sparse_moe.experts.103.w1", "model.layers.43.block_sparse_moe.experts.104.w1", "model.layers.43.block_sparse_moe.experts.105.w1", "model.layers.43.block_sparse_moe.experts.106.w1", "model.layers.43.block_sparse_moe.experts.107.w1", "model.layers.43.block_sparse_moe.experts.108.w1", "model.layers.43.block_sparse_moe.experts.109.w1", "model.layers.43.block_sparse_moe.experts.110.w1", "model.layers.43.block_sparse_moe.experts.111.w1", "model.layers.43.block_sparse_moe.experts.112.w1", "model.layers.43.block_sparse_moe.experts.113.w1", "model.layers.43.block_sparse_moe.experts.114.w1", "model.layers.43.block_sparse_moe.experts.115.w1", "model.layers.43.block_sparse_moe.experts.116.w1", "model.layers.43.block_sparse_moe.experts.117.w1", "model.layers.43.block_sparse_moe.experts.118.w1", "model.layers.43.block_sparse_moe.experts.119.w1", "model.layers.43.block_sparse_moe.experts.120.w1", "model.layers.43.block_sparse_moe.experts.121.w1", "model.layers.43.block_sparse_moe.experts.122.w1", "model.layers.43.block_sparse_moe.experts.123.w1", "model.layers.43.block_sparse_moe.experts.124.w1", "model.layers.43.block_sparse_moe.experts.125.w1", "model.layers.43.block_sparse_moe.experts.126.w1", "model.layers.43.block_sparse_moe.experts.127.w1", "model.layers.43.block_sparse_moe.experts.128.w1", "model.layers.43.block_sparse_moe.experts.129.w1", "model.layers.43.block_sparse_moe.experts.130.w1", "model.layers.43.block_sparse_moe.experts.131.w1", "model.layers.43.block_sparse_moe.experts.132.w1", "model.layers.43.block_sparse_moe.experts.133.w1", "model.layers.43.block_sparse_moe.experts.134.w1", "model.layers.43.block_sparse_moe.experts.135.w1", "model.layers.43.block_sparse_moe.experts.136.w1", "model.layers.43.block_sparse_moe.experts.137.w1", "model.layers.43.block_sparse_moe.experts.138.w1", "model.layers.43.block_sparse_moe.experts.139.w1", "model.layers.43.block_sparse_moe.experts.140.w1", "model.layers.43.block_sparse_moe.experts.141.w1", "model.layers.43.block_sparse_moe.experts.142.w1", "model.layers.43.block_sparse_moe.experts.143.w1", "model.layers.43.block_sparse_moe.experts.144.w1", "model.layers.43.block_sparse_moe.experts.145.w1", "model.layers.43.block_sparse_moe.experts.146.w1", "model.layers.43.block_sparse_moe.experts.147.w1", "model.layers.43.block_sparse_moe.experts.148.w1", "model.layers.43.block_sparse_moe.experts.149.w1", "model.layers.43.block_sparse_moe.experts.150.w1", "model.layers.43.block_sparse_moe.experts.151.w1", "model.layers.43.block_sparse_moe.experts.152.w1", "model.layers.43.block_sparse_moe.experts.153.w1", "model.layers.43.block_sparse_moe.experts.154.w1", "model.layers.43.block_sparse_moe.experts.155.w1", "model.layers.43.block_sparse_moe.experts.156.w1", "model.layers.43.block_sparse_moe.experts.157.w1", "model.layers.43.block_sparse_moe.experts.158.w1", "model.layers.43.block_sparse_moe.experts.159.w1", "model.layers.43.block_sparse_moe.experts.160.w1", "model.layers.43.block_sparse_moe.experts.161.w1", "model.layers.43.block_sparse_moe.experts.162.w1", "model.layers.43.block_sparse_moe.experts.163.w1", "model.layers.43.block_sparse_moe.experts.164.w1", "model.layers.43.block_sparse_moe.experts.165.w1", "model.layers.43.block_sparse_moe.experts.166.w1", "model.layers.43.block_sparse_moe.experts.167.w1", "model.layers.43.block_sparse_moe.experts.168.w1", "model.layers.43.block_sparse_moe.experts.169.w1", "model.layers.43.block_sparse_moe.experts.170.w1", "model.layers.43.block_sparse_moe.experts.171.w1", "model.layers.43.block_sparse_moe.experts.172.w1", "model.layers.43.block_sparse_moe.experts.173.w1", "model.layers.43.block_sparse_moe.experts.174.w1", "model.layers.43.block_sparse_moe.experts.175.w1", "model.layers.43.block_sparse_moe.experts.176.w1", "model.layers.43.block_sparse_moe.experts.177.w1", "model.layers.43.block_sparse_moe.experts.178.w1", "model.layers.43.block_sparse_moe.experts.179.w1", "model.layers.43.block_sparse_moe.experts.180.w1", "model.layers.43.block_sparse_moe.experts.181.w1", "model.layers.43.block_sparse_moe.experts.182.w1", "model.layers.43.block_sparse_moe.experts.183.w1", "model.layers.43.block_sparse_moe.experts.184.w1", "model.layers.43.block_sparse_moe.experts.185.w1", "model.layers.43.block_sparse_moe.experts.186.w1", "model.layers.43.block_sparse_moe.experts.187.w1", "model.layers.43.block_sparse_moe.experts.188.w1", "model.layers.43.block_sparse_moe.experts.189.w1", "model.layers.43.block_sparse_moe.experts.190.w1", "model.layers.43.block_sparse_moe.experts.191.w1", "model.layers.43.block_sparse_moe.experts.192.w1", "model.layers.43.block_sparse_moe.experts.193.w1", "model.layers.43.block_sparse_moe.experts.194.w1", "model.layers.43.block_sparse_moe.experts.195.w1", "model.layers.43.block_sparse_moe.experts.196.w1", "model.layers.43.block_sparse_moe.experts.197.w1", "model.layers.43.block_sparse_moe.experts.198.w1", "model.layers.43.block_sparse_moe.experts.199.w1", "model.layers.43.block_sparse_moe.experts.200.w1", "model.layers.43.block_sparse_moe.experts.201.w1", "model.layers.43.block_sparse_moe.experts.202.w1", "model.layers.43.block_sparse_moe.experts.203.w1", "model.layers.43.block_sparse_moe.experts.204.w1", "model.layers.43.block_sparse_moe.experts.205.w1", "model.layers.43.block_sparse_moe.experts.206.w1", "model.layers.43.block_sparse_moe.experts.207.w1", "model.layers.43.block_sparse_moe.experts.208.w1", "model.layers.43.block_sparse_moe.experts.209.w1", "model.layers.43.block_sparse_moe.experts.210.w1", "model.layers.43.block_sparse_moe.experts.211.w1", "model.layers.43.block_sparse_moe.experts.212.w1", "model.layers.43.block_sparse_moe.experts.213.w1", "model.layers.43.block_sparse_moe.experts.214.w1", "model.layers.43.block_sparse_moe.experts.215.w1", "model.layers.43.block_sparse_moe.experts.216.w1", "model.layers.43.block_sparse_moe.experts.217.w1", "model.layers.43.block_sparse_moe.experts.218.w1", "model.layers.43.block_sparse_moe.experts.219.w1", "model.layers.43.block_sparse_moe.experts.220.w1", "model.layers.43.block_sparse_moe.experts.221.w1", "model.layers.43.block_sparse_moe.experts.222.w1", "model.layers.43.block_sparse_moe.experts.223.w1", "model.layers.43.block_sparse_moe.experts.224.w1", "model.layers.43.block_sparse_moe.experts.225.w1", "model.layers.43.block_sparse_moe.experts.226.w1", "model.layers.43.block_sparse_moe.experts.227.w1", "model.layers.43.block_sparse_moe.experts.228.w1", "model.layers.43.block_sparse_moe.experts.229.w1", "model.layers.43.block_sparse_moe.experts.230.w1", "model.layers.43.block_sparse_moe.experts.231.w1", "model.layers.43.block_sparse_moe.experts.232.w1", "model.layers.43.block_sparse_moe.experts.233.w1", "model.layers.43.block_sparse_moe.experts.234.w1", "model.layers.43.block_sparse_moe.experts.235.w1", "model.layers.43.block_sparse_moe.experts.236.w1", "model.layers.43.block_sparse_moe.experts.237.w1", "model.layers.43.block_sparse_moe.experts.238.w1", "model.layers.43.block_sparse_moe.experts.239.w1", "model.layers.43.block_sparse_moe.experts.240.w1", "model.layers.43.block_sparse_moe.experts.241.w1", "model.layers.43.block_sparse_moe.experts.242.w1", "model.layers.43.block_sparse_moe.experts.243.w1", "model.layers.43.block_sparse_moe.experts.244.w1", "model.layers.43.block_sparse_moe.experts.245.w1", "model.layers.43.block_sparse_moe.experts.246.w1", "model.layers.43.block_sparse_moe.experts.247.w1", "model.layers.43.block_sparse_moe.experts.248.w1", "model.layers.43.block_sparse_moe.experts.249.w1", "model.layers.43.block_sparse_moe.experts.250.w1", "model.layers.43.block_sparse_moe.experts.251.w1", "model.layers.43.block_sparse_moe.experts.252.w1", "model.layers.43.block_sparse_moe.experts.253.w1", "model.layers.43.block_sparse_moe.experts.254.w1", "model.layers.43.block_sparse_moe.experts.255.w1", "model.layers.43.block_sparse_moe.experts.0.w3", "model.layers.43.block_sparse_moe.experts.1.w3", "model.layers.43.block_sparse_moe.experts.2.w3", "model.layers.43.block_sparse_moe.experts.3.w3", "model.layers.43.block_sparse_moe.experts.4.w3", "model.layers.43.block_sparse_moe.experts.5.w3", "model.layers.43.block_sparse_moe.experts.6.w3", "model.layers.43.block_sparse_moe.experts.7.w3", "model.layers.43.block_sparse_moe.experts.8.w3", "model.layers.43.block_sparse_moe.experts.9.w3", "model.layers.43.block_sparse_moe.experts.10.w3", "model.layers.43.block_sparse_moe.experts.11.w3", "model.layers.43.block_sparse_moe.experts.12.w3", "model.layers.43.block_sparse_moe.experts.13.w3", "model.layers.43.block_sparse_moe.experts.14.w3", "model.layers.43.block_sparse_moe.experts.15.w3", "model.layers.43.block_sparse_moe.experts.16.w3", "model.layers.43.block_sparse_moe.experts.17.w3", "model.layers.43.block_sparse_moe.experts.18.w3", "model.layers.43.block_sparse_moe.experts.19.w3", "model.layers.43.block_sparse_moe.experts.20.w3", "model.layers.43.block_sparse_moe.experts.21.w3", "model.layers.43.block_sparse_moe.experts.22.w3", "model.layers.43.block_sparse_moe.experts.23.w3", "model.layers.43.block_sparse_moe.experts.24.w3", "model.layers.43.block_sparse_moe.experts.25.w3", "model.layers.43.block_sparse_moe.experts.26.w3", "model.layers.43.block_sparse_moe.experts.27.w3", "model.layers.43.block_sparse_moe.experts.28.w3", "model.layers.43.block_sparse_moe.experts.29.w3", "model.layers.43.block_sparse_moe.experts.30.w3", "model.layers.43.block_sparse_moe.experts.31.w3", "model.layers.43.block_sparse_moe.experts.32.w3", "model.layers.43.block_sparse_moe.experts.33.w3", "model.layers.43.block_sparse_moe.experts.34.w3", "model.layers.43.block_sparse_moe.experts.35.w3", "model.layers.43.block_sparse_moe.experts.36.w3", "model.layers.43.block_sparse_moe.experts.37.w3", "model.layers.43.block_sparse_moe.experts.38.w3", "model.layers.43.block_sparse_moe.experts.39.w3", "model.layers.43.block_sparse_moe.experts.40.w3", "model.layers.43.block_sparse_moe.experts.41.w3", "model.layers.43.block_sparse_moe.experts.42.w3", "model.layers.43.block_sparse_moe.experts.43.w3", "model.layers.43.block_sparse_moe.experts.44.w3", "model.layers.43.block_sparse_moe.experts.45.w3", "model.layers.43.block_sparse_moe.experts.46.w3", "model.layers.43.block_sparse_moe.experts.47.w3", "model.layers.43.block_sparse_moe.experts.48.w3", "model.layers.43.block_sparse_moe.experts.49.w3", "model.layers.43.block_sparse_moe.experts.50.w3", "model.layers.43.block_sparse_moe.experts.51.w3", "model.layers.43.block_sparse_moe.experts.52.w3", "model.layers.43.block_sparse_moe.experts.53.w3", "model.layers.43.block_sparse_moe.experts.54.w3", "model.layers.43.block_sparse_moe.experts.55.w3", "model.layers.43.block_sparse_moe.experts.56.w3", "model.layers.43.block_sparse_moe.experts.57.w3", "model.layers.43.block_sparse_moe.experts.58.w3", "model.layers.43.block_sparse_moe.experts.59.w3", "model.layers.43.block_sparse_moe.experts.60.w3", "model.layers.43.block_sparse_moe.experts.61.w3", "model.layers.43.block_sparse_moe.experts.62.w3", "model.layers.43.block_sparse_moe.experts.63.w3", "model.layers.43.block_sparse_moe.experts.64.w3", "model.layers.43.block_sparse_moe.experts.65.w3", "model.layers.43.block_sparse_moe.experts.66.w3", "model.layers.43.block_sparse_moe.experts.67.w3", "model.layers.43.block_sparse_moe.experts.68.w3", "model.layers.43.block_sparse_moe.experts.69.w3", "model.layers.43.block_sparse_moe.experts.70.w3", "model.layers.43.block_sparse_moe.experts.71.w3", "model.layers.43.block_sparse_moe.experts.72.w3", "model.layers.43.block_sparse_moe.experts.73.w3", "model.layers.43.block_sparse_moe.experts.74.w3", "model.layers.43.block_sparse_moe.experts.75.w3", "model.layers.43.block_sparse_moe.experts.76.w3", "model.layers.43.block_sparse_moe.experts.77.w3", "model.layers.43.block_sparse_moe.experts.78.w3", "model.layers.43.block_sparse_moe.experts.79.w3", "model.layers.43.block_sparse_moe.experts.80.w3", "model.layers.43.block_sparse_moe.experts.81.w3", "model.layers.43.block_sparse_moe.experts.82.w3", "model.layers.43.block_sparse_moe.experts.83.w3", "model.layers.43.block_sparse_moe.experts.84.w3", "model.layers.43.block_sparse_moe.experts.85.w3", "model.layers.43.block_sparse_moe.experts.86.w3", "model.layers.43.block_sparse_moe.experts.87.w3", "model.layers.43.block_sparse_moe.experts.88.w3", "model.layers.43.block_sparse_moe.experts.89.w3", "model.layers.43.block_sparse_moe.experts.90.w3", "model.layers.43.block_sparse_moe.experts.91.w3", "model.layers.43.block_sparse_moe.experts.92.w3", "model.layers.43.block_sparse_moe.experts.93.w3", "model.layers.43.block_sparse_moe.experts.94.w3", "model.layers.43.block_sparse_moe.experts.95.w3", "model.layers.43.block_sparse_moe.experts.96.w3", "model.layers.43.block_sparse_moe.experts.97.w3", "model.layers.43.block_sparse_moe.experts.98.w3", "model.layers.43.block_sparse_moe.experts.99.w3", "model.layers.43.block_sparse_moe.experts.100.w3", "model.layers.43.block_sparse_moe.experts.101.w3", "model.layers.43.block_sparse_moe.experts.102.w3", "model.layers.43.block_sparse_moe.experts.103.w3", "model.layers.43.block_sparse_moe.experts.104.w3", "model.layers.43.block_sparse_moe.experts.105.w3", "model.layers.43.block_sparse_moe.experts.106.w3", "model.layers.43.block_sparse_moe.experts.107.w3", "model.layers.43.block_sparse_moe.experts.108.w3", "model.layers.43.block_sparse_moe.experts.109.w3", "model.layers.43.block_sparse_moe.experts.110.w3", "model.layers.43.block_sparse_moe.experts.111.w3", "model.layers.43.block_sparse_moe.experts.112.w3", "model.layers.43.block_sparse_moe.experts.113.w3", "model.layers.43.block_sparse_moe.experts.114.w3", "model.layers.43.block_sparse_moe.experts.115.w3", "model.layers.43.block_sparse_moe.experts.116.w3", "model.layers.43.block_sparse_moe.experts.117.w3", "model.layers.43.block_sparse_moe.experts.118.w3", "model.layers.43.block_sparse_moe.experts.119.w3", "model.layers.43.block_sparse_moe.experts.120.w3", "model.layers.43.block_sparse_moe.experts.121.w3", "model.layers.43.block_sparse_moe.experts.122.w3", "model.layers.43.block_sparse_moe.experts.123.w3", "model.layers.43.block_sparse_moe.experts.124.w3", "model.layers.43.block_sparse_moe.experts.125.w3", "model.layers.43.block_sparse_moe.experts.126.w3", "model.layers.43.block_sparse_moe.experts.127.w3", "model.layers.43.block_sparse_moe.experts.128.w3", "model.layers.43.block_sparse_moe.experts.129.w3", "model.layers.43.block_sparse_moe.experts.130.w3", "model.layers.43.block_sparse_moe.experts.131.w3", "model.layers.43.block_sparse_moe.experts.132.w3", "model.layers.43.block_sparse_moe.experts.133.w3", "model.layers.43.block_sparse_moe.experts.134.w3", "model.layers.43.block_sparse_moe.experts.135.w3", "model.layers.43.block_sparse_moe.experts.136.w3", "model.layers.43.block_sparse_moe.experts.137.w3", "model.layers.43.block_sparse_moe.experts.138.w3", "model.layers.43.block_sparse_moe.experts.139.w3", "model.layers.43.block_sparse_moe.experts.140.w3", "model.layers.43.block_sparse_moe.experts.141.w3", "model.layers.43.block_sparse_moe.experts.142.w3", "model.layers.43.block_sparse_moe.experts.143.w3", "model.layers.43.block_sparse_moe.experts.144.w3", "model.layers.43.block_sparse_moe.experts.145.w3", "model.layers.43.block_sparse_moe.experts.146.w3", "model.layers.43.block_sparse_moe.experts.147.w3", "model.layers.43.block_sparse_moe.experts.148.w3", "model.layers.43.block_sparse_moe.experts.149.w3", "model.layers.43.block_sparse_moe.experts.150.w3", "model.layers.43.block_sparse_moe.experts.151.w3", "model.layers.43.block_sparse_moe.experts.152.w3", "model.layers.43.block_sparse_moe.experts.153.w3", "model.layers.43.block_sparse_moe.experts.154.w3", "model.layers.43.block_sparse_moe.experts.155.w3", "model.layers.43.block_sparse_moe.experts.156.w3", "model.layers.43.block_sparse_moe.experts.157.w3", "model.layers.43.block_sparse_moe.experts.158.w3", "model.layers.43.block_sparse_moe.experts.159.w3", "model.layers.43.block_sparse_moe.experts.160.w3", "model.layers.43.block_sparse_moe.experts.161.w3", "model.layers.43.block_sparse_moe.experts.162.w3", "model.layers.43.block_sparse_moe.experts.163.w3", "model.layers.43.block_sparse_moe.experts.164.w3", "model.layers.43.block_sparse_moe.experts.165.w3", "model.layers.43.block_sparse_moe.experts.166.w3", "model.layers.43.block_sparse_moe.experts.167.w3", "model.layers.43.block_sparse_moe.experts.168.w3", "model.layers.43.block_sparse_moe.experts.169.w3", "model.layers.43.block_sparse_moe.experts.170.w3", "model.layers.43.block_sparse_moe.experts.171.w3", "model.layers.43.block_sparse_moe.experts.172.w3", "model.layers.43.block_sparse_moe.experts.173.w3", "model.layers.43.block_sparse_moe.experts.174.w3", "model.layers.43.block_sparse_moe.experts.175.w3", "model.layers.43.block_sparse_moe.experts.176.w3", "model.layers.43.block_sparse_moe.experts.177.w3", "model.layers.43.block_sparse_moe.experts.178.w3", "model.layers.43.block_sparse_moe.experts.179.w3", "model.layers.43.block_sparse_moe.experts.180.w3", "model.layers.43.block_sparse_moe.experts.181.w3", "model.layers.43.block_sparse_moe.experts.182.w3", "model.layers.43.block_sparse_moe.experts.183.w3", "model.layers.43.block_sparse_moe.experts.184.w3", "model.layers.43.block_sparse_moe.experts.185.w3", "model.layers.43.block_sparse_moe.experts.186.w3", "model.layers.43.block_sparse_moe.experts.187.w3", "model.layers.43.block_sparse_moe.experts.188.w3", "model.layers.43.block_sparse_moe.experts.189.w3", "model.layers.43.block_sparse_moe.experts.190.w3", "model.layers.43.block_sparse_moe.experts.191.w3", "model.layers.43.block_sparse_moe.experts.192.w3", "model.layers.43.block_sparse_moe.experts.193.w3", "model.layers.43.block_sparse_moe.experts.194.w3", "model.layers.43.block_sparse_moe.experts.195.w3", "model.layers.43.block_sparse_moe.experts.196.w3", "model.layers.43.block_sparse_moe.experts.197.w3", "model.layers.43.block_sparse_moe.experts.198.w3", "model.layers.43.block_sparse_moe.experts.199.w3", "model.layers.43.block_sparse_moe.experts.200.w3", "model.layers.43.block_sparse_moe.experts.201.w3", "model.layers.43.block_sparse_moe.experts.202.w3", "model.layers.43.block_sparse_moe.experts.203.w3", "model.layers.43.block_sparse_moe.experts.204.w3", "model.layers.43.block_sparse_moe.experts.205.w3", "model.layers.43.block_sparse_moe.experts.206.w3", "model.layers.43.block_sparse_moe.experts.207.w3", "model.layers.43.block_sparse_moe.experts.208.w3", "model.layers.43.block_sparse_moe.experts.209.w3", "model.layers.43.block_sparse_moe.experts.210.w3", "model.layers.43.block_sparse_moe.experts.211.w3", "model.layers.43.block_sparse_moe.experts.212.w3", "model.layers.43.block_sparse_moe.experts.213.w3", "model.layers.43.block_sparse_moe.experts.214.w3", "model.layers.43.block_sparse_moe.experts.215.w3", "model.layers.43.block_sparse_moe.experts.216.w3", "model.layers.43.block_sparse_moe.experts.217.w3", "model.layers.43.block_sparse_moe.experts.218.w3", "model.layers.43.block_sparse_moe.experts.219.w3", "model.layers.43.block_sparse_moe.experts.220.w3", "model.layers.43.block_sparse_moe.experts.221.w3", "model.layers.43.block_sparse_moe.experts.222.w3", "model.layers.43.block_sparse_moe.experts.223.w3", "model.layers.43.block_sparse_moe.experts.224.w3", "model.layers.43.block_sparse_moe.experts.225.w3", "model.layers.43.block_sparse_moe.experts.226.w3", "model.layers.43.block_sparse_moe.experts.227.w3", "model.layers.43.block_sparse_moe.experts.228.w3", "model.layers.43.block_sparse_moe.experts.229.w3", "model.layers.43.block_sparse_moe.experts.230.w3", "model.layers.43.block_sparse_moe.experts.231.w3", "model.layers.43.block_sparse_moe.experts.232.w3", "model.layers.43.block_sparse_moe.experts.233.w3", "model.layers.43.block_sparse_moe.experts.234.w3", "model.layers.43.block_sparse_moe.experts.235.w3", "model.layers.43.block_sparse_moe.experts.236.w3", "model.layers.43.block_sparse_moe.experts.237.w3", "model.layers.43.block_sparse_moe.experts.238.w3", "model.layers.43.block_sparse_moe.experts.239.w3", "model.layers.43.block_sparse_moe.experts.240.w3", "model.layers.43.block_sparse_moe.experts.241.w3", "model.layers.43.block_sparse_moe.experts.242.w3", "model.layers.43.block_sparse_moe.experts.243.w3", "model.layers.43.block_sparse_moe.experts.244.w3", "model.layers.43.block_sparse_moe.experts.245.w3", "model.layers.43.block_sparse_moe.experts.246.w3", "model.layers.43.block_sparse_moe.experts.247.w3", "model.layers.43.block_sparse_moe.experts.248.w3", "model.layers.43.block_sparse_moe.experts.249.w3", "model.layers.43.block_sparse_moe.experts.250.w3", "model.layers.43.block_sparse_moe.experts.251.w3", "model.layers.43.block_sparse_moe.experts.252.w3", "model.layers.43.block_sparse_moe.experts.253.w3", "model.layers.43.block_sparse_moe.experts.254.w3", "model.layers.43.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -2.5573372840884123e-05, "dbits": 2415919104 } ] }, { "idx": 219, "layers": [ "model.layers.43.block_sparse_moe.experts.0.w2", "model.layers.43.block_sparse_moe.experts.1.w2", "model.layers.43.block_sparse_moe.experts.2.w2", "model.layers.43.block_sparse_moe.experts.3.w2", "model.layers.43.block_sparse_moe.experts.4.w2", "model.layers.43.block_sparse_moe.experts.5.w2", "model.layers.43.block_sparse_moe.experts.6.w2", "model.layers.43.block_sparse_moe.experts.7.w2", "model.layers.43.block_sparse_moe.experts.8.w2", "model.layers.43.block_sparse_moe.experts.9.w2", "model.layers.43.block_sparse_moe.experts.10.w2", "model.layers.43.block_sparse_moe.experts.11.w2", "model.layers.43.block_sparse_moe.experts.12.w2", "model.layers.43.block_sparse_moe.experts.13.w2", "model.layers.43.block_sparse_moe.experts.14.w2", "model.layers.43.block_sparse_moe.experts.15.w2", "model.layers.43.block_sparse_moe.experts.16.w2", "model.layers.43.block_sparse_moe.experts.17.w2", "model.layers.43.block_sparse_moe.experts.18.w2", "model.layers.43.block_sparse_moe.experts.19.w2", "model.layers.43.block_sparse_moe.experts.20.w2", "model.layers.43.block_sparse_moe.experts.21.w2", "model.layers.43.block_sparse_moe.experts.22.w2", "model.layers.43.block_sparse_moe.experts.23.w2", "model.layers.43.block_sparse_moe.experts.24.w2", "model.layers.43.block_sparse_moe.experts.25.w2", "model.layers.43.block_sparse_moe.experts.26.w2", "model.layers.43.block_sparse_moe.experts.27.w2", "model.layers.43.block_sparse_moe.experts.28.w2", "model.layers.43.block_sparse_moe.experts.29.w2", "model.layers.43.block_sparse_moe.experts.30.w2", "model.layers.43.block_sparse_moe.experts.31.w2", "model.layers.43.block_sparse_moe.experts.32.w2", "model.layers.43.block_sparse_moe.experts.33.w2", "model.layers.43.block_sparse_moe.experts.34.w2", "model.layers.43.block_sparse_moe.experts.35.w2", "model.layers.43.block_sparse_moe.experts.36.w2", "model.layers.43.block_sparse_moe.experts.37.w2", "model.layers.43.block_sparse_moe.experts.38.w2", "model.layers.43.block_sparse_moe.experts.39.w2", "model.layers.43.block_sparse_moe.experts.40.w2", "model.layers.43.block_sparse_moe.experts.41.w2", "model.layers.43.block_sparse_moe.experts.42.w2", "model.layers.43.block_sparse_moe.experts.43.w2", "model.layers.43.block_sparse_moe.experts.44.w2", "model.layers.43.block_sparse_moe.experts.45.w2", "model.layers.43.block_sparse_moe.experts.46.w2", "model.layers.43.block_sparse_moe.experts.47.w2", "model.layers.43.block_sparse_moe.experts.48.w2", "model.layers.43.block_sparse_moe.experts.49.w2", "model.layers.43.block_sparse_moe.experts.50.w2", "model.layers.43.block_sparse_moe.experts.51.w2", "model.layers.43.block_sparse_moe.experts.52.w2", "model.layers.43.block_sparse_moe.experts.53.w2", "model.layers.43.block_sparse_moe.experts.54.w2", "model.layers.43.block_sparse_moe.experts.55.w2", "model.layers.43.block_sparse_moe.experts.56.w2", "model.layers.43.block_sparse_moe.experts.57.w2", "model.layers.43.block_sparse_moe.experts.58.w2", "model.layers.43.block_sparse_moe.experts.59.w2", "model.layers.43.block_sparse_moe.experts.60.w2", "model.layers.43.block_sparse_moe.experts.61.w2", "model.layers.43.block_sparse_moe.experts.62.w2", "model.layers.43.block_sparse_moe.experts.63.w2", "model.layers.43.block_sparse_moe.experts.64.w2", "model.layers.43.block_sparse_moe.experts.65.w2", "model.layers.43.block_sparse_moe.experts.66.w2", "model.layers.43.block_sparse_moe.experts.67.w2", "model.layers.43.block_sparse_moe.experts.68.w2", "model.layers.43.block_sparse_moe.experts.69.w2", "model.layers.43.block_sparse_moe.experts.70.w2", "model.layers.43.block_sparse_moe.experts.71.w2", "model.layers.43.block_sparse_moe.experts.72.w2", "model.layers.43.block_sparse_moe.experts.73.w2", "model.layers.43.block_sparse_moe.experts.74.w2", "model.layers.43.block_sparse_moe.experts.75.w2", "model.layers.43.block_sparse_moe.experts.76.w2", "model.layers.43.block_sparse_moe.experts.77.w2", "model.layers.43.block_sparse_moe.experts.78.w2", "model.layers.43.block_sparse_moe.experts.79.w2", "model.layers.43.block_sparse_moe.experts.80.w2", "model.layers.43.block_sparse_moe.experts.81.w2", "model.layers.43.block_sparse_moe.experts.82.w2", "model.layers.43.block_sparse_moe.experts.83.w2", "model.layers.43.block_sparse_moe.experts.84.w2", "model.layers.43.block_sparse_moe.experts.85.w2", "model.layers.43.block_sparse_moe.experts.86.w2", "model.layers.43.block_sparse_moe.experts.87.w2", "model.layers.43.block_sparse_moe.experts.88.w2", "model.layers.43.block_sparse_moe.experts.89.w2", "model.layers.43.block_sparse_moe.experts.90.w2", "model.layers.43.block_sparse_moe.experts.91.w2", "model.layers.43.block_sparse_moe.experts.92.w2", "model.layers.43.block_sparse_moe.experts.93.w2", "model.layers.43.block_sparse_moe.experts.94.w2", "model.layers.43.block_sparse_moe.experts.95.w2", "model.layers.43.block_sparse_moe.experts.96.w2", "model.layers.43.block_sparse_moe.experts.97.w2", "model.layers.43.block_sparse_moe.experts.98.w2", "model.layers.43.block_sparse_moe.experts.99.w2", "model.layers.43.block_sparse_moe.experts.100.w2", "model.layers.43.block_sparse_moe.experts.101.w2", "model.layers.43.block_sparse_moe.experts.102.w2", "model.layers.43.block_sparse_moe.experts.103.w2", "model.layers.43.block_sparse_moe.experts.104.w2", "model.layers.43.block_sparse_moe.experts.105.w2", "model.layers.43.block_sparse_moe.experts.106.w2", "model.layers.43.block_sparse_moe.experts.107.w2", "model.layers.43.block_sparse_moe.experts.108.w2", "model.layers.43.block_sparse_moe.experts.109.w2", "model.layers.43.block_sparse_moe.experts.110.w2", "model.layers.43.block_sparse_moe.experts.111.w2", "model.layers.43.block_sparse_moe.experts.112.w2", "model.layers.43.block_sparse_moe.experts.113.w2", "model.layers.43.block_sparse_moe.experts.114.w2", "model.layers.43.block_sparse_moe.experts.115.w2", "model.layers.43.block_sparse_moe.experts.116.w2", "model.layers.43.block_sparse_moe.experts.117.w2", "model.layers.43.block_sparse_moe.experts.118.w2", "model.layers.43.block_sparse_moe.experts.119.w2", "model.layers.43.block_sparse_moe.experts.120.w2", "model.layers.43.block_sparse_moe.experts.121.w2", "model.layers.43.block_sparse_moe.experts.122.w2", "model.layers.43.block_sparse_moe.experts.123.w2", "model.layers.43.block_sparse_moe.experts.124.w2", "model.layers.43.block_sparse_moe.experts.125.w2", "model.layers.43.block_sparse_moe.experts.126.w2", "model.layers.43.block_sparse_moe.experts.127.w2", "model.layers.43.block_sparse_moe.experts.128.w2", "model.layers.43.block_sparse_moe.experts.129.w2", "model.layers.43.block_sparse_moe.experts.130.w2", "model.layers.43.block_sparse_moe.experts.131.w2", "model.layers.43.block_sparse_moe.experts.132.w2", "model.layers.43.block_sparse_moe.experts.133.w2", "model.layers.43.block_sparse_moe.experts.134.w2", "model.layers.43.block_sparse_moe.experts.135.w2", "model.layers.43.block_sparse_moe.experts.136.w2", "model.layers.43.block_sparse_moe.experts.137.w2", "model.layers.43.block_sparse_moe.experts.138.w2", "model.layers.43.block_sparse_moe.experts.139.w2", "model.layers.43.block_sparse_moe.experts.140.w2", "model.layers.43.block_sparse_moe.experts.141.w2", "model.layers.43.block_sparse_moe.experts.142.w2", "model.layers.43.block_sparse_moe.experts.143.w2", "model.layers.43.block_sparse_moe.experts.144.w2", "model.layers.43.block_sparse_moe.experts.145.w2", "model.layers.43.block_sparse_moe.experts.146.w2", "model.layers.43.block_sparse_moe.experts.147.w2", "model.layers.43.block_sparse_moe.experts.148.w2", "model.layers.43.block_sparse_moe.experts.149.w2", "model.layers.43.block_sparse_moe.experts.150.w2", "model.layers.43.block_sparse_moe.experts.151.w2", "model.layers.43.block_sparse_moe.experts.152.w2", "model.layers.43.block_sparse_moe.experts.153.w2", "model.layers.43.block_sparse_moe.experts.154.w2", "model.layers.43.block_sparse_moe.experts.155.w2", "model.layers.43.block_sparse_moe.experts.156.w2", "model.layers.43.block_sparse_moe.experts.157.w2", "model.layers.43.block_sparse_moe.experts.158.w2", "model.layers.43.block_sparse_moe.experts.159.w2", "model.layers.43.block_sparse_moe.experts.160.w2", "model.layers.43.block_sparse_moe.experts.161.w2", "model.layers.43.block_sparse_moe.experts.162.w2", "model.layers.43.block_sparse_moe.experts.163.w2", "model.layers.43.block_sparse_moe.experts.164.w2", "model.layers.43.block_sparse_moe.experts.165.w2", "model.layers.43.block_sparse_moe.experts.166.w2", "model.layers.43.block_sparse_moe.experts.167.w2", "model.layers.43.block_sparse_moe.experts.168.w2", "model.layers.43.block_sparse_moe.experts.169.w2", "model.layers.43.block_sparse_moe.experts.170.w2", "model.layers.43.block_sparse_moe.experts.171.w2", "model.layers.43.block_sparse_moe.experts.172.w2", "model.layers.43.block_sparse_moe.experts.173.w2", "model.layers.43.block_sparse_moe.experts.174.w2", "model.layers.43.block_sparse_moe.experts.175.w2", "model.layers.43.block_sparse_moe.experts.176.w2", "model.layers.43.block_sparse_moe.experts.177.w2", "model.layers.43.block_sparse_moe.experts.178.w2", "model.layers.43.block_sparse_moe.experts.179.w2", "model.layers.43.block_sparse_moe.experts.180.w2", "model.layers.43.block_sparse_moe.experts.181.w2", "model.layers.43.block_sparse_moe.experts.182.w2", "model.layers.43.block_sparse_moe.experts.183.w2", "model.layers.43.block_sparse_moe.experts.184.w2", "model.layers.43.block_sparse_moe.experts.185.w2", "model.layers.43.block_sparse_moe.experts.186.w2", "model.layers.43.block_sparse_moe.experts.187.w2", "model.layers.43.block_sparse_moe.experts.188.w2", "model.layers.43.block_sparse_moe.experts.189.w2", "model.layers.43.block_sparse_moe.experts.190.w2", "model.layers.43.block_sparse_moe.experts.191.w2", "model.layers.43.block_sparse_moe.experts.192.w2", "model.layers.43.block_sparse_moe.experts.193.w2", "model.layers.43.block_sparse_moe.experts.194.w2", "model.layers.43.block_sparse_moe.experts.195.w2", "model.layers.43.block_sparse_moe.experts.196.w2", "model.layers.43.block_sparse_moe.experts.197.w2", "model.layers.43.block_sparse_moe.experts.198.w2", "model.layers.43.block_sparse_moe.experts.199.w2", "model.layers.43.block_sparse_moe.experts.200.w2", "model.layers.43.block_sparse_moe.experts.201.w2", "model.layers.43.block_sparse_moe.experts.202.w2", "model.layers.43.block_sparse_moe.experts.203.w2", "model.layers.43.block_sparse_moe.experts.204.w2", "model.layers.43.block_sparse_moe.experts.205.w2", "model.layers.43.block_sparse_moe.experts.206.w2", "model.layers.43.block_sparse_moe.experts.207.w2", "model.layers.43.block_sparse_moe.experts.208.w2", "model.layers.43.block_sparse_moe.experts.209.w2", "model.layers.43.block_sparse_moe.experts.210.w2", "model.layers.43.block_sparse_moe.experts.211.w2", "model.layers.43.block_sparse_moe.experts.212.w2", "model.layers.43.block_sparse_moe.experts.213.w2", "model.layers.43.block_sparse_moe.experts.214.w2", "model.layers.43.block_sparse_moe.experts.215.w2", "model.layers.43.block_sparse_moe.experts.216.w2", "model.layers.43.block_sparse_moe.experts.217.w2", "model.layers.43.block_sparse_moe.experts.218.w2", "model.layers.43.block_sparse_moe.experts.219.w2", "model.layers.43.block_sparse_moe.experts.220.w2", "model.layers.43.block_sparse_moe.experts.221.w2", "model.layers.43.block_sparse_moe.experts.222.w2", "model.layers.43.block_sparse_moe.experts.223.w2", "model.layers.43.block_sparse_moe.experts.224.w2", "model.layers.43.block_sparse_moe.experts.225.w2", "model.layers.43.block_sparse_moe.experts.226.w2", "model.layers.43.block_sparse_moe.experts.227.w2", "model.layers.43.block_sparse_moe.experts.228.w2", "model.layers.43.block_sparse_moe.experts.229.w2", "model.layers.43.block_sparse_moe.experts.230.w2", "model.layers.43.block_sparse_moe.experts.231.w2", "model.layers.43.block_sparse_moe.experts.232.w2", "model.layers.43.block_sparse_moe.experts.233.w2", "model.layers.43.block_sparse_moe.experts.234.w2", "model.layers.43.block_sparse_moe.experts.235.w2", "model.layers.43.block_sparse_moe.experts.236.w2", "model.layers.43.block_sparse_moe.experts.237.w2", "model.layers.43.block_sparse_moe.experts.238.w2", "model.layers.43.block_sparse_moe.experts.239.w2", "model.layers.43.block_sparse_moe.experts.240.w2", "model.layers.43.block_sparse_moe.experts.241.w2", "model.layers.43.block_sparse_moe.experts.242.w2", "model.layers.43.block_sparse_moe.experts.243.w2", "model.layers.43.block_sparse_moe.experts.244.w2", "model.layers.43.block_sparse_moe.experts.245.w2", "model.layers.43.block_sparse_moe.experts.246.w2", "model.layers.43.block_sparse_moe.experts.247.w2", "model.layers.43.block_sparse_moe.experts.248.w2", "model.layers.43.block_sparse_moe.experts.249.w2", "model.layers.43.block_sparse_moe.experts.250.w2", "model.layers.43.block_sparse_moe.experts.251.w2", "model.layers.43.block_sparse_moe.experts.252.w2", "model.layers.43.block_sparse_moe.experts.253.w2", "model.layers.43.block_sparse_moe.experts.254.w2", "model.layers.43.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -2.524033188820718e-05, "dbits": 1207959552 } ] }, { "idx": 220, "layers": [ "model.layers.44.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00010496657341718674, "dbits": 18874368 } ] }, { "idx": 221, "layers": [ "model.layers.44.self_attn.k_proj", "model.layers.44.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0005314391106367028, "dbits": 6291456 } ] }, { "idx": 222, "layers": [ "model.layers.44.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00018254816532135842, "dbits": 18874368 } ] }, { "idx": 223, "layers": [ "model.layers.44.block_sparse_moe.experts.0.w1", "model.layers.44.block_sparse_moe.experts.1.w1", "model.layers.44.block_sparse_moe.experts.2.w1", "model.layers.44.block_sparse_moe.experts.3.w1", "model.layers.44.block_sparse_moe.experts.4.w1", "model.layers.44.block_sparse_moe.experts.5.w1", "model.layers.44.block_sparse_moe.experts.6.w1", "model.layers.44.block_sparse_moe.experts.7.w1", "model.layers.44.block_sparse_moe.experts.8.w1", "model.layers.44.block_sparse_moe.experts.9.w1", "model.layers.44.block_sparse_moe.experts.10.w1", "model.layers.44.block_sparse_moe.experts.11.w1", "model.layers.44.block_sparse_moe.experts.12.w1", "model.layers.44.block_sparse_moe.experts.13.w1", "model.layers.44.block_sparse_moe.experts.14.w1", "model.layers.44.block_sparse_moe.experts.15.w1", "model.layers.44.block_sparse_moe.experts.16.w1", "model.layers.44.block_sparse_moe.experts.17.w1", "model.layers.44.block_sparse_moe.experts.18.w1", "model.layers.44.block_sparse_moe.experts.19.w1", "model.layers.44.block_sparse_moe.experts.20.w1", "model.layers.44.block_sparse_moe.experts.21.w1", "model.layers.44.block_sparse_moe.experts.22.w1", "model.layers.44.block_sparse_moe.experts.23.w1", "model.layers.44.block_sparse_moe.experts.24.w1", "model.layers.44.block_sparse_moe.experts.25.w1", "model.layers.44.block_sparse_moe.experts.26.w1", "model.layers.44.block_sparse_moe.experts.27.w1", "model.layers.44.block_sparse_moe.experts.28.w1", "model.layers.44.block_sparse_moe.experts.29.w1", "model.layers.44.block_sparse_moe.experts.30.w1", "model.layers.44.block_sparse_moe.experts.31.w1", "model.layers.44.block_sparse_moe.experts.32.w1", "model.layers.44.block_sparse_moe.experts.33.w1", "model.layers.44.block_sparse_moe.experts.34.w1", "model.layers.44.block_sparse_moe.experts.35.w1", "model.layers.44.block_sparse_moe.experts.36.w1", "model.layers.44.block_sparse_moe.experts.37.w1", "model.layers.44.block_sparse_moe.experts.38.w1", "model.layers.44.block_sparse_moe.experts.39.w1", "model.layers.44.block_sparse_moe.experts.40.w1", "model.layers.44.block_sparse_moe.experts.41.w1", "model.layers.44.block_sparse_moe.experts.42.w1", "model.layers.44.block_sparse_moe.experts.43.w1", "model.layers.44.block_sparse_moe.experts.44.w1", "model.layers.44.block_sparse_moe.experts.45.w1", "model.layers.44.block_sparse_moe.experts.46.w1", "model.layers.44.block_sparse_moe.experts.47.w1", "model.layers.44.block_sparse_moe.experts.48.w1", "model.layers.44.block_sparse_moe.experts.49.w1", "model.layers.44.block_sparse_moe.experts.50.w1", "model.layers.44.block_sparse_moe.experts.51.w1", "model.layers.44.block_sparse_moe.experts.52.w1", "model.layers.44.block_sparse_moe.experts.53.w1", "model.layers.44.block_sparse_moe.experts.54.w1", "model.layers.44.block_sparse_moe.experts.55.w1", "model.layers.44.block_sparse_moe.experts.56.w1", "model.layers.44.block_sparse_moe.experts.57.w1", "model.layers.44.block_sparse_moe.experts.58.w1", "model.layers.44.block_sparse_moe.experts.59.w1", "model.layers.44.block_sparse_moe.experts.60.w1", "model.layers.44.block_sparse_moe.experts.61.w1", "model.layers.44.block_sparse_moe.experts.62.w1", "model.layers.44.block_sparse_moe.experts.63.w1", "model.layers.44.block_sparse_moe.experts.64.w1", "model.layers.44.block_sparse_moe.experts.65.w1", "model.layers.44.block_sparse_moe.experts.66.w1", "model.layers.44.block_sparse_moe.experts.67.w1", "model.layers.44.block_sparse_moe.experts.68.w1", "model.layers.44.block_sparse_moe.experts.69.w1", "model.layers.44.block_sparse_moe.experts.70.w1", "model.layers.44.block_sparse_moe.experts.71.w1", "model.layers.44.block_sparse_moe.experts.72.w1", "model.layers.44.block_sparse_moe.experts.73.w1", "model.layers.44.block_sparse_moe.experts.74.w1", "model.layers.44.block_sparse_moe.experts.75.w1", "model.layers.44.block_sparse_moe.experts.76.w1", "model.layers.44.block_sparse_moe.experts.77.w1", "model.layers.44.block_sparse_moe.experts.78.w1", "model.layers.44.block_sparse_moe.experts.79.w1", "model.layers.44.block_sparse_moe.experts.80.w1", "model.layers.44.block_sparse_moe.experts.81.w1", "model.layers.44.block_sparse_moe.experts.82.w1", "model.layers.44.block_sparse_moe.experts.83.w1", "model.layers.44.block_sparse_moe.experts.84.w1", "model.layers.44.block_sparse_moe.experts.85.w1", "model.layers.44.block_sparse_moe.experts.86.w1", "model.layers.44.block_sparse_moe.experts.87.w1", "model.layers.44.block_sparse_moe.experts.88.w1", "model.layers.44.block_sparse_moe.experts.89.w1", "model.layers.44.block_sparse_moe.experts.90.w1", "model.layers.44.block_sparse_moe.experts.91.w1", "model.layers.44.block_sparse_moe.experts.92.w1", "model.layers.44.block_sparse_moe.experts.93.w1", "model.layers.44.block_sparse_moe.experts.94.w1", "model.layers.44.block_sparse_moe.experts.95.w1", "model.layers.44.block_sparse_moe.experts.96.w1", "model.layers.44.block_sparse_moe.experts.97.w1", "model.layers.44.block_sparse_moe.experts.98.w1", "model.layers.44.block_sparse_moe.experts.99.w1", "model.layers.44.block_sparse_moe.experts.100.w1", "model.layers.44.block_sparse_moe.experts.101.w1", "model.layers.44.block_sparse_moe.experts.102.w1", "model.layers.44.block_sparse_moe.experts.103.w1", "model.layers.44.block_sparse_moe.experts.104.w1", "model.layers.44.block_sparse_moe.experts.105.w1", "model.layers.44.block_sparse_moe.experts.106.w1", "model.layers.44.block_sparse_moe.experts.107.w1", "model.layers.44.block_sparse_moe.experts.108.w1", "model.layers.44.block_sparse_moe.experts.109.w1", "model.layers.44.block_sparse_moe.experts.110.w1", "model.layers.44.block_sparse_moe.experts.111.w1", "model.layers.44.block_sparse_moe.experts.112.w1", "model.layers.44.block_sparse_moe.experts.113.w1", "model.layers.44.block_sparse_moe.experts.114.w1", "model.layers.44.block_sparse_moe.experts.115.w1", "model.layers.44.block_sparse_moe.experts.116.w1", "model.layers.44.block_sparse_moe.experts.117.w1", "model.layers.44.block_sparse_moe.experts.118.w1", "model.layers.44.block_sparse_moe.experts.119.w1", "model.layers.44.block_sparse_moe.experts.120.w1", "model.layers.44.block_sparse_moe.experts.121.w1", "model.layers.44.block_sparse_moe.experts.122.w1", "model.layers.44.block_sparse_moe.experts.123.w1", "model.layers.44.block_sparse_moe.experts.124.w1", "model.layers.44.block_sparse_moe.experts.125.w1", "model.layers.44.block_sparse_moe.experts.126.w1", "model.layers.44.block_sparse_moe.experts.127.w1", "model.layers.44.block_sparse_moe.experts.128.w1", "model.layers.44.block_sparse_moe.experts.129.w1", "model.layers.44.block_sparse_moe.experts.130.w1", "model.layers.44.block_sparse_moe.experts.131.w1", "model.layers.44.block_sparse_moe.experts.132.w1", "model.layers.44.block_sparse_moe.experts.133.w1", "model.layers.44.block_sparse_moe.experts.134.w1", "model.layers.44.block_sparse_moe.experts.135.w1", "model.layers.44.block_sparse_moe.experts.136.w1", "model.layers.44.block_sparse_moe.experts.137.w1", "model.layers.44.block_sparse_moe.experts.138.w1", "model.layers.44.block_sparse_moe.experts.139.w1", "model.layers.44.block_sparse_moe.experts.140.w1", "model.layers.44.block_sparse_moe.experts.141.w1", "model.layers.44.block_sparse_moe.experts.142.w1", "model.layers.44.block_sparse_moe.experts.143.w1", "model.layers.44.block_sparse_moe.experts.144.w1", "model.layers.44.block_sparse_moe.experts.145.w1", "model.layers.44.block_sparse_moe.experts.146.w1", "model.layers.44.block_sparse_moe.experts.147.w1", "model.layers.44.block_sparse_moe.experts.148.w1", "model.layers.44.block_sparse_moe.experts.149.w1", "model.layers.44.block_sparse_moe.experts.150.w1", "model.layers.44.block_sparse_moe.experts.151.w1", "model.layers.44.block_sparse_moe.experts.152.w1", "model.layers.44.block_sparse_moe.experts.153.w1", "model.layers.44.block_sparse_moe.experts.154.w1", "model.layers.44.block_sparse_moe.experts.155.w1", "model.layers.44.block_sparse_moe.experts.156.w1", "model.layers.44.block_sparse_moe.experts.157.w1", "model.layers.44.block_sparse_moe.experts.158.w1", "model.layers.44.block_sparse_moe.experts.159.w1", "model.layers.44.block_sparse_moe.experts.160.w1", "model.layers.44.block_sparse_moe.experts.161.w1", "model.layers.44.block_sparse_moe.experts.162.w1", "model.layers.44.block_sparse_moe.experts.163.w1", "model.layers.44.block_sparse_moe.experts.164.w1", "model.layers.44.block_sparse_moe.experts.165.w1", "model.layers.44.block_sparse_moe.experts.166.w1", "model.layers.44.block_sparse_moe.experts.167.w1", "model.layers.44.block_sparse_moe.experts.168.w1", "model.layers.44.block_sparse_moe.experts.169.w1", "model.layers.44.block_sparse_moe.experts.170.w1", "model.layers.44.block_sparse_moe.experts.171.w1", "model.layers.44.block_sparse_moe.experts.172.w1", "model.layers.44.block_sparse_moe.experts.173.w1", "model.layers.44.block_sparse_moe.experts.174.w1", "model.layers.44.block_sparse_moe.experts.175.w1", "model.layers.44.block_sparse_moe.experts.176.w1", "model.layers.44.block_sparse_moe.experts.177.w1", "model.layers.44.block_sparse_moe.experts.178.w1", "model.layers.44.block_sparse_moe.experts.179.w1", "model.layers.44.block_sparse_moe.experts.180.w1", "model.layers.44.block_sparse_moe.experts.181.w1", "model.layers.44.block_sparse_moe.experts.182.w1", "model.layers.44.block_sparse_moe.experts.183.w1", "model.layers.44.block_sparse_moe.experts.184.w1", "model.layers.44.block_sparse_moe.experts.185.w1", "model.layers.44.block_sparse_moe.experts.186.w1", "model.layers.44.block_sparse_moe.experts.187.w1", "model.layers.44.block_sparse_moe.experts.188.w1", "model.layers.44.block_sparse_moe.experts.189.w1", "model.layers.44.block_sparse_moe.experts.190.w1", "model.layers.44.block_sparse_moe.experts.191.w1", "model.layers.44.block_sparse_moe.experts.192.w1", "model.layers.44.block_sparse_moe.experts.193.w1", "model.layers.44.block_sparse_moe.experts.194.w1", "model.layers.44.block_sparse_moe.experts.195.w1", "model.layers.44.block_sparse_moe.experts.196.w1", "model.layers.44.block_sparse_moe.experts.197.w1", "model.layers.44.block_sparse_moe.experts.198.w1", "model.layers.44.block_sparse_moe.experts.199.w1", "model.layers.44.block_sparse_moe.experts.200.w1", "model.layers.44.block_sparse_moe.experts.201.w1", "model.layers.44.block_sparse_moe.experts.202.w1", "model.layers.44.block_sparse_moe.experts.203.w1", "model.layers.44.block_sparse_moe.experts.204.w1", "model.layers.44.block_sparse_moe.experts.205.w1", "model.layers.44.block_sparse_moe.experts.206.w1", "model.layers.44.block_sparse_moe.experts.207.w1", "model.layers.44.block_sparse_moe.experts.208.w1", "model.layers.44.block_sparse_moe.experts.209.w1", "model.layers.44.block_sparse_moe.experts.210.w1", "model.layers.44.block_sparse_moe.experts.211.w1", "model.layers.44.block_sparse_moe.experts.212.w1", "model.layers.44.block_sparse_moe.experts.213.w1", "model.layers.44.block_sparse_moe.experts.214.w1", "model.layers.44.block_sparse_moe.experts.215.w1", "model.layers.44.block_sparse_moe.experts.216.w1", "model.layers.44.block_sparse_moe.experts.217.w1", "model.layers.44.block_sparse_moe.experts.218.w1", "model.layers.44.block_sparse_moe.experts.219.w1", "model.layers.44.block_sparse_moe.experts.220.w1", "model.layers.44.block_sparse_moe.experts.221.w1", "model.layers.44.block_sparse_moe.experts.222.w1", "model.layers.44.block_sparse_moe.experts.223.w1", "model.layers.44.block_sparse_moe.experts.224.w1", "model.layers.44.block_sparse_moe.experts.225.w1", "model.layers.44.block_sparse_moe.experts.226.w1", "model.layers.44.block_sparse_moe.experts.227.w1", "model.layers.44.block_sparse_moe.experts.228.w1", "model.layers.44.block_sparse_moe.experts.229.w1", "model.layers.44.block_sparse_moe.experts.230.w1", "model.layers.44.block_sparse_moe.experts.231.w1", "model.layers.44.block_sparse_moe.experts.232.w1", "model.layers.44.block_sparse_moe.experts.233.w1", "model.layers.44.block_sparse_moe.experts.234.w1", "model.layers.44.block_sparse_moe.experts.235.w1", "model.layers.44.block_sparse_moe.experts.236.w1", "model.layers.44.block_sparse_moe.experts.237.w1", "model.layers.44.block_sparse_moe.experts.238.w1", "model.layers.44.block_sparse_moe.experts.239.w1", "model.layers.44.block_sparse_moe.experts.240.w1", "model.layers.44.block_sparse_moe.experts.241.w1", "model.layers.44.block_sparse_moe.experts.242.w1", "model.layers.44.block_sparse_moe.experts.243.w1", "model.layers.44.block_sparse_moe.experts.244.w1", "model.layers.44.block_sparse_moe.experts.245.w1", "model.layers.44.block_sparse_moe.experts.246.w1", "model.layers.44.block_sparse_moe.experts.247.w1", "model.layers.44.block_sparse_moe.experts.248.w1", "model.layers.44.block_sparse_moe.experts.249.w1", "model.layers.44.block_sparse_moe.experts.250.w1", "model.layers.44.block_sparse_moe.experts.251.w1", "model.layers.44.block_sparse_moe.experts.252.w1", "model.layers.44.block_sparse_moe.experts.253.w1", "model.layers.44.block_sparse_moe.experts.254.w1", "model.layers.44.block_sparse_moe.experts.255.w1", "model.layers.44.block_sparse_moe.experts.0.w3", "model.layers.44.block_sparse_moe.experts.1.w3", "model.layers.44.block_sparse_moe.experts.2.w3", "model.layers.44.block_sparse_moe.experts.3.w3", "model.layers.44.block_sparse_moe.experts.4.w3", "model.layers.44.block_sparse_moe.experts.5.w3", "model.layers.44.block_sparse_moe.experts.6.w3", "model.layers.44.block_sparse_moe.experts.7.w3", "model.layers.44.block_sparse_moe.experts.8.w3", "model.layers.44.block_sparse_moe.experts.9.w3", "model.layers.44.block_sparse_moe.experts.10.w3", "model.layers.44.block_sparse_moe.experts.11.w3", "model.layers.44.block_sparse_moe.experts.12.w3", "model.layers.44.block_sparse_moe.experts.13.w3", "model.layers.44.block_sparse_moe.experts.14.w3", "model.layers.44.block_sparse_moe.experts.15.w3", "model.layers.44.block_sparse_moe.experts.16.w3", "model.layers.44.block_sparse_moe.experts.17.w3", "model.layers.44.block_sparse_moe.experts.18.w3", "model.layers.44.block_sparse_moe.experts.19.w3", "model.layers.44.block_sparse_moe.experts.20.w3", "model.layers.44.block_sparse_moe.experts.21.w3", "model.layers.44.block_sparse_moe.experts.22.w3", "model.layers.44.block_sparse_moe.experts.23.w3", "model.layers.44.block_sparse_moe.experts.24.w3", "model.layers.44.block_sparse_moe.experts.25.w3", "model.layers.44.block_sparse_moe.experts.26.w3", "model.layers.44.block_sparse_moe.experts.27.w3", "model.layers.44.block_sparse_moe.experts.28.w3", "model.layers.44.block_sparse_moe.experts.29.w3", "model.layers.44.block_sparse_moe.experts.30.w3", "model.layers.44.block_sparse_moe.experts.31.w3", "model.layers.44.block_sparse_moe.experts.32.w3", "model.layers.44.block_sparse_moe.experts.33.w3", "model.layers.44.block_sparse_moe.experts.34.w3", "model.layers.44.block_sparse_moe.experts.35.w3", "model.layers.44.block_sparse_moe.experts.36.w3", "model.layers.44.block_sparse_moe.experts.37.w3", "model.layers.44.block_sparse_moe.experts.38.w3", "model.layers.44.block_sparse_moe.experts.39.w3", "model.layers.44.block_sparse_moe.experts.40.w3", "model.layers.44.block_sparse_moe.experts.41.w3", "model.layers.44.block_sparse_moe.experts.42.w3", "model.layers.44.block_sparse_moe.experts.43.w3", "model.layers.44.block_sparse_moe.experts.44.w3", "model.layers.44.block_sparse_moe.experts.45.w3", "model.layers.44.block_sparse_moe.experts.46.w3", "model.layers.44.block_sparse_moe.experts.47.w3", "model.layers.44.block_sparse_moe.experts.48.w3", "model.layers.44.block_sparse_moe.experts.49.w3", "model.layers.44.block_sparse_moe.experts.50.w3", "model.layers.44.block_sparse_moe.experts.51.w3", "model.layers.44.block_sparse_moe.experts.52.w3", "model.layers.44.block_sparse_moe.experts.53.w3", "model.layers.44.block_sparse_moe.experts.54.w3", "model.layers.44.block_sparse_moe.experts.55.w3", "model.layers.44.block_sparse_moe.experts.56.w3", "model.layers.44.block_sparse_moe.experts.57.w3", "model.layers.44.block_sparse_moe.experts.58.w3", "model.layers.44.block_sparse_moe.experts.59.w3", "model.layers.44.block_sparse_moe.experts.60.w3", "model.layers.44.block_sparse_moe.experts.61.w3", "model.layers.44.block_sparse_moe.experts.62.w3", "model.layers.44.block_sparse_moe.experts.63.w3", "model.layers.44.block_sparse_moe.experts.64.w3", "model.layers.44.block_sparse_moe.experts.65.w3", "model.layers.44.block_sparse_moe.experts.66.w3", "model.layers.44.block_sparse_moe.experts.67.w3", "model.layers.44.block_sparse_moe.experts.68.w3", "model.layers.44.block_sparse_moe.experts.69.w3", "model.layers.44.block_sparse_moe.experts.70.w3", "model.layers.44.block_sparse_moe.experts.71.w3", "model.layers.44.block_sparse_moe.experts.72.w3", "model.layers.44.block_sparse_moe.experts.73.w3", "model.layers.44.block_sparse_moe.experts.74.w3", "model.layers.44.block_sparse_moe.experts.75.w3", "model.layers.44.block_sparse_moe.experts.76.w3", "model.layers.44.block_sparse_moe.experts.77.w3", "model.layers.44.block_sparse_moe.experts.78.w3", "model.layers.44.block_sparse_moe.experts.79.w3", "model.layers.44.block_sparse_moe.experts.80.w3", "model.layers.44.block_sparse_moe.experts.81.w3", "model.layers.44.block_sparse_moe.experts.82.w3", "model.layers.44.block_sparse_moe.experts.83.w3", "model.layers.44.block_sparse_moe.experts.84.w3", "model.layers.44.block_sparse_moe.experts.85.w3", "model.layers.44.block_sparse_moe.experts.86.w3", "model.layers.44.block_sparse_moe.experts.87.w3", "model.layers.44.block_sparse_moe.experts.88.w3", "model.layers.44.block_sparse_moe.experts.89.w3", "model.layers.44.block_sparse_moe.experts.90.w3", "model.layers.44.block_sparse_moe.experts.91.w3", "model.layers.44.block_sparse_moe.experts.92.w3", "model.layers.44.block_sparse_moe.experts.93.w3", "model.layers.44.block_sparse_moe.experts.94.w3", "model.layers.44.block_sparse_moe.experts.95.w3", "model.layers.44.block_sparse_moe.experts.96.w3", "model.layers.44.block_sparse_moe.experts.97.w3", "model.layers.44.block_sparse_moe.experts.98.w3", "model.layers.44.block_sparse_moe.experts.99.w3", "model.layers.44.block_sparse_moe.experts.100.w3", "model.layers.44.block_sparse_moe.experts.101.w3", "model.layers.44.block_sparse_moe.experts.102.w3", "model.layers.44.block_sparse_moe.experts.103.w3", "model.layers.44.block_sparse_moe.experts.104.w3", "model.layers.44.block_sparse_moe.experts.105.w3", "model.layers.44.block_sparse_moe.experts.106.w3", "model.layers.44.block_sparse_moe.experts.107.w3", "model.layers.44.block_sparse_moe.experts.108.w3", "model.layers.44.block_sparse_moe.experts.109.w3", "model.layers.44.block_sparse_moe.experts.110.w3", "model.layers.44.block_sparse_moe.experts.111.w3", "model.layers.44.block_sparse_moe.experts.112.w3", "model.layers.44.block_sparse_moe.experts.113.w3", "model.layers.44.block_sparse_moe.experts.114.w3", "model.layers.44.block_sparse_moe.experts.115.w3", "model.layers.44.block_sparse_moe.experts.116.w3", "model.layers.44.block_sparse_moe.experts.117.w3", "model.layers.44.block_sparse_moe.experts.118.w3", "model.layers.44.block_sparse_moe.experts.119.w3", "model.layers.44.block_sparse_moe.experts.120.w3", "model.layers.44.block_sparse_moe.experts.121.w3", "model.layers.44.block_sparse_moe.experts.122.w3", "model.layers.44.block_sparse_moe.experts.123.w3", "model.layers.44.block_sparse_moe.experts.124.w3", "model.layers.44.block_sparse_moe.experts.125.w3", "model.layers.44.block_sparse_moe.experts.126.w3", "model.layers.44.block_sparse_moe.experts.127.w3", "model.layers.44.block_sparse_moe.experts.128.w3", "model.layers.44.block_sparse_moe.experts.129.w3", "model.layers.44.block_sparse_moe.experts.130.w3", "model.layers.44.block_sparse_moe.experts.131.w3", "model.layers.44.block_sparse_moe.experts.132.w3", "model.layers.44.block_sparse_moe.experts.133.w3", "model.layers.44.block_sparse_moe.experts.134.w3", "model.layers.44.block_sparse_moe.experts.135.w3", "model.layers.44.block_sparse_moe.experts.136.w3", "model.layers.44.block_sparse_moe.experts.137.w3", "model.layers.44.block_sparse_moe.experts.138.w3", "model.layers.44.block_sparse_moe.experts.139.w3", "model.layers.44.block_sparse_moe.experts.140.w3", "model.layers.44.block_sparse_moe.experts.141.w3", "model.layers.44.block_sparse_moe.experts.142.w3", "model.layers.44.block_sparse_moe.experts.143.w3", "model.layers.44.block_sparse_moe.experts.144.w3", "model.layers.44.block_sparse_moe.experts.145.w3", "model.layers.44.block_sparse_moe.experts.146.w3", "model.layers.44.block_sparse_moe.experts.147.w3", "model.layers.44.block_sparse_moe.experts.148.w3", "model.layers.44.block_sparse_moe.experts.149.w3", "model.layers.44.block_sparse_moe.experts.150.w3", "model.layers.44.block_sparse_moe.experts.151.w3", "model.layers.44.block_sparse_moe.experts.152.w3", "model.layers.44.block_sparse_moe.experts.153.w3", "model.layers.44.block_sparse_moe.experts.154.w3", "model.layers.44.block_sparse_moe.experts.155.w3", "model.layers.44.block_sparse_moe.experts.156.w3", "model.layers.44.block_sparse_moe.experts.157.w3", "model.layers.44.block_sparse_moe.experts.158.w3", "model.layers.44.block_sparse_moe.experts.159.w3", "model.layers.44.block_sparse_moe.experts.160.w3", "model.layers.44.block_sparse_moe.experts.161.w3", "model.layers.44.block_sparse_moe.experts.162.w3", "model.layers.44.block_sparse_moe.experts.163.w3", "model.layers.44.block_sparse_moe.experts.164.w3", "model.layers.44.block_sparse_moe.experts.165.w3", "model.layers.44.block_sparse_moe.experts.166.w3", "model.layers.44.block_sparse_moe.experts.167.w3", "model.layers.44.block_sparse_moe.experts.168.w3", "model.layers.44.block_sparse_moe.experts.169.w3", "model.layers.44.block_sparse_moe.experts.170.w3", "model.layers.44.block_sparse_moe.experts.171.w3", "model.layers.44.block_sparse_moe.experts.172.w3", "model.layers.44.block_sparse_moe.experts.173.w3", "model.layers.44.block_sparse_moe.experts.174.w3", "model.layers.44.block_sparse_moe.experts.175.w3", "model.layers.44.block_sparse_moe.experts.176.w3", "model.layers.44.block_sparse_moe.experts.177.w3", "model.layers.44.block_sparse_moe.experts.178.w3", "model.layers.44.block_sparse_moe.experts.179.w3", "model.layers.44.block_sparse_moe.experts.180.w3", "model.layers.44.block_sparse_moe.experts.181.w3", "model.layers.44.block_sparse_moe.experts.182.w3", "model.layers.44.block_sparse_moe.experts.183.w3", "model.layers.44.block_sparse_moe.experts.184.w3", "model.layers.44.block_sparse_moe.experts.185.w3", "model.layers.44.block_sparse_moe.experts.186.w3", "model.layers.44.block_sparse_moe.experts.187.w3", "model.layers.44.block_sparse_moe.experts.188.w3", "model.layers.44.block_sparse_moe.experts.189.w3", "model.layers.44.block_sparse_moe.experts.190.w3", "model.layers.44.block_sparse_moe.experts.191.w3", "model.layers.44.block_sparse_moe.experts.192.w3", "model.layers.44.block_sparse_moe.experts.193.w3", "model.layers.44.block_sparse_moe.experts.194.w3", "model.layers.44.block_sparse_moe.experts.195.w3", "model.layers.44.block_sparse_moe.experts.196.w3", "model.layers.44.block_sparse_moe.experts.197.w3", "model.layers.44.block_sparse_moe.experts.198.w3", "model.layers.44.block_sparse_moe.experts.199.w3", "model.layers.44.block_sparse_moe.experts.200.w3", "model.layers.44.block_sparse_moe.experts.201.w3", "model.layers.44.block_sparse_moe.experts.202.w3", "model.layers.44.block_sparse_moe.experts.203.w3", "model.layers.44.block_sparse_moe.experts.204.w3", "model.layers.44.block_sparse_moe.experts.205.w3", "model.layers.44.block_sparse_moe.experts.206.w3", "model.layers.44.block_sparse_moe.experts.207.w3", "model.layers.44.block_sparse_moe.experts.208.w3", "model.layers.44.block_sparse_moe.experts.209.w3", "model.layers.44.block_sparse_moe.experts.210.w3", "model.layers.44.block_sparse_moe.experts.211.w3", "model.layers.44.block_sparse_moe.experts.212.w3", "model.layers.44.block_sparse_moe.experts.213.w3", "model.layers.44.block_sparse_moe.experts.214.w3", "model.layers.44.block_sparse_moe.experts.215.w3", "model.layers.44.block_sparse_moe.experts.216.w3", "model.layers.44.block_sparse_moe.experts.217.w3", "model.layers.44.block_sparse_moe.experts.218.w3", "model.layers.44.block_sparse_moe.experts.219.w3", "model.layers.44.block_sparse_moe.experts.220.w3", "model.layers.44.block_sparse_moe.experts.221.w3", "model.layers.44.block_sparse_moe.experts.222.w3", "model.layers.44.block_sparse_moe.experts.223.w3", "model.layers.44.block_sparse_moe.experts.224.w3", "model.layers.44.block_sparse_moe.experts.225.w3", "model.layers.44.block_sparse_moe.experts.226.w3", "model.layers.44.block_sparse_moe.experts.227.w3", "model.layers.44.block_sparse_moe.experts.228.w3", "model.layers.44.block_sparse_moe.experts.229.w3", "model.layers.44.block_sparse_moe.experts.230.w3", "model.layers.44.block_sparse_moe.experts.231.w3", "model.layers.44.block_sparse_moe.experts.232.w3", "model.layers.44.block_sparse_moe.experts.233.w3", "model.layers.44.block_sparse_moe.experts.234.w3", "model.layers.44.block_sparse_moe.experts.235.w3", "model.layers.44.block_sparse_moe.experts.236.w3", "model.layers.44.block_sparse_moe.experts.237.w3", "model.layers.44.block_sparse_moe.experts.238.w3", "model.layers.44.block_sparse_moe.experts.239.w3", "model.layers.44.block_sparse_moe.experts.240.w3", "model.layers.44.block_sparse_moe.experts.241.w3", "model.layers.44.block_sparse_moe.experts.242.w3", "model.layers.44.block_sparse_moe.experts.243.w3", "model.layers.44.block_sparse_moe.experts.244.w3", "model.layers.44.block_sparse_moe.experts.245.w3", "model.layers.44.block_sparse_moe.experts.246.w3", "model.layers.44.block_sparse_moe.experts.247.w3", "model.layers.44.block_sparse_moe.experts.248.w3", "model.layers.44.block_sparse_moe.experts.249.w3", "model.layers.44.block_sparse_moe.experts.250.w3", "model.layers.44.block_sparse_moe.experts.251.w3", "model.layers.44.block_sparse_moe.experts.252.w3", "model.layers.44.block_sparse_moe.experts.253.w3", "model.layers.44.block_sparse_moe.experts.254.w3", "model.layers.44.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -2.2310018539431486e-05, "dbits": 2415919104 } ] }, { "idx": 224, "layers": [ "model.layers.44.block_sparse_moe.experts.0.w2", "model.layers.44.block_sparse_moe.experts.1.w2", "model.layers.44.block_sparse_moe.experts.2.w2", "model.layers.44.block_sparse_moe.experts.3.w2", "model.layers.44.block_sparse_moe.experts.4.w2", "model.layers.44.block_sparse_moe.experts.5.w2", "model.layers.44.block_sparse_moe.experts.6.w2", "model.layers.44.block_sparse_moe.experts.7.w2", "model.layers.44.block_sparse_moe.experts.8.w2", "model.layers.44.block_sparse_moe.experts.9.w2", "model.layers.44.block_sparse_moe.experts.10.w2", "model.layers.44.block_sparse_moe.experts.11.w2", "model.layers.44.block_sparse_moe.experts.12.w2", "model.layers.44.block_sparse_moe.experts.13.w2", "model.layers.44.block_sparse_moe.experts.14.w2", "model.layers.44.block_sparse_moe.experts.15.w2", "model.layers.44.block_sparse_moe.experts.16.w2", "model.layers.44.block_sparse_moe.experts.17.w2", "model.layers.44.block_sparse_moe.experts.18.w2", "model.layers.44.block_sparse_moe.experts.19.w2", "model.layers.44.block_sparse_moe.experts.20.w2", "model.layers.44.block_sparse_moe.experts.21.w2", "model.layers.44.block_sparse_moe.experts.22.w2", "model.layers.44.block_sparse_moe.experts.23.w2", "model.layers.44.block_sparse_moe.experts.24.w2", "model.layers.44.block_sparse_moe.experts.25.w2", "model.layers.44.block_sparse_moe.experts.26.w2", "model.layers.44.block_sparse_moe.experts.27.w2", "model.layers.44.block_sparse_moe.experts.28.w2", "model.layers.44.block_sparse_moe.experts.29.w2", "model.layers.44.block_sparse_moe.experts.30.w2", "model.layers.44.block_sparse_moe.experts.31.w2", "model.layers.44.block_sparse_moe.experts.32.w2", "model.layers.44.block_sparse_moe.experts.33.w2", "model.layers.44.block_sparse_moe.experts.34.w2", "model.layers.44.block_sparse_moe.experts.35.w2", "model.layers.44.block_sparse_moe.experts.36.w2", "model.layers.44.block_sparse_moe.experts.37.w2", "model.layers.44.block_sparse_moe.experts.38.w2", "model.layers.44.block_sparse_moe.experts.39.w2", "model.layers.44.block_sparse_moe.experts.40.w2", "model.layers.44.block_sparse_moe.experts.41.w2", "model.layers.44.block_sparse_moe.experts.42.w2", "model.layers.44.block_sparse_moe.experts.43.w2", "model.layers.44.block_sparse_moe.experts.44.w2", "model.layers.44.block_sparse_moe.experts.45.w2", "model.layers.44.block_sparse_moe.experts.46.w2", "model.layers.44.block_sparse_moe.experts.47.w2", "model.layers.44.block_sparse_moe.experts.48.w2", "model.layers.44.block_sparse_moe.experts.49.w2", "model.layers.44.block_sparse_moe.experts.50.w2", "model.layers.44.block_sparse_moe.experts.51.w2", "model.layers.44.block_sparse_moe.experts.52.w2", "model.layers.44.block_sparse_moe.experts.53.w2", "model.layers.44.block_sparse_moe.experts.54.w2", "model.layers.44.block_sparse_moe.experts.55.w2", "model.layers.44.block_sparse_moe.experts.56.w2", "model.layers.44.block_sparse_moe.experts.57.w2", "model.layers.44.block_sparse_moe.experts.58.w2", "model.layers.44.block_sparse_moe.experts.59.w2", "model.layers.44.block_sparse_moe.experts.60.w2", "model.layers.44.block_sparse_moe.experts.61.w2", "model.layers.44.block_sparse_moe.experts.62.w2", "model.layers.44.block_sparse_moe.experts.63.w2", "model.layers.44.block_sparse_moe.experts.64.w2", "model.layers.44.block_sparse_moe.experts.65.w2", "model.layers.44.block_sparse_moe.experts.66.w2", "model.layers.44.block_sparse_moe.experts.67.w2", "model.layers.44.block_sparse_moe.experts.68.w2", "model.layers.44.block_sparse_moe.experts.69.w2", "model.layers.44.block_sparse_moe.experts.70.w2", "model.layers.44.block_sparse_moe.experts.71.w2", "model.layers.44.block_sparse_moe.experts.72.w2", "model.layers.44.block_sparse_moe.experts.73.w2", "model.layers.44.block_sparse_moe.experts.74.w2", "model.layers.44.block_sparse_moe.experts.75.w2", "model.layers.44.block_sparse_moe.experts.76.w2", "model.layers.44.block_sparse_moe.experts.77.w2", "model.layers.44.block_sparse_moe.experts.78.w2", "model.layers.44.block_sparse_moe.experts.79.w2", "model.layers.44.block_sparse_moe.experts.80.w2", "model.layers.44.block_sparse_moe.experts.81.w2", "model.layers.44.block_sparse_moe.experts.82.w2", "model.layers.44.block_sparse_moe.experts.83.w2", "model.layers.44.block_sparse_moe.experts.84.w2", "model.layers.44.block_sparse_moe.experts.85.w2", "model.layers.44.block_sparse_moe.experts.86.w2", "model.layers.44.block_sparse_moe.experts.87.w2", "model.layers.44.block_sparse_moe.experts.88.w2", "model.layers.44.block_sparse_moe.experts.89.w2", "model.layers.44.block_sparse_moe.experts.90.w2", "model.layers.44.block_sparse_moe.experts.91.w2", "model.layers.44.block_sparse_moe.experts.92.w2", "model.layers.44.block_sparse_moe.experts.93.w2", "model.layers.44.block_sparse_moe.experts.94.w2", "model.layers.44.block_sparse_moe.experts.95.w2", "model.layers.44.block_sparse_moe.experts.96.w2", "model.layers.44.block_sparse_moe.experts.97.w2", "model.layers.44.block_sparse_moe.experts.98.w2", "model.layers.44.block_sparse_moe.experts.99.w2", "model.layers.44.block_sparse_moe.experts.100.w2", "model.layers.44.block_sparse_moe.experts.101.w2", "model.layers.44.block_sparse_moe.experts.102.w2", "model.layers.44.block_sparse_moe.experts.103.w2", "model.layers.44.block_sparse_moe.experts.104.w2", "model.layers.44.block_sparse_moe.experts.105.w2", "model.layers.44.block_sparse_moe.experts.106.w2", "model.layers.44.block_sparse_moe.experts.107.w2", "model.layers.44.block_sparse_moe.experts.108.w2", "model.layers.44.block_sparse_moe.experts.109.w2", "model.layers.44.block_sparse_moe.experts.110.w2", "model.layers.44.block_sparse_moe.experts.111.w2", "model.layers.44.block_sparse_moe.experts.112.w2", "model.layers.44.block_sparse_moe.experts.113.w2", "model.layers.44.block_sparse_moe.experts.114.w2", "model.layers.44.block_sparse_moe.experts.115.w2", "model.layers.44.block_sparse_moe.experts.116.w2", "model.layers.44.block_sparse_moe.experts.117.w2", "model.layers.44.block_sparse_moe.experts.118.w2", "model.layers.44.block_sparse_moe.experts.119.w2", "model.layers.44.block_sparse_moe.experts.120.w2", "model.layers.44.block_sparse_moe.experts.121.w2", "model.layers.44.block_sparse_moe.experts.122.w2", "model.layers.44.block_sparse_moe.experts.123.w2", "model.layers.44.block_sparse_moe.experts.124.w2", "model.layers.44.block_sparse_moe.experts.125.w2", "model.layers.44.block_sparse_moe.experts.126.w2", "model.layers.44.block_sparse_moe.experts.127.w2", "model.layers.44.block_sparse_moe.experts.128.w2", "model.layers.44.block_sparse_moe.experts.129.w2", "model.layers.44.block_sparse_moe.experts.130.w2", "model.layers.44.block_sparse_moe.experts.131.w2", "model.layers.44.block_sparse_moe.experts.132.w2", "model.layers.44.block_sparse_moe.experts.133.w2", "model.layers.44.block_sparse_moe.experts.134.w2", "model.layers.44.block_sparse_moe.experts.135.w2", "model.layers.44.block_sparse_moe.experts.136.w2", "model.layers.44.block_sparse_moe.experts.137.w2", "model.layers.44.block_sparse_moe.experts.138.w2", "model.layers.44.block_sparse_moe.experts.139.w2", "model.layers.44.block_sparse_moe.experts.140.w2", "model.layers.44.block_sparse_moe.experts.141.w2", "model.layers.44.block_sparse_moe.experts.142.w2", "model.layers.44.block_sparse_moe.experts.143.w2", "model.layers.44.block_sparse_moe.experts.144.w2", "model.layers.44.block_sparse_moe.experts.145.w2", "model.layers.44.block_sparse_moe.experts.146.w2", "model.layers.44.block_sparse_moe.experts.147.w2", "model.layers.44.block_sparse_moe.experts.148.w2", "model.layers.44.block_sparse_moe.experts.149.w2", "model.layers.44.block_sparse_moe.experts.150.w2", "model.layers.44.block_sparse_moe.experts.151.w2", "model.layers.44.block_sparse_moe.experts.152.w2", "model.layers.44.block_sparse_moe.experts.153.w2", "model.layers.44.block_sparse_moe.experts.154.w2", "model.layers.44.block_sparse_moe.experts.155.w2", "model.layers.44.block_sparse_moe.experts.156.w2", "model.layers.44.block_sparse_moe.experts.157.w2", "model.layers.44.block_sparse_moe.experts.158.w2", "model.layers.44.block_sparse_moe.experts.159.w2", "model.layers.44.block_sparse_moe.experts.160.w2", "model.layers.44.block_sparse_moe.experts.161.w2", "model.layers.44.block_sparse_moe.experts.162.w2", "model.layers.44.block_sparse_moe.experts.163.w2", "model.layers.44.block_sparse_moe.experts.164.w2", "model.layers.44.block_sparse_moe.experts.165.w2", "model.layers.44.block_sparse_moe.experts.166.w2", "model.layers.44.block_sparse_moe.experts.167.w2", "model.layers.44.block_sparse_moe.experts.168.w2", "model.layers.44.block_sparse_moe.experts.169.w2", "model.layers.44.block_sparse_moe.experts.170.w2", "model.layers.44.block_sparse_moe.experts.171.w2", "model.layers.44.block_sparse_moe.experts.172.w2", "model.layers.44.block_sparse_moe.experts.173.w2", "model.layers.44.block_sparse_moe.experts.174.w2", "model.layers.44.block_sparse_moe.experts.175.w2", "model.layers.44.block_sparse_moe.experts.176.w2", "model.layers.44.block_sparse_moe.experts.177.w2", "model.layers.44.block_sparse_moe.experts.178.w2", "model.layers.44.block_sparse_moe.experts.179.w2", "model.layers.44.block_sparse_moe.experts.180.w2", "model.layers.44.block_sparse_moe.experts.181.w2", "model.layers.44.block_sparse_moe.experts.182.w2", "model.layers.44.block_sparse_moe.experts.183.w2", "model.layers.44.block_sparse_moe.experts.184.w2", "model.layers.44.block_sparse_moe.experts.185.w2", "model.layers.44.block_sparse_moe.experts.186.w2", "model.layers.44.block_sparse_moe.experts.187.w2", "model.layers.44.block_sparse_moe.experts.188.w2", "model.layers.44.block_sparse_moe.experts.189.w2", "model.layers.44.block_sparse_moe.experts.190.w2", "model.layers.44.block_sparse_moe.experts.191.w2", "model.layers.44.block_sparse_moe.experts.192.w2", "model.layers.44.block_sparse_moe.experts.193.w2", "model.layers.44.block_sparse_moe.experts.194.w2", "model.layers.44.block_sparse_moe.experts.195.w2", "model.layers.44.block_sparse_moe.experts.196.w2", "model.layers.44.block_sparse_moe.experts.197.w2", "model.layers.44.block_sparse_moe.experts.198.w2", "model.layers.44.block_sparse_moe.experts.199.w2", "model.layers.44.block_sparse_moe.experts.200.w2", "model.layers.44.block_sparse_moe.experts.201.w2", "model.layers.44.block_sparse_moe.experts.202.w2", "model.layers.44.block_sparse_moe.experts.203.w2", "model.layers.44.block_sparse_moe.experts.204.w2", "model.layers.44.block_sparse_moe.experts.205.w2", "model.layers.44.block_sparse_moe.experts.206.w2", "model.layers.44.block_sparse_moe.experts.207.w2", "model.layers.44.block_sparse_moe.experts.208.w2", "model.layers.44.block_sparse_moe.experts.209.w2", "model.layers.44.block_sparse_moe.experts.210.w2", "model.layers.44.block_sparse_moe.experts.211.w2", "model.layers.44.block_sparse_moe.experts.212.w2", "model.layers.44.block_sparse_moe.experts.213.w2", "model.layers.44.block_sparse_moe.experts.214.w2", "model.layers.44.block_sparse_moe.experts.215.w2", "model.layers.44.block_sparse_moe.experts.216.w2", "model.layers.44.block_sparse_moe.experts.217.w2", "model.layers.44.block_sparse_moe.experts.218.w2", "model.layers.44.block_sparse_moe.experts.219.w2", "model.layers.44.block_sparse_moe.experts.220.w2", "model.layers.44.block_sparse_moe.experts.221.w2", "model.layers.44.block_sparse_moe.experts.222.w2", "model.layers.44.block_sparse_moe.experts.223.w2", "model.layers.44.block_sparse_moe.experts.224.w2", "model.layers.44.block_sparse_moe.experts.225.w2", "model.layers.44.block_sparse_moe.experts.226.w2", "model.layers.44.block_sparse_moe.experts.227.w2", "model.layers.44.block_sparse_moe.experts.228.w2", "model.layers.44.block_sparse_moe.experts.229.w2", "model.layers.44.block_sparse_moe.experts.230.w2", "model.layers.44.block_sparse_moe.experts.231.w2", "model.layers.44.block_sparse_moe.experts.232.w2", "model.layers.44.block_sparse_moe.experts.233.w2", "model.layers.44.block_sparse_moe.experts.234.w2", "model.layers.44.block_sparse_moe.experts.235.w2", "model.layers.44.block_sparse_moe.experts.236.w2", "model.layers.44.block_sparse_moe.experts.237.w2", "model.layers.44.block_sparse_moe.experts.238.w2", "model.layers.44.block_sparse_moe.experts.239.w2", "model.layers.44.block_sparse_moe.experts.240.w2", "model.layers.44.block_sparse_moe.experts.241.w2", "model.layers.44.block_sparse_moe.experts.242.w2", "model.layers.44.block_sparse_moe.experts.243.w2", "model.layers.44.block_sparse_moe.experts.244.w2", "model.layers.44.block_sparse_moe.experts.245.w2", "model.layers.44.block_sparse_moe.experts.246.w2", "model.layers.44.block_sparse_moe.experts.247.w2", "model.layers.44.block_sparse_moe.experts.248.w2", "model.layers.44.block_sparse_moe.experts.249.w2", "model.layers.44.block_sparse_moe.experts.250.w2", "model.layers.44.block_sparse_moe.experts.251.w2", "model.layers.44.block_sparse_moe.experts.252.w2", "model.layers.44.block_sparse_moe.experts.253.w2", "model.layers.44.block_sparse_moe.experts.254.w2", "model.layers.44.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 7.530115544787663e-06, "dbits": 1207959552 } ] }, { "idx": 225, "layers": [ "model.layers.45.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00027957204729318896, "dbits": 18874368 } ] }, { "idx": 226, "layers": [ "model.layers.45.self_attn.k_proj", "model.layers.45.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0005576372146606501, "dbits": 6291456 } ] }, { "idx": 227, "layers": [ "model.layers.45.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0015850728377699852, "dbits": 18874368 } ] }, { "idx": 228, "layers": [ "model.layers.45.block_sparse_moe.experts.0.w1", "model.layers.45.block_sparse_moe.experts.1.w1", "model.layers.45.block_sparse_moe.experts.2.w1", "model.layers.45.block_sparse_moe.experts.3.w1", "model.layers.45.block_sparse_moe.experts.4.w1", "model.layers.45.block_sparse_moe.experts.5.w1", "model.layers.45.block_sparse_moe.experts.6.w1", "model.layers.45.block_sparse_moe.experts.7.w1", "model.layers.45.block_sparse_moe.experts.8.w1", "model.layers.45.block_sparse_moe.experts.9.w1", "model.layers.45.block_sparse_moe.experts.10.w1", "model.layers.45.block_sparse_moe.experts.11.w1", "model.layers.45.block_sparse_moe.experts.12.w1", "model.layers.45.block_sparse_moe.experts.13.w1", "model.layers.45.block_sparse_moe.experts.14.w1", "model.layers.45.block_sparse_moe.experts.15.w1", "model.layers.45.block_sparse_moe.experts.16.w1", "model.layers.45.block_sparse_moe.experts.17.w1", "model.layers.45.block_sparse_moe.experts.18.w1", "model.layers.45.block_sparse_moe.experts.19.w1", "model.layers.45.block_sparse_moe.experts.20.w1", "model.layers.45.block_sparse_moe.experts.21.w1", "model.layers.45.block_sparse_moe.experts.22.w1", "model.layers.45.block_sparse_moe.experts.23.w1", "model.layers.45.block_sparse_moe.experts.24.w1", "model.layers.45.block_sparse_moe.experts.25.w1", "model.layers.45.block_sparse_moe.experts.26.w1", "model.layers.45.block_sparse_moe.experts.27.w1", "model.layers.45.block_sparse_moe.experts.28.w1", "model.layers.45.block_sparse_moe.experts.29.w1", "model.layers.45.block_sparse_moe.experts.30.w1", "model.layers.45.block_sparse_moe.experts.31.w1", "model.layers.45.block_sparse_moe.experts.32.w1", "model.layers.45.block_sparse_moe.experts.33.w1", "model.layers.45.block_sparse_moe.experts.34.w1", "model.layers.45.block_sparse_moe.experts.35.w1", "model.layers.45.block_sparse_moe.experts.36.w1", "model.layers.45.block_sparse_moe.experts.37.w1", "model.layers.45.block_sparse_moe.experts.38.w1", "model.layers.45.block_sparse_moe.experts.39.w1", "model.layers.45.block_sparse_moe.experts.40.w1", "model.layers.45.block_sparse_moe.experts.41.w1", "model.layers.45.block_sparse_moe.experts.42.w1", "model.layers.45.block_sparse_moe.experts.43.w1", "model.layers.45.block_sparse_moe.experts.44.w1", "model.layers.45.block_sparse_moe.experts.45.w1", "model.layers.45.block_sparse_moe.experts.46.w1", "model.layers.45.block_sparse_moe.experts.47.w1", "model.layers.45.block_sparse_moe.experts.48.w1", "model.layers.45.block_sparse_moe.experts.49.w1", "model.layers.45.block_sparse_moe.experts.50.w1", "model.layers.45.block_sparse_moe.experts.51.w1", "model.layers.45.block_sparse_moe.experts.52.w1", "model.layers.45.block_sparse_moe.experts.53.w1", "model.layers.45.block_sparse_moe.experts.54.w1", "model.layers.45.block_sparse_moe.experts.55.w1", "model.layers.45.block_sparse_moe.experts.56.w1", "model.layers.45.block_sparse_moe.experts.57.w1", "model.layers.45.block_sparse_moe.experts.58.w1", "model.layers.45.block_sparse_moe.experts.59.w1", "model.layers.45.block_sparse_moe.experts.60.w1", "model.layers.45.block_sparse_moe.experts.61.w1", "model.layers.45.block_sparse_moe.experts.62.w1", "model.layers.45.block_sparse_moe.experts.63.w1", "model.layers.45.block_sparse_moe.experts.64.w1", "model.layers.45.block_sparse_moe.experts.65.w1", "model.layers.45.block_sparse_moe.experts.66.w1", "model.layers.45.block_sparse_moe.experts.67.w1", "model.layers.45.block_sparse_moe.experts.68.w1", "model.layers.45.block_sparse_moe.experts.69.w1", "model.layers.45.block_sparse_moe.experts.70.w1", "model.layers.45.block_sparse_moe.experts.71.w1", "model.layers.45.block_sparse_moe.experts.72.w1", "model.layers.45.block_sparse_moe.experts.73.w1", "model.layers.45.block_sparse_moe.experts.74.w1", "model.layers.45.block_sparse_moe.experts.75.w1", "model.layers.45.block_sparse_moe.experts.76.w1", "model.layers.45.block_sparse_moe.experts.77.w1", "model.layers.45.block_sparse_moe.experts.78.w1", "model.layers.45.block_sparse_moe.experts.79.w1", "model.layers.45.block_sparse_moe.experts.80.w1", "model.layers.45.block_sparse_moe.experts.81.w1", "model.layers.45.block_sparse_moe.experts.82.w1", "model.layers.45.block_sparse_moe.experts.83.w1", "model.layers.45.block_sparse_moe.experts.84.w1", "model.layers.45.block_sparse_moe.experts.85.w1", "model.layers.45.block_sparse_moe.experts.86.w1", "model.layers.45.block_sparse_moe.experts.87.w1", "model.layers.45.block_sparse_moe.experts.88.w1", "model.layers.45.block_sparse_moe.experts.89.w1", "model.layers.45.block_sparse_moe.experts.90.w1", "model.layers.45.block_sparse_moe.experts.91.w1", "model.layers.45.block_sparse_moe.experts.92.w1", "model.layers.45.block_sparse_moe.experts.93.w1", "model.layers.45.block_sparse_moe.experts.94.w1", "model.layers.45.block_sparse_moe.experts.95.w1", "model.layers.45.block_sparse_moe.experts.96.w1", "model.layers.45.block_sparse_moe.experts.97.w1", "model.layers.45.block_sparse_moe.experts.98.w1", "model.layers.45.block_sparse_moe.experts.99.w1", "model.layers.45.block_sparse_moe.experts.100.w1", "model.layers.45.block_sparse_moe.experts.101.w1", "model.layers.45.block_sparse_moe.experts.102.w1", "model.layers.45.block_sparse_moe.experts.103.w1", "model.layers.45.block_sparse_moe.experts.104.w1", "model.layers.45.block_sparse_moe.experts.105.w1", "model.layers.45.block_sparse_moe.experts.106.w1", "model.layers.45.block_sparse_moe.experts.107.w1", "model.layers.45.block_sparse_moe.experts.108.w1", "model.layers.45.block_sparse_moe.experts.109.w1", "model.layers.45.block_sparse_moe.experts.110.w1", "model.layers.45.block_sparse_moe.experts.111.w1", "model.layers.45.block_sparse_moe.experts.112.w1", "model.layers.45.block_sparse_moe.experts.113.w1", "model.layers.45.block_sparse_moe.experts.114.w1", "model.layers.45.block_sparse_moe.experts.115.w1", "model.layers.45.block_sparse_moe.experts.116.w1", "model.layers.45.block_sparse_moe.experts.117.w1", "model.layers.45.block_sparse_moe.experts.118.w1", "model.layers.45.block_sparse_moe.experts.119.w1", "model.layers.45.block_sparse_moe.experts.120.w1", "model.layers.45.block_sparse_moe.experts.121.w1", "model.layers.45.block_sparse_moe.experts.122.w1", "model.layers.45.block_sparse_moe.experts.123.w1", "model.layers.45.block_sparse_moe.experts.124.w1", "model.layers.45.block_sparse_moe.experts.125.w1", "model.layers.45.block_sparse_moe.experts.126.w1", "model.layers.45.block_sparse_moe.experts.127.w1", "model.layers.45.block_sparse_moe.experts.128.w1", "model.layers.45.block_sparse_moe.experts.129.w1", "model.layers.45.block_sparse_moe.experts.130.w1", "model.layers.45.block_sparse_moe.experts.131.w1", "model.layers.45.block_sparse_moe.experts.132.w1", "model.layers.45.block_sparse_moe.experts.133.w1", "model.layers.45.block_sparse_moe.experts.134.w1", "model.layers.45.block_sparse_moe.experts.135.w1", "model.layers.45.block_sparse_moe.experts.136.w1", "model.layers.45.block_sparse_moe.experts.137.w1", "model.layers.45.block_sparse_moe.experts.138.w1", "model.layers.45.block_sparse_moe.experts.139.w1", "model.layers.45.block_sparse_moe.experts.140.w1", "model.layers.45.block_sparse_moe.experts.141.w1", "model.layers.45.block_sparse_moe.experts.142.w1", "model.layers.45.block_sparse_moe.experts.143.w1", "model.layers.45.block_sparse_moe.experts.144.w1", "model.layers.45.block_sparse_moe.experts.145.w1", "model.layers.45.block_sparse_moe.experts.146.w1", "model.layers.45.block_sparse_moe.experts.147.w1", "model.layers.45.block_sparse_moe.experts.148.w1", "model.layers.45.block_sparse_moe.experts.149.w1", "model.layers.45.block_sparse_moe.experts.150.w1", "model.layers.45.block_sparse_moe.experts.151.w1", "model.layers.45.block_sparse_moe.experts.152.w1", "model.layers.45.block_sparse_moe.experts.153.w1", "model.layers.45.block_sparse_moe.experts.154.w1", "model.layers.45.block_sparse_moe.experts.155.w1", "model.layers.45.block_sparse_moe.experts.156.w1", "model.layers.45.block_sparse_moe.experts.157.w1", "model.layers.45.block_sparse_moe.experts.158.w1", "model.layers.45.block_sparse_moe.experts.159.w1", "model.layers.45.block_sparse_moe.experts.160.w1", "model.layers.45.block_sparse_moe.experts.161.w1", "model.layers.45.block_sparse_moe.experts.162.w1", "model.layers.45.block_sparse_moe.experts.163.w1", "model.layers.45.block_sparse_moe.experts.164.w1", "model.layers.45.block_sparse_moe.experts.165.w1", "model.layers.45.block_sparse_moe.experts.166.w1", "model.layers.45.block_sparse_moe.experts.167.w1", "model.layers.45.block_sparse_moe.experts.168.w1", "model.layers.45.block_sparse_moe.experts.169.w1", "model.layers.45.block_sparse_moe.experts.170.w1", "model.layers.45.block_sparse_moe.experts.171.w1", "model.layers.45.block_sparse_moe.experts.172.w1", "model.layers.45.block_sparse_moe.experts.173.w1", "model.layers.45.block_sparse_moe.experts.174.w1", "model.layers.45.block_sparse_moe.experts.175.w1", "model.layers.45.block_sparse_moe.experts.176.w1", "model.layers.45.block_sparse_moe.experts.177.w1", "model.layers.45.block_sparse_moe.experts.178.w1", "model.layers.45.block_sparse_moe.experts.179.w1", "model.layers.45.block_sparse_moe.experts.180.w1", "model.layers.45.block_sparse_moe.experts.181.w1", "model.layers.45.block_sparse_moe.experts.182.w1", "model.layers.45.block_sparse_moe.experts.183.w1", "model.layers.45.block_sparse_moe.experts.184.w1", "model.layers.45.block_sparse_moe.experts.185.w1", "model.layers.45.block_sparse_moe.experts.186.w1", "model.layers.45.block_sparse_moe.experts.187.w1", "model.layers.45.block_sparse_moe.experts.188.w1", "model.layers.45.block_sparse_moe.experts.189.w1", "model.layers.45.block_sparse_moe.experts.190.w1", "model.layers.45.block_sparse_moe.experts.191.w1", "model.layers.45.block_sparse_moe.experts.192.w1", "model.layers.45.block_sparse_moe.experts.193.w1", "model.layers.45.block_sparse_moe.experts.194.w1", "model.layers.45.block_sparse_moe.experts.195.w1", "model.layers.45.block_sparse_moe.experts.196.w1", "model.layers.45.block_sparse_moe.experts.197.w1", "model.layers.45.block_sparse_moe.experts.198.w1", "model.layers.45.block_sparse_moe.experts.199.w1", "model.layers.45.block_sparse_moe.experts.200.w1", "model.layers.45.block_sparse_moe.experts.201.w1", "model.layers.45.block_sparse_moe.experts.202.w1", "model.layers.45.block_sparse_moe.experts.203.w1", "model.layers.45.block_sparse_moe.experts.204.w1", "model.layers.45.block_sparse_moe.experts.205.w1", "model.layers.45.block_sparse_moe.experts.206.w1", "model.layers.45.block_sparse_moe.experts.207.w1", "model.layers.45.block_sparse_moe.experts.208.w1", "model.layers.45.block_sparse_moe.experts.209.w1", "model.layers.45.block_sparse_moe.experts.210.w1", "model.layers.45.block_sparse_moe.experts.211.w1", "model.layers.45.block_sparse_moe.experts.212.w1", "model.layers.45.block_sparse_moe.experts.213.w1", "model.layers.45.block_sparse_moe.experts.214.w1", "model.layers.45.block_sparse_moe.experts.215.w1", "model.layers.45.block_sparse_moe.experts.216.w1", "model.layers.45.block_sparse_moe.experts.217.w1", "model.layers.45.block_sparse_moe.experts.218.w1", "model.layers.45.block_sparse_moe.experts.219.w1", "model.layers.45.block_sparse_moe.experts.220.w1", "model.layers.45.block_sparse_moe.experts.221.w1", "model.layers.45.block_sparse_moe.experts.222.w1", "model.layers.45.block_sparse_moe.experts.223.w1", "model.layers.45.block_sparse_moe.experts.224.w1", "model.layers.45.block_sparse_moe.experts.225.w1", "model.layers.45.block_sparse_moe.experts.226.w1", "model.layers.45.block_sparse_moe.experts.227.w1", "model.layers.45.block_sparse_moe.experts.228.w1", "model.layers.45.block_sparse_moe.experts.229.w1", "model.layers.45.block_sparse_moe.experts.230.w1", "model.layers.45.block_sparse_moe.experts.231.w1", "model.layers.45.block_sparse_moe.experts.232.w1", "model.layers.45.block_sparse_moe.experts.233.w1", "model.layers.45.block_sparse_moe.experts.234.w1", "model.layers.45.block_sparse_moe.experts.235.w1", "model.layers.45.block_sparse_moe.experts.236.w1", "model.layers.45.block_sparse_moe.experts.237.w1", "model.layers.45.block_sparse_moe.experts.238.w1", "model.layers.45.block_sparse_moe.experts.239.w1", "model.layers.45.block_sparse_moe.experts.240.w1", "model.layers.45.block_sparse_moe.experts.241.w1", "model.layers.45.block_sparse_moe.experts.242.w1", "model.layers.45.block_sparse_moe.experts.243.w1", "model.layers.45.block_sparse_moe.experts.244.w1", "model.layers.45.block_sparse_moe.experts.245.w1", "model.layers.45.block_sparse_moe.experts.246.w1", "model.layers.45.block_sparse_moe.experts.247.w1", "model.layers.45.block_sparse_moe.experts.248.w1", "model.layers.45.block_sparse_moe.experts.249.w1", "model.layers.45.block_sparse_moe.experts.250.w1", "model.layers.45.block_sparse_moe.experts.251.w1", "model.layers.45.block_sparse_moe.experts.252.w1", "model.layers.45.block_sparse_moe.experts.253.w1", "model.layers.45.block_sparse_moe.experts.254.w1", "model.layers.45.block_sparse_moe.experts.255.w1", "model.layers.45.block_sparse_moe.experts.0.w3", "model.layers.45.block_sparse_moe.experts.1.w3", "model.layers.45.block_sparse_moe.experts.2.w3", "model.layers.45.block_sparse_moe.experts.3.w3", "model.layers.45.block_sparse_moe.experts.4.w3", "model.layers.45.block_sparse_moe.experts.5.w3", "model.layers.45.block_sparse_moe.experts.6.w3", "model.layers.45.block_sparse_moe.experts.7.w3", "model.layers.45.block_sparse_moe.experts.8.w3", "model.layers.45.block_sparse_moe.experts.9.w3", "model.layers.45.block_sparse_moe.experts.10.w3", "model.layers.45.block_sparse_moe.experts.11.w3", "model.layers.45.block_sparse_moe.experts.12.w3", "model.layers.45.block_sparse_moe.experts.13.w3", "model.layers.45.block_sparse_moe.experts.14.w3", "model.layers.45.block_sparse_moe.experts.15.w3", "model.layers.45.block_sparse_moe.experts.16.w3", "model.layers.45.block_sparse_moe.experts.17.w3", "model.layers.45.block_sparse_moe.experts.18.w3", "model.layers.45.block_sparse_moe.experts.19.w3", "model.layers.45.block_sparse_moe.experts.20.w3", "model.layers.45.block_sparse_moe.experts.21.w3", "model.layers.45.block_sparse_moe.experts.22.w3", "model.layers.45.block_sparse_moe.experts.23.w3", "model.layers.45.block_sparse_moe.experts.24.w3", "model.layers.45.block_sparse_moe.experts.25.w3", "model.layers.45.block_sparse_moe.experts.26.w3", "model.layers.45.block_sparse_moe.experts.27.w3", "model.layers.45.block_sparse_moe.experts.28.w3", "model.layers.45.block_sparse_moe.experts.29.w3", "model.layers.45.block_sparse_moe.experts.30.w3", "model.layers.45.block_sparse_moe.experts.31.w3", "model.layers.45.block_sparse_moe.experts.32.w3", "model.layers.45.block_sparse_moe.experts.33.w3", "model.layers.45.block_sparse_moe.experts.34.w3", "model.layers.45.block_sparse_moe.experts.35.w3", "model.layers.45.block_sparse_moe.experts.36.w3", "model.layers.45.block_sparse_moe.experts.37.w3", "model.layers.45.block_sparse_moe.experts.38.w3", "model.layers.45.block_sparse_moe.experts.39.w3", "model.layers.45.block_sparse_moe.experts.40.w3", "model.layers.45.block_sparse_moe.experts.41.w3", "model.layers.45.block_sparse_moe.experts.42.w3", "model.layers.45.block_sparse_moe.experts.43.w3", "model.layers.45.block_sparse_moe.experts.44.w3", "model.layers.45.block_sparse_moe.experts.45.w3", "model.layers.45.block_sparse_moe.experts.46.w3", "model.layers.45.block_sparse_moe.experts.47.w3", "model.layers.45.block_sparse_moe.experts.48.w3", "model.layers.45.block_sparse_moe.experts.49.w3", "model.layers.45.block_sparse_moe.experts.50.w3", "model.layers.45.block_sparse_moe.experts.51.w3", "model.layers.45.block_sparse_moe.experts.52.w3", "model.layers.45.block_sparse_moe.experts.53.w3", "model.layers.45.block_sparse_moe.experts.54.w3", "model.layers.45.block_sparse_moe.experts.55.w3", "model.layers.45.block_sparse_moe.experts.56.w3", "model.layers.45.block_sparse_moe.experts.57.w3", "model.layers.45.block_sparse_moe.experts.58.w3", "model.layers.45.block_sparse_moe.experts.59.w3", "model.layers.45.block_sparse_moe.experts.60.w3", "model.layers.45.block_sparse_moe.experts.61.w3", "model.layers.45.block_sparse_moe.experts.62.w3", "model.layers.45.block_sparse_moe.experts.63.w3", "model.layers.45.block_sparse_moe.experts.64.w3", "model.layers.45.block_sparse_moe.experts.65.w3", "model.layers.45.block_sparse_moe.experts.66.w3", "model.layers.45.block_sparse_moe.experts.67.w3", "model.layers.45.block_sparse_moe.experts.68.w3", "model.layers.45.block_sparse_moe.experts.69.w3", "model.layers.45.block_sparse_moe.experts.70.w3", "model.layers.45.block_sparse_moe.experts.71.w3", "model.layers.45.block_sparse_moe.experts.72.w3", "model.layers.45.block_sparse_moe.experts.73.w3", "model.layers.45.block_sparse_moe.experts.74.w3", "model.layers.45.block_sparse_moe.experts.75.w3", "model.layers.45.block_sparse_moe.experts.76.w3", "model.layers.45.block_sparse_moe.experts.77.w3", "model.layers.45.block_sparse_moe.experts.78.w3", "model.layers.45.block_sparse_moe.experts.79.w3", "model.layers.45.block_sparse_moe.experts.80.w3", "model.layers.45.block_sparse_moe.experts.81.w3", "model.layers.45.block_sparse_moe.experts.82.w3", "model.layers.45.block_sparse_moe.experts.83.w3", "model.layers.45.block_sparse_moe.experts.84.w3", "model.layers.45.block_sparse_moe.experts.85.w3", "model.layers.45.block_sparse_moe.experts.86.w3", "model.layers.45.block_sparse_moe.experts.87.w3", "model.layers.45.block_sparse_moe.experts.88.w3", "model.layers.45.block_sparse_moe.experts.89.w3", "model.layers.45.block_sparse_moe.experts.90.w3", "model.layers.45.block_sparse_moe.experts.91.w3", "model.layers.45.block_sparse_moe.experts.92.w3", "model.layers.45.block_sparse_moe.experts.93.w3", "model.layers.45.block_sparse_moe.experts.94.w3", "model.layers.45.block_sparse_moe.experts.95.w3", "model.layers.45.block_sparse_moe.experts.96.w3", "model.layers.45.block_sparse_moe.experts.97.w3", "model.layers.45.block_sparse_moe.experts.98.w3", "model.layers.45.block_sparse_moe.experts.99.w3", "model.layers.45.block_sparse_moe.experts.100.w3", "model.layers.45.block_sparse_moe.experts.101.w3", "model.layers.45.block_sparse_moe.experts.102.w3", "model.layers.45.block_sparse_moe.experts.103.w3", "model.layers.45.block_sparse_moe.experts.104.w3", "model.layers.45.block_sparse_moe.experts.105.w3", "model.layers.45.block_sparse_moe.experts.106.w3", "model.layers.45.block_sparse_moe.experts.107.w3", "model.layers.45.block_sparse_moe.experts.108.w3", "model.layers.45.block_sparse_moe.experts.109.w3", "model.layers.45.block_sparse_moe.experts.110.w3", "model.layers.45.block_sparse_moe.experts.111.w3", "model.layers.45.block_sparse_moe.experts.112.w3", "model.layers.45.block_sparse_moe.experts.113.w3", "model.layers.45.block_sparse_moe.experts.114.w3", "model.layers.45.block_sparse_moe.experts.115.w3", "model.layers.45.block_sparse_moe.experts.116.w3", "model.layers.45.block_sparse_moe.experts.117.w3", "model.layers.45.block_sparse_moe.experts.118.w3", "model.layers.45.block_sparse_moe.experts.119.w3", "model.layers.45.block_sparse_moe.experts.120.w3", "model.layers.45.block_sparse_moe.experts.121.w3", "model.layers.45.block_sparse_moe.experts.122.w3", "model.layers.45.block_sparse_moe.experts.123.w3", "model.layers.45.block_sparse_moe.experts.124.w3", "model.layers.45.block_sparse_moe.experts.125.w3", "model.layers.45.block_sparse_moe.experts.126.w3", "model.layers.45.block_sparse_moe.experts.127.w3", "model.layers.45.block_sparse_moe.experts.128.w3", "model.layers.45.block_sparse_moe.experts.129.w3", "model.layers.45.block_sparse_moe.experts.130.w3", "model.layers.45.block_sparse_moe.experts.131.w3", "model.layers.45.block_sparse_moe.experts.132.w3", "model.layers.45.block_sparse_moe.experts.133.w3", "model.layers.45.block_sparse_moe.experts.134.w3", "model.layers.45.block_sparse_moe.experts.135.w3", "model.layers.45.block_sparse_moe.experts.136.w3", "model.layers.45.block_sparse_moe.experts.137.w3", "model.layers.45.block_sparse_moe.experts.138.w3", "model.layers.45.block_sparse_moe.experts.139.w3", "model.layers.45.block_sparse_moe.experts.140.w3", "model.layers.45.block_sparse_moe.experts.141.w3", "model.layers.45.block_sparse_moe.experts.142.w3", "model.layers.45.block_sparse_moe.experts.143.w3", "model.layers.45.block_sparse_moe.experts.144.w3", "model.layers.45.block_sparse_moe.experts.145.w3", "model.layers.45.block_sparse_moe.experts.146.w3", "model.layers.45.block_sparse_moe.experts.147.w3", "model.layers.45.block_sparse_moe.experts.148.w3", "model.layers.45.block_sparse_moe.experts.149.w3", "model.layers.45.block_sparse_moe.experts.150.w3", "model.layers.45.block_sparse_moe.experts.151.w3", "model.layers.45.block_sparse_moe.experts.152.w3", "model.layers.45.block_sparse_moe.experts.153.w3", "model.layers.45.block_sparse_moe.experts.154.w3", "model.layers.45.block_sparse_moe.experts.155.w3", "model.layers.45.block_sparse_moe.experts.156.w3", "model.layers.45.block_sparse_moe.experts.157.w3", "model.layers.45.block_sparse_moe.experts.158.w3", "model.layers.45.block_sparse_moe.experts.159.w3", "model.layers.45.block_sparse_moe.experts.160.w3", "model.layers.45.block_sparse_moe.experts.161.w3", "model.layers.45.block_sparse_moe.experts.162.w3", "model.layers.45.block_sparse_moe.experts.163.w3", "model.layers.45.block_sparse_moe.experts.164.w3", "model.layers.45.block_sparse_moe.experts.165.w3", "model.layers.45.block_sparse_moe.experts.166.w3", "model.layers.45.block_sparse_moe.experts.167.w3", "model.layers.45.block_sparse_moe.experts.168.w3", "model.layers.45.block_sparse_moe.experts.169.w3", "model.layers.45.block_sparse_moe.experts.170.w3", "model.layers.45.block_sparse_moe.experts.171.w3", "model.layers.45.block_sparse_moe.experts.172.w3", "model.layers.45.block_sparse_moe.experts.173.w3", "model.layers.45.block_sparse_moe.experts.174.w3", "model.layers.45.block_sparse_moe.experts.175.w3", "model.layers.45.block_sparse_moe.experts.176.w3", "model.layers.45.block_sparse_moe.experts.177.w3", "model.layers.45.block_sparse_moe.experts.178.w3", "model.layers.45.block_sparse_moe.experts.179.w3", "model.layers.45.block_sparse_moe.experts.180.w3", "model.layers.45.block_sparse_moe.experts.181.w3", "model.layers.45.block_sparse_moe.experts.182.w3", "model.layers.45.block_sparse_moe.experts.183.w3", "model.layers.45.block_sparse_moe.experts.184.w3", "model.layers.45.block_sparse_moe.experts.185.w3", "model.layers.45.block_sparse_moe.experts.186.w3", "model.layers.45.block_sparse_moe.experts.187.w3", "model.layers.45.block_sparse_moe.experts.188.w3", "model.layers.45.block_sparse_moe.experts.189.w3", "model.layers.45.block_sparse_moe.experts.190.w3", "model.layers.45.block_sparse_moe.experts.191.w3", "model.layers.45.block_sparse_moe.experts.192.w3", "model.layers.45.block_sparse_moe.experts.193.w3", "model.layers.45.block_sparse_moe.experts.194.w3", "model.layers.45.block_sparse_moe.experts.195.w3", "model.layers.45.block_sparse_moe.experts.196.w3", "model.layers.45.block_sparse_moe.experts.197.w3", "model.layers.45.block_sparse_moe.experts.198.w3", "model.layers.45.block_sparse_moe.experts.199.w3", "model.layers.45.block_sparse_moe.experts.200.w3", "model.layers.45.block_sparse_moe.experts.201.w3", "model.layers.45.block_sparse_moe.experts.202.w3", "model.layers.45.block_sparse_moe.experts.203.w3", "model.layers.45.block_sparse_moe.experts.204.w3", "model.layers.45.block_sparse_moe.experts.205.w3", "model.layers.45.block_sparse_moe.experts.206.w3", "model.layers.45.block_sparse_moe.experts.207.w3", "model.layers.45.block_sparse_moe.experts.208.w3", "model.layers.45.block_sparse_moe.experts.209.w3", "model.layers.45.block_sparse_moe.experts.210.w3", "model.layers.45.block_sparse_moe.experts.211.w3", "model.layers.45.block_sparse_moe.experts.212.w3", "model.layers.45.block_sparse_moe.experts.213.w3", "model.layers.45.block_sparse_moe.experts.214.w3", "model.layers.45.block_sparse_moe.experts.215.w3", "model.layers.45.block_sparse_moe.experts.216.w3", "model.layers.45.block_sparse_moe.experts.217.w3", "model.layers.45.block_sparse_moe.experts.218.w3", "model.layers.45.block_sparse_moe.experts.219.w3", "model.layers.45.block_sparse_moe.experts.220.w3", "model.layers.45.block_sparse_moe.experts.221.w3", "model.layers.45.block_sparse_moe.experts.222.w3", "model.layers.45.block_sparse_moe.experts.223.w3", "model.layers.45.block_sparse_moe.experts.224.w3", "model.layers.45.block_sparse_moe.experts.225.w3", "model.layers.45.block_sparse_moe.experts.226.w3", "model.layers.45.block_sparse_moe.experts.227.w3", "model.layers.45.block_sparse_moe.experts.228.w3", "model.layers.45.block_sparse_moe.experts.229.w3", "model.layers.45.block_sparse_moe.experts.230.w3", "model.layers.45.block_sparse_moe.experts.231.w3", "model.layers.45.block_sparse_moe.experts.232.w3", "model.layers.45.block_sparse_moe.experts.233.w3", "model.layers.45.block_sparse_moe.experts.234.w3", "model.layers.45.block_sparse_moe.experts.235.w3", "model.layers.45.block_sparse_moe.experts.236.w3", "model.layers.45.block_sparse_moe.experts.237.w3", "model.layers.45.block_sparse_moe.experts.238.w3", "model.layers.45.block_sparse_moe.experts.239.w3", "model.layers.45.block_sparse_moe.experts.240.w3", "model.layers.45.block_sparse_moe.experts.241.w3", "model.layers.45.block_sparse_moe.experts.242.w3", "model.layers.45.block_sparse_moe.experts.243.w3", "model.layers.45.block_sparse_moe.experts.244.w3", "model.layers.45.block_sparse_moe.experts.245.w3", "model.layers.45.block_sparse_moe.experts.246.w3", "model.layers.45.block_sparse_moe.experts.247.w3", "model.layers.45.block_sparse_moe.experts.248.w3", "model.layers.45.block_sparse_moe.experts.249.w3", "model.layers.45.block_sparse_moe.experts.250.w3", "model.layers.45.block_sparse_moe.experts.251.w3", "model.layers.45.block_sparse_moe.experts.252.w3", "model.layers.45.block_sparse_moe.experts.253.w3", "model.layers.45.block_sparse_moe.experts.254.w3", "model.layers.45.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0003395441919565173, "dbits": 2415919104 } ] }, { "idx": 229, "layers": [ "model.layers.45.block_sparse_moe.experts.0.w2", "model.layers.45.block_sparse_moe.experts.1.w2", "model.layers.45.block_sparse_moe.experts.2.w2", "model.layers.45.block_sparse_moe.experts.3.w2", "model.layers.45.block_sparse_moe.experts.4.w2", "model.layers.45.block_sparse_moe.experts.5.w2", "model.layers.45.block_sparse_moe.experts.6.w2", "model.layers.45.block_sparse_moe.experts.7.w2", "model.layers.45.block_sparse_moe.experts.8.w2", "model.layers.45.block_sparse_moe.experts.9.w2", "model.layers.45.block_sparse_moe.experts.10.w2", "model.layers.45.block_sparse_moe.experts.11.w2", "model.layers.45.block_sparse_moe.experts.12.w2", "model.layers.45.block_sparse_moe.experts.13.w2", "model.layers.45.block_sparse_moe.experts.14.w2", "model.layers.45.block_sparse_moe.experts.15.w2", "model.layers.45.block_sparse_moe.experts.16.w2", "model.layers.45.block_sparse_moe.experts.17.w2", "model.layers.45.block_sparse_moe.experts.18.w2", "model.layers.45.block_sparse_moe.experts.19.w2", "model.layers.45.block_sparse_moe.experts.20.w2", "model.layers.45.block_sparse_moe.experts.21.w2", "model.layers.45.block_sparse_moe.experts.22.w2", "model.layers.45.block_sparse_moe.experts.23.w2", "model.layers.45.block_sparse_moe.experts.24.w2", "model.layers.45.block_sparse_moe.experts.25.w2", "model.layers.45.block_sparse_moe.experts.26.w2", "model.layers.45.block_sparse_moe.experts.27.w2", "model.layers.45.block_sparse_moe.experts.28.w2", "model.layers.45.block_sparse_moe.experts.29.w2", "model.layers.45.block_sparse_moe.experts.30.w2", "model.layers.45.block_sparse_moe.experts.31.w2", "model.layers.45.block_sparse_moe.experts.32.w2", "model.layers.45.block_sparse_moe.experts.33.w2", "model.layers.45.block_sparse_moe.experts.34.w2", "model.layers.45.block_sparse_moe.experts.35.w2", "model.layers.45.block_sparse_moe.experts.36.w2", "model.layers.45.block_sparse_moe.experts.37.w2", "model.layers.45.block_sparse_moe.experts.38.w2", "model.layers.45.block_sparse_moe.experts.39.w2", "model.layers.45.block_sparse_moe.experts.40.w2", "model.layers.45.block_sparse_moe.experts.41.w2", "model.layers.45.block_sparse_moe.experts.42.w2", "model.layers.45.block_sparse_moe.experts.43.w2", "model.layers.45.block_sparse_moe.experts.44.w2", "model.layers.45.block_sparse_moe.experts.45.w2", "model.layers.45.block_sparse_moe.experts.46.w2", "model.layers.45.block_sparse_moe.experts.47.w2", "model.layers.45.block_sparse_moe.experts.48.w2", "model.layers.45.block_sparse_moe.experts.49.w2", "model.layers.45.block_sparse_moe.experts.50.w2", "model.layers.45.block_sparse_moe.experts.51.w2", "model.layers.45.block_sparse_moe.experts.52.w2", "model.layers.45.block_sparse_moe.experts.53.w2", "model.layers.45.block_sparse_moe.experts.54.w2", "model.layers.45.block_sparse_moe.experts.55.w2", "model.layers.45.block_sparse_moe.experts.56.w2", "model.layers.45.block_sparse_moe.experts.57.w2", "model.layers.45.block_sparse_moe.experts.58.w2", "model.layers.45.block_sparse_moe.experts.59.w2", "model.layers.45.block_sparse_moe.experts.60.w2", "model.layers.45.block_sparse_moe.experts.61.w2", "model.layers.45.block_sparse_moe.experts.62.w2", "model.layers.45.block_sparse_moe.experts.63.w2", "model.layers.45.block_sparse_moe.experts.64.w2", "model.layers.45.block_sparse_moe.experts.65.w2", "model.layers.45.block_sparse_moe.experts.66.w2", "model.layers.45.block_sparse_moe.experts.67.w2", "model.layers.45.block_sparse_moe.experts.68.w2", "model.layers.45.block_sparse_moe.experts.69.w2", "model.layers.45.block_sparse_moe.experts.70.w2", "model.layers.45.block_sparse_moe.experts.71.w2", "model.layers.45.block_sparse_moe.experts.72.w2", "model.layers.45.block_sparse_moe.experts.73.w2", "model.layers.45.block_sparse_moe.experts.74.w2", "model.layers.45.block_sparse_moe.experts.75.w2", "model.layers.45.block_sparse_moe.experts.76.w2", "model.layers.45.block_sparse_moe.experts.77.w2", "model.layers.45.block_sparse_moe.experts.78.w2", "model.layers.45.block_sparse_moe.experts.79.w2", "model.layers.45.block_sparse_moe.experts.80.w2", "model.layers.45.block_sparse_moe.experts.81.w2", "model.layers.45.block_sparse_moe.experts.82.w2", "model.layers.45.block_sparse_moe.experts.83.w2", "model.layers.45.block_sparse_moe.experts.84.w2", "model.layers.45.block_sparse_moe.experts.85.w2", "model.layers.45.block_sparse_moe.experts.86.w2", "model.layers.45.block_sparse_moe.experts.87.w2", "model.layers.45.block_sparse_moe.experts.88.w2", "model.layers.45.block_sparse_moe.experts.89.w2", "model.layers.45.block_sparse_moe.experts.90.w2", "model.layers.45.block_sparse_moe.experts.91.w2", "model.layers.45.block_sparse_moe.experts.92.w2", "model.layers.45.block_sparse_moe.experts.93.w2", "model.layers.45.block_sparse_moe.experts.94.w2", "model.layers.45.block_sparse_moe.experts.95.w2", "model.layers.45.block_sparse_moe.experts.96.w2", "model.layers.45.block_sparse_moe.experts.97.w2", "model.layers.45.block_sparse_moe.experts.98.w2", "model.layers.45.block_sparse_moe.experts.99.w2", "model.layers.45.block_sparse_moe.experts.100.w2", "model.layers.45.block_sparse_moe.experts.101.w2", "model.layers.45.block_sparse_moe.experts.102.w2", "model.layers.45.block_sparse_moe.experts.103.w2", "model.layers.45.block_sparse_moe.experts.104.w2", "model.layers.45.block_sparse_moe.experts.105.w2", "model.layers.45.block_sparse_moe.experts.106.w2", "model.layers.45.block_sparse_moe.experts.107.w2", "model.layers.45.block_sparse_moe.experts.108.w2", "model.layers.45.block_sparse_moe.experts.109.w2", "model.layers.45.block_sparse_moe.experts.110.w2", "model.layers.45.block_sparse_moe.experts.111.w2", "model.layers.45.block_sparse_moe.experts.112.w2", "model.layers.45.block_sparse_moe.experts.113.w2", "model.layers.45.block_sparse_moe.experts.114.w2", "model.layers.45.block_sparse_moe.experts.115.w2", "model.layers.45.block_sparse_moe.experts.116.w2", "model.layers.45.block_sparse_moe.experts.117.w2", "model.layers.45.block_sparse_moe.experts.118.w2", "model.layers.45.block_sparse_moe.experts.119.w2", "model.layers.45.block_sparse_moe.experts.120.w2", "model.layers.45.block_sparse_moe.experts.121.w2", "model.layers.45.block_sparse_moe.experts.122.w2", "model.layers.45.block_sparse_moe.experts.123.w2", "model.layers.45.block_sparse_moe.experts.124.w2", "model.layers.45.block_sparse_moe.experts.125.w2", "model.layers.45.block_sparse_moe.experts.126.w2", "model.layers.45.block_sparse_moe.experts.127.w2", "model.layers.45.block_sparse_moe.experts.128.w2", "model.layers.45.block_sparse_moe.experts.129.w2", "model.layers.45.block_sparse_moe.experts.130.w2", "model.layers.45.block_sparse_moe.experts.131.w2", "model.layers.45.block_sparse_moe.experts.132.w2", "model.layers.45.block_sparse_moe.experts.133.w2", "model.layers.45.block_sparse_moe.experts.134.w2", "model.layers.45.block_sparse_moe.experts.135.w2", "model.layers.45.block_sparse_moe.experts.136.w2", "model.layers.45.block_sparse_moe.experts.137.w2", "model.layers.45.block_sparse_moe.experts.138.w2", "model.layers.45.block_sparse_moe.experts.139.w2", "model.layers.45.block_sparse_moe.experts.140.w2", "model.layers.45.block_sparse_moe.experts.141.w2", "model.layers.45.block_sparse_moe.experts.142.w2", "model.layers.45.block_sparse_moe.experts.143.w2", "model.layers.45.block_sparse_moe.experts.144.w2", "model.layers.45.block_sparse_moe.experts.145.w2", "model.layers.45.block_sparse_moe.experts.146.w2", "model.layers.45.block_sparse_moe.experts.147.w2", "model.layers.45.block_sparse_moe.experts.148.w2", "model.layers.45.block_sparse_moe.experts.149.w2", "model.layers.45.block_sparse_moe.experts.150.w2", "model.layers.45.block_sparse_moe.experts.151.w2", "model.layers.45.block_sparse_moe.experts.152.w2", "model.layers.45.block_sparse_moe.experts.153.w2", "model.layers.45.block_sparse_moe.experts.154.w2", "model.layers.45.block_sparse_moe.experts.155.w2", "model.layers.45.block_sparse_moe.experts.156.w2", "model.layers.45.block_sparse_moe.experts.157.w2", "model.layers.45.block_sparse_moe.experts.158.w2", "model.layers.45.block_sparse_moe.experts.159.w2", "model.layers.45.block_sparse_moe.experts.160.w2", "model.layers.45.block_sparse_moe.experts.161.w2", "model.layers.45.block_sparse_moe.experts.162.w2", "model.layers.45.block_sparse_moe.experts.163.w2", "model.layers.45.block_sparse_moe.experts.164.w2", "model.layers.45.block_sparse_moe.experts.165.w2", "model.layers.45.block_sparse_moe.experts.166.w2", "model.layers.45.block_sparse_moe.experts.167.w2", "model.layers.45.block_sparse_moe.experts.168.w2", "model.layers.45.block_sparse_moe.experts.169.w2", "model.layers.45.block_sparse_moe.experts.170.w2", "model.layers.45.block_sparse_moe.experts.171.w2", "model.layers.45.block_sparse_moe.experts.172.w2", "model.layers.45.block_sparse_moe.experts.173.w2", "model.layers.45.block_sparse_moe.experts.174.w2", "model.layers.45.block_sparse_moe.experts.175.w2", "model.layers.45.block_sparse_moe.experts.176.w2", "model.layers.45.block_sparse_moe.experts.177.w2", "model.layers.45.block_sparse_moe.experts.178.w2", "model.layers.45.block_sparse_moe.experts.179.w2", "model.layers.45.block_sparse_moe.experts.180.w2", "model.layers.45.block_sparse_moe.experts.181.w2", "model.layers.45.block_sparse_moe.experts.182.w2", "model.layers.45.block_sparse_moe.experts.183.w2", "model.layers.45.block_sparse_moe.experts.184.w2", "model.layers.45.block_sparse_moe.experts.185.w2", "model.layers.45.block_sparse_moe.experts.186.w2", "model.layers.45.block_sparse_moe.experts.187.w2", "model.layers.45.block_sparse_moe.experts.188.w2", "model.layers.45.block_sparse_moe.experts.189.w2", "model.layers.45.block_sparse_moe.experts.190.w2", "model.layers.45.block_sparse_moe.experts.191.w2", "model.layers.45.block_sparse_moe.experts.192.w2", "model.layers.45.block_sparse_moe.experts.193.w2", "model.layers.45.block_sparse_moe.experts.194.w2", "model.layers.45.block_sparse_moe.experts.195.w2", "model.layers.45.block_sparse_moe.experts.196.w2", "model.layers.45.block_sparse_moe.experts.197.w2", "model.layers.45.block_sparse_moe.experts.198.w2", "model.layers.45.block_sparse_moe.experts.199.w2", "model.layers.45.block_sparse_moe.experts.200.w2", "model.layers.45.block_sparse_moe.experts.201.w2", "model.layers.45.block_sparse_moe.experts.202.w2", "model.layers.45.block_sparse_moe.experts.203.w2", "model.layers.45.block_sparse_moe.experts.204.w2", "model.layers.45.block_sparse_moe.experts.205.w2", "model.layers.45.block_sparse_moe.experts.206.w2", "model.layers.45.block_sparse_moe.experts.207.w2", "model.layers.45.block_sparse_moe.experts.208.w2", "model.layers.45.block_sparse_moe.experts.209.w2", "model.layers.45.block_sparse_moe.experts.210.w2", "model.layers.45.block_sparse_moe.experts.211.w2", "model.layers.45.block_sparse_moe.experts.212.w2", "model.layers.45.block_sparse_moe.experts.213.w2", "model.layers.45.block_sparse_moe.experts.214.w2", "model.layers.45.block_sparse_moe.experts.215.w2", "model.layers.45.block_sparse_moe.experts.216.w2", "model.layers.45.block_sparse_moe.experts.217.w2", "model.layers.45.block_sparse_moe.experts.218.w2", "model.layers.45.block_sparse_moe.experts.219.w2", "model.layers.45.block_sparse_moe.experts.220.w2", "model.layers.45.block_sparse_moe.experts.221.w2", "model.layers.45.block_sparse_moe.experts.222.w2", "model.layers.45.block_sparse_moe.experts.223.w2", "model.layers.45.block_sparse_moe.experts.224.w2", "model.layers.45.block_sparse_moe.experts.225.w2", "model.layers.45.block_sparse_moe.experts.226.w2", "model.layers.45.block_sparse_moe.experts.227.w2", "model.layers.45.block_sparse_moe.experts.228.w2", "model.layers.45.block_sparse_moe.experts.229.w2", "model.layers.45.block_sparse_moe.experts.230.w2", "model.layers.45.block_sparse_moe.experts.231.w2", "model.layers.45.block_sparse_moe.experts.232.w2", "model.layers.45.block_sparse_moe.experts.233.w2", "model.layers.45.block_sparse_moe.experts.234.w2", "model.layers.45.block_sparse_moe.experts.235.w2", "model.layers.45.block_sparse_moe.experts.236.w2", "model.layers.45.block_sparse_moe.experts.237.w2", "model.layers.45.block_sparse_moe.experts.238.w2", "model.layers.45.block_sparse_moe.experts.239.w2", "model.layers.45.block_sparse_moe.experts.240.w2", "model.layers.45.block_sparse_moe.experts.241.w2", "model.layers.45.block_sparse_moe.experts.242.w2", "model.layers.45.block_sparse_moe.experts.243.w2", "model.layers.45.block_sparse_moe.experts.244.w2", "model.layers.45.block_sparse_moe.experts.245.w2", "model.layers.45.block_sparse_moe.experts.246.w2", "model.layers.45.block_sparse_moe.experts.247.w2", "model.layers.45.block_sparse_moe.experts.248.w2", "model.layers.45.block_sparse_moe.experts.249.w2", "model.layers.45.block_sparse_moe.experts.250.w2", "model.layers.45.block_sparse_moe.experts.251.w2", "model.layers.45.block_sparse_moe.experts.252.w2", "model.layers.45.block_sparse_moe.experts.253.w2", "model.layers.45.block_sparse_moe.experts.254.w2", "model.layers.45.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00011261422187090753, "dbits": 1207959552 } ] }, { "idx": 230, "layers": [ "model.layers.46.self_attn.q_proj" ], "candidates": [ { "dkld": 4.0393322706222534e-05, "dbits": 18874368 } ] }, { "idx": 231, "layers": [ "model.layers.46.self_attn.k_proj", "model.layers.46.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00012177973985671997, "dbits": 6291456 } ] }, { "idx": 232, "layers": [ "model.layers.46.self_attn.o_proj" ], "candidates": [ { "dkld": 0.00024379342794418613, "dbits": 18874368 } ] }, { "idx": 233, "layers": [ "model.layers.46.block_sparse_moe.experts.0.w1", "model.layers.46.block_sparse_moe.experts.1.w1", "model.layers.46.block_sparse_moe.experts.2.w1", "model.layers.46.block_sparse_moe.experts.3.w1", "model.layers.46.block_sparse_moe.experts.4.w1", "model.layers.46.block_sparse_moe.experts.5.w1", "model.layers.46.block_sparse_moe.experts.6.w1", "model.layers.46.block_sparse_moe.experts.7.w1", "model.layers.46.block_sparse_moe.experts.8.w1", "model.layers.46.block_sparse_moe.experts.9.w1", "model.layers.46.block_sparse_moe.experts.10.w1", "model.layers.46.block_sparse_moe.experts.11.w1", "model.layers.46.block_sparse_moe.experts.12.w1", "model.layers.46.block_sparse_moe.experts.13.w1", "model.layers.46.block_sparse_moe.experts.14.w1", "model.layers.46.block_sparse_moe.experts.15.w1", "model.layers.46.block_sparse_moe.experts.16.w1", "model.layers.46.block_sparse_moe.experts.17.w1", "model.layers.46.block_sparse_moe.experts.18.w1", "model.layers.46.block_sparse_moe.experts.19.w1", "model.layers.46.block_sparse_moe.experts.20.w1", "model.layers.46.block_sparse_moe.experts.21.w1", "model.layers.46.block_sparse_moe.experts.22.w1", "model.layers.46.block_sparse_moe.experts.23.w1", "model.layers.46.block_sparse_moe.experts.24.w1", "model.layers.46.block_sparse_moe.experts.25.w1", "model.layers.46.block_sparse_moe.experts.26.w1", "model.layers.46.block_sparse_moe.experts.27.w1", "model.layers.46.block_sparse_moe.experts.28.w1", "model.layers.46.block_sparse_moe.experts.29.w1", "model.layers.46.block_sparse_moe.experts.30.w1", "model.layers.46.block_sparse_moe.experts.31.w1", "model.layers.46.block_sparse_moe.experts.32.w1", "model.layers.46.block_sparse_moe.experts.33.w1", "model.layers.46.block_sparse_moe.experts.34.w1", "model.layers.46.block_sparse_moe.experts.35.w1", "model.layers.46.block_sparse_moe.experts.36.w1", "model.layers.46.block_sparse_moe.experts.37.w1", "model.layers.46.block_sparse_moe.experts.38.w1", "model.layers.46.block_sparse_moe.experts.39.w1", "model.layers.46.block_sparse_moe.experts.40.w1", "model.layers.46.block_sparse_moe.experts.41.w1", "model.layers.46.block_sparse_moe.experts.42.w1", "model.layers.46.block_sparse_moe.experts.43.w1", "model.layers.46.block_sparse_moe.experts.44.w1", "model.layers.46.block_sparse_moe.experts.45.w1", "model.layers.46.block_sparse_moe.experts.46.w1", "model.layers.46.block_sparse_moe.experts.47.w1", "model.layers.46.block_sparse_moe.experts.48.w1", "model.layers.46.block_sparse_moe.experts.49.w1", "model.layers.46.block_sparse_moe.experts.50.w1", "model.layers.46.block_sparse_moe.experts.51.w1", "model.layers.46.block_sparse_moe.experts.52.w1", "model.layers.46.block_sparse_moe.experts.53.w1", "model.layers.46.block_sparse_moe.experts.54.w1", "model.layers.46.block_sparse_moe.experts.55.w1", "model.layers.46.block_sparse_moe.experts.56.w1", "model.layers.46.block_sparse_moe.experts.57.w1", "model.layers.46.block_sparse_moe.experts.58.w1", "model.layers.46.block_sparse_moe.experts.59.w1", "model.layers.46.block_sparse_moe.experts.60.w1", "model.layers.46.block_sparse_moe.experts.61.w1", "model.layers.46.block_sparse_moe.experts.62.w1", "model.layers.46.block_sparse_moe.experts.63.w1", "model.layers.46.block_sparse_moe.experts.64.w1", "model.layers.46.block_sparse_moe.experts.65.w1", "model.layers.46.block_sparse_moe.experts.66.w1", "model.layers.46.block_sparse_moe.experts.67.w1", "model.layers.46.block_sparse_moe.experts.68.w1", "model.layers.46.block_sparse_moe.experts.69.w1", "model.layers.46.block_sparse_moe.experts.70.w1", "model.layers.46.block_sparse_moe.experts.71.w1", "model.layers.46.block_sparse_moe.experts.72.w1", "model.layers.46.block_sparse_moe.experts.73.w1", "model.layers.46.block_sparse_moe.experts.74.w1", "model.layers.46.block_sparse_moe.experts.75.w1", "model.layers.46.block_sparse_moe.experts.76.w1", "model.layers.46.block_sparse_moe.experts.77.w1", "model.layers.46.block_sparse_moe.experts.78.w1", "model.layers.46.block_sparse_moe.experts.79.w1", "model.layers.46.block_sparse_moe.experts.80.w1", "model.layers.46.block_sparse_moe.experts.81.w1", "model.layers.46.block_sparse_moe.experts.82.w1", "model.layers.46.block_sparse_moe.experts.83.w1", "model.layers.46.block_sparse_moe.experts.84.w1", "model.layers.46.block_sparse_moe.experts.85.w1", "model.layers.46.block_sparse_moe.experts.86.w1", "model.layers.46.block_sparse_moe.experts.87.w1", "model.layers.46.block_sparse_moe.experts.88.w1", "model.layers.46.block_sparse_moe.experts.89.w1", "model.layers.46.block_sparse_moe.experts.90.w1", "model.layers.46.block_sparse_moe.experts.91.w1", "model.layers.46.block_sparse_moe.experts.92.w1", "model.layers.46.block_sparse_moe.experts.93.w1", "model.layers.46.block_sparse_moe.experts.94.w1", "model.layers.46.block_sparse_moe.experts.95.w1", "model.layers.46.block_sparse_moe.experts.96.w1", "model.layers.46.block_sparse_moe.experts.97.w1", "model.layers.46.block_sparse_moe.experts.98.w1", "model.layers.46.block_sparse_moe.experts.99.w1", "model.layers.46.block_sparse_moe.experts.100.w1", "model.layers.46.block_sparse_moe.experts.101.w1", "model.layers.46.block_sparse_moe.experts.102.w1", "model.layers.46.block_sparse_moe.experts.103.w1", "model.layers.46.block_sparse_moe.experts.104.w1", "model.layers.46.block_sparse_moe.experts.105.w1", "model.layers.46.block_sparse_moe.experts.106.w1", "model.layers.46.block_sparse_moe.experts.107.w1", "model.layers.46.block_sparse_moe.experts.108.w1", "model.layers.46.block_sparse_moe.experts.109.w1", "model.layers.46.block_sparse_moe.experts.110.w1", "model.layers.46.block_sparse_moe.experts.111.w1", "model.layers.46.block_sparse_moe.experts.112.w1", "model.layers.46.block_sparse_moe.experts.113.w1", "model.layers.46.block_sparse_moe.experts.114.w1", "model.layers.46.block_sparse_moe.experts.115.w1", "model.layers.46.block_sparse_moe.experts.116.w1", "model.layers.46.block_sparse_moe.experts.117.w1", "model.layers.46.block_sparse_moe.experts.118.w1", "model.layers.46.block_sparse_moe.experts.119.w1", "model.layers.46.block_sparse_moe.experts.120.w1", "model.layers.46.block_sparse_moe.experts.121.w1", "model.layers.46.block_sparse_moe.experts.122.w1", "model.layers.46.block_sparse_moe.experts.123.w1", "model.layers.46.block_sparse_moe.experts.124.w1", "model.layers.46.block_sparse_moe.experts.125.w1", "model.layers.46.block_sparse_moe.experts.126.w1", "model.layers.46.block_sparse_moe.experts.127.w1", "model.layers.46.block_sparse_moe.experts.128.w1", "model.layers.46.block_sparse_moe.experts.129.w1", "model.layers.46.block_sparse_moe.experts.130.w1", "model.layers.46.block_sparse_moe.experts.131.w1", "model.layers.46.block_sparse_moe.experts.132.w1", "model.layers.46.block_sparse_moe.experts.133.w1", "model.layers.46.block_sparse_moe.experts.134.w1", "model.layers.46.block_sparse_moe.experts.135.w1", "model.layers.46.block_sparse_moe.experts.136.w1", "model.layers.46.block_sparse_moe.experts.137.w1", "model.layers.46.block_sparse_moe.experts.138.w1", "model.layers.46.block_sparse_moe.experts.139.w1", "model.layers.46.block_sparse_moe.experts.140.w1", "model.layers.46.block_sparse_moe.experts.141.w1", "model.layers.46.block_sparse_moe.experts.142.w1", "model.layers.46.block_sparse_moe.experts.143.w1", "model.layers.46.block_sparse_moe.experts.144.w1", "model.layers.46.block_sparse_moe.experts.145.w1", "model.layers.46.block_sparse_moe.experts.146.w1", "model.layers.46.block_sparse_moe.experts.147.w1", "model.layers.46.block_sparse_moe.experts.148.w1", "model.layers.46.block_sparse_moe.experts.149.w1", "model.layers.46.block_sparse_moe.experts.150.w1", "model.layers.46.block_sparse_moe.experts.151.w1", "model.layers.46.block_sparse_moe.experts.152.w1", "model.layers.46.block_sparse_moe.experts.153.w1", "model.layers.46.block_sparse_moe.experts.154.w1", "model.layers.46.block_sparse_moe.experts.155.w1", "model.layers.46.block_sparse_moe.experts.156.w1", "model.layers.46.block_sparse_moe.experts.157.w1", "model.layers.46.block_sparse_moe.experts.158.w1", "model.layers.46.block_sparse_moe.experts.159.w1", "model.layers.46.block_sparse_moe.experts.160.w1", "model.layers.46.block_sparse_moe.experts.161.w1", "model.layers.46.block_sparse_moe.experts.162.w1", "model.layers.46.block_sparse_moe.experts.163.w1", "model.layers.46.block_sparse_moe.experts.164.w1", "model.layers.46.block_sparse_moe.experts.165.w1", "model.layers.46.block_sparse_moe.experts.166.w1", "model.layers.46.block_sparse_moe.experts.167.w1", "model.layers.46.block_sparse_moe.experts.168.w1", "model.layers.46.block_sparse_moe.experts.169.w1", "model.layers.46.block_sparse_moe.experts.170.w1", "model.layers.46.block_sparse_moe.experts.171.w1", "model.layers.46.block_sparse_moe.experts.172.w1", "model.layers.46.block_sparse_moe.experts.173.w1", "model.layers.46.block_sparse_moe.experts.174.w1", "model.layers.46.block_sparse_moe.experts.175.w1", "model.layers.46.block_sparse_moe.experts.176.w1", "model.layers.46.block_sparse_moe.experts.177.w1", "model.layers.46.block_sparse_moe.experts.178.w1", "model.layers.46.block_sparse_moe.experts.179.w1", "model.layers.46.block_sparse_moe.experts.180.w1", "model.layers.46.block_sparse_moe.experts.181.w1", "model.layers.46.block_sparse_moe.experts.182.w1", "model.layers.46.block_sparse_moe.experts.183.w1", "model.layers.46.block_sparse_moe.experts.184.w1", "model.layers.46.block_sparse_moe.experts.185.w1", "model.layers.46.block_sparse_moe.experts.186.w1", "model.layers.46.block_sparse_moe.experts.187.w1", "model.layers.46.block_sparse_moe.experts.188.w1", "model.layers.46.block_sparse_moe.experts.189.w1", "model.layers.46.block_sparse_moe.experts.190.w1", "model.layers.46.block_sparse_moe.experts.191.w1", "model.layers.46.block_sparse_moe.experts.192.w1", "model.layers.46.block_sparse_moe.experts.193.w1", "model.layers.46.block_sparse_moe.experts.194.w1", "model.layers.46.block_sparse_moe.experts.195.w1", "model.layers.46.block_sparse_moe.experts.196.w1", "model.layers.46.block_sparse_moe.experts.197.w1", "model.layers.46.block_sparse_moe.experts.198.w1", "model.layers.46.block_sparse_moe.experts.199.w1", "model.layers.46.block_sparse_moe.experts.200.w1", "model.layers.46.block_sparse_moe.experts.201.w1", "model.layers.46.block_sparse_moe.experts.202.w1", "model.layers.46.block_sparse_moe.experts.203.w1", "model.layers.46.block_sparse_moe.experts.204.w1", "model.layers.46.block_sparse_moe.experts.205.w1", "model.layers.46.block_sparse_moe.experts.206.w1", "model.layers.46.block_sparse_moe.experts.207.w1", "model.layers.46.block_sparse_moe.experts.208.w1", "model.layers.46.block_sparse_moe.experts.209.w1", "model.layers.46.block_sparse_moe.experts.210.w1", "model.layers.46.block_sparse_moe.experts.211.w1", "model.layers.46.block_sparse_moe.experts.212.w1", "model.layers.46.block_sparse_moe.experts.213.w1", "model.layers.46.block_sparse_moe.experts.214.w1", "model.layers.46.block_sparse_moe.experts.215.w1", "model.layers.46.block_sparse_moe.experts.216.w1", "model.layers.46.block_sparse_moe.experts.217.w1", "model.layers.46.block_sparse_moe.experts.218.w1", "model.layers.46.block_sparse_moe.experts.219.w1", "model.layers.46.block_sparse_moe.experts.220.w1", "model.layers.46.block_sparse_moe.experts.221.w1", "model.layers.46.block_sparse_moe.experts.222.w1", "model.layers.46.block_sparse_moe.experts.223.w1", "model.layers.46.block_sparse_moe.experts.224.w1", "model.layers.46.block_sparse_moe.experts.225.w1", "model.layers.46.block_sparse_moe.experts.226.w1", "model.layers.46.block_sparse_moe.experts.227.w1", "model.layers.46.block_sparse_moe.experts.228.w1", "model.layers.46.block_sparse_moe.experts.229.w1", "model.layers.46.block_sparse_moe.experts.230.w1", "model.layers.46.block_sparse_moe.experts.231.w1", "model.layers.46.block_sparse_moe.experts.232.w1", "model.layers.46.block_sparse_moe.experts.233.w1", "model.layers.46.block_sparse_moe.experts.234.w1", "model.layers.46.block_sparse_moe.experts.235.w1", "model.layers.46.block_sparse_moe.experts.236.w1", "model.layers.46.block_sparse_moe.experts.237.w1", "model.layers.46.block_sparse_moe.experts.238.w1", "model.layers.46.block_sparse_moe.experts.239.w1", "model.layers.46.block_sparse_moe.experts.240.w1", "model.layers.46.block_sparse_moe.experts.241.w1", "model.layers.46.block_sparse_moe.experts.242.w1", "model.layers.46.block_sparse_moe.experts.243.w1", "model.layers.46.block_sparse_moe.experts.244.w1", "model.layers.46.block_sparse_moe.experts.245.w1", "model.layers.46.block_sparse_moe.experts.246.w1", "model.layers.46.block_sparse_moe.experts.247.w1", "model.layers.46.block_sparse_moe.experts.248.w1", "model.layers.46.block_sparse_moe.experts.249.w1", "model.layers.46.block_sparse_moe.experts.250.w1", "model.layers.46.block_sparse_moe.experts.251.w1", "model.layers.46.block_sparse_moe.experts.252.w1", "model.layers.46.block_sparse_moe.experts.253.w1", "model.layers.46.block_sparse_moe.experts.254.w1", "model.layers.46.block_sparse_moe.experts.255.w1", "model.layers.46.block_sparse_moe.experts.0.w3", "model.layers.46.block_sparse_moe.experts.1.w3", "model.layers.46.block_sparse_moe.experts.2.w3", "model.layers.46.block_sparse_moe.experts.3.w3", "model.layers.46.block_sparse_moe.experts.4.w3", "model.layers.46.block_sparse_moe.experts.5.w3", "model.layers.46.block_sparse_moe.experts.6.w3", "model.layers.46.block_sparse_moe.experts.7.w3", "model.layers.46.block_sparse_moe.experts.8.w3", "model.layers.46.block_sparse_moe.experts.9.w3", "model.layers.46.block_sparse_moe.experts.10.w3", "model.layers.46.block_sparse_moe.experts.11.w3", "model.layers.46.block_sparse_moe.experts.12.w3", "model.layers.46.block_sparse_moe.experts.13.w3", "model.layers.46.block_sparse_moe.experts.14.w3", "model.layers.46.block_sparse_moe.experts.15.w3", "model.layers.46.block_sparse_moe.experts.16.w3", "model.layers.46.block_sparse_moe.experts.17.w3", "model.layers.46.block_sparse_moe.experts.18.w3", "model.layers.46.block_sparse_moe.experts.19.w3", "model.layers.46.block_sparse_moe.experts.20.w3", "model.layers.46.block_sparse_moe.experts.21.w3", "model.layers.46.block_sparse_moe.experts.22.w3", "model.layers.46.block_sparse_moe.experts.23.w3", "model.layers.46.block_sparse_moe.experts.24.w3", "model.layers.46.block_sparse_moe.experts.25.w3", "model.layers.46.block_sparse_moe.experts.26.w3", "model.layers.46.block_sparse_moe.experts.27.w3", "model.layers.46.block_sparse_moe.experts.28.w3", "model.layers.46.block_sparse_moe.experts.29.w3", "model.layers.46.block_sparse_moe.experts.30.w3", "model.layers.46.block_sparse_moe.experts.31.w3", "model.layers.46.block_sparse_moe.experts.32.w3", "model.layers.46.block_sparse_moe.experts.33.w3", "model.layers.46.block_sparse_moe.experts.34.w3", "model.layers.46.block_sparse_moe.experts.35.w3", "model.layers.46.block_sparse_moe.experts.36.w3", "model.layers.46.block_sparse_moe.experts.37.w3", "model.layers.46.block_sparse_moe.experts.38.w3", "model.layers.46.block_sparse_moe.experts.39.w3", "model.layers.46.block_sparse_moe.experts.40.w3", "model.layers.46.block_sparse_moe.experts.41.w3", "model.layers.46.block_sparse_moe.experts.42.w3", "model.layers.46.block_sparse_moe.experts.43.w3", "model.layers.46.block_sparse_moe.experts.44.w3", "model.layers.46.block_sparse_moe.experts.45.w3", "model.layers.46.block_sparse_moe.experts.46.w3", "model.layers.46.block_sparse_moe.experts.47.w3", "model.layers.46.block_sparse_moe.experts.48.w3", "model.layers.46.block_sparse_moe.experts.49.w3", "model.layers.46.block_sparse_moe.experts.50.w3", "model.layers.46.block_sparse_moe.experts.51.w3", "model.layers.46.block_sparse_moe.experts.52.w3", "model.layers.46.block_sparse_moe.experts.53.w3", "model.layers.46.block_sparse_moe.experts.54.w3", "model.layers.46.block_sparse_moe.experts.55.w3", "model.layers.46.block_sparse_moe.experts.56.w3", "model.layers.46.block_sparse_moe.experts.57.w3", "model.layers.46.block_sparse_moe.experts.58.w3", "model.layers.46.block_sparse_moe.experts.59.w3", "model.layers.46.block_sparse_moe.experts.60.w3", "model.layers.46.block_sparse_moe.experts.61.w3", "model.layers.46.block_sparse_moe.experts.62.w3", "model.layers.46.block_sparse_moe.experts.63.w3", "model.layers.46.block_sparse_moe.experts.64.w3", "model.layers.46.block_sparse_moe.experts.65.w3", "model.layers.46.block_sparse_moe.experts.66.w3", "model.layers.46.block_sparse_moe.experts.67.w3", "model.layers.46.block_sparse_moe.experts.68.w3", "model.layers.46.block_sparse_moe.experts.69.w3", "model.layers.46.block_sparse_moe.experts.70.w3", "model.layers.46.block_sparse_moe.experts.71.w3", "model.layers.46.block_sparse_moe.experts.72.w3", "model.layers.46.block_sparse_moe.experts.73.w3", "model.layers.46.block_sparse_moe.experts.74.w3", "model.layers.46.block_sparse_moe.experts.75.w3", "model.layers.46.block_sparse_moe.experts.76.w3", "model.layers.46.block_sparse_moe.experts.77.w3", "model.layers.46.block_sparse_moe.experts.78.w3", "model.layers.46.block_sparse_moe.experts.79.w3", "model.layers.46.block_sparse_moe.experts.80.w3", "model.layers.46.block_sparse_moe.experts.81.w3", "model.layers.46.block_sparse_moe.experts.82.w3", "model.layers.46.block_sparse_moe.experts.83.w3", "model.layers.46.block_sparse_moe.experts.84.w3", "model.layers.46.block_sparse_moe.experts.85.w3", "model.layers.46.block_sparse_moe.experts.86.w3", "model.layers.46.block_sparse_moe.experts.87.w3", "model.layers.46.block_sparse_moe.experts.88.w3", "model.layers.46.block_sparse_moe.experts.89.w3", "model.layers.46.block_sparse_moe.experts.90.w3", "model.layers.46.block_sparse_moe.experts.91.w3", "model.layers.46.block_sparse_moe.experts.92.w3", "model.layers.46.block_sparse_moe.experts.93.w3", "model.layers.46.block_sparse_moe.experts.94.w3", "model.layers.46.block_sparse_moe.experts.95.w3", "model.layers.46.block_sparse_moe.experts.96.w3", "model.layers.46.block_sparse_moe.experts.97.w3", "model.layers.46.block_sparse_moe.experts.98.w3", "model.layers.46.block_sparse_moe.experts.99.w3", "model.layers.46.block_sparse_moe.experts.100.w3", "model.layers.46.block_sparse_moe.experts.101.w3", "model.layers.46.block_sparse_moe.experts.102.w3", "model.layers.46.block_sparse_moe.experts.103.w3", "model.layers.46.block_sparse_moe.experts.104.w3", "model.layers.46.block_sparse_moe.experts.105.w3", "model.layers.46.block_sparse_moe.experts.106.w3", "model.layers.46.block_sparse_moe.experts.107.w3", "model.layers.46.block_sparse_moe.experts.108.w3", "model.layers.46.block_sparse_moe.experts.109.w3", "model.layers.46.block_sparse_moe.experts.110.w3", "model.layers.46.block_sparse_moe.experts.111.w3", "model.layers.46.block_sparse_moe.experts.112.w3", "model.layers.46.block_sparse_moe.experts.113.w3", "model.layers.46.block_sparse_moe.experts.114.w3", "model.layers.46.block_sparse_moe.experts.115.w3", "model.layers.46.block_sparse_moe.experts.116.w3", "model.layers.46.block_sparse_moe.experts.117.w3", "model.layers.46.block_sparse_moe.experts.118.w3", "model.layers.46.block_sparse_moe.experts.119.w3", "model.layers.46.block_sparse_moe.experts.120.w3", "model.layers.46.block_sparse_moe.experts.121.w3", "model.layers.46.block_sparse_moe.experts.122.w3", "model.layers.46.block_sparse_moe.experts.123.w3", "model.layers.46.block_sparse_moe.experts.124.w3", "model.layers.46.block_sparse_moe.experts.125.w3", "model.layers.46.block_sparse_moe.experts.126.w3", "model.layers.46.block_sparse_moe.experts.127.w3", "model.layers.46.block_sparse_moe.experts.128.w3", "model.layers.46.block_sparse_moe.experts.129.w3", "model.layers.46.block_sparse_moe.experts.130.w3", "model.layers.46.block_sparse_moe.experts.131.w3", "model.layers.46.block_sparse_moe.experts.132.w3", "model.layers.46.block_sparse_moe.experts.133.w3", "model.layers.46.block_sparse_moe.experts.134.w3", "model.layers.46.block_sparse_moe.experts.135.w3", "model.layers.46.block_sparse_moe.experts.136.w3", "model.layers.46.block_sparse_moe.experts.137.w3", "model.layers.46.block_sparse_moe.experts.138.w3", "model.layers.46.block_sparse_moe.experts.139.w3", "model.layers.46.block_sparse_moe.experts.140.w3", "model.layers.46.block_sparse_moe.experts.141.w3", "model.layers.46.block_sparse_moe.experts.142.w3", "model.layers.46.block_sparse_moe.experts.143.w3", "model.layers.46.block_sparse_moe.experts.144.w3", "model.layers.46.block_sparse_moe.experts.145.w3", "model.layers.46.block_sparse_moe.experts.146.w3", "model.layers.46.block_sparse_moe.experts.147.w3", "model.layers.46.block_sparse_moe.experts.148.w3", "model.layers.46.block_sparse_moe.experts.149.w3", "model.layers.46.block_sparse_moe.experts.150.w3", "model.layers.46.block_sparse_moe.experts.151.w3", "model.layers.46.block_sparse_moe.experts.152.w3", "model.layers.46.block_sparse_moe.experts.153.w3", "model.layers.46.block_sparse_moe.experts.154.w3", "model.layers.46.block_sparse_moe.experts.155.w3", "model.layers.46.block_sparse_moe.experts.156.w3", "model.layers.46.block_sparse_moe.experts.157.w3", "model.layers.46.block_sparse_moe.experts.158.w3", "model.layers.46.block_sparse_moe.experts.159.w3", "model.layers.46.block_sparse_moe.experts.160.w3", "model.layers.46.block_sparse_moe.experts.161.w3", "model.layers.46.block_sparse_moe.experts.162.w3", "model.layers.46.block_sparse_moe.experts.163.w3", "model.layers.46.block_sparse_moe.experts.164.w3", "model.layers.46.block_sparse_moe.experts.165.w3", "model.layers.46.block_sparse_moe.experts.166.w3", "model.layers.46.block_sparse_moe.experts.167.w3", "model.layers.46.block_sparse_moe.experts.168.w3", "model.layers.46.block_sparse_moe.experts.169.w3", "model.layers.46.block_sparse_moe.experts.170.w3", "model.layers.46.block_sparse_moe.experts.171.w3", "model.layers.46.block_sparse_moe.experts.172.w3", "model.layers.46.block_sparse_moe.experts.173.w3", "model.layers.46.block_sparse_moe.experts.174.w3", "model.layers.46.block_sparse_moe.experts.175.w3", "model.layers.46.block_sparse_moe.experts.176.w3", "model.layers.46.block_sparse_moe.experts.177.w3", "model.layers.46.block_sparse_moe.experts.178.w3", "model.layers.46.block_sparse_moe.experts.179.w3", "model.layers.46.block_sparse_moe.experts.180.w3", "model.layers.46.block_sparse_moe.experts.181.w3", "model.layers.46.block_sparse_moe.experts.182.w3", "model.layers.46.block_sparse_moe.experts.183.w3", "model.layers.46.block_sparse_moe.experts.184.w3", "model.layers.46.block_sparse_moe.experts.185.w3", "model.layers.46.block_sparse_moe.experts.186.w3", "model.layers.46.block_sparse_moe.experts.187.w3", "model.layers.46.block_sparse_moe.experts.188.w3", "model.layers.46.block_sparse_moe.experts.189.w3", "model.layers.46.block_sparse_moe.experts.190.w3", "model.layers.46.block_sparse_moe.experts.191.w3", "model.layers.46.block_sparse_moe.experts.192.w3", "model.layers.46.block_sparse_moe.experts.193.w3", "model.layers.46.block_sparse_moe.experts.194.w3", "model.layers.46.block_sparse_moe.experts.195.w3", "model.layers.46.block_sparse_moe.experts.196.w3", "model.layers.46.block_sparse_moe.experts.197.w3", "model.layers.46.block_sparse_moe.experts.198.w3", "model.layers.46.block_sparse_moe.experts.199.w3", "model.layers.46.block_sparse_moe.experts.200.w3", "model.layers.46.block_sparse_moe.experts.201.w3", "model.layers.46.block_sparse_moe.experts.202.w3", "model.layers.46.block_sparse_moe.experts.203.w3", "model.layers.46.block_sparse_moe.experts.204.w3", "model.layers.46.block_sparse_moe.experts.205.w3", "model.layers.46.block_sparse_moe.experts.206.w3", "model.layers.46.block_sparse_moe.experts.207.w3", "model.layers.46.block_sparse_moe.experts.208.w3", "model.layers.46.block_sparse_moe.experts.209.w3", "model.layers.46.block_sparse_moe.experts.210.w3", "model.layers.46.block_sparse_moe.experts.211.w3", "model.layers.46.block_sparse_moe.experts.212.w3", "model.layers.46.block_sparse_moe.experts.213.w3", "model.layers.46.block_sparse_moe.experts.214.w3", "model.layers.46.block_sparse_moe.experts.215.w3", "model.layers.46.block_sparse_moe.experts.216.w3", "model.layers.46.block_sparse_moe.experts.217.w3", "model.layers.46.block_sparse_moe.experts.218.w3", "model.layers.46.block_sparse_moe.experts.219.w3", "model.layers.46.block_sparse_moe.experts.220.w3", "model.layers.46.block_sparse_moe.experts.221.w3", "model.layers.46.block_sparse_moe.experts.222.w3", "model.layers.46.block_sparse_moe.experts.223.w3", "model.layers.46.block_sparse_moe.experts.224.w3", "model.layers.46.block_sparse_moe.experts.225.w3", "model.layers.46.block_sparse_moe.experts.226.w3", "model.layers.46.block_sparse_moe.experts.227.w3", "model.layers.46.block_sparse_moe.experts.228.w3", "model.layers.46.block_sparse_moe.experts.229.w3", "model.layers.46.block_sparse_moe.experts.230.w3", "model.layers.46.block_sparse_moe.experts.231.w3", "model.layers.46.block_sparse_moe.experts.232.w3", "model.layers.46.block_sparse_moe.experts.233.w3", "model.layers.46.block_sparse_moe.experts.234.w3", "model.layers.46.block_sparse_moe.experts.235.w3", "model.layers.46.block_sparse_moe.experts.236.w3", "model.layers.46.block_sparse_moe.experts.237.w3", "model.layers.46.block_sparse_moe.experts.238.w3", "model.layers.46.block_sparse_moe.experts.239.w3", "model.layers.46.block_sparse_moe.experts.240.w3", "model.layers.46.block_sparse_moe.experts.241.w3", "model.layers.46.block_sparse_moe.experts.242.w3", "model.layers.46.block_sparse_moe.experts.243.w3", "model.layers.46.block_sparse_moe.experts.244.w3", "model.layers.46.block_sparse_moe.experts.245.w3", "model.layers.46.block_sparse_moe.experts.246.w3", "model.layers.46.block_sparse_moe.experts.247.w3", "model.layers.46.block_sparse_moe.experts.248.w3", "model.layers.46.block_sparse_moe.experts.249.w3", "model.layers.46.block_sparse_moe.experts.250.w3", "model.layers.46.block_sparse_moe.experts.251.w3", "model.layers.46.block_sparse_moe.experts.252.w3", "model.layers.46.block_sparse_moe.experts.253.w3", "model.layers.46.block_sparse_moe.experts.254.w3", "model.layers.46.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00015871245414018909, "dbits": 2415919104 } ] }, { "idx": 234, "layers": [ "model.layers.46.block_sparse_moe.experts.0.w2", "model.layers.46.block_sparse_moe.experts.1.w2", "model.layers.46.block_sparse_moe.experts.2.w2", "model.layers.46.block_sparse_moe.experts.3.w2", "model.layers.46.block_sparse_moe.experts.4.w2", "model.layers.46.block_sparse_moe.experts.5.w2", "model.layers.46.block_sparse_moe.experts.6.w2", "model.layers.46.block_sparse_moe.experts.7.w2", "model.layers.46.block_sparse_moe.experts.8.w2", "model.layers.46.block_sparse_moe.experts.9.w2", "model.layers.46.block_sparse_moe.experts.10.w2", "model.layers.46.block_sparse_moe.experts.11.w2", "model.layers.46.block_sparse_moe.experts.12.w2", "model.layers.46.block_sparse_moe.experts.13.w2", "model.layers.46.block_sparse_moe.experts.14.w2", "model.layers.46.block_sparse_moe.experts.15.w2", "model.layers.46.block_sparse_moe.experts.16.w2", "model.layers.46.block_sparse_moe.experts.17.w2", "model.layers.46.block_sparse_moe.experts.18.w2", "model.layers.46.block_sparse_moe.experts.19.w2", "model.layers.46.block_sparse_moe.experts.20.w2", "model.layers.46.block_sparse_moe.experts.21.w2", "model.layers.46.block_sparse_moe.experts.22.w2", "model.layers.46.block_sparse_moe.experts.23.w2", "model.layers.46.block_sparse_moe.experts.24.w2", "model.layers.46.block_sparse_moe.experts.25.w2", "model.layers.46.block_sparse_moe.experts.26.w2", "model.layers.46.block_sparse_moe.experts.27.w2", "model.layers.46.block_sparse_moe.experts.28.w2", "model.layers.46.block_sparse_moe.experts.29.w2", "model.layers.46.block_sparse_moe.experts.30.w2", "model.layers.46.block_sparse_moe.experts.31.w2", "model.layers.46.block_sparse_moe.experts.32.w2", "model.layers.46.block_sparse_moe.experts.33.w2", "model.layers.46.block_sparse_moe.experts.34.w2", "model.layers.46.block_sparse_moe.experts.35.w2", "model.layers.46.block_sparse_moe.experts.36.w2", "model.layers.46.block_sparse_moe.experts.37.w2", "model.layers.46.block_sparse_moe.experts.38.w2", "model.layers.46.block_sparse_moe.experts.39.w2", "model.layers.46.block_sparse_moe.experts.40.w2", "model.layers.46.block_sparse_moe.experts.41.w2", "model.layers.46.block_sparse_moe.experts.42.w2", "model.layers.46.block_sparse_moe.experts.43.w2", "model.layers.46.block_sparse_moe.experts.44.w2", "model.layers.46.block_sparse_moe.experts.45.w2", "model.layers.46.block_sparse_moe.experts.46.w2", "model.layers.46.block_sparse_moe.experts.47.w2", "model.layers.46.block_sparse_moe.experts.48.w2", "model.layers.46.block_sparse_moe.experts.49.w2", "model.layers.46.block_sparse_moe.experts.50.w2", "model.layers.46.block_sparse_moe.experts.51.w2", "model.layers.46.block_sparse_moe.experts.52.w2", "model.layers.46.block_sparse_moe.experts.53.w2", "model.layers.46.block_sparse_moe.experts.54.w2", "model.layers.46.block_sparse_moe.experts.55.w2", "model.layers.46.block_sparse_moe.experts.56.w2", "model.layers.46.block_sparse_moe.experts.57.w2", "model.layers.46.block_sparse_moe.experts.58.w2", "model.layers.46.block_sparse_moe.experts.59.w2", "model.layers.46.block_sparse_moe.experts.60.w2", "model.layers.46.block_sparse_moe.experts.61.w2", "model.layers.46.block_sparse_moe.experts.62.w2", "model.layers.46.block_sparse_moe.experts.63.w2", "model.layers.46.block_sparse_moe.experts.64.w2", "model.layers.46.block_sparse_moe.experts.65.w2", "model.layers.46.block_sparse_moe.experts.66.w2", "model.layers.46.block_sparse_moe.experts.67.w2", "model.layers.46.block_sparse_moe.experts.68.w2", "model.layers.46.block_sparse_moe.experts.69.w2", "model.layers.46.block_sparse_moe.experts.70.w2", "model.layers.46.block_sparse_moe.experts.71.w2", "model.layers.46.block_sparse_moe.experts.72.w2", "model.layers.46.block_sparse_moe.experts.73.w2", "model.layers.46.block_sparse_moe.experts.74.w2", "model.layers.46.block_sparse_moe.experts.75.w2", "model.layers.46.block_sparse_moe.experts.76.w2", "model.layers.46.block_sparse_moe.experts.77.w2", "model.layers.46.block_sparse_moe.experts.78.w2", "model.layers.46.block_sparse_moe.experts.79.w2", "model.layers.46.block_sparse_moe.experts.80.w2", "model.layers.46.block_sparse_moe.experts.81.w2", "model.layers.46.block_sparse_moe.experts.82.w2", "model.layers.46.block_sparse_moe.experts.83.w2", "model.layers.46.block_sparse_moe.experts.84.w2", "model.layers.46.block_sparse_moe.experts.85.w2", "model.layers.46.block_sparse_moe.experts.86.w2", "model.layers.46.block_sparse_moe.experts.87.w2", "model.layers.46.block_sparse_moe.experts.88.w2", "model.layers.46.block_sparse_moe.experts.89.w2", "model.layers.46.block_sparse_moe.experts.90.w2", "model.layers.46.block_sparse_moe.experts.91.w2", "model.layers.46.block_sparse_moe.experts.92.w2", "model.layers.46.block_sparse_moe.experts.93.w2", "model.layers.46.block_sparse_moe.experts.94.w2", "model.layers.46.block_sparse_moe.experts.95.w2", "model.layers.46.block_sparse_moe.experts.96.w2", "model.layers.46.block_sparse_moe.experts.97.w2", "model.layers.46.block_sparse_moe.experts.98.w2", "model.layers.46.block_sparse_moe.experts.99.w2", "model.layers.46.block_sparse_moe.experts.100.w2", "model.layers.46.block_sparse_moe.experts.101.w2", "model.layers.46.block_sparse_moe.experts.102.w2", "model.layers.46.block_sparse_moe.experts.103.w2", "model.layers.46.block_sparse_moe.experts.104.w2", "model.layers.46.block_sparse_moe.experts.105.w2", "model.layers.46.block_sparse_moe.experts.106.w2", "model.layers.46.block_sparse_moe.experts.107.w2", "model.layers.46.block_sparse_moe.experts.108.w2", "model.layers.46.block_sparse_moe.experts.109.w2", "model.layers.46.block_sparse_moe.experts.110.w2", "model.layers.46.block_sparse_moe.experts.111.w2", "model.layers.46.block_sparse_moe.experts.112.w2", "model.layers.46.block_sparse_moe.experts.113.w2", "model.layers.46.block_sparse_moe.experts.114.w2", "model.layers.46.block_sparse_moe.experts.115.w2", "model.layers.46.block_sparse_moe.experts.116.w2", "model.layers.46.block_sparse_moe.experts.117.w2", "model.layers.46.block_sparse_moe.experts.118.w2", "model.layers.46.block_sparse_moe.experts.119.w2", "model.layers.46.block_sparse_moe.experts.120.w2", "model.layers.46.block_sparse_moe.experts.121.w2", "model.layers.46.block_sparse_moe.experts.122.w2", "model.layers.46.block_sparse_moe.experts.123.w2", "model.layers.46.block_sparse_moe.experts.124.w2", "model.layers.46.block_sparse_moe.experts.125.w2", "model.layers.46.block_sparse_moe.experts.126.w2", "model.layers.46.block_sparse_moe.experts.127.w2", "model.layers.46.block_sparse_moe.experts.128.w2", "model.layers.46.block_sparse_moe.experts.129.w2", "model.layers.46.block_sparse_moe.experts.130.w2", "model.layers.46.block_sparse_moe.experts.131.w2", "model.layers.46.block_sparse_moe.experts.132.w2", "model.layers.46.block_sparse_moe.experts.133.w2", "model.layers.46.block_sparse_moe.experts.134.w2", "model.layers.46.block_sparse_moe.experts.135.w2", "model.layers.46.block_sparse_moe.experts.136.w2", "model.layers.46.block_sparse_moe.experts.137.w2", "model.layers.46.block_sparse_moe.experts.138.w2", "model.layers.46.block_sparse_moe.experts.139.w2", "model.layers.46.block_sparse_moe.experts.140.w2", "model.layers.46.block_sparse_moe.experts.141.w2", "model.layers.46.block_sparse_moe.experts.142.w2", "model.layers.46.block_sparse_moe.experts.143.w2", "model.layers.46.block_sparse_moe.experts.144.w2", "model.layers.46.block_sparse_moe.experts.145.w2", "model.layers.46.block_sparse_moe.experts.146.w2", "model.layers.46.block_sparse_moe.experts.147.w2", "model.layers.46.block_sparse_moe.experts.148.w2", "model.layers.46.block_sparse_moe.experts.149.w2", "model.layers.46.block_sparse_moe.experts.150.w2", "model.layers.46.block_sparse_moe.experts.151.w2", "model.layers.46.block_sparse_moe.experts.152.w2", "model.layers.46.block_sparse_moe.experts.153.w2", "model.layers.46.block_sparse_moe.experts.154.w2", "model.layers.46.block_sparse_moe.experts.155.w2", "model.layers.46.block_sparse_moe.experts.156.w2", "model.layers.46.block_sparse_moe.experts.157.w2", "model.layers.46.block_sparse_moe.experts.158.w2", "model.layers.46.block_sparse_moe.experts.159.w2", "model.layers.46.block_sparse_moe.experts.160.w2", "model.layers.46.block_sparse_moe.experts.161.w2", "model.layers.46.block_sparse_moe.experts.162.w2", "model.layers.46.block_sparse_moe.experts.163.w2", "model.layers.46.block_sparse_moe.experts.164.w2", "model.layers.46.block_sparse_moe.experts.165.w2", "model.layers.46.block_sparse_moe.experts.166.w2", "model.layers.46.block_sparse_moe.experts.167.w2", "model.layers.46.block_sparse_moe.experts.168.w2", "model.layers.46.block_sparse_moe.experts.169.w2", "model.layers.46.block_sparse_moe.experts.170.w2", "model.layers.46.block_sparse_moe.experts.171.w2", "model.layers.46.block_sparse_moe.experts.172.w2", "model.layers.46.block_sparse_moe.experts.173.w2", "model.layers.46.block_sparse_moe.experts.174.w2", "model.layers.46.block_sparse_moe.experts.175.w2", "model.layers.46.block_sparse_moe.experts.176.w2", "model.layers.46.block_sparse_moe.experts.177.w2", "model.layers.46.block_sparse_moe.experts.178.w2", "model.layers.46.block_sparse_moe.experts.179.w2", "model.layers.46.block_sparse_moe.experts.180.w2", "model.layers.46.block_sparse_moe.experts.181.w2", "model.layers.46.block_sparse_moe.experts.182.w2", "model.layers.46.block_sparse_moe.experts.183.w2", "model.layers.46.block_sparse_moe.experts.184.w2", "model.layers.46.block_sparse_moe.experts.185.w2", "model.layers.46.block_sparse_moe.experts.186.w2", "model.layers.46.block_sparse_moe.experts.187.w2", "model.layers.46.block_sparse_moe.experts.188.w2", "model.layers.46.block_sparse_moe.experts.189.w2", "model.layers.46.block_sparse_moe.experts.190.w2", "model.layers.46.block_sparse_moe.experts.191.w2", "model.layers.46.block_sparse_moe.experts.192.w2", "model.layers.46.block_sparse_moe.experts.193.w2", "model.layers.46.block_sparse_moe.experts.194.w2", "model.layers.46.block_sparse_moe.experts.195.w2", "model.layers.46.block_sparse_moe.experts.196.w2", "model.layers.46.block_sparse_moe.experts.197.w2", "model.layers.46.block_sparse_moe.experts.198.w2", "model.layers.46.block_sparse_moe.experts.199.w2", "model.layers.46.block_sparse_moe.experts.200.w2", "model.layers.46.block_sparse_moe.experts.201.w2", "model.layers.46.block_sparse_moe.experts.202.w2", "model.layers.46.block_sparse_moe.experts.203.w2", "model.layers.46.block_sparse_moe.experts.204.w2", "model.layers.46.block_sparse_moe.experts.205.w2", "model.layers.46.block_sparse_moe.experts.206.w2", "model.layers.46.block_sparse_moe.experts.207.w2", "model.layers.46.block_sparse_moe.experts.208.w2", "model.layers.46.block_sparse_moe.experts.209.w2", "model.layers.46.block_sparse_moe.experts.210.w2", "model.layers.46.block_sparse_moe.experts.211.w2", "model.layers.46.block_sparse_moe.experts.212.w2", "model.layers.46.block_sparse_moe.experts.213.w2", "model.layers.46.block_sparse_moe.experts.214.w2", "model.layers.46.block_sparse_moe.experts.215.w2", "model.layers.46.block_sparse_moe.experts.216.w2", "model.layers.46.block_sparse_moe.experts.217.w2", "model.layers.46.block_sparse_moe.experts.218.w2", "model.layers.46.block_sparse_moe.experts.219.w2", "model.layers.46.block_sparse_moe.experts.220.w2", "model.layers.46.block_sparse_moe.experts.221.w2", "model.layers.46.block_sparse_moe.experts.222.w2", "model.layers.46.block_sparse_moe.experts.223.w2", "model.layers.46.block_sparse_moe.experts.224.w2", "model.layers.46.block_sparse_moe.experts.225.w2", "model.layers.46.block_sparse_moe.experts.226.w2", "model.layers.46.block_sparse_moe.experts.227.w2", "model.layers.46.block_sparse_moe.experts.228.w2", "model.layers.46.block_sparse_moe.experts.229.w2", "model.layers.46.block_sparse_moe.experts.230.w2", "model.layers.46.block_sparse_moe.experts.231.w2", "model.layers.46.block_sparse_moe.experts.232.w2", "model.layers.46.block_sparse_moe.experts.233.w2", "model.layers.46.block_sparse_moe.experts.234.w2", "model.layers.46.block_sparse_moe.experts.235.w2", "model.layers.46.block_sparse_moe.experts.236.w2", "model.layers.46.block_sparse_moe.experts.237.w2", "model.layers.46.block_sparse_moe.experts.238.w2", "model.layers.46.block_sparse_moe.experts.239.w2", "model.layers.46.block_sparse_moe.experts.240.w2", "model.layers.46.block_sparse_moe.experts.241.w2", "model.layers.46.block_sparse_moe.experts.242.w2", "model.layers.46.block_sparse_moe.experts.243.w2", "model.layers.46.block_sparse_moe.experts.244.w2", "model.layers.46.block_sparse_moe.experts.245.w2", "model.layers.46.block_sparse_moe.experts.246.w2", "model.layers.46.block_sparse_moe.experts.247.w2", "model.layers.46.block_sparse_moe.experts.248.w2", "model.layers.46.block_sparse_moe.experts.249.w2", "model.layers.46.block_sparse_moe.experts.250.w2", "model.layers.46.block_sparse_moe.experts.251.w2", "model.layers.46.block_sparse_moe.experts.252.w2", "model.layers.46.block_sparse_moe.experts.253.w2", "model.layers.46.block_sparse_moe.experts.254.w2", "model.layers.46.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -5.5740401148793306e-05, "dbits": 1207959552 } ] }, { "idx": 235, "layers": [ "model.layers.47.self_attn.q_proj" ], "candidates": [ { "dkld": 4.5547075569626894e-05, "dbits": 18874368 } ] }, { "idx": 236, "layers": [ "model.layers.47.self_attn.k_proj", "model.layers.47.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0011304242536425563, "dbits": 6291456 } ] }, { "idx": 237, "layers": [ "model.layers.47.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0004173193126916913, "dbits": 18874368 } ] }, { "idx": 238, "layers": [ "model.layers.47.block_sparse_moe.experts.0.w1", "model.layers.47.block_sparse_moe.experts.1.w1", "model.layers.47.block_sparse_moe.experts.2.w1", "model.layers.47.block_sparse_moe.experts.3.w1", "model.layers.47.block_sparse_moe.experts.4.w1", "model.layers.47.block_sparse_moe.experts.5.w1", "model.layers.47.block_sparse_moe.experts.6.w1", "model.layers.47.block_sparse_moe.experts.7.w1", "model.layers.47.block_sparse_moe.experts.8.w1", "model.layers.47.block_sparse_moe.experts.9.w1", "model.layers.47.block_sparse_moe.experts.10.w1", "model.layers.47.block_sparse_moe.experts.11.w1", "model.layers.47.block_sparse_moe.experts.12.w1", "model.layers.47.block_sparse_moe.experts.13.w1", "model.layers.47.block_sparse_moe.experts.14.w1", "model.layers.47.block_sparse_moe.experts.15.w1", "model.layers.47.block_sparse_moe.experts.16.w1", "model.layers.47.block_sparse_moe.experts.17.w1", "model.layers.47.block_sparse_moe.experts.18.w1", "model.layers.47.block_sparse_moe.experts.19.w1", "model.layers.47.block_sparse_moe.experts.20.w1", "model.layers.47.block_sparse_moe.experts.21.w1", "model.layers.47.block_sparse_moe.experts.22.w1", "model.layers.47.block_sparse_moe.experts.23.w1", "model.layers.47.block_sparse_moe.experts.24.w1", "model.layers.47.block_sparse_moe.experts.25.w1", "model.layers.47.block_sparse_moe.experts.26.w1", "model.layers.47.block_sparse_moe.experts.27.w1", "model.layers.47.block_sparse_moe.experts.28.w1", "model.layers.47.block_sparse_moe.experts.29.w1", "model.layers.47.block_sparse_moe.experts.30.w1", "model.layers.47.block_sparse_moe.experts.31.w1", "model.layers.47.block_sparse_moe.experts.32.w1", "model.layers.47.block_sparse_moe.experts.33.w1", "model.layers.47.block_sparse_moe.experts.34.w1", "model.layers.47.block_sparse_moe.experts.35.w1", "model.layers.47.block_sparse_moe.experts.36.w1", "model.layers.47.block_sparse_moe.experts.37.w1", "model.layers.47.block_sparse_moe.experts.38.w1", "model.layers.47.block_sparse_moe.experts.39.w1", "model.layers.47.block_sparse_moe.experts.40.w1", "model.layers.47.block_sparse_moe.experts.41.w1", "model.layers.47.block_sparse_moe.experts.42.w1", "model.layers.47.block_sparse_moe.experts.43.w1", "model.layers.47.block_sparse_moe.experts.44.w1", "model.layers.47.block_sparse_moe.experts.45.w1", "model.layers.47.block_sparse_moe.experts.46.w1", "model.layers.47.block_sparse_moe.experts.47.w1", "model.layers.47.block_sparse_moe.experts.48.w1", "model.layers.47.block_sparse_moe.experts.49.w1", "model.layers.47.block_sparse_moe.experts.50.w1", "model.layers.47.block_sparse_moe.experts.51.w1", "model.layers.47.block_sparse_moe.experts.52.w1", "model.layers.47.block_sparse_moe.experts.53.w1", "model.layers.47.block_sparse_moe.experts.54.w1", "model.layers.47.block_sparse_moe.experts.55.w1", "model.layers.47.block_sparse_moe.experts.56.w1", "model.layers.47.block_sparse_moe.experts.57.w1", "model.layers.47.block_sparse_moe.experts.58.w1", "model.layers.47.block_sparse_moe.experts.59.w1", "model.layers.47.block_sparse_moe.experts.60.w1", "model.layers.47.block_sparse_moe.experts.61.w1", "model.layers.47.block_sparse_moe.experts.62.w1", "model.layers.47.block_sparse_moe.experts.63.w1", "model.layers.47.block_sparse_moe.experts.64.w1", "model.layers.47.block_sparse_moe.experts.65.w1", "model.layers.47.block_sparse_moe.experts.66.w1", "model.layers.47.block_sparse_moe.experts.67.w1", "model.layers.47.block_sparse_moe.experts.68.w1", "model.layers.47.block_sparse_moe.experts.69.w1", "model.layers.47.block_sparse_moe.experts.70.w1", "model.layers.47.block_sparse_moe.experts.71.w1", "model.layers.47.block_sparse_moe.experts.72.w1", "model.layers.47.block_sparse_moe.experts.73.w1", "model.layers.47.block_sparse_moe.experts.74.w1", "model.layers.47.block_sparse_moe.experts.75.w1", "model.layers.47.block_sparse_moe.experts.76.w1", "model.layers.47.block_sparse_moe.experts.77.w1", "model.layers.47.block_sparse_moe.experts.78.w1", "model.layers.47.block_sparse_moe.experts.79.w1", "model.layers.47.block_sparse_moe.experts.80.w1", "model.layers.47.block_sparse_moe.experts.81.w1", "model.layers.47.block_sparse_moe.experts.82.w1", "model.layers.47.block_sparse_moe.experts.83.w1", "model.layers.47.block_sparse_moe.experts.84.w1", "model.layers.47.block_sparse_moe.experts.85.w1", "model.layers.47.block_sparse_moe.experts.86.w1", "model.layers.47.block_sparse_moe.experts.87.w1", "model.layers.47.block_sparse_moe.experts.88.w1", "model.layers.47.block_sparse_moe.experts.89.w1", "model.layers.47.block_sparse_moe.experts.90.w1", "model.layers.47.block_sparse_moe.experts.91.w1", "model.layers.47.block_sparse_moe.experts.92.w1", "model.layers.47.block_sparse_moe.experts.93.w1", "model.layers.47.block_sparse_moe.experts.94.w1", "model.layers.47.block_sparse_moe.experts.95.w1", "model.layers.47.block_sparse_moe.experts.96.w1", "model.layers.47.block_sparse_moe.experts.97.w1", "model.layers.47.block_sparse_moe.experts.98.w1", "model.layers.47.block_sparse_moe.experts.99.w1", "model.layers.47.block_sparse_moe.experts.100.w1", "model.layers.47.block_sparse_moe.experts.101.w1", "model.layers.47.block_sparse_moe.experts.102.w1", "model.layers.47.block_sparse_moe.experts.103.w1", "model.layers.47.block_sparse_moe.experts.104.w1", "model.layers.47.block_sparse_moe.experts.105.w1", "model.layers.47.block_sparse_moe.experts.106.w1", "model.layers.47.block_sparse_moe.experts.107.w1", "model.layers.47.block_sparse_moe.experts.108.w1", "model.layers.47.block_sparse_moe.experts.109.w1", "model.layers.47.block_sparse_moe.experts.110.w1", "model.layers.47.block_sparse_moe.experts.111.w1", "model.layers.47.block_sparse_moe.experts.112.w1", "model.layers.47.block_sparse_moe.experts.113.w1", "model.layers.47.block_sparse_moe.experts.114.w1", "model.layers.47.block_sparse_moe.experts.115.w1", "model.layers.47.block_sparse_moe.experts.116.w1", "model.layers.47.block_sparse_moe.experts.117.w1", "model.layers.47.block_sparse_moe.experts.118.w1", "model.layers.47.block_sparse_moe.experts.119.w1", "model.layers.47.block_sparse_moe.experts.120.w1", "model.layers.47.block_sparse_moe.experts.121.w1", "model.layers.47.block_sparse_moe.experts.122.w1", "model.layers.47.block_sparse_moe.experts.123.w1", "model.layers.47.block_sparse_moe.experts.124.w1", "model.layers.47.block_sparse_moe.experts.125.w1", "model.layers.47.block_sparse_moe.experts.126.w1", "model.layers.47.block_sparse_moe.experts.127.w1", "model.layers.47.block_sparse_moe.experts.128.w1", "model.layers.47.block_sparse_moe.experts.129.w1", "model.layers.47.block_sparse_moe.experts.130.w1", "model.layers.47.block_sparse_moe.experts.131.w1", "model.layers.47.block_sparse_moe.experts.132.w1", "model.layers.47.block_sparse_moe.experts.133.w1", "model.layers.47.block_sparse_moe.experts.134.w1", "model.layers.47.block_sparse_moe.experts.135.w1", "model.layers.47.block_sparse_moe.experts.136.w1", "model.layers.47.block_sparse_moe.experts.137.w1", "model.layers.47.block_sparse_moe.experts.138.w1", "model.layers.47.block_sparse_moe.experts.139.w1", "model.layers.47.block_sparse_moe.experts.140.w1", "model.layers.47.block_sparse_moe.experts.141.w1", "model.layers.47.block_sparse_moe.experts.142.w1", "model.layers.47.block_sparse_moe.experts.143.w1", "model.layers.47.block_sparse_moe.experts.144.w1", "model.layers.47.block_sparse_moe.experts.145.w1", "model.layers.47.block_sparse_moe.experts.146.w1", "model.layers.47.block_sparse_moe.experts.147.w1", "model.layers.47.block_sparse_moe.experts.148.w1", "model.layers.47.block_sparse_moe.experts.149.w1", "model.layers.47.block_sparse_moe.experts.150.w1", "model.layers.47.block_sparse_moe.experts.151.w1", "model.layers.47.block_sparse_moe.experts.152.w1", "model.layers.47.block_sparse_moe.experts.153.w1", "model.layers.47.block_sparse_moe.experts.154.w1", "model.layers.47.block_sparse_moe.experts.155.w1", "model.layers.47.block_sparse_moe.experts.156.w1", "model.layers.47.block_sparse_moe.experts.157.w1", "model.layers.47.block_sparse_moe.experts.158.w1", "model.layers.47.block_sparse_moe.experts.159.w1", "model.layers.47.block_sparse_moe.experts.160.w1", "model.layers.47.block_sparse_moe.experts.161.w1", "model.layers.47.block_sparse_moe.experts.162.w1", "model.layers.47.block_sparse_moe.experts.163.w1", "model.layers.47.block_sparse_moe.experts.164.w1", "model.layers.47.block_sparse_moe.experts.165.w1", "model.layers.47.block_sparse_moe.experts.166.w1", "model.layers.47.block_sparse_moe.experts.167.w1", "model.layers.47.block_sparse_moe.experts.168.w1", "model.layers.47.block_sparse_moe.experts.169.w1", "model.layers.47.block_sparse_moe.experts.170.w1", "model.layers.47.block_sparse_moe.experts.171.w1", "model.layers.47.block_sparse_moe.experts.172.w1", "model.layers.47.block_sparse_moe.experts.173.w1", "model.layers.47.block_sparse_moe.experts.174.w1", "model.layers.47.block_sparse_moe.experts.175.w1", "model.layers.47.block_sparse_moe.experts.176.w1", "model.layers.47.block_sparse_moe.experts.177.w1", "model.layers.47.block_sparse_moe.experts.178.w1", "model.layers.47.block_sparse_moe.experts.179.w1", "model.layers.47.block_sparse_moe.experts.180.w1", "model.layers.47.block_sparse_moe.experts.181.w1", "model.layers.47.block_sparse_moe.experts.182.w1", "model.layers.47.block_sparse_moe.experts.183.w1", "model.layers.47.block_sparse_moe.experts.184.w1", "model.layers.47.block_sparse_moe.experts.185.w1", "model.layers.47.block_sparse_moe.experts.186.w1", "model.layers.47.block_sparse_moe.experts.187.w1", "model.layers.47.block_sparse_moe.experts.188.w1", "model.layers.47.block_sparse_moe.experts.189.w1", "model.layers.47.block_sparse_moe.experts.190.w1", "model.layers.47.block_sparse_moe.experts.191.w1", "model.layers.47.block_sparse_moe.experts.192.w1", "model.layers.47.block_sparse_moe.experts.193.w1", "model.layers.47.block_sparse_moe.experts.194.w1", "model.layers.47.block_sparse_moe.experts.195.w1", "model.layers.47.block_sparse_moe.experts.196.w1", "model.layers.47.block_sparse_moe.experts.197.w1", "model.layers.47.block_sparse_moe.experts.198.w1", "model.layers.47.block_sparse_moe.experts.199.w1", "model.layers.47.block_sparse_moe.experts.200.w1", "model.layers.47.block_sparse_moe.experts.201.w1", "model.layers.47.block_sparse_moe.experts.202.w1", "model.layers.47.block_sparse_moe.experts.203.w1", "model.layers.47.block_sparse_moe.experts.204.w1", "model.layers.47.block_sparse_moe.experts.205.w1", "model.layers.47.block_sparse_moe.experts.206.w1", "model.layers.47.block_sparse_moe.experts.207.w1", "model.layers.47.block_sparse_moe.experts.208.w1", "model.layers.47.block_sparse_moe.experts.209.w1", "model.layers.47.block_sparse_moe.experts.210.w1", "model.layers.47.block_sparse_moe.experts.211.w1", "model.layers.47.block_sparse_moe.experts.212.w1", "model.layers.47.block_sparse_moe.experts.213.w1", "model.layers.47.block_sparse_moe.experts.214.w1", "model.layers.47.block_sparse_moe.experts.215.w1", "model.layers.47.block_sparse_moe.experts.216.w1", "model.layers.47.block_sparse_moe.experts.217.w1", "model.layers.47.block_sparse_moe.experts.218.w1", "model.layers.47.block_sparse_moe.experts.219.w1", "model.layers.47.block_sparse_moe.experts.220.w1", "model.layers.47.block_sparse_moe.experts.221.w1", "model.layers.47.block_sparse_moe.experts.222.w1", "model.layers.47.block_sparse_moe.experts.223.w1", "model.layers.47.block_sparse_moe.experts.224.w1", "model.layers.47.block_sparse_moe.experts.225.w1", "model.layers.47.block_sparse_moe.experts.226.w1", "model.layers.47.block_sparse_moe.experts.227.w1", "model.layers.47.block_sparse_moe.experts.228.w1", "model.layers.47.block_sparse_moe.experts.229.w1", "model.layers.47.block_sparse_moe.experts.230.w1", "model.layers.47.block_sparse_moe.experts.231.w1", "model.layers.47.block_sparse_moe.experts.232.w1", "model.layers.47.block_sparse_moe.experts.233.w1", "model.layers.47.block_sparse_moe.experts.234.w1", "model.layers.47.block_sparse_moe.experts.235.w1", "model.layers.47.block_sparse_moe.experts.236.w1", "model.layers.47.block_sparse_moe.experts.237.w1", "model.layers.47.block_sparse_moe.experts.238.w1", "model.layers.47.block_sparse_moe.experts.239.w1", "model.layers.47.block_sparse_moe.experts.240.w1", "model.layers.47.block_sparse_moe.experts.241.w1", "model.layers.47.block_sparse_moe.experts.242.w1", "model.layers.47.block_sparse_moe.experts.243.w1", "model.layers.47.block_sparse_moe.experts.244.w1", "model.layers.47.block_sparse_moe.experts.245.w1", "model.layers.47.block_sparse_moe.experts.246.w1", "model.layers.47.block_sparse_moe.experts.247.w1", "model.layers.47.block_sparse_moe.experts.248.w1", "model.layers.47.block_sparse_moe.experts.249.w1", "model.layers.47.block_sparse_moe.experts.250.w1", "model.layers.47.block_sparse_moe.experts.251.w1", "model.layers.47.block_sparse_moe.experts.252.w1", "model.layers.47.block_sparse_moe.experts.253.w1", "model.layers.47.block_sparse_moe.experts.254.w1", "model.layers.47.block_sparse_moe.experts.255.w1", "model.layers.47.block_sparse_moe.experts.0.w3", "model.layers.47.block_sparse_moe.experts.1.w3", "model.layers.47.block_sparse_moe.experts.2.w3", "model.layers.47.block_sparse_moe.experts.3.w3", "model.layers.47.block_sparse_moe.experts.4.w3", "model.layers.47.block_sparse_moe.experts.5.w3", "model.layers.47.block_sparse_moe.experts.6.w3", "model.layers.47.block_sparse_moe.experts.7.w3", "model.layers.47.block_sparse_moe.experts.8.w3", "model.layers.47.block_sparse_moe.experts.9.w3", "model.layers.47.block_sparse_moe.experts.10.w3", "model.layers.47.block_sparse_moe.experts.11.w3", "model.layers.47.block_sparse_moe.experts.12.w3", "model.layers.47.block_sparse_moe.experts.13.w3", "model.layers.47.block_sparse_moe.experts.14.w3", "model.layers.47.block_sparse_moe.experts.15.w3", "model.layers.47.block_sparse_moe.experts.16.w3", "model.layers.47.block_sparse_moe.experts.17.w3", "model.layers.47.block_sparse_moe.experts.18.w3", "model.layers.47.block_sparse_moe.experts.19.w3", "model.layers.47.block_sparse_moe.experts.20.w3", "model.layers.47.block_sparse_moe.experts.21.w3", "model.layers.47.block_sparse_moe.experts.22.w3", "model.layers.47.block_sparse_moe.experts.23.w3", "model.layers.47.block_sparse_moe.experts.24.w3", "model.layers.47.block_sparse_moe.experts.25.w3", "model.layers.47.block_sparse_moe.experts.26.w3", "model.layers.47.block_sparse_moe.experts.27.w3", "model.layers.47.block_sparse_moe.experts.28.w3", "model.layers.47.block_sparse_moe.experts.29.w3", "model.layers.47.block_sparse_moe.experts.30.w3", "model.layers.47.block_sparse_moe.experts.31.w3", "model.layers.47.block_sparse_moe.experts.32.w3", "model.layers.47.block_sparse_moe.experts.33.w3", "model.layers.47.block_sparse_moe.experts.34.w3", "model.layers.47.block_sparse_moe.experts.35.w3", "model.layers.47.block_sparse_moe.experts.36.w3", "model.layers.47.block_sparse_moe.experts.37.w3", "model.layers.47.block_sparse_moe.experts.38.w3", "model.layers.47.block_sparse_moe.experts.39.w3", "model.layers.47.block_sparse_moe.experts.40.w3", "model.layers.47.block_sparse_moe.experts.41.w3", "model.layers.47.block_sparse_moe.experts.42.w3", "model.layers.47.block_sparse_moe.experts.43.w3", "model.layers.47.block_sparse_moe.experts.44.w3", "model.layers.47.block_sparse_moe.experts.45.w3", "model.layers.47.block_sparse_moe.experts.46.w3", "model.layers.47.block_sparse_moe.experts.47.w3", "model.layers.47.block_sparse_moe.experts.48.w3", "model.layers.47.block_sparse_moe.experts.49.w3", "model.layers.47.block_sparse_moe.experts.50.w3", "model.layers.47.block_sparse_moe.experts.51.w3", "model.layers.47.block_sparse_moe.experts.52.w3", "model.layers.47.block_sparse_moe.experts.53.w3", "model.layers.47.block_sparse_moe.experts.54.w3", "model.layers.47.block_sparse_moe.experts.55.w3", "model.layers.47.block_sparse_moe.experts.56.w3", "model.layers.47.block_sparse_moe.experts.57.w3", "model.layers.47.block_sparse_moe.experts.58.w3", "model.layers.47.block_sparse_moe.experts.59.w3", "model.layers.47.block_sparse_moe.experts.60.w3", "model.layers.47.block_sparse_moe.experts.61.w3", "model.layers.47.block_sparse_moe.experts.62.w3", "model.layers.47.block_sparse_moe.experts.63.w3", "model.layers.47.block_sparse_moe.experts.64.w3", "model.layers.47.block_sparse_moe.experts.65.w3", "model.layers.47.block_sparse_moe.experts.66.w3", "model.layers.47.block_sparse_moe.experts.67.w3", "model.layers.47.block_sparse_moe.experts.68.w3", "model.layers.47.block_sparse_moe.experts.69.w3", "model.layers.47.block_sparse_moe.experts.70.w3", "model.layers.47.block_sparse_moe.experts.71.w3", "model.layers.47.block_sparse_moe.experts.72.w3", "model.layers.47.block_sparse_moe.experts.73.w3", "model.layers.47.block_sparse_moe.experts.74.w3", "model.layers.47.block_sparse_moe.experts.75.w3", "model.layers.47.block_sparse_moe.experts.76.w3", "model.layers.47.block_sparse_moe.experts.77.w3", "model.layers.47.block_sparse_moe.experts.78.w3", "model.layers.47.block_sparse_moe.experts.79.w3", "model.layers.47.block_sparse_moe.experts.80.w3", "model.layers.47.block_sparse_moe.experts.81.w3", "model.layers.47.block_sparse_moe.experts.82.w3", "model.layers.47.block_sparse_moe.experts.83.w3", "model.layers.47.block_sparse_moe.experts.84.w3", "model.layers.47.block_sparse_moe.experts.85.w3", "model.layers.47.block_sparse_moe.experts.86.w3", "model.layers.47.block_sparse_moe.experts.87.w3", "model.layers.47.block_sparse_moe.experts.88.w3", "model.layers.47.block_sparse_moe.experts.89.w3", "model.layers.47.block_sparse_moe.experts.90.w3", "model.layers.47.block_sparse_moe.experts.91.w3", "model.layers.47.block_sparse_moe.experts.92.w3", "model.layers.47.block_sparse_moe.experts.93.w3", "model.layers.47.block_sparse_moe.experts.94.w3", "model.layers.47.block_sparse_moe.experts.95.w3", "model.layers.47.block_sparse_moe.experts.96.w3", "model.layers.47.block_sparse_moe.experts.97.w3", "model.layers.47.block_sparse_moe.experts.98.w3", "model.layers.47.block_sparse_moe.experts.99.w3", "model.layers.47.block_sparse_moe.experts.100.w3", "model.layers.47.block_sparse_moe.experts.101.w3", "model.layers.47.block_sparse_moe.experts.102.w3", "model.layers.47.block_sparse_moe.experts.103.w3", "model.layers.47.block_sparse_moe.experts.104.w3", "model.layers.47.block_sparse_moe.experts.105.w3", "model.layers.47.block_sparse_moe.experts.106.w3", "model.layers.47.block_sparse_moe.experts.107.w3", "model.layers.47.block_sparse_moe.experts.108.w3", "model.layers.47.block_sparse_moe.experts.109.w3", "model.layers.47.block_sparse_moe.experts.110.w3", "model.layers.47.block_sparse_moe.experts.111.w3", "model.layers.47.block_sparse_moe.experts.112.w3", "model.layers.47.block_sparse_moe.experts.113.w3", "model.layers.47.block_sparse_moe.experts.114.w3", "model.layers.47.block_sparse_moe.experts.115.w3", "model.layers.47.block_sparse_moe.experts.116.w3", "model.layers.47.block_sparse_moe.experts.117.w3", "model.layers.47.block_sparse_moe.experts.118.w3", "model.layers.47.block_sparse_moe.experts.119.w3", "model.layers.47.block_sparse_moe.experts.120.w3", "model.layers.47.block_sparse_moe.experts.121.w3", "model.layers.47.block_sparse_moe.experts.122.w3", "model.layers.47.block_sparse_moe.experts.123.w3", "model.layers.47.block_sparse_moe.experts.124.w3", "model.layers.47.block_sparse_moe.experts.125.w3", "model.layers.47.block_sparse_moe.experts.126.w3", "model.layers.47.block_sparse_moe.experts.127.w3", "model.layers.47.block_sparse_moe.experts.128.w3", "model.layers.47.block_sparse_moe.experts.129.w3", "model.layers.47.block_sparse_moe.experts.130.w3", "model.layers.47.block_sparse_moe.experts.131.w3", "model.layers.47.block_sparse_moe.experts.132.w3", "model.layers.47.block_sparse_moe.experts.133.w3", "model.layers.47.block_sparse_moe.experts.134.w3", "model.layers.47.block_sparse_moe.experts.135.w3", "model.layers.47.block_sparse_moe.experts.136.w3", "model.layers.47.block_sparse_moe.experts.137.w3", "model.layers.47.block_sparse_moe.experts.138.w3", "model.layers.47.block_sparse_moe.experts.139.w3", "model.layers.47.block_sparse_moe.experts.140.w3", "model.layers.47.block_sparse_moe.experts.141.w3", "model.layers.47.block_sparse_moe.experts.142.w3", "model.layers.47.block_sparse_moe.experts.143.w3", "model.layers.47.block_sparse_moe.experts.144.w3", "model.layers.47.block_sparse_moe.experts.145.w3", "model.layers.47.block_sparse_moe.experts.146.w3", "model.layers.47.block_sparse_moe.experts.147.w3", "model.layers.47.block_sparse_moe.experts.148.w3", "model.layers.47.block_sparse_moe.experts.149.w3", "model.layers.47.block_sparse_moe.experts.150.w3", "model.layers.47.block_sparse_moe.experts.151.w3", "model.layers.47.block_sparse_moe.experts.152.w3", "model.layers.47.block_sparse_moe.experts.153.w3", "model.layers.47.block_sparse_moe.experts.154.w3", "model.layers.47.block_sparse_moe.experts.155.w3", "model.layers.47.block_sparse_moe.experts.156.w3", "model.layers.47.block_sparse_moe.experts.157.w3", "model.layers.47.block_sparse_moe.experts.158.w3", "model.layers.47.block_sparse_moe.experts.159.w3", "model.layers.47.block_sparse_moe.experts.160.w3", "model.layers.47.block_sparse_moe.experts.161.w3", "model.layers.47.block_sparse_moe.experts.162.w3", "model.layers.47.block_sparse_moe.experts.163.w3", "model.layers.47.block_sparse_moe.experts.164.w3", "model.layers.47.block_sparse_moe.experts.165.w3", "model.layers.47.block_sparse_moe.experts.166.w3", "model.layers.47.block_sparse_moe.experts.167.w3", "model.layers.47.block_sparse_moe.experts.168.w3", "model.layers.47.block_sparse_moe.experts.169.w3", "model.layers.47.block_sparse_moe.experts.170.w3", "model.layers.47.block_sparse_moe.experts.171.w3", "model.layers.47.block_sparse_moe.experts.172.w3", "model.layers.47.block_sparse_moe.experts.173.w3", "model.layers.47.block_sparse_moe.experts.174.w3", "model.layers.47.block_sparse_moe.experts.175.w3", "model.layers.47.block_sparse_moe.experts.176.w3", "model.layers.47.block_sparse_moe.experts.177.w3", "model.layers.47.block_sparse_moe.experts.178.w3", "model.layers.47.block_sparse_moe.experts.179.w3", "model.layers.47.block_sparse_moe.experts.180.w3", "model.layers.47.block_sparse_moe.experts.181.w3", "model.layers.47.block_sparse_moe.experts.182.w3", "model.layers.47.block_sparse_moe.experts.183.w3", "model.layers.47.block_sparse_moe.experts.184.w3", "model.layers.47.block_sparse_moe.experts.185.w3", "model.layers.47.block_sparse_moe.experts.186.w3", "model.layers.47.block_sparse_moe.experts.187.w3", "model.layers.47.block_sparse_moe.experts.188.w3", "model.layers.47.block_sparse_moe.experts.189.w3", "model.layers.47.block_sparse_moe.experts.190.w3", "model.layers.47.block_sparse_moe.experts.191.w3", "model.layers.47.block_sparse_moe.experts.192.w3", "model.layers.47.block_sparse_moe.experts.193.w3", "model.layers.47.block_sparse_moe.experts.194.w3", "model.layers.47.block_sparse_moe.experts.195.w3", "model.layers.47.block_sparse_moe.experts.196.w3", "model.layers.47.block_sparse_moe.experts.197.w3", "model.layers.47.block_sparse_moe.experts.198.w3", "model.layers.47.block_sparse_moe.experts.199.w3", "model.layers.47.block_sparse_moe.experts.200.w3", "model.layers.47.block_sparse_moe.experts.201.w3", "model.layers.47.block_sparse_moe.experts.202.w3", "model.layers.47.block_sparse_moe.experts.203.w3", "model.layers.47.block_sparse_moe.experts.204.w3", "model.layers.47.block_sparse_moe.experts.205.w3", "model.layers.47.block_sparse_moe.experts.206.w3", "model.layers.47.block_sparse_moe.experts.207.w3", "model.layers.47.block_sparse_moe.experts.208.w3", "model.layers.47.block_sparse_moe.experts.209.w3", "model.layers.47.block_sparse_moe.experts.210.w3", "model.layers.47.block_sparse_moe.experts.211.w3", "model.layers.47.block_sparse_moe.experts.212.w3", "model.layers.47.block_sparse_moe.experts.213.w3", "model.layers.47.block_sparse_moe.experts.214.w3", "model.layers.47.block_sparse_moe.experts.215.w3", "model.layers.47.block_sparse_moe.experts.216.w3", "model.layers.47.block_sparse_moe.experts.217.w3", "model.layers.47.block_sparse_moe.experts.218.w3", "model.layers.47.block_sparse_moe.experts.219.w3", "model.layers.47.block_sparse_moe.experts.220.w3", "model.layers.47.block_sparse_moe.experts.221.w3", "model.layers.47.block_sparse_moe.experts.222.w3", "model.layers.47.block_sparse_moe.experts.223.w3", "model.layers.47.block_sparse_moe.experts.224.w3", "model.layers.47.block_sparse_moe.experts.225.w3", "model.layers.47.block_sparse_moe.experts.226.w3", "model.layers.47.block_sparse_moe.experts.227.w3", "model.layers.47.block_sparse_moe.experts.228.w3", "model.layers.47.block_sparse_moe.experts.229.w3", "model.layers.47.block_sparse_moe.experts.230.w3", "model.layers.47.block_sparse_moe.experts.231.w3", "model.layers.47.block_sparse_moe.experts.232.w3", "model.layers.47.block_sparse_moe.experts.233.w3", "model.layers.47.block_sparse_moe.experts.234.w3", "model.layers.47.block_sparse_moe.experts.235.w3", "model.layers.47.block_sparse_moe.experts.236.w3", "model.layers.47.block_sparse_moe.experts.237.w3", "model.layers.47.block_sparse_moe.experts.238.w3", "model.layers.47.block_sparse_moe.experts.239.w3", "model.layers.47.block_sparse_moe.experts.240.w3", "model.layers.47.block_sparse_moe.experts.241.w3", "model.layers.47.block_sparse_moe.experts.242.w3", "model.layers.47.block_sparse_moe.experts.243.w3", "model.layers.47.block_sparse_moe.experts.244.w3", "model.layers.47.block_sparse_moe.experts.245.w3", "model.layers.47.block_sparse_moe.experts.246.w3", "model.layers.47.block_sparse_moe.experts.247.w3", "model.layers.47.block_sparse_moe.experts.248.w3", "model.layers.47.block_sparse_moe.experts.249.w3", "model.layers.47.block_sparse_moe.experts.250.w3", "model.layers.47.block_sparse_moe.experts.251.w3", "model.layers.47.block_sparse_moe.experts.252.w3", "model.layers.47.block_sparse_moe.experts.253.w3", "model.layers.47.block_sparse_moe.experts.254.w3", "model.layers.47.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 8.389744907617847e-05, "dbits": 2415919104 } ] }, { "idx": 239, "layers": [ "model.layers.47.block_sparse_moe.experts.0.w2", "model.layers.47.block_sparse_moe.experts.1.w2", "model.layers.47.block_sparse_moe.experts.2.w2", "model.layers.47.block_sparse_moe.experts.3.w2", "model.layers.47.block_sparse_moe.experts.4.w2", "model.layers.47.block_sparse_moe.experts.5.w2", "model.layers.47.block_sparse_moe.experts.6.w2", "model.layers.47.block_sparse_moe.experts.7.w2", "model.layers.47.block_sparse_moe.experts.8.w2", "model.layers.47.block_sparse_moe.experts.9.w2", "model.layers.47.block_sparse_moe.experts.10.w2", "model.layers.47.block_sparse_moe.experts.11.w2", "model.layers.47.block_sparse_moe.experts.12.w2", "model.layers.47.block_sparse_moe.experts.13.w2", "model.layers.47.block_sparse_moe.experts.14.w2", "model.layers.47.block_sparse_moe.experts.15.w2", "model.layers.47.block_sparse_moe.experts.16.w2", "model.layers.47.block_sparse_moe.experts.17.w2", "model.layers.47.block_sparse_moe.experts.18.w2", "model.layers.47.block_sparse_moe.experts.19.w2", "model.layers.47.block_sparse_moe.experts.20.w2", "model.layers.47.block_sparse_moe.experts.21.w2", "model.layers.47.block_sparse_moe.experts.22.w2", "model.layers.47.block_sparse_moe.experts.23.w2", "model.layers.47.block_sparse_moe.experts.24.w2", "model.layers.47.block_sparse_moe.experts.25.w2", "model.layers.47.block_sparse_moe.experts.26.w2", "model.layers.47.block_sparse_moe.experts.27.w2", "model.layers.47.block_sparse_moe.experts.28.w2", "model.layers.47.block_sparse_moe.experts.29.w2", "model.layers.47.block_sparse_moe.experts.30.w2", "model.layers.47.block_sparse_moe.experts.31.w2", "model.layers.47.block_sparse_moe.experts.32.w2", "model.layers.47.block_sparse_moe.experts.33.w2", "model.layers.47.block_sparse_moe.experts.34.w2", "model.layers.47.block_sparse_moe.experts.35.w2", "model.layers.47.block_sparse_moe.experts.36.w2", "model.layers.47.block_sparse_moe.experts.37.w2", "model.layers.47.block_sparse_moe.experts.38.w2", "model.layers.47.block_sparse_moe.experts.39.w2", "model.layers.47.block_sparse_moe.experts.40.w2", "model.layers.47.block_sparse_moe.experts.41.w2", "model.layers.47.block_sparse_moe.experts.42.w2", "model.layers.47.block_sparse_moe.experts.43.w2", "model.layers.47.block_sparse_moe.experts.44.w2", "model.layers.47.block_sparse_moe.experts.45.w2", "model.layers.47.block_sparse_moe.experts.46.w2", "model.layers.47.block_sparse_moe.experts.47.w2", "model.layers.47.block_sparse_moe.experts.48.w2", "model.layers.47.block_sparse_moe.experts.49.w2", "model.layers.47.block_sparse_moe.experts.50.w2", "model.layers.47.block_sparse_moe.experts.51.w2", "model.layers.47.block_sparse_moe.experts.52.w2", "model.layers.47.block_sparse_moe.experts.53.w2", "model.layers.47.block_sparse_moe.experts.54.w2", "model.layers.47.block_sparse_moe.experts.55.w2", "model.layers.47.block_sparse_moe.experts.56.w2", "model.layers.47.block_sparse_moe.experts.57.w2", "model.layers.47.block_sparse_moe.experts.58.w2", "model.layers.47.block_sparse_moe.experts.59.w2", "model.layers.47.block_sparse_moe.experts.60.w2", "model.layers.47.block_sparse_moe.experts.61.w2", "model.layers.47.block_sparse_moe.experts.62.w2", "model.layers.47.block_sparse_moe.experts.63.w2", "model.layers.47.block_sparse_moe.experts.64.w2", "model.layers.47.block_sparse_moe.experts.65.w2", "model.layers.47.block_sparse_moe.experts.66.w2", "model.layers.47.block_sparse_moe.experts.67.w2", "model.layers.47.block_sparse_moe.experts.68.w2", "model.layers.47.block_sparse_moe.experts.69.w2", "model.layers.47.block_sparse_moe.experts.70.w2", "model.layers.47.block_sparse_moe.experts.71.w2", "model.layers.47.block_sparse_moe.experts.72.w2", "model.layers.47.block_sparse_moe.experts.73.w2", "model.layers.47.block_sparse_moe.experts.74.w2", "model.layers.47.block_sparse_moe.experts.75.w2", "model.layers.47.block_sparse_moe.experts.76.w2", "model.layers.47.block_sparse_moe.experts.77.w2", "model.layers.47.block_sparse_moe.experts.78.w2", "model.layers.47.block_sparse_moe.experts.79.w2", "model.layers.47.block_sparse_moe.experts.80.w2", "model.layers.47.block_sparse_moe.experts.81.w2", "model.layers.47.block_sparse_moe.experts.82.w2", "model.layers.47.block_sparse_moe.experts.83.w2", "model.layers.47.block_sparse_moe.experts.84.w2", "model.layers.47.block_sparse_moe.experts.85.w2", "model.layers.47.block_sparse_moe.experts.86.w2", "model.layers.47.block_sparse_moe.experts.87.w2", "model.layers.47.block_sparse_moe.experts.88.w2", "model.layers.47.block_sparse_moe.experts.89.w2", "model.layers.47.block_sparse_moe.experts.90.w2", "model.layers.47.block_sparse_moe.experts.91.w2", "model.layers.47.block_sparse_moe.experts.92.w2", "model.layers.47.block_sparse_moe.experts.93.w2", "model.layers.47.block_sparse_moe.experts.94.w2", "model.layers.47.block_sparse_moe.experts.95.w2", "model.layers.47.block_sparse_moe.experts.96.w2", "model.layers.47.block_sparse_moe.experts.97.w2", "model.layers.47.block_sparse_moe.experts.98.w2", "model.layers.47.block_sparse_moe.experts.99.w2", "model.layers.47.block_sparse_moe.experts.100.w2", "model.layers.47.block_sparse_moe.experts.101.w2", "model.layers.47.block_sparse_moe.experts.102.w2", "model.layers.47.block_sparse_moe.experts.103.w2", "model.layers.47.block_sparse_moe.experts.104.w2", "model.layers.47.block_sparse_moe.experts.105.w2", "model.layers.47.block_sparse_moe.experts.106.w2", "model.layers.47.block_sparse_moe.experts.107.w2", "model.layers.47.block_sparse_moe.experts.108.w2", "model.layers.47.block_sparse_moe.experts.109.w2", "model.layers.47.block_sparse_moe.experts.110.w2", "model.layers.47.block_sparse_moe.experts.111.w2", "model.layers.47.block_sparse_moe.experts.112.w2", "model.layers.47.block_sparse_moe.experts.113.w2", "model.layers.47.block_sparse_moe.experts.114.w2", "model.layers.47.block_sparse_moe.experts.115.w2", "model.layers.47.block_sparse_moe.experts.116.w2", "model.layers.47.block_sparse_moe.experts.117.w2", "model.layers.47.block_sparse_moe.experts.118.w2", "model.layers.47.block_sparse_moe.experts.119.w2", "model.layers.47.block_sparse_moe.experts.120.w2", "model.layers.47.block_sparse_moe.experts.121.w2", "model.layers.47.block_sparse_moe.experts.122.w2", "model.layers.47.block_sparse_moe.experts.123.w2", "model.layers.47.block_sparse_moe.experts.124.w2", "model.layers.47.block_sparse_moe.experts.125.w2", "model.layers.47.block_sparse_moe.experts.126.w2", "model.layers.47.block_sparse_moe.experts.127.w2", "model.layers.47.block_sparse_moe.experts.128.w2", "model.layers.47.block_sparse_moe.experts.129.w2", "model.layers.47.block_sparse_moe.experts.130.w2", "model.layers.47.block_sparse_moe.experts.131.w2", "model.layers.47.block_sparse_moe.experts.132.w2", "model.layers.47.block_sparse_moe.experts.133.w2", "model.layers.47.block_sparse_moe.experts.134.w2", "model.layers.47.block_sparse_moe.experts.135.w2", "model.layers.47.block_sparse_moe.experts.136.w2", "model.layers.47.block_sparse_moe.experts.137.w2", "model.layers.47.block_sparse_moe.experts.138.w2", "model.layers.47.block_sparse_moe.experts.139.w2", "model.layers.47.block_sparse_moe.experts.140.w2", "model.layers.47.block_sparse_moe.experts.141.w2", "model.layers.47.block_sparse_moe.experts.142.w2", "model.layers.47.block_sparse_moe.experts.143.w2", "model.layers.47.block_sparse_moe.experts.144.w2", "model.layers.47.block_sparse_moe.experts.145.w2", "model.layers.47.block_sparse_moe.experts.146.w2", "model.layers.47.block_sparse_moe.experts.147.w2", "model.layers.47.block_sparse_moe.experts.148.w2", "model.layers.47.block_sparse_moe.experts.149.w2", "model.layers.47.block_sparse_moe.experts.150.w2", "model.layers.47.block_sparse_moe.experts.151.w2", "model.layers.47.block_sparse_moe.experts.152.w2", "model.layers.47.block_sparse_moe.experts.153.w2", "model.layers.47.block_sparse_moe.experts.154.w2", "model.layers.47.block_sparse_moe.experts.155.w2", "model.layers.47.block_sparse_moe.experts.156.w2", "model.layers.47.block_sparse_moe.experts.157.w2", "model.layers.47.block_sparse_moe.experts.158.w2", "model.layers.47.block_sparse_moe.experts.159.w2", "model.layers.47.block_sparse_moe.experts.160.w2", "model.layers.47.block_sparse_moe.experts.161.w2", "model.layers.47.block_sparse_moe.experts.162.w2", "model.layers.47.block_sparse_moe.experts.163.w2", "model.layers.47.block_sparse_moe.experts.164.w2", "model.layers.47.block_sparse_moe.experts.165.w2", "model.layers.47.block_sparse_moe.experts.166.w2", "model.layers.47.block_sparse_moe.experts.167.w2", "model.layers.47.block_sparse_moe.experts.168.w2", "model.layers.47.block_sparse_moe.experts.169.w2", "model.layers.47.block_sparse_moe.experts.170.w2", "model.layers.47.block_sparse_moe.experts.171.w2", "model.layers.47.block_sparse_moe.experts.172.w2", "model.layers.47.block_sparse_moe.experts.173.w2", "model.layers.47.block_sparse_moe.experts.174.w2", "model.layers.47.block_sparse_moe.experts.175.w2", "model.layers.47.block_sparse_moe.experts.176.w2", "model.layers.47.block_sparse_moe.experts.177.w2", "model.layers.47.block_sparse_moe.experts.178.w2", "model.layers.47.block_sparse_moe.experts.179.w2", "model.layers.47.block_sparse_moe.experts.180.w2", "model.layers.47.block_sparse_moe.experts.181.w2", "model.layers.47.block_sparse_moe.experts.182.w2", "model.layers.47.block_sparse_moe.experts.183.w2", "model.layers.47.block_sparse_moe.experts.184.w2", "model.layers.47.block_sparse_moe.experts.185.w2", "model.layers.47.block_sparse_moe.experts.186.w2", "model.layers.47.block_sparse_moe.experts.187.w2", "model.layers.47.block_sparse_moe.experts.188.w2", "model.layers.47.block_sparse_moe.experts.189.w2", "model.layers.47.block_sparse_moe.experts.190.w2", "model.layers.47.block_sparse_moe.experts.191.w2", "model.layers.47.block_sparse_moe.experts.192.w2", "model.layers.47.block_sparse_moe.experts.193.w2", "model.layers.47.block_sparse_moe.experts.194.w2", "model.layers.47.block_sparse_moe.experts.195.w2", "model.layers.47.block_sparse_moe.experts.196.w2", "model.layers.47.block_sparse_moe.experts.197.w2", "model.layers.47.block_sparse_moe.experts.198.w2", "model.layers.47.block_sparse_moe.experts.199.w2", "model.layers.47.block_sparse_moe.experts.200.w2", "model.layers.47.block_sparse_moe.experts.201.w2", "model.layers.47.block_sparse_moe.experts.202.w2", "model.layers.47.block_sparse_moe.experts.203.w2", "model.layers.47.block_sparse_moe.experts.204.w2", "model.layers.47.block_sparse_moe.experts.205.w2", "model.layers.47.block_sparse_moe.experts.206.w2", "model.layers.47.block_sparse_moe.experts.207.w2", "model.layers.47.block_sparse_moe.experts.208.w2", "model.layers.47.block_sparse_moe.experts.209.w2", "model.layers.47.block_sparse_moe.experts.210.w2", "model.layers.47.block_sparse_moe.experts.211.w2", "model.layers.47.block_sparse_moe.experts.212.w2", "model.layers.47.block_sparse_moe.experts.213.w2", "model.layers.47.block_sparse_moe.experts.214.w2", "model.layers.47.block_sparse_moe.experts.215.w2", "model.layers.47.block_sparse_moe.experts.216.w2", "model.layers.47.block_sparse_moe.experts.217.w2", "model.layers.47.block_sparse_moe.experts.218.w2", "model.layers.47.block_sparse_moe.experts.219.w2", "model.layers.47.block_sparse_moe.experts.220.w2", "model.layers.47.block_sparse_moe.experts.221.w2", "model.layers.47.block_sparse_moe.experts.222.w2", "model.layers.47.block_sparse_moe.experts.223.w2", "model.layers.47.block_sparse_moe.experts.224.w2", "model.layers.47.block_sparse_moe.experts.225.w2", "model.layers.47.block_sparse_moe.experts.226.w2", "model.layers.47.block_sparse_moe.experts.227.w2", "model.layers.47.block_sparse_moe.experts.228.w2", "model.layers.47.block_sparse_moe.experts.229.w2", "model.layers.47.block_sparse_moe.experts.230.w2", "model.layers.47.block_sparse_moe.experts.231.w2", "model.layers.47.block_sparse_moe.experts.232.w2", "model.layers.47.block_sparse_moe.experts.233.w2", "model.layers.47.block_sparse_moe.experts.234.w2", "model.layers.47.block_sparse_moe.experts.235.w2", "model.layers.47.block_sparse_moe.experts.236.w2", "model.layers.47.block_sparse_moe.experts.237.w2", "model.layers.47.block_sparse_moe.experts.238.w2", "model.layers.47.block_sparse_moe.experts.239.w2", "model.layers.47.block_sparse_moe.experts.240.w2", "model.layers.47.block_sparse_moe.experts.241.w2", "model.layers.47.block_sparse_moe.experts.242.w2", "model.layers.47.block_sparse_moe.experts.243.w2", "model.layers.47.block_sparse_moe.experts.244.w2", "model.layers.47.block_sparse_moe.experts.245.w2", "model.layers.47.block_sparse_moe.experts.246.w2", "model.layers.47.block_sparse_moe.experts.247.w2", "model.layers.47.block_sparse_moe.experts.248.w2", "model.layers.47.block_sparse_moe.experts.249.w2", "model.layers.47.block_sparse_moe.experts.250.w2", "model.layers.47.block_sparse_moe.experts.251.w2", "model.layers.47.block_sparse_moe.experts.252.w2", "model.layers.47.block_sparse_moe.experts.253.w2", "model.layers.47.block_sparse_moe.experts.254.w2", "model.layers.47.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 2.4948455393317137e-05, "dbits": 1207959552 } ] }, { "idx": 240, "layers": [ "model.layers.48.self_attn.q_proj" ], "candidates": [ { "dkld": -9.772498160601217e-05, "dbits": 18874368 } ] }, { "idx": 241, "layers": [ "model.layers.48.self_attn.k_proj", "model.layers.48.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0010104436427354896, "dbits": 6291456 } ] }, { "idx": 242, "layers": [ "model.layers.48.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0005497990176081685, "dbits": 18874368 } ] }, { "idx": 243, "layers": [ "model.layers.48.block_sparse_moe.experts.0.w1", "model.layers.48.block_sparse_moe.experts.1.w1", "model.layers.48.block_sparse_moe.experts.2.w1", "model.layers.48.block_sparse_moe.experts.3.w1", "model.layers.48.block_sparse_moe.experts.4.w1", "model.layers.48.block_sparse_moe.experts.5.w1", "model.layers.48.block_sparse_moe.experts.6.w1", "model.layers.48.block_sparse_moe.experts.7.w1", "model.layers.48.block_sparse_moe.experts.8.w1", "model.layers.48.block_sparse_moe.experts.9.w1", "model.layers.48.block_sparse_moe.experts.10.w1", "model.layers.48.block_sparse_moe.experts.11.w1", "model.layers.48.block_sparse_moe.experts.12.w1", "model.layers.48.block_sparse_moe.experts.13.w1", "model.layers.48.block_sparse_moe.experts.14.w1", "model.layers.48.block_sparse_moe.experts.15.w1", "model.layers.48.block_sparse_moe.experts.16.w1", "model.layers.48.block_sparse_moe.experts.17.w1", "model.layers.48.block_sparse_moe.experts.18.w1", "model.layers.48.block_sparse_moe.experts.19.w1", "model.layers.48.block_sparse_moe.experts.20.w1", "model.layers.48.block_sparse_moe.experts.21.w1", "model.layers.48.block_sparse_moe.experts.22.w1", "model.layers.48.block_sparse_moe.experts.23.w1", "model.layers.48.block_sparse_moe.experts.24.w1", "model.layers.48.block_sparse_moe.experts.25.w1", "model.layers.48.block_sparse_moe.experts.26.w1", "model.layers.48.block_sparse_moe.experts.27.w1", "model.layers.48.block_sparse_moe.experts.28.w1", "model.layers.48.block_sparse_moe.experts.29.w1", "model.layers.48.block_sparse_moe.experts.30.w1", "model.layers.48.block_sparse_moe.experts.31.w1", "model.layers.48.block_sparse_moe.experts.32.w1", "model.layers.48.block_sparse_moe.experts.33.w1", "model.layers.48.block_sparse_moe.experts.34.w1", "model.layers.48.block_sparse_moe.experts.35.w1", "model.layers.48.block_sparse_moe.experts.36.w1", "model.layers.48.block_sparse_moe.experts.37.w1", "model.layers.48.block_sparse_moe.experts.38.w1", "model.layers.48.block_sparse_moe.experts.39.w1", "model.layers.48.block_sparse_moe.experts.40.w1", "model.layers.48.block_sparse_moe.experts.41.w1", "model.layers.48.block_sparse_moe.experts.42.w1", "model.layers.48.block_sparse_moe.experts.43.w1", "model.layers.48.block_sparse_moe.experts.44.w1", "model.layers.48.block_sparse_moe.experts.45.w1", "model.layers.48.block_sparse_moe.experts.46.w1", "model.layers.48.block_sparse_moe.experts.47.w1", "model.layers.48.block_sparse_moe.experts.48.w1", "model.layers.48.block_sparse_moe.experts.49.w1", "model.layers.48.block_sparse_moe.experts.50.w1", "model.layers.48.block_sparse_moe.experts.51.w1", "model.layers.48.block_sparse_moe.experts.52.w1", "model.layers.48.block_sparse_moe.experts.53.w1", "model.layers.48.block_sparse_moe.experts.54.w1", "model.layers.48.block_sparse_moe.experts.55.w1", "model.layers.48.block_sparse_moe.experts.56.w1", "model.layers.48.block_sparse_moe.experts.57.w1", "model.layers.48.block_sparse_moe.experts.58.w1", "model.layers.48.block_sparse_moe.experts.59.w1", "model.layers.48.block_sparse_moe.experts.60.w1", "model.layers.48.block_sparse_moe.experts.61.w1", "model.layers.48.block_sparse_moe.experts.62.w1", "model.layers.48.block_sparse_moe.experts.63.w1", "model.layers.48.block_sparse_moe.experts.64.w1", "model.layers.48.block_sparse_moe.experts.65.w1", "model.layers.48.block_sparse_moe.experts.66.w1", "model.layers.48.block_sparse_moe.experts.67.w1", "model.layers.48.block_sparse_moe.experts.68.w1", "model.layers.48.block_sparse_moe.experts.69.w1", "model.layers.48.block_sparse_moe.experts.70.w1", "model.layers.48.block_sparse_moe.experts.71.w1", "model.layers.48.block_sparse_moe.experts.72.w1", "model.layers.48.block_sparse_moe.experts.73.w1", "model.layers.48.block_sparse_moe.experts.74.w1", "model.layers.48.block_sparse_moe.experts.75.w1", "model.layers.48.block_sparse_moe.experts.76.w1", "model.layers.48.block_sparse_moe.experts.77.w1", "model.layers.48.block_sparse_moe.experts.78.w1", "model.layers.48.block_sparse_moe.experts.79.w1", "model.layers.48.block_sparse_moe.experts.80.w1", "model.layers.48.block_sparse_moe.experts.81.w1", "model.layers.48.block_sparse_moe.experts.82.w1", "model.layers.48.block_sparse_moe.experts.83.w1", "model.layers.48.block_sparse_moe.experts.84.w1", "model.layers.48.block_sparse_moe.experts.85.w1", "model.layers.48.block_sparse_moe.experts.86.w1", "model.layers.48.block_sparse_moe.experts.87.w1", "model.layers.48.block_sparse_moe.experts.88.w1", "model.layers.48.block_sparse_moe.experts.89.w1", "model.layers.48.block_sparse_moe.experts.90.w1", "model.layers.48.block_sparse_moe.experts.91.w1", "model.layers.48.block_sparse_moe.experts.92.w1", "model.layers.48.block_sparse_moe.experts.93.w1", "model.layers.48.block_sparse_moe.experts.94.w1", "model.layers.48.block_sparse_moe.experts.95.w1", "model.layers.48.block_sparse_moe.experts.96.w1", "model.layers.48.block_sparse_moe.experts.97.w1", "model.layers.48.block_sparse_moe.experts.98.w1", "model.layers.48.block_sparse_moe.experts.99.w1", "model.layers.48.block_sparse_moe.experts.100.w1", "model.layers.48.block_sparse_moe.experts.101.w1", "model.layers.48.block_sparse_moe.experts.102.w1", "model.layers.48.block_sparse_moe.experts.103.w1", "model.layers.48.block_sparse_moe.experts.104.w1", "model.layers.48.block_sparse_moe.experts.105.w1", "model.layers.48.block_sparse_moe.experts.106.w1", "model.layers.48.block_sparse_moe.experts.107.w1", "model.layers.48.block_sparse_moe.experts.108.w1", "model.layers.48.block_sparse_moe.experts.109.w1", "model.layers.48.block_sparse_moe.experts.110.w1", "model.layers.48.block_sparse_moe.experts.111.w1", "model.layers.48.block_sparse_moe.experts.112.w1", "model.layers.48.block_sparse_moe.experts.113.w1", "model.layers.48.block_sparse_moe.experts.114.w1", "model.layers.48.block_sparse_moe.experts.115.w1", "model.layers.48.block_sparse_moe.experts.116.w1", "model.layers.48.block_sparse_moe.experts.117.w1", "model.layers.48.block_sparse_moe.experts.118.w1", "model.layers.48.block_sparse_moe.experts.119.w1", "model.layers.48.block_sparse_moe.experts.120.w1", "model.layers.48.block_sparse_moe.experts.121.w1", "model.layers.48.block_sparse_moe.experts.122.w1", "model.layers.48.block_sparse_moe.experts.123.w1", "model.layers.48.block_sparse_moe.experts.124.w1", "model.layers.48.block_sparse_moe.experts.125.w1", "model.layers.48.block_sparse_moe.experts.126.w1", "model.layers.48.block_sparse_moe.experts.127.w1", "model.layers.48.block_sparse_moe.experts.128.w1", "model.layers.48.block_sparse_moe.experts.129.w1", "model.layers.48.block_sparse_moe.experts.130.w1", "model.layers.48.block_sparse_moe.experts.131.w1", "model.layers.48.block_sparse_moe.experts.132.w1", "model.layers.48.block_sparse_moe.experts.133.w1", "model.layers.48.block_sparse_moe.experts.134.w1", "model.layers.48.block_sparse_moe.experts.135.w1", "model.layers.48.block_sparse_moe.experts.136.w1", "model.layers.48.block_sparse_moe.experts.137.w1", "model.layers.48.block_sparse_moe.experts.138.w1", "model.layers.48.block_sparse_moe.experts.139.w1", "model.layers.48.block_sparse_moe.experts.140.w1", "model.layers.48.block_sparse_moe.experts.141.w1", "model.layers.48.block_sparse_moe.experts.142.w1", "model.layers.48.block_sparse_moe.experts.143.w1", "model.layers.48.block_sparse_moe.experts.144.w1", "model.layers.48.block_sparse_moe.experts.145.w1", "model.layers.48.block_sparse_moe.experts.146.w1", "model.layers.48.block_sparse_moe.experts.147.w1", "model.layers.48.block_sparse_moe.experts.148.w1", "model.layers.48.block_sparse_moe.experts.149.w1", "model.layers.48.block_sparse_moe.experts.150.w1", "model.layers.48.block_sparse_moe.experts.151.w1", "model.layers.48.block_sparse_moe.experts.152.w1", "model.layers.48.block_sparse_moe.experts.153.w1", "model.layers.48.block_sparse_moe.experts.154.w1", "model.layers.48.block_sparse_moe.experts.155.w1", "model.layers.48.block_sparse_moe.experts.156.w1", "model.layers.48.block_sparse_moe.experts.157.w1", "model.layers.48.block_sparse_moe.experts.158.w1", "model.layers.48.block_sparse_moe.experts.159.w1", "model.layers.48.block_sparse_moe.experts.160.w1", "model.layers.48.block_sparse_moe.experts.161.w1", "model.layers.48.block_sparse_moe.experts.162.w1", "model.layers.48.block_sparse_moe.experts.163.w1", "model.layers.48.block_sparse_moe.experts.164.w1", "model.layers.48.block_sparse_moe.experts.165.w1", "model.layers.48.block_sparse_moe.experts.166.w1", "model.layers.48.block_sparse_moe.experts.167.w1", "model.layers.48.block_sparse_moe.experts.168.w1", "model.layers.48.block_sparse_moe.experts.169.w1", "model.layers.48.block_sparse_moe.experts.170.w1", "model.layers.48.block_sparse_moe.experts.171.w1", "model.layers.48.block_sparse_moe.experts.172.w1", "model.layers.48.block_sparse_moe.experts.173.w1", "model.layers.48.block_sparse_moe.experts.174.w1", "model.layers.48.block_sparse_moe.experts.175.w1", "model.layers.48.block_sparse_moe.experts.176.w1", "model.layers.48.block_sparse_moe.experts.177.w1", "model.layers.48.block_sparse_moe.experts.178.w1", "model.layers.48.block_sparse_moe.experts.179.w1", "model.layers.48.block_sparse_moe.experts.180.w1", "model.layers.48.block_sparse_moe.experts.181.w1", "model.layers.48.block_sparse_moe.experts.182.w1", "model.layers.48.block_sparse_moe.experts.183.w1", "model.layers.48.block_sparse_moe.experts.184.w1", "model.layers.48.block_sparse_moe.experts.185.w1", "model.layers.48.block_sparse_moe.experts.186.w1", "model.layers.48.block_sparse_moe.experts.187.w1", "model.layers.48.block_sparse_moe.experts.188.w1", "model.layers.48.block_sparse_moe.experts.189.w1", "model.layers.48.block_sparse_moe.experts.190.w1", "model.layers.48.block_sparse_moe.experts.191.w1", "model.layers.48.block_sparse_moe.experts.192.w1", "model.layers.48.block_sparse_moe.experts.193.w1", "model.layers.48.block_sparse_moe.experts.194.w1", "model.layers.48.block_sparse_moe.experts.195.w1", "model.layers.48.block_sparse_moe.experts.196.w1", "model.layers.48.block_sparse_moe.experts.197.w1", "model.layers.48.block_sparse_moe.experts.198.w1", "model.layers.48.block_sparse_moe.experts.199.w1", "model.layers.48.block_sparse_moe.experts.200.w1", "model.layers.48.block_sparse_moe.experts.201.w1", "model.layers.48.block_sparse_moe.experts.202.w1", "model.layers.48.block_sparse_moe.experts.203.w1", "model.layers.48.block_sparse_moe.experts.204.w1", "model.layers.48.block_sparse_moe.experts.205.w1", "model.layers.48.block_sparse_moe.experts.206.w1", "model.layers.48.block_sparse_moe.experts.207.w1", "model.layers.48.block_sparse_moe.experts.208.w1", "model.layers.48.block_sparse_moe.experts.209.w1", "model.layers.48.block_sparse_moe.experts.210.w1", "model.layers.48.block_sparse_moe.experts.211.w1", "model.layers.48.block_sparse_moe.experts.212.w1", "model.layers.48.block_sparse_moe.experts.213.w1", "model.layers.48.block_sparse_moe.experts.214.w1", "model.layers.48.block_sparse_moe.experts.215.w1", "model.layers.48.block_sparse_moe.experts.216.w1", "model.layers.48.block_sparse_moe.experts.217.w1", "model.layers.48.block_sparse_moe.experts.218.w1", "model.layers.48.block_sparse_moe.experts.219.w1", "model.layers.48.block_sparse_moe.experts.220.w1", "model.layers.48.block_sparse_moe.experts.221.w1", "model.layers.48.block_sparse_moe.experts.222.w1", "model.layers.48.block_sparse_moe.experts.223.w1", "model.layers.48.block_sparse_moe.experts.224.w1", "model.layers.48.block_sparse_moe.experts.225.w1", "model.layers.48.block_sparse_moe.experts.226.w1", "model.layers.48.block_sparse_moe.experts.227.w1", "model.layers.48.block_sparse_moe.experts.228.w1", "model.layers.48.block_sparse_moe.experts.229.w1", "model.layers.48.block_sparse_moe.experts.230.w1", "model.layers.48.block_sparse_moe.experts.231.w1", "model.layers.48.block_sparse_moe.experts.232.w1", "model.layers.48.block_sparse_moe.experts.233.w1", "model.layers.48.block_sparse_moe.experts.234.w1", "model.layers.48.block_sparse_moe.experts.235.w1", "model.layers.48.block_sparse_moe.experts.236.w1", "model.layers.48.block_sparse_moe.experts.237.w1", "model.layers.48.block_sparse_moe.experts.238.w1", "model.layers.48.block_sparse_moe.experts.239.w1", "model.layers.48.block_sparse_moe.experts.240.w1", "model.layers.48.block_sparse_moe.experts.241.w1", "model.layers.48.block_sparse_moe.experts.242.w1", "model.layers.48.block_sparse_moe.experts.243.w1", "model.layers.48.block_sparse_moe.experts.244.w1", "model.layers.48.block_sparse_moe.experts.245.w1", "model.layers.48.block_sparse_moe.experts.246.w1", "model.layers.48.block_sparse_moe.experts.247.w1", "model.layers.48.block_sparse_moe.experts.248.w1", "model.layers.48.block_sparse_moe.experts.249.w1", "model.layers.48.block_sparse_moe.experts.250.w1", "model.layers.48.block_sparse_moe.experts.251.w1", "model.layers.48.block_sparse_moe.experts.252.w1", "model.layers.48.block_sparse_moe.experts.253.w1", "model.layers.48.block_sparse_moe.experts.254.w1", "model.layers.48.block_sparse_moe.experts.255.w1", "model.layers.48.block_sparse_moe.experts.0.w3", "model.layers.48.block_sparse_moe.experts.1.w3", "model.layers.48.block_sparse_moe.experts.2.w3", "model.layers.48.block_sparse_moe.experts.3.w3", "model.layers.48.block_sparse_moe.experts.4.w3", "model.layers.48.block_sparse_moe.experts.5.w3", "model.layers.48.block_sparse_moe.experts.6.w3", "model.layers.48.block_sparse_moe.experts.7.w3", "model.layers.48.block_sparse_moe.experts.8.w3", "model.layers.48.block_sparse_moe.experts.9.w3", "model.layers.48.block_sparse_moe.experts.10.w3", "model.layers.48.block_sparse_moe.experts.11.w3", "model.layers.48.block_sparse_moe.experts.12.w3", "model.layers.48.block_sparse_moe.experts.13.w3", "model.layers.48.block_sparse_moe.experts.14.w3", "model.layers.48.block_sparse_moe.experts.15.w3", "model.layers.48.block_sparse_moe.experts.16.w3", "model.layers.48.block_sparse_moe.experts.17.w3", "model.layers.48.block_sparse_moe.experts.18.w3", "model.layers.48.block_sparse_moe.experts.19.w3", "model.layers.48.block_sparse_moe.experts.20.w3", "model.layers.48.block_sparse_moe.experts.21.w3", "model.layers.48.block_sparse_moe.experts.22.w3", "model.layers.48.block_sparse_moe.experts.23.w3", "model.layers.48.block_sparse_moe.experts.24.w3", "model.layers.48.block_sparse_moe.experts.25.w3", "model.layers.48.block_sparse_moe.experts.26.w3", "model.layers.48.block_sparse_moe.experts.27.w3", "model.layers.48.block_sparse_moe.experts.28.w3", "model.layers.48.block_sparse_moe.experts.29.w3", "model.layers.48.block_sparse_moe.experts.30.w3", "model.layers.48.block_sparse_moe.experts.31.w3", "model.layers.48.block_sparse_moe.experts.32.w3", "model.layers.48.block_sparse_moe.experts.33.w3", "model.layers.48.block_sparse_moe.experts.34.w3", "model.layers.48.block_sparse_moe.experts.35.w3", "model.layers.48.block_sparse_moe.experts.36.w3", "model.layers.48.block_sparse_moe.experts.37.w3", "model.layers.48.block_sparse_moe.experts.38.w3", "model.layers.48.block_sparse_moe.experts.39.w3", "model.layers.48.block_sparse_moe.experts.40.w3", "model.layers.48.block_sparse_moe.experts.41.w3", "model.layers.48.block_sparse_moe.experts.42.w3", "model.layers.48.block_sparse_moe.experts.43.w3", "model.layers.48.block_sparse_moe.experts.44.w3", "model.layers.48.block_sparse_moe.experts.45.w3", "model.layers.48.block_sparse_moe.experts.46.w3", "model.layers.48.block_sparse_moe.experts.47.w3", "model.layers.48.block_sparse_moe.experts.48.w3", "model.layers.48.block_sparse_moe.experts.49.w3", "model.layers.48.block_sparse_moe.experts.50.w3", "model.layers.48.block_sparse_moe.experts.51.w3", "model.layers.48.block_sparse_moe.experts.52.w3", "model.layers.48.block_sparse_moe.experts.53.w3", "model.layers.48.block_sparse_moe.experts.54.w3", "model.layers.48.block_sparse_moe.experts.55.w3", "model.layers.48.block_sparse_moe.experts.56.w3", "model.layers.48.block_sparse_moe.experts.57.w3", "model.layers.48.block_sparse_moe.experts.58.w3", "model.layers.48.block_sparse_moe.experts.59.w3", "model.layers.48.block_sparse_moe.experts.60.w3", "model.layers.48.block_sparse_moe.experts.61.w3", "model.layers.48.block_sparse_moe.experts.62.w3", "model.layers.48.block_sparse_moe.experts.63.w3", "model.layers.48.block_sparse_moe.experts.64.w3", "model.layers.48.block_sparse_moe.experts.65.w3", "model.layers.48.block_sparse_moe.experts.66.w3", "model.layers.48.block_sparse_moe.experts.67.w3", "model.layers.48.block_sparse_moe.experts.68.w3", "model.layers.48.block_sparse_moe.experts.69.w3", "model.layers.48.block_sparse_moe.experts.70.w3", "model.layers.48.block_sparse_moe.experts.71.w3", "model.layers.48.block_sparse_moe.experts.72.w3", "model.layers.48.block_sparse_moe.experts.73.w3", "model.layers.48.block_sparse_moe.experts.74.w3", "model.layers.48.block_sparse_moe.experts.75.w3", "model.layers.48.block_sparse_moe.experts.76.w3", "model.layers.48.block_sparse_moe.experts.77.w3", "model.layers.48.block_sparse_moe.experts.78.w3", "model.layers.48.block_sparse_moe.experts.79.w3", "model.layers.48.block_sparse_moe.experts.80.w3", "model.layers.48.block_sparse_moe.experts.81.w3", "model.layers.48.block_sparse_moe.experts.82.w3", "model.layers.48.block_sparse_moe.experts.83.w3", "model.layers.48.block_sparse_moe.experts.84.w3", "model.layers.48.block_sparse_moe.experts.85.w3", "model.layers.48.block_sparse_moe.experts.86.w3", "model.layers.48.block_sparse_moe.experts.87.w3", "model.layers.48.block_sparse_moe.experts.88.w3", "model.layers.48.block_sparse_moe.experts.89.w3", "model.layers.48.block_sparse_moe.experts.90.w3", "model.layers.48.block_sparse_moe.experts.91.w3", "model.layers.48.block_sparse_moe.experts.92.w3", "model.layers.48.block_sparse_moe.experts.93.w3", "model.layers.48.block_sparse_moe.experts.94.w3", "model.layers.48.block_sparse_moe.experts.95.w3", "model.layers.48.block_sparse_moe.experts.96.w3", "model.layers.48.block_sparse_moe.experts.97.w3", "model.layers.48.block_sparse_moe.experts.98.w3", "model.layers.48.block_sparse_moe.experts.99.w3", "model.layers.48.block_sparse_moe.experts.100.w3", "model.layers.48.block_sparse_moe.experts.101.w3", "model.layers.48.block_sparse_moe.experts.102.w3", "model.layers.48.block_sparse_moe.experts.103.w3", "model.layers.48.block_sparse_moe.experts.104.w3", "model.layers.48.block_sparse_moe.experts.105.w3", "model.layers.48.block_sparse_moe.experts.106.w3", "model.layers.48.block_sparse_moe.experts.107.w3", "model.layers.48.block_sparse_moe.experts.108.w3", "model.layers.48.block_sparse_moe.experts.109.w3", "model.layers.48.block_sparse_moe.experts.110.w3", "model.layers.48.block_sparse_moe.experts.111.w3", "model.layers.48.block_sparse_moe.experts.112.w3", "model.layers.48.block_sparse_moe.experts.113.w3", "model.layers.48.block_sparse_moe.experts.114.w3", "model.layers.48.block_sparse_moe.experts.115.w3", "model.layers.48.block_sparse_moe.experts.116.w3", "model.layers.48.block_sparse_moe.experts.117.w3", "model.layers.48.block_sparse_moe.experts.118.w3", "model.layers.48.block_sparse_moe.experts.119.w3", "model.layers.48.block_sparse_moe.experts.120.w3", "model.layers.48.block_sparse_moe.experts.121.w3", "model.layers.48.block_sparse_moe.experts.122.w3", "model.layers.48.block_sparse_moe.experts.123.w3", "model.layers.48.block_sparse_moe.experts.124.w3", "model.layers.48.block_sparse_moe.experts.125.w3", "model.layers.48.block_sparse_moe.experts.126.w3", "model.layers.48.block_sparse_moe.experts.127.w3", "model.layers.48.block_sparse_moe.experts.128.w3", "model.layers.48.block_sparse_moe.experts.129.w3", "model.layers.48.block_sparse_moe.experts.130.w3", "model.layers.48.block_sparse_moe.experts.131.w3", "model.layers.48.block_sparse_moe.experts.132.w3", "model.layers.48.block_sparse_moe.experts.133.w3", "model.layers.48.block_sparse_moe.experts.134.w3", "model.layers.48.block_sparse_moe.experts.135.w3", "model.layers.48.block_sparse_moe.experts.136.w3", "model.layers.48.block_sparse_moe.experts.137.w3", "model.layers.48.block_sparse_moe.experts.138.w3", "model.layers.48.block_sparse_moe.experts.139.w3", "model.layers.48.block_sparse_moe.experts.140.w3", "model.layers.48.block_sparse_moe.experts.141.w3", "model.layers.48.block_sparse_moe.experts.142.w3", "model.layers.48.block_sparse_moe.experts.143.w3", "model.layers.48.block_sparse_moe.experts.144.w3", "model.layers.48.block_sparse_moe.experts.145.w3", "model.layers.48.block_sparse_moe.experts.146.w3", "model.layers.48.block_sparse_moe.experts.147.w3", "model.layers.48.block_sparse_moe.experts.148.w3", "model.layers.48.block_sparse_moe.experts.149.w3", "model.layers.48.block_sparse_moe.experts.150.w3", "model.layers.48.block_sparse_moe.experts.151.w3", "model.layers.48.block_sparse_moe.experts.152.w3", "model.layers.48.block_sparse_moe.experts.153.w3", "model.layers.48.block_sparse_moe.experts.154.w3", "model.layers.48.block_sparse_moe.experts.155.w3", "model.layers.48.block_sparse_moe.experts.156.w3", "model.layers.48.block_sparse_moe.experts.157.w3", "model.layers.48.block_sparse_moe.experts.158.w3", "model.layers.48.block_sparse_moe.experts.159.w3", "model.layers.48.block_sparse_moe.experts.160.w3", "model.layers.48.block_sparse_moe.experts.161.w3", "model.layers.48.block_sparse_moe.experts.162.w3", "model.layers.48.block_sparse_moe.experts.163.w3", "model.layers.48.block_sparse_moe.experts.164.w3", "model.layers.48.block_sparse_moe.experts.165.w3", "model.layers.48.block_sparse_moe.experts.166.w3", "model.layers.48.block_sparse_moe.experts.167.w3", "model.layers.48.block_sparse_moe.experts.168.w3", "model.layers.48.block_sparse_moe.experts.169.w3", "model.layers.48.block_sparse_moe.experts.170.w3", "model.layers.48.block_sparse_moe.experts.171.w3", "model.layers.48.block_sparse_moe.experts.172.w3", "model.layers.48.block_sparse_moe.experts.173.w3", "model.layers.48.block_sparse_moe.experts.174.w3", "model.layers.48.block_sparse_moe.experts.175.w3", "model.layers.48.block_sparse_moe.experts.176.w3", "model.layers.48.block_sparse_moe.experts.177.w3", "model.layers.48.block_sparse_moe.experts.178.w3", "model.layers.48.block_sparse_moe.experts.179.w3", "model.layers.48.block_sparse_moe.experts.180.w3", "model.layers.48.block_sparse_moe.experts.181.w3", "model.layers.48.block_sparse_moe.experts.182.w3", "model.layers.48.block_sparse_moe.experts.183.w3", "model.layers.48.block_sparse_moe.experts.184.w3", "model.layers.48.block_sparse_moe.experts.185.w3", "model.layers.48.block_sparse_moe.experts.186.w3", "model.layers.48.block_sparse_moe.experts.187.w3", "model.layers.48.block_sparse_moe.experts.188.w3", "model.layers.48.block_sparse_moe.experts.189.w3", "model.layers.48.block_sparse_moe.experts.190.w3", "model.layers.48.block_sparse_moe.experts.191.w3", "model.layers.48.block_sparse_moe.experts.192.w3", "model.layers.48.block_sparse_moe.experts.193.w3", "model.layers.48.block_sparse_moe.experts.194.w3", "model.layers.48.block_sparse_moe.experts.195.w3", "model.layers.48.block_sparse_moe.experts.196.w3", "model.layers.48.block_sparse_moe.experts.197.w3", "model.layers.48.block_sparse_moe.experts.198.w3", "model.layers.48.block_sparse_moe.experts.199.w3", "model.layers.48.block_sparse_moe.experts.200.w3", "model.layers.48.block_sparse_moe.experts.201.w3", "model.layers.48.block_sparse_moe.experts.202.w3", "model.layers.48.block_sparse_moe.experts.203.w3", "model.layers.48.block_sparse_moe.experts.204.w3", "model.layers.48.block_sparse_moe.experts.205.w3", "model.layers.48.block_sparse_moe.experts.206.w3", "model.layers.48.block_sparse_moe.experts.207.w3", "model.layers.48.block_sparse_moe.experts.208.w3", "model.layers.48.block_sparse_moe.experts.209.w3", "model.layers.48.block_sparse_moe.experts.210.w3", "model.layers.48.block_sparse_moe.experts.211.w3", "model.layers.48.block_sparse_moe.experts.212.w3", "model.layers.48.block_sparse_moe.experts.213.w3", "model.layers.48.block_sparse_moe.experts.214.w3", "model.layers.48.block_sparse_moe.experts.215.w3", "model.layers.48.block_sparse_moe.experts.216.w3", "model.layers.48.block_sparse_moe.experts.217.w3", "model.layers.48.block_sparse_moe.experts.218.w3", "model.layers.48.block_sparse_moe.experts.219.w3", "model.layers.48.block_sparse_moe.experts.220.w3", "model.layers.48.block_sparse_moe.experts.221.w3", "model.layers.48.block_sparse_moe.experts.222.w3", "model.layers.48.block_sparse_moe.experts.223.w3", "model.layers.48.block_sparse_moe.experts.224.w3", "model.layers.48.block_sparse_moe.experts.225.w3", "model.layers.48.block_sparse_moe.experts.226.w3", "model.layers.48.block_sparse_moe.experts.227.w3", "model.layers.48.block_sparse_moe.experts.228.w3", "model.layers.48.block_sparse_moe.experts.229.w3", "model.layers.48.block_sparse_moe.experts.230.w3", "model.layers.48.block_sparse_moe.experts.231.w3", "model.layers.48.block_sparse_moe.experts.232.w3", "model.layers.48.block_sparse_moe.experts.233.w3", "model.layers.48.block_sparse_moe.experts.234.w3", "model.layers.48.block_sparse_moe.experts.235.w3", "model.layers.48.block_sparse_moe.experts.236.w3", "model.layers.48.block_sparse_moe.experts.237.w3", "model.layers.48.block_sparse_moe.experts.238.w3", "model.layers.48.block_sparse_moe.experts.239.w3", "model.layers.48.block_sparse_moe.experts.240.w3", "model.layers.48.block_sparse_moe.experts.241.w3", "model.layers.48.block_sparse_moe.experts.242.w3", "model.layers.48.block_sparse_moe.experts.243.w3", "model.layers.48.block_sparse_moe.experts.244.w3", "model.layers.48.block_sparse_moe.experts.245.w3", "model.layers.48.block_sparse_moe.experts.246.w3", "model.layers.48.block_sparse_moe.experts.247.w3", "model.layers.48.block_sparse_moe.experts.248.w3", "model.layers.48.block_sparse_moe.experts.249.w3", "model.layers.48.block_sparse_moe.experts.250.w3", "model.layers.48.block_sparse_moe.experts.251.w3", "model.layers.48.block_sparse_moe.experts.252.w3", "model.layers.48.block_sparse_moe.experts.253.w3", "model.layers.48.block_sparse_moe.experts.254.w3", "model.layers.48.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00014571771025657931, "dbits": 2415919104 } ] }, { "idx": 244, "layers": [ "model.layers.48.block_sparse_moe.experts.0.w2", "model.layers.48.block_sparse_moe.experts.1.w2", "model.layers.48.block_sparse_moe.experts.2.w2", "model.layers.48.block_sparse_moe.experts.3.w2", "model.layers.48.block_sparse_moe.experts.4.w2", "model.layers.48.block_sparse_moe.experts.5.w2", "model.layers.48.block_sparse_moe.experts.6.w2", "model.layers.48.block_sparse_moe.experts.7.w2", "model.layers.48.block_sparse_moe.experts.8.w2", "model.layers.48.block_sparse_moe.experts.9.w2", "model.layers.48.block_sparse_moe.experts.10.w2", "model.layers.48.block_sparse_moe.experts.11.w2", "model.layers.48.block_sparse_moe.experts.12.w2", "model.layers.48.block_sparse_moe.experts.13.w2", "model.layers.48.block_sparse_moe.experts.14.w2", "model.layers.48.block_sparse_moe.experts.15.w2", "model.layers.48.block_sparse_moe.experts.16.w2", "model.layers.48.block_sparse_moe.experts.17.w2", "model.layers.48.block_sparse_moe.experts.18.w2", "model.layers.48.block_sparse_moe.experts.19.w2", "model.layers.48.block_sparse_moe.experts.20.w2", "model.layers.48.block_sparse_moe.experts.21.w2", "model.layers.48.block_sparse_moe.experts.22.w2", "model.layers.48.block_sparse_moe.experts.23.w2", "model.layers.48.block_sparse_moe.experts.24.w2", "model.layers.48.block_sparse_moe.experts.25.w2", "model.layers.48.block_sparse_moe.experts.26.w2", "model.layers.48.block_sparse_moe.experts.27.w2", "model.layers.48.block_sparse_moe.experts.28.w2", "model.layers.48.block_sparse_moe.experts.29.w2", "model.layers.48.block_sparse_moe.experts.30.w2", "model.layers.48.block_sparse_moe.experts.31.w2", "model.layers.48.block_sparse_moe.experts.32.w2", "model.layers.48.block_sparse_moe.experts.33.w2", "model.layers.48.block_sparse_moe.experts.34.w2", "model.layers.48.block_sparse_moe.experts.35.w2", "model.layers.48.block_sparse_moe.experts.36.w2", "model.layers.48.block_sparse_moe.experts.37.w2", "model.layers.48.block_sparse_moe.experts.38.w2", "model.layers.48.block_sparse_moe.experts.39.w2", "model.layers.48.block_sparse_moe.experts.40.w2", "model.layers.48.block_sparse_moe.experts.41.w2", "model.layers.48.block_sparse_moe.experts.42.w2", "model.layers.48.block_sparse_moe.experts.43.w2", "model.layers.48.block_sparse_moe.experts.44.w2", "model.layers.48.block_sparse_moe.experts.45.w2", "model.layers.48.block_sparse_moe.experts.46.w2", "model.layers.48.block_sparse_moe.experts.47.w2", "model.layers.48.block_sparse_moe.experts.48.w2", "model.layers.48.block_sparse_moe.experts.49.w2", "model.layers.48.block_sparse_moe.experts.50.w2", "model.layers.48.block_sparse_moe.experts.51.w2", "model.layers.48.block_sparse_moe.experts.52.w2", "model.layers.48.block_sparse_moe.experts.53.w2", "model.layers.48.block_sparse_moe.experts.54.w2", "model.layers.48.block_sparse_moe.experts.55.w2", "model.layers.48.block_sparse_moe.experts.56.w2", "model.layers.48.block_sparse_moe.experts.57.w2", "model.layers.48.block_sparse_moe.experts.58.w2", "model.layers.48.block_sparse_moe.experts.59.w2", "model.layers.48.block_sparse_moe.experts.60.w2", "model.layers.48.block_sparse_moe.experts.61.w2", "model.layers.48.block_sparse_moe.experts.62.w2", "model.layers.48.block_sparse_moe.experts.63.w2", "model.layers.48.block_sparse_moe.experts.64.w2", "model.layers.48.block_sparse_moe.experts.65.w2", "model.layers.48.block_sparse_moe.experts.66.w2", "model.layers.48.block_sparse_moe.experts.67.w2", "model.layers.48.block_sparse_moe.experts.68.w2", "model.layers.48.block_sparse_moe.experts.69.w2", "model.layers.48.block_sparse_moe.experts.70.w2", "model.layers.48.block_sparse_moe.experts.71.w2", "model.layers.48.block_sparse_moe.experts.72.w2", "model.layers.48.block_sparse_moe.experts.73.w2", "model.layers.48.block_sparse_moe.experts.74.w2", "model.layers.48.block_sparse_moe.experts.75.w2", "model.layers.48.block_sparse_moe.experts.76.w2", "model.layers.48.block_sparse_moe.experts.77.w2", "model.layers.48.block_sparse_moe.experts.78.w2", "model.layers.48.block_sparse_moe.experts.79.w2", "model.layers.48.block_sparse_moe.experts.80.w2", "model.layers.48.block_sparse_moe.experts.81.w2", "model.layers.48.block_sparse_moe.experts.82.w2", "model.layers.48.block_sparse_moe.experts.83.w2", "model.layers.48.block_sparse_moe.experts.84.w2", "model.layers.48.block_sparse_moe.experts.85.w2", "model.layers.48.block_sparse_moe.experts.86.w2", "model.layers.48.block_sparse_moe.experts.87.w2", "model.layers.48.block_sparse_moe.experts.88.w2", "model.layers.48.block_sparse_moe.experts.89.w2", "model.layers.48.block_sparse_moe.experts.90.w2", "model.layers.48.block_sparse_moe.experts.91.w2", "model.layers.48.block_sparse_moe.experts.92.w2", "model.layers.48.block_sparse_moe.experts.93.w2", "model.layers.48.block_sparse_moe.experts.94.w2", "model.layers.48.block_sparse_moe.experts.95.w2", "model.layers.48.block_sparse_moe.experts.96.w2", "model.layers.48.block_sparse_moe.experts.97.w2", "model.layers.48.block_sparse_moe.experts.98.w2", "model.layers.48.block_sparse_moe.experts.99.w2", "model.layers.48.block_sparse_moe.experts.100.w2", "model.layers.48.block_sparse_moe.experts.101.w2", "model.layers.48.block_sparse_moe.experts.102.w2", "model.layers.48.block_sparse_moe.experts.103.w2", "model.layers.48.block_sparse_moe.experts.104.w2", "model.layers.48.block_sparse_moe.experts.105.w2", "model.layers.48.block_sparse_moe.experts.106.w2", "model.layers.48.block_sparse_moe.experts.107.w2", "model.layers.48.block_sparse_moe.experts.108.w2", "model.layers.48.block_sparse_moe.experts.109.w2", "model.layers.48.block_sparse_moe.experts.110.w2", "model.layers.48.block_sparse_moe.experts.111.w2", "model.layers.48.block_sparse_moe.experts.112.w2", "model.layers.48.block_sparse_moe.experts.113.w2", "model.layers.48.block_sparse_moe.experts.114.w2", "model.layers.48.block_sparse_moe.experts.115.w2", "model.layers.48.block_sparse_moe.experts.116.w2", "model.layers.48.block_sparse_moe.experts.117.w2", "model.layers.48.block_sparse_moe.experts.118.w2", "model.layers.48.block_sparse_moe.experts.119.w2", "model.layers.48.block_sparse_moe.experts.120.w2", "model.layers.48.block_sparse_moe.experts.121.w2", "model.layers.48.block_sparse_moe.experts.122.w2", "model.layers.48.block_sparse_moe.experts.123.w2", "model.layers.48.block_sparse_moe.experts.124.w2", "model.layers.48.block_sparse_moe.experts.125.w2", "model.layers.48.block_sparse_moe.experts.126.w2", "model.layers.48.block_sparse_moe.experts.127.w2", "model.layers.48.block_sparse_moe.experts.128.w2", "model.layers.48.block_sparse_moe.experts.129.w2", "model.layers.48.block_sparse_moe.experts.130.w2", "model.layers.48.block_sparse_moe.experts.131.w2", "model.layers.48.block_sparse_moe.experts.132.w2", "model.layers.48.block_sparse_moe.experts.133.w2", "model.layers.48.block_sparse_moe.experts.134.w2", "model.layers.48.block_sparse_moe.experts.135.w2", "model.layers.48.block_sparse_moe.experts.136.w2", "model.layers.48.block_sparse_moe.experts.137.w2", "model.layers.48.block_sparse_moe.experts.138.w2", "model.layers.48.block_sparse_moe.experts.139.w2", "model.layers.48.block_sparse_moe.experts.140.w2", "model.layers.48.block_sparse_moe.experts.141.w2", "model.layers.48.block_sparse_moe.experts.142.w2", "model.layers.48.block_sparse_moe.experts.143.w2", "model.layers.48.block_sparse_moe.experts.144.w2", "model.layers.48.block_sparse_moe.experts.145.w2", "model.layers.48.block_sparse_moe.experts.146.w2", "model.layers.48.block_sparse_moe.experts.147.w2", "model.layers.48.block_sparse_moe.experts.148.w2", "model.layers.48.block_sparse_moe.experts.149.w2", "model.layers.48.block_sparse_moe.experts.150.w2", "model.layers.48.block_sparse_moe.experts.151.w2", "model.layers.48.block_sparse_moe.experts.152.w2", "model.layers.48.block_sparse_moe.experts.153.w2", "model.layers.48.block_sparse_moe.experts.154.w2", "model.layers.48.block_sparse_moe.experts.155.w2", "model.layers.48.block_sparse_moe.experts.156.w2", "model.layers.48.block_sparse_moe.experts.157.w2", "model.layers.48.block_sparse_moe.experts.158.w2", "model.layers.48.block_sparse_moe.experts.159.w2", "model.layers.48.block_sparse_moe.experts.160.w2", "model.layers.48.block_sparse_moe.experts.161.w2", "model.layers.48.block_sparse_moe.experts.162.w2", "model.layers.48.block_sparse_moe.experts.163.w2", "model.layers.48.block_sparse_moe.experts.164.w2", "model.layers.48.block_sparse_moe.experts.165.w2", "model.layers.48.block_sparse_moe.experts.166.w2", "model.layers.48.block_sparse_moe.experts.167.w2", "model.layers.48.block_sparse_moe.experts.168.w2", "model.layers.48.block_sparse_moe.experts.169.w2", "model.layers.48.block_sparse_moe.experts.170.w2", "model.layers.48.block_sparse_moe.experts.171.w2", "model.layers.48.block_sparse_moe.experts.172.w2", "model.layers.48.block_sparse_moe.experts.173.w2", "model.layers.48.block_sparse_moe.experts.174.w2", "model.layers.48.block_sparse_moe.experts.175.w2", "model.layers.48.block_sparse_moe.experts.176.w2", "model.layers.48.block_sparse_moe.experts.177.w2", "model.layers.48.block_sparse_moe.experts.178.w2", "model.layers.48.block_sparse_moe.experts.179.w2", "model.layers.48.block_sparse_moe.experts.180.w2", "model.layers.48.block_sparse_moe.experts.181.w2", "model.layers.48.block_sparse_moe.experts.182.w2", "model.layers.48.block_sparse_moe.experts.183.w2", "model.layers.48.block_sparse_moe.experts.184.w2", "model.layers.48.block_sparse_moe.experts.185.w2", "model.layers.48.block_sparse_moe.experts.186.w2", "model.layers.48.block_sparse_moe.experts.187.w2", "model.layers.48.block_sparse_moe.experts.188.w2", "model.layers.48.block_sparse_moe.experts.189.w2", "model.layers.48.block_sparse_moe.experts.190.w2", "model.layers.48.block_sparse_moe.experts.191.w2", "model.layers.48.block_sparse_moe.experts.192.w2", "model.layers.48.block_sparse_moe.experts.193.w2", "model.layers.48.block_sparse_moe.experts.194.w2", "model.layers.48.block_sparse_moe.experts.195.w2", "model.layers.48.block_sparse_moe.experts.196.w2", "model.layers.48.block_sparse_moe.experts.197.w2", "model.layers.48.block_sparse_moe.experts.198.w2", "model.layers.48.block_sparse_moe.experts.199.w2", "model.layers.48.block_sparse_moe.experts.200.w2", "model.layers.48.block_sparse_moe.experts.201.w2", "model.layers.48.block_sparse_moe.experts.202.w2", "model.layers.48.block_sparse_moe.experts.203.w2", "model.layers.48.block_sparse_moe.experts.204.w2", "model.layers.48.block_sparse_moe.experts.205.w2", "model.layers.48.block_sparse_moe.experts.206.w2", "model.layers.48.block_sparse_moe.experts.207.w2", "model.layers.48.block_sparse_moe.experts.208.w2", "model.layers.48.block_sparse_moe.experts.209.w2", "model.layers.48.block_sparse_moe.experts.210.w2", "model.layers.48.block_sparse_moe.experts.211.w2", "model.layers.48.block_sparse_moe.experts.212.w2", "model.layers.48.block_sparse_moe.experts.213.w2", "model.layers.48.block_sparse_moe.experts.214.w2", "model.layers.48.block_sparse_moe.experts.215.w2", "model.layers.48.block_sparse_moe.experts.216.w2", "model.layers.48.block_sparse_moe.experts.217.w2", "model.layers.48.block_sparse_moe.experts.218.w2", "model.layers.48.block_sparse_moe.experts.219.w2", "model.layers.48.block_sparse_moe.experts.220.w2", "model.layers.48.block_sparse_moe.experts.221.w2", "model.layers.48.block_sparse_moe.experts.222.w2", "model.layers.48.block_sparse_moe.experts.223.w2", "model.layers.48.block_sparse_moe.experts.224.w2", "model.layers.48.block_sparse_moe.experts.225.w2", "model.layers.48.block_sparse_moe.experts.226.w2", "model.layers.48.block_sparse_moe.experts.227.w2", "model.layers.48.block_sparse_moe.experts.228.w2", "model.layers.48.block_sparse_moe.experts.229.w2", "model.layers.48.block_sparse_moe.experts.230.w2", "model.layers.48.block_sparse_moe.experts.231.w2", "model.layers.48.block_sparse_moe.experts.232.w2", "model.layers.48.block_sparse_moe.experts.233.w2", "model.layers.48.block_sparse_moe.experts.234.w2", "model.layers.48.block_sparse_moe.experts.235.w2", "model.layers.48.block_sparse_moe.experts.236.w2", "model.layers.48.block_sparse_moe.experts.237.w2", "model.layers.48.block_sparse_moe.experts.238.w2", "model.layers.48.block_sparse_moe.experts.239.w2", "model.layers.48.block_sparse_moe.experts.240.w2", "model.layers.48.block_sparse_moe.experts.241.w2", "model.layers.48.block_sparse_moe.experts.242.w2", "model.layers.48.block_sparse_moe.experts.243.w2", "model.layers.48.block_sparse_moe.experts.244.w2", "model.layers.48.block_sparse_moe.experts.245.w2", "model.layers.48.block_sparse_moe.experts.246.w2", "model.layers.48.block_sparse_moe.experts.247.w2", "model.layers.48.block_sparse_moe.experts.248.w2", "model.layers.48.block_sparse_moe.experts.249.w2", "model.layers.48.block_sparse_moe.experts.250.w2", "model.layers.48.block_sparse_moe.experts.251.w2", "model.layers.48.block_sparse_moe.experts.252.w2", "model.layers.48.block_sparse_moe.experts.253.w2", "model.layers.48.block_sparse_moe.experts.254.w2", "model.layers.48.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 7.792003452772311e-06, "dbits": 1207959552 } ] }, { "idx": 245, "layers": [ "model.layers.49.self_attn.q_proj" ], "candidates": [ { "dkld": -3.016553819179535e-06, "dbits": 18874368 } ] }, { "idx": 246, "layers": [ "model.layers.49.self_attn.k_proj", "model.layers.49.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00020412374287843982, "dbits": 6291456 } ] }, { "idx": 247, "layers": [ "model.layers.49.self_attn.o_proj" ], "candidates": [ { "dkld": 8.316673338412683e-05, "dbits": 18874368 } ] }, { "idx": 248, "layers": [ "model.layers.49.block_sparse_moe.experts.0.w1", "model.layers.49.block_sparse_moe.experts.1.w1", "model.layers.49.block_sparse_moe.experts.2.w1", "model.layers.49.block_sparse_moe.experts.3.w1", "model.layers.49.block_sparse_moe.experts.4.w1", "model.layers.49.block_sparse_moe.experts.5.w1", "model.layers.49.block_sparse_moe.experts.6.w1", "model.layers.49.block_sparse_moe.experts.7.w1", "model.layers.49.block_sparse_moe.experts.8.w1", "model.layers.49.block_sparse_moe.experts.9.w1", "model.layers.49.block_sparse_moe.experts.10.w1", "model.layers.49.block_sparse_moe.experts.11.w1", "model.layers.49.block_sparse_moe.experts.12.w1", "model.layers.49.block_sparse_moe.experts.13.w1", "model.layers.49.block_sparse_moe.experts.14.w1", "model.layers.49.block_sparse_moe.experts.15.w1", "model.layers.49.block_sparse_moe.experts.16.w1", "model.layers.49.block_sparse_moe.experts.17.w1", "model.layers.49.block_sparse_moe.experts.18.w1", "model.layers.49.block_sparse_moe.experts.19.w1", "model.layers.49.block_sparse_moe.experts.20.w1", "model.layers.49.block_sparse_moe.experts.21.w1", "model.layers.49.block_sparse_moe.experts.22.w1", "model.layers.49.block_sparse_moe.experts.23.w1", "model.layers.49.block_sparse_moe.experts.24.w1", "model.layers.49.block_sparse_moe.experts.25.w1", "model.layers.49.block_sparse_moe.experts.26.w1", "model.layers.49.block_sparse_moe.experts.27.w1", "model.layers.49.block_sparse_moe.experts.28.w1", "model.layers.49.block_sparse_moe.experts.29.w1", "model.layers.49.block_sparse_moe.experts.30.w1", "model.layers.49.block_sparse_moe.experts.31.w1", "model.layers.49.block_sparse_moe.experts.32.w1", "model.layers.49.block_sparse_moe.experts.33.w1", "model.layers.49.block_sparse_moe.experts.34.w1", "model.layers.49.block_sparse_moe.experts.35.w1", "model.layers.49.block_sparse_moe.experts.36.w1", "model.layers.49.block_sparse_moe.experts.37.w1", "model.layers.49.block_sparse_moe.experts.38.w1", "model.layers.49.block_sparse_moe.experts.39.w1", "model.layers.49.block_sparse_moe.experts.40.w1", "model.layers.49.block_sparse_moe.experts.41.w1", "model.layers.49.block_sparse_moe.experts.42.w1", "model.layers.49.block_sparse_moe.experts.43.w1", "model.layers.49.block_sparse_moe.experts.44.w1", "model.layers.49.block_sparse_moe.experts.45.w1", "model.layers.49.block_sparse_moe.experts.46.w1", "model.layers.49.block_sparse_moe.experts.47.w1", "model.layers.49.block_sparse_moe.experts.48.w1", "model.layers.49.block_sparse_moe.experts.49.w1", "model.layers.49.block_sparse_moe.experts.50.w1", "model.layers.49.block_sparse_moe.experts.51.w1", "model.layers.49.block_sparse_moe.experts.52.w1", "model.layers.49.block_sparse_moe.experts.53.w1", "model.layers.49.block_sparse_moe.experts.54.w1", "model.layers.49.block_sparse_moe.experts.55.w1", "model.layers.49.block_sparse_moe.experts.56.w1", "model.layers.49.block_sparse_moe.experts.57.w1", "model.layers.49.block_sparse_moe.experts.58.w1", "model.layers.49.block_sparse_moe.experts.59.w1", "model.layers.49.block_sparse_moe.experts.60.w1", "model.layers.49.block_sparse_moe.experts.61.w1", "model.layers.49.block_sparse_moe.experts.62.w1", "model.layers.49.block_sparse_moe.experts.63.w1", "model.layers.49.block_sparse_moe.experts.64.w1", "model.layers.49.block_sparse_moe.experts.65.w1", "model.layers.49.block_sparse_moe.experts.66.w1", "model.layers.49.block_sparse_moe.experts.67.w1", "model.layers.49.block_sparse_moe.experts.68.w1", "model.layers.49.block_sparse_moe.experts.69.w1", "model.layers.49.block_sparse_moe.experts.70.w1", "model.layers.49.block_sparse_moe.experts.71.w1", "model.layers.49.block_sparse_moe.experts.72.w1", "model.layers.49.block_sparse_moe.experts.73.w1", "model.layers.49.block_sparse_moe.experts.74.w1", "model.layers.49.block_sparse_moe.experts.75.w1", "model.layers.49.block_sparse_moe.experts.76.w1", "model.layers.49.block_sparse_moe.experts.77.w1", "model.layers.49.block_sparse_moe.experts.78.w1", "model.layers.49.block_sparse_moe.experts.79.w1", "model.layers.49.block_sparse_moe.experts.80.w1", "model.layers.49.block_sparse_moe.experts.81.w1", "model.layers.49.block_sparse_moe.experts.82.w1", "model.layers.49.block_sparse_moe.experts.83.w1", "model.layers.49.block_sparse_moe.experts.84.w1", "model.layers.49.block_sparse_moe.experts.85.w1", "model.layers.49.block_sparse_moe.experts.86.w1", "model.layers.49.block_sparse_moe.experts.87.w1", "model.layers.49.block_sparse_moe.experts.88.w1", "model.layers.49.block_sparse_moe.experts.89.w1", "model.layers.49.block_sparse_moe.experts.90.w1", "model.layers.49.block_sparse_moe.experts.91.w1", "model.layers.49.block_sparse_moe.experts.92.w1", "model.layers.49.block_sparse_moe.experts.93.w1", "model.layers.49.block_sparse_moe.experts.94.w1", "model.layers.49.block_sparse_moe.experts.95.w1", "model.layers.49.block_sparse_moe.experts.96.w1", "model.layers.49.block_sparse_moe.experts.97.w1", "model.layers.49.block_sparse_moe.experts.98.w1", "model.layers.49.block_sparse_moe.experts.99.w1", "model.layers.49.block_sparse_moe.experts.100.w1", "model.layers.49.block_sparse_moe.experts.101.w1", "model.layers.49.block_sparse_moe.experts.102.w1", "model.layers.49.block_sparse_moe.experts.103.w1", "model.layers.49.block_sparse_moe.experts.104.w1", "model.layers.49.block_sparse_moe.experts.105.w1", "model.layers.49.block_sparse_moe.experts.106.w1", "model.layers.49.block_sparse_moe.experts.107.w1", "model.layers.49.block_sparse_moe.experts.108.w1", "model.layers.49.block_sparse_moe.experts.109.w1", "model.layers.49.block_sparse_moe.experts.110.w1", "model.layers.49.block_sparse_moe.experts.111.w1", "model.layers.49.block_sparse_moe.experts.112.w1", "model.layers.49.block_sparse_moe.experts.113.w1", "model.layers.49.block_sparse_moe.experts.114.w1", "model.layers.49.block_sparse_moe.experts.115.w1", "model.layers.49.block_sparse_moe.experts.116.w1", "model.layers.49.block_sparse_moe.experts.117.w1", "model.layers.49.block_sparse_moe.experts.118.w1", "model.layers.49.block_sparse_moe.experts.119.w1", "model.layers.49.block_sparse_moe.experts.120.w1", "model.layers.49.block_sparse_moe.experts.121.w1", "model.layers.49.block_sparse_moe.experts.122.w1", "model.layers.49.block_sparse_moe.experts.123.w1", "model.layers.49.block_sparse_moe.experts.124.w1", "model.layers.49.block_sparse_moe.experts.125.w1", "model.layers.49.block_sparse_moe.experts.126.w1", "model.layers.49.block_sparse_moe.experts.127.w1", "model.layers.49.block_sparse_moe.experts.128.w1", "model.layers.49.block_sparse_moe.experts.129.w1", "model.layers.49.block_sparse_moe.experts.130.w1", "model.layers.49.block_sparse_moe.experts.131.w1", "model.layers.49.block_sparse_moe.experts.132.w1", "model.layers.49.block_sparse_moe.experts.133.w1", "model.layers.49.block_sparse_moe.experts.134.w1", "model.layers.49.block_sparse_moe.experts.135.w1", "model.layers.49.block_sparse_moe.experts.136.w1", "model.layers.49.block_sparse_moe.experts.137.w1", "model.layers.49.block_sparse_moe.experts.138.w1", "model.layers.49.block_sparse_moe.experts.139.w1", "model.layers.49.block_sparse_moe.experts.140.w1", "model.layers.49.block_sparse_moe.experts.141.w1", "model.layers.49.block_sparse_moe.experts.142.w1", "model.layers.49.block_sparse_moe.experts.143.w1", "model.layers.49.block_sparse_moe.experts.144.w1", "model.layers.49.block_sparse_moe.experts.145.w1", "model.layers.49.block_sparse_moe.experts.146.w1", "model.layers.49.block_sparse_moe.experts.147.w1", "model.layers.49.block_sparse_moe.experts.148.w1", "model.layers.49.block_sparse_moe.experts.149.w1", "model.layers.49.block_sparse_moe.experts.150.w1", "model.layers.49.block_sparse_moe.experts.151.w1", "model.layers.49.block_sparse_moe.experts.152.w1", "model.layers.49.block_sparse_moe.experts.153.w1", "model.layers.49.block_sparse_moe.experts.154.w1", "model.layers.49.block_sparse_moe.experts.155.w1", "model.layers.49.block_sparse_moe.experts.156.w1", "model.layers.49.block_sparse_moe.experts.157.w1", "model.layers.49.block_sparse_moe.experts.158.w1", "model.layers.49.block_sparse_moe.experts.159.w1", "model.layers.49.block_sparse_moe.experts.160.w1", "model.layers.49.block_sparse_moe.experts.161.w1", "model.layers.49.block_sparse_moe.experts.162.w1", "model.layers.49.block_sparse_moe.experts.163.w1", "model.layers.49.block_sparse_moe.experts.164.w1", "model.layers.49.block_sparse_moe.experts.165.w1", "model.layers.49.block_sparse_moe.experts.166.w1", "model.layers.49.block_sparse_moe.experts.167.w1", "model.layers.49.block_sparse_moe.experts.168.w1", "model.layers.49.block_sparse_moe.experts.169.w1", "model.layers.49.block_sparse_moe.experts.170.w1", "model.layers.49.block_sparse_moe.experts.171.w1", "model.layers.49.block_sparse_moe.experts.172.w1", "model.layers.49.block_sparse_moe.experts.173.w1", "model.layers.49.block_sparse_moe.experts.174.w1", "model.layers.49.block_sparse_moe.experts.175.w1", "model.layers.49.block_sparse_moe.experts.176.w1", "model.layers.49.block_sparse_moe.experts.177.w1", "model.layers.49.block_sparse_moe.experts.178.w1", "model.layers.49.block_sparse_moe.experts.179.w1", "model.layers.49.block_sparse_moe.experts.180.w1", "model.layers.49.block_sparse_moe.experts.181.w1", "model.layers.49.block_sparse_moe.experts.182.w1", "model.layers.49.block_sparse_moe.experts.183.w1", "model.layers.49.block_sparse_moe.experts.184.w1", "model.layers.49.block_sparse_moe.experts.185.w1", "model.layers.49.block_sparse_moe.experts.186.w1", "model.layers.49.block_sparse_moe.experts.187.w1", "model.layers.49.block_sparse_moe.experts.188.w1", "model.layers.49.block_sparse_moe.experts.189.w1", "model.layers.49.block_sparse_moe.experts.190.w1", "model.layers.49.block_sparse_moe.experts.191.w1", "model.layers.49.block_sparse_moe.experts.192.w1", "model.layers.49.block_sparse_moe.experts.193.w1", "model.layers.49.block_sparse_moe.experts.194.w1", "model.layers.49.block_sparse_moe.experts.195.w1", "model.layers.49.block_sparse_moe.experts.196.w1", "model.layers.49.block_sparse_moe.experts.197.w1", "model.layers.49.block_sparse_moe.experts.198.w1", "model.layers.49.block_sparse_moe.experts.199.w1", "model.layers.49.block_sparse_moe.experts.200.w1", "model.layers.49.block_sparse_moe.experts.201.w1", "model.layers.49.block_sparse_moe.experts.202.w1", "model.layers.49.block_sparse_moe.experts.203.w1", "model.layers.49.block_sparse_moe.experts.204.w1", "model.layers.49.block_sparse_moe.experts.205.w1", "model.layers.49.block_sparse_moe.experts.206.w1", "model.layers.49.block_sparse_moe.experts.207.w1", "model.layers.49.block_sparse_moe.experts.208.w1", "model.layers.49.block_sparse_moe.experts.209.w1", "model.layers.49.block_sparse_moe.experts.210.w1", "model.layers.49.block_sparse_moe.experts.211.w1", "model.layers.49.block_sparse_moe.experts.212.w1", "model.layers.49.block_sparse_moe.experts.213.w1", "model.layers.49.block_sparse_moe.experts.214.w1", "model.layers.49.block_sparse_moe.experts.215.w1", "model.layers.49.block_sparse_moe.experts.216.w1", "model.layers.49.block_sparse_moe.experts.217.w1", "model.layers.49.block_sparse_moe.experts.218.w1", "model.layers.49.block_sparse_moe.experts.219.w1", "model.layers.49.block_sparse_moe.experts.220.w1", "model.layers.49.block_sparse_moe.experts.221.w1", "model.layers.49.block_sparse_moe.experts.222.w1", "model.layers.49.block_sparse_moe.experts.223.w1", "model.layers.49.block_sparse_moe.experts.224.w1", "model.layers.49.block_sparse_moe.experts.225.w1", "model.layers.49.block_sparse_moe.experts.226.w1", "model.layers.49.block_sparse_moe.experts.227.w1", "model.layers.49.block_sparse_moe.experts.228.w1", "model.layers.49.block_sparse_moe.experts.229.w1", "model.layers.49.block_sparse_moe.experts.230.w1", "model.layers.49.block_sparse_moe.experts.231.w1", "model.layers.49.block_sparse_moe.experts.232.w1", "model.layers.49.block_sparse_moe.experts.233.w1", "model.layers.49.block_sparse_moe.experts.234.w1", "model.layers.49.block_sparse_moe.experts.235.w1", "model.layers.49.block_sparse_moe.experts.236.w1", "model.layers.49.block_sparse_moe.experts.237.w1", "model.layers.49.block_sparse_moe.experts.238.w1", "model.layers.49.block_sparse_moe.experts.239.w1", "model.layers.49.block_sparse_moe.experts.240.w1", "model.layers.49.block_sparse_moe.experts.241.w1", "model.layers.49.block_sparse_moe.experts.242.w1", "model.layers.49.block_sparse_moe.experts.243.w1", "model.layers.49.block_sparse_moe.experts.244.w1", "model.layers.49.block_sparse_moe.experts.245.w1", "model.layers.49.block_sparse_moe.experts.246.w1", "model.layers.49.block_sparse_moe.experts.247.w1", "model.layers.49.block_sparse_moe.experts.248.w1", "model.layers.49.block_sparse_moe.experts.249.w1", "model.layers.49.block_sparse_moe.experts.250.w1", "model.layers.49.block_sparse_moe.experts.251.w1", "model.layers.49.block_sparse_moe.experts.252.w1", "model.layers.49.block_sparse_moe.experts.253.w1", "model.layers.49.block_sparse_moe.experts.254.w1", "model.layers.49.block_sparse_moe.experts.255.w1", "model.layers.49.block_sparse_moe.experts.0.w3", "model.layers.49.block_sparse_moe.experts.1.w3", "model.layers.49.block_sparse_moe.experts.2.w3", "model.layers.49.block_sparse_moe.experts.3.w3", "model.layers.49.block_sparse_moe.experts.4.w3", "model.layers.49.block_sparse_moe.experts.5.w3", "model.layers.49.block_sparse_moe.experts.6.w3", "model.layers.49.block_sparse_moe.experts.7.w3", "model.layers.49.block_sparse_moe.experts.8.w3", "model.layers.49.block_sparse_moe.experts.9.w3", "model.layers.49.block_sparse_moe.experts.10.w3", "model.layers.49.block_sparse_moe.experts.11.w3", "model.layers.49.block_sparse_moe.experts.12.w3", "model.layers.49.block_sparse_moe.experts.13.w3", "model.layers.49.block_sparse_moe.experts.14.w3", "model.layers.49.block_sparse_moe.experts.15.w3", "model.layers.49.block_sparse_moe.experts.16.w3", "model.layers.49.block_sparse_moe.experts.17.w3", "model.layers.49.block_sparse_moe.experts.18.w3", "model.layers.49.block_sparse_moe.experts.19.w3", "model.layers.49.block_sparse_moe.experts.20.w3", "model.layers.49.block_sparse_moe.experts.21.w3", "model.layers.49.block_sparse_moe.experts.22.w3", "model.layers.49.block_sparse_moe.experts.23.w3", "model.layers.49.block_sparse_moe.experts.24.w3", "model.layers.49.block_sparse_moe.experts.25.w3", "model.layers.49.block_sparse_moe.experts.26.w3", "model.layers.49.block_sparse_moe.experts.27.w3", "model.layers.49.block_sparse_moe.experts.28.w3", "model.layers.49.block_sparse_moe.experts.29.w3", "model.layers.49.block_sparse_moe.experts.30.w3", "model.layers.49.block_sparse_moe.experts.31.w3", "model.layers.49.block_sparse_moe.experts.32.w3", "model.layers.49.block_sparse_moe.experts.33.w3", "model.layers.49.block_sparse_moe.experts.34.w3", "model.layers.49.block_sparse_moe.experts.35.w3", "model.layers.49.block_sparse_moe.experts.36.w3", "model.layers.49.block_sparse_moe.experts.37.w3", "model.layers.49.block_sparse_moe.experts.38.w3", "model.layers.49.block_sparse_moe.experts.39.w3", "model.layers.49.block_sparse_moe.experts.40.w3", "model.layers.49.block_sparse_moe.experts.41.w3", "model.layers.49.block_sparse_moe.experts.42.w3", "model.layers.49.block_sparse_moe.experts.43.w3", "model.layers.49.block_sparse_moe.experts.44.w3", "model.layers.49.block_sparse_moe.experts.45.w3", "model.layers.49.block_sparse_moe.experts.46.w3", "model.layers.49.block_sparse_moe.experts.47.w3", "model.layers.49.block_sparse_moe.experts.48.w3", "model.layers.49.block_sparse_moe.experts.49.w3", "model.layers.49.block_sparse_moe.experts.50.w3", "model.layers.49.block_sparse_moe.experts.51.w3", "model.layers.49.block_sparse_moe.experts.52.w3", "model.layers.49.block_sparse_moe.experts.53.w3", "model.layers.49.block_sparse_moe.experts.54.w3", "model.layers.49.block_sparse_moe.experts.55.w3", "model.layers.49.block_sparse_moe.experts.56.w3", "model.layers.49.block_sparse_moe.experts.57.w3", "model.layers.49.block_sparse_moe.experts.58.w3", "model.layers.49.block_sparse_moe.experts.59.w3", "model.layers.49.block_sparse_moe.experts.60.w3", "model.layers.49.block_sparse_moe.experts.61.w3", "model.layers.49.block_sparse_moe.experts.62.w3", "model.layers.49.block_sparse_moe.experts.63.w3", "model.layers.49.block_sparse_moe.experts.64.w3", "model.layers.49.block_sparse_moe.experts.65.w3", "model.layers.49.block_sparse_moe.experts.66.w3", "model.layers.49.block_sparse_moe.experts.67.w3", "model.layers.49.block_sparse_moe.experts.68.w3", "model.layers.49.block_sparse_moe.experts.69.w3", "model.layers.49.block_sparse_moe.experts.70.w3", "model.layers.49.block_sparse_moe.experts.71.w3", "model.layers.49.block_sparse_moe.experts.72.w3", "model.layers.49.block_sparse_moe.experts.73.w3", "model.layers.49.block_sparse_moe.experts.74.w3", "model.layers.49.block_sparse_moe.experts.75.w3", "model.layers.49.block_sparse_moe.experts.76.w3", "model.layers.49.block_sparse_moe.experts.77.w3", "model.layers.49.block_sparse_moe.experts.78.w3", "model.layers.49.block_sparse_moe.experts.79.w3", "model.layers.49.block_sparse_moe.experts.80.w3", "model.layers.49.block_sparse_moe.experts.81.w3", "model.layers.49.block_sparse_moe.experts.82.w3", "model.layers.49.block_sparse_moe.experts.83.w3", "model.layers.49.block_sparse_moe.experts.84.w3", "model.layers.49.block_sparse_moe.experts.85.w3", "model.layers.49.block_sparse_moe.experts.86.w3", "model.layers.49.block_sparse_moe.experts.87.w3", "model.layers.49.block_sparse_moe.experts.88.w3", "model.layers.49.block_sparse_moe.experts.89.w3", "model.layers.49.block_sparse_moe.experts.90.w3", "model.layers.49.block_sparse_moe.experts.91.w3", "model.layers.49.block_sparse_moe.experts.92.w3", "model.layers.49.block_sparse_moe.experts.93.w3", "model.layers.49.block_sparse_moe.experts.94.w3", "model.layers.49.block_sparse_moe.experts.95.w3", "model.layers.49.block_sparse_moe.experts.96.w3", "model.layers.49.block_sparse_moe.experts.97.w3", "model.layers.49.block_sparse_moe.experts.98.w3", "model.layers.49.block_sparse_moe.experts.99.w3", "model.layers.49.block_sparse_moe.experts.100.w3", "model.layers.49.block_sparse_moe.experts.101.w3", "model.layers.49.block_sparse_moe.experts.102.w3", "model.layers.49.block_sparse_moe.experts.103.w3", "model.layers.49.block_sparse_moe.experts.104.w3", "model.layers.49.block_sparse_moe.experts.105.w3", "model.layers.49.block_sparse_moe.experts.106.w3", "model.layers.49.block_sparse_moe.experts.107.w3", "model.layers.49.block_sparse_moe.experts.108.w3", "model.layers.49.block_sparse_moe.experts.109.w3", "model.layers.49.block_sparse_moe.experts.110.w3", "model.layers.49.block_sparse_moe.experts.111.w3", "model.layers.49.block_sparse_moe.experts.112.w3", "model.layers.49.block_sparse_moe.experts.113.w3", "model.layers.49.block_sparse_moe.experts.114.w3", "model.layers.49.block_sparse_moe.experts.115.w3", "model.layers.49.block_sparse_moe.experts.116.w3", "model.layers.49.block_sparse_moe.experts.117.w3", "model.layers.49.block_sparse_moe.experts.118.w3", "model.layers.49.block_sparse_moe.experts.119.w3", "model.layers.49.block_sparse_moe.experts.120.w3", "model.layers.49.block_sparse_moe.experts.121.w3", "model.layers.49.block_sparse_moe.experts.122.w3", "model.layers.49.block_sparse_moe.experts.123.w3", "model.layers.49.block_sparse_moe.experts.124.w3", "model.layers.49.block_sparse_moe.experts.125.w3", "model.layers.49.block_sparse_moe.experts.126.w3", "model.layers.49.block_sparse_moe.experts.127.w3", "model.layers.49.block_sparse_moe.experts.128.w3", "model.layers.49.block_sparse_moe.experts.129.w3", "model.layers.49.block_sparse_moe.experts.130.w3", "model.layers.49.block_sparse_moe.experts.131.w3", "model.layers.49.block_sparse_moe.experts.132.w3", "model.layers.49.block_sparse_moe.experts.133.w3", "model.layers.49.block_sparse_moe.experts.134.w3", "model.layers.49.block_sparse_moe.experts.135.w3", "model.layers.49.block_sparse_moe.experts.136.w3", "model.layers.49.block_sparse_moe.experts.137.w3", "model.layers.49.block_sparse_moe.experts.138.w3", "model.layers.49.block_sparse_moe.experts.139.w3", "model.layers.49.block_sparse_moe.experts.140.w3", "model.layers.49.block_sparse_moe.experts.141.w3", "model.layers.49.block_sparse_moe.experts.142.w3", "model.layers.49.block_sparse_moe.experts.143.w3", "model.layers.49.block_sparse_moe.experts.144.w3", "model.layers.49.block_sparse_moe.experts.145.w3", "model.layers.49.block_sparse_moe.experts.146.w3", "model.layers.49.block_sparse_moe.experts.147.w3", "model.layers.49.block_sparse_moe.experts.148.w3", "model.layers.49.block_sparse_moe.experts.149.w3", "model.layers.49.block_sparse_moe.experts.150.w3", "model.layers.49.block_sparse_moe.experts.151.w3", "model.layers.49.block_sparse_moe.experts.152.w3", "model.layers.49.block_sparse_moe.experts.153.w3", "model.layers.49.block_sparse_moe.experts.154.w3", "model.layers.49.block_sparse_moe.experts.155.w3", "model.layers.49.block_sparse_moe.experts.156.w3", "model.layers.49.block_sparse_moe.experts.157.w3", "model.layers.49.block_sparse_moe.experts.158.w3", "model.layers.49.block_sparse_moe.experts.159.w3", "model.layers.49.block_sparse_moe.experts.160.w3", "model.layers.49.block_sparse_moe.experts.161.w3", "model.layers.49.block_sparse_moe.experts.162.w3", "model.layers.49.block_sparse_moe.experts.163.w3", "model.layers.49.block_sparse_moe.experts.164.w3", "model.layers.49.block_sparse_moe.experts.165.w3", "model.layers.49.block_sparse_moe.experts.166.w3", "model.layers.49.block_sparse_moe.experts.167.w3", "model.layers.49.block_sparse_moe.experts.168.w3", "model.layers.49.block_sparse_moe.experts.169.w3", "model.layers.49.block_sparse_moe.experts.170.w3", "model.layers.49.block_sparse_moe.experts.171.w3", "model.layers.49.block_sparse_moe.experts.172.w3", "model.layers.49.block_sparse_moe.experts.173.w3", "model.layers.49.block_sparse_moe.experts.174.w3", "model.layers.49.block_sparse_moe.experts.175.w3", "model.layers.49.block_sparse_moe.experts.176.w3", "model.layers.49.block_sparse_moe.experts.177.w3", "model.layers.49.block_sparse_moe.experts.178.w3", "model.layers.49.block_sparse_moe.experts.179.w3", "model.layers.49.block_sparse_moe.experts.180.w3", "model.layers.49.block_sparse_moe.experts.181.w3", "model.layers.49.block_sparse_moe.experts.182.w3", "model.layers.49.block_sparse_moe.experts.183.w3", "model.layers.49.block_sparse_moe.experts.184.w3", "model.layers.49.block_sparse_moe.experts.185.w3", "model.layers.49.block_sparse_moe.experts.186.w3", "model.layers.49.block_sparse_moe.experts.187.w3", "model.layers.49.block_sparse_moe.experts.188.w3", "model.layers.49.block_sparse_moe.experts.189.w3", "model.layers.49.block_sparse_moe.experts.190.w3", "model.layers.49.block_sparse_moe.experts.191.w3", "model.layers.49.block_sparse_moe.experts.192.w3", "model.layers.49.block_sparse_moe.experts.193.w3", "model.layers.49.block_sparse_moe.experts.194.w3", "model.layers.49.block_sparse_moe.experts.195.w3", "model.layers.49.block_sparse_moe.experts.196.w3", "model.layers.49.block_sparse_moe.experts.197.w3", "model.layers.49.block_sparse_moe.experts.198.w3", "model.layers.49.block_sparse_moe.experts.199.w3", "model.layers.49.block_sparse_moe.experts.200.w3", "model.layers.49.block_sparse_moe.experts.201.w3", "model.layers.49.block_sparse_moe.experts.202.w3", "model.layers.49.block_sparse_moe.experts.203.w3", "model.layers.49.block_sparse_moe.experts.204.w3", "model.layers.49.block_sparse_moe.experts.205.w3", "model.layers.49.block_sparse_moe.experts.206.w3", "model.layers.49.block_sparse_moe.experts.207.w3", "model.layers.49.block_sparse_moe.experts.208.w3", "model.layers.49.block_sparse_moe.experts.209.w3", "model.layers.49.block_sparse_moe.experts.210.w3", "model.layers.49.block_sparse_moe.experts.211.w3", "model.layers.49.block_sparse_moe.experts.212.w3", "model.layers.49.block_sparse_moe.experts.213.w3", "model.layers.49.block_sparse_moe.experts.214.w3", "model.layers.49.block_sparse_moe.experts.215.w3", "model.layers.49.block_sparse_moe.experts.216.w3", "model.layers.49.block_sparse_moe.experts.217.w3", "model.layers.49.block_sparse_moe.experts.218.w3", "model.layers.49.block_sparse_moe.experts.219.w3", "model.layers.49.block_sparse_moe.experts.220.w3", "model.layers.49.block_sparse_moe.experts.221.w3", "model.layers.49.block_sparse_moe.experts.222.w3", "model.layers.49.block_sparse_moe.experts.223.w3", "model.layers.49.block_sparse_moe.experts.224.w3", "model.layers.49.block_sparse_moe.experts.225.w3", "model.layers.49.block_sparse_moe.experts.226.w3", "model.layers.49.block_sparse_moe.experts.227.w3", "model.layers.49.block_sparse_moe.experts.228.w3", "model.layers.49.block_sparse_moe.experts.229.w3", "model.layers.49.block_sparse_moe.experts.230.w3", "model.layers.49.block_sparse_moe.experts.231.w3", "model.layers.49.block_sparse_moe.experts.232.w3", "model.layers.49.block_sparse_moe.experts.233.w3", "model.layers.49.block_sparse_moe.experts.234.w3", "model.layers.49.block_sparse_moe.experts.235.w3", "model.layers.49.block_sparse_moe.experts.236.w3", "model.layers.49.block_sparse_moe.experts.237.w3", "model.layers.49.block_sparse_moe.experts.238.w3", "model.layers.49.block_sparse_moe.experts.239.w3", "model.layers.49.block_sparse_moe.experts.240.w3", "model.layers.49.block_sparse_moe.experts.241.w3", "model.layers.49.block_sparse_moe.experts.242.w3", "model.layers.49.block_sparse_moe.experts.243.w3", "model.layers.49.block_sparse_moe.experts.244.w3", "model.layers.49.block_sparse_moe.experts.245.w3", "model.layers.49.block_sparse_moe.experts.246.w3", "model.layers.49.block_sparse_moe.experts.247.w3", "model.layers.49.block_sparse_moe.experts.248.w3", "model.layers.49.block_sparse_moe.experts.249.w3", "model.layers.49.block_sparse_moe.experts.250.w3", "model.layers.49.block_sparse_moe.experts.251.w3", "model.layers.49.block_sparse_moe.experts.252.w3", "model.layers.49.block_sparse_moe.experts.253.w3", "model.layers.49.block_sparse_moe.experts.254.w3", "model.layers.49.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -1.0527670383453369e-05, "dbits": 2415919104 } ] }, { "idx": 249, "layers": [ "model.layers.49.block_sparse_moe.experts.0.w2", "model.layers.49.block_sparse_moe.experts.1.w2", "model.layers.49.block_sparse_moe.experts.2.w2", "model.layers.49.block_sparse_moe.experts.3.w2", "model.layers.49.block_sparse_moe.experts.4.w2", "model.layers.49.block_sparse_moe.experts.5.w2", "model.layers.49.block_sparse_moe.experts.6.w2", "model.layers.49.block_sparse_moe.experts.7.w2", "model.layers.49.block_sparse_moe.experts.8.w2", "model.layers.49.block_sparse_moe.experts.9.w2", "model.layers.49.block_sparse_moe.experts.10.w2", "model.layers.49.block_sparse_moe.experts.11.w2", "model.layers.49.block_sparse_moe.experts.12.w2", "model.layers.49.block_sparse_moe.experts.13.w2", "model.layers.49.block_sparse_moe.experts.14.w2", "model.layers.49.block_sparse_moe.experts.15.w2", "model.layers.49.block_sparse_moe.experts.16.w2", "model.layers.49.block_sparse_moe.experts.17.w2", "model.layers.49.block_sparse_moe.experts.18.w2", "model.layers.49.block_sparse_moe.experts.19.w2", "model.layers.49.block_sparse_moe.experts.20.w2", "model.layers.49.block_sparse_moe.experts.21.w2", "model.layers.49.block_sparse_moe.experts.22.w2", "model.layers.49.block_sparse_moe.experts.23.w2", "model.layers.49.block_sparse_moe.experts.24.w2", "model.layers.49.block_sparse_moe.experts.25.w2", "model.layers.49.block_sparse_moe.experts.26.w2", "model.layers.49.block_sparse_moe.experts.27.w2", "model.layers.49.block_sparse_moe.experts.28.w2", "model.layers.49.block_sparse_moe.experts.29.w2", "model.layers.49.block_sparse_moe.experts.30.w2", "model.layers.49.block_sparse_moe.experts.31.w2", "model.layers.49.block_sparse_moe.experts.32.w2", "model.layers.49.block_sparse_moe.experts.33.w2", "model.layers.49.block_sparse_moe.experts.34.w2", "model.layers.49.block_sparse_moe.experts.35.w2", "model.layers.49.block_sparse_moe.experts.36.w2", "model.layers.49.block_sparse_moe.experts.37.w2", "model.layers.49.block_sparse_moe.experts.38.w2", "model.layers.49.block_sparse_moe.experts.39.w2", "model.layers.49.block_sparse_moe.experts.40.w2", "model.layers.49.block_sparse_moe.experts.41.w2", "model.layers.49.block_sparse_moe.experts.42.w2", "model.layers.49.block_sparse_moe.experts.43.w2", "model.layers.49.block_sparse_moe.experts.44.w2", "model.layers.49.block_sparse_moe.experts.45.w2", "model.layers.49.block_sparse_moe.experts.46.w2", "model.layers.49.block_sparse_moe.experts.47.w2", "model.layers.49.block_sparse_moe.experts.48.w2", "model.layers.49.block_sparse_moe.experts.49.w2", "model.layers.49.block_sparse_moe.experts.50.w2", "model.layers.49.block_sparse_moe.experts.51.w2", "model.layers.49.block_sparse_moe.experts.52.w2", "model.layers.49.block_sparse_moe.experts.53.w2", "model.layers.49.block_sparse_moe.experts.54.w2", "model.layers.49.block_sparse_moe.experts.55.w2", "model.layers.49.block_sparse_moe.experts.56.w2", "model.layers.49.block_sparse_moe.experts.57.w2", "model.layers.49.block_sparse_moe.experts.58.w2", "model.layers.49.block_sparse_moe.experts.59.w2", "model.layers.49.block_sparse_moe.experts.60.w2", "model.layers.49.block_sparse_moe.experts.61.w2", "model.layers.49.block_sparse_moe.experts.62.w2", "model.layers.49.block_sparse_moe.experts.63.w2", "model.layers.49.block_sparse_moe.experts.64.w2", "model.layers.49.block_sparse_moe.experts.65.w2", "model.layers.49.block_sparse_moe.experts.66.w2", "model.layers.49.block_sparse_moe.experts.67.w2", "model.layers.49.block_sparse_moe.experts.68.w2", "model.layers.49.block_sparse_moe.experts.69.w2", "model.layers.49.block_sparse_moe.experts.70.w2", "model.layers.49.block_sparse_moe.experts.71.w2", "model.layers.49.block_sparse_moe.experts.72.w2", "model.layers.49.block_sparse_moe.experts.73.w2", "model.layers.49.block_sparse_moe.experts.74.w2", "model.layers.49.block_sparse_moe.experts.75.w2", "model.layers.49.block_sparse_moe.experts.76.w2", "model.layers.49.block_sparse_moe.experts.77.w2", "model.layers.49.block_sparse_moe.experts.78.w2", "model.layers.49.block_sparse_moe.experts.79.w2", "model.layers.49.block_sparse_moe.experts.80.w2", "model.layers.49.block_sparse_moe.experts.81.w2", "model.layers.49.block_sparse_moe.experts.82.w2", "model.layers.49.block_sparse_moe.experts.83.w2", "model.layers.49.block_sparse_moe.experts.84.w2", "model.layers.49.block_sparse_moe.experts.85.w2", "model.layers.49.block_sparse_moe.experts.86.w2", "model.layers.49.block_sparse_moe.experts.87.w2", "model.layers.49.block_sparse_moe.experts.88.w2", "model.layers.49.block_sparse_moe.experts.89.w2", "model.layers.49.block_sparse_moe.experts.90.w2", "model.layers.49.block_sparse_moe.experts.91.w2", "model.layers.49.block_sparse_moe.experts.92.w2", "model.layers.49.block_sparse_moe.experts.93.w2", "model.layers.49.block_sparse_moe.experts.94.w2", "model.layers.49.block_sparse_moe.experts.95.w2", "model.layers.49.block_sparse_moe.experts.96.w2", "model.layers.49.block_sparse_moe.experts.97.w2", "model.layers.49.block_sparse_moe.experts.98.w2", "model.layers.49.block_sparse_moe.experts.99.w2", "model.layers.49.block_sparse_moe.experts.100.w2", "model.layers.49.block_sparse_moe.experts.101.w2", "model.layers.49.block_sparse_moe.experts.102.w2", "model.layers.49.block_sparse_moe.experts.103.w2", "model.layers.49.block_sparse_moe.experts.104.w2", "model.layers.49.block_sparse_moe.experts.105.w2", "model.layers.49.block_sparse_moe.experts.106.w2", "model.layers.49.block_sparse_moe.experts.107.w2", "model.layers.49.block_sparse_moe.experts.108.w2", "model.layers.49.block_sparse_moe.experts.109.w2", "model.layers.49.block_sparse_moe.experts.110.w2", "model.layers.49.block_sparse_moe.experts.111.w2", "model.layers.49.block_sparse_moe.experts.112.w2", "model.layers.49.block_sparse_moe.experts.113.w2", "model.layers.49.block_sparse_moe.experts.114.w2", "model.layers.49.block_sparse_moe.experts.115.w2", "model.layers.49.block_sparse_moe.experts.116.w2", "model.layers.49.block_sparse_moe.experts.117.w2", "model.layers.49.block_sparse_moe.experts.118.w2", "model.layers.49.block_sparse_moe.experts.119.w2", "model.layers.49.block_sparse_moe.experts.120.w2", "model.layers.49.block_sparse_moe.experts.121.w2", "model.layers.49.block_sparse_moe.experts.122.w2", "model.layers.49.block_sparse_moe.experts.123.w2", "model.layers.49.block_sparse_moe.experts.124.w2", "model.layers.49.block_sparse_moe.experts.125.w2", "model.layers.49.block_sparse_moe.experts.126.w2", "model.layers.49.block_sparse_moe.experts.127.w2", "model.layers.49.block_sparse_moe.experts.128.w2", "model.layers.49.block_sparse_moe.experts.129.w2", "model.layers.49.block_sparse_moe.experts.130.w2", "model.layers.49.block_sparse_moe.experts.131.w2", "model.layers.49.block_sparse_moe.experts.132.w2", "model.layers.49.block_sparse_moe.experts.133.w2", "model.layers.49.block_sparse_moe.experts.134.w2", "model.layers.49.block_sparse_moe.experts.135.w2", "model.layers.49.block_sparse_moe.experts.136.w2", "model.layers.49.block_sparse_moe.experts.137.w2", "model.layers.49.block_sparse_moe.experts.138.w2", "model.layers.49.block_sparse_moe.experts.139.w2", "model.layers.49.block_sparse_moe.experts.140.w2", "model.layers.49.block_sparse_moe.experts.141.w2", "model.layers.49.block_sparse_moe.experts.142.w2", "model.layers.49.block_sparse_moe.experts.143.w2", "model.layers.49.block_sparse_moe.experts.144.w2", "model.layers.49.block_sparse_moe.experts.145.w2", "model.layers.49.block_sparse_moe.experts.146.w2", "model.layers.49.block_sparse_moe.experts.147.w2", "model.layers.49.block_sparse_moe.experts.148.w2", "model.layers.49.block_sparse_moe.experts.149.w2", "model.layers.49.block_sparse_moe.experts.150.w2", "model.layers.49.block_sparse_moe.experts.151.w2", "model.layers.49.block_sparse_moe.experts.152.w2", "model.layers.49.block_sparse_moe.experts.153.w2", "model.layers.49.block_sparse_moe.experts.154.w2", "model.layers.49.block_sparse_moe.experts.155.w2", "model.layers.49.block_sparse_moe.experts.156.w2", "model.layers.49.block_sparse_moe.experts.157.w2", "model.layers.49.block_sparse_moe.experts.158.w2", "model.layers.49.block_sparse_moe.experts.159.w2", "model.layers.49.block_sparse_moe.experts.160.w2", "model.layers.49.block_sparse_moe.experts.161.w2", "model.layers.49.block_sparse_moe.experts.162.w2", "model.layers.49.block_sparse_moe.experts.163.w2", "model.layers.49.block_sparse_moe.experts.164.w2", "model.layers.49.block_sparse_moe.experts.165.w2", "model.layers.49.block_sparse_moe.experts.166.w2", "model.layers.49.block_sparse_moe.experts.167.w2", "model.layers.49.block_sparse_moe.experts.168.w2", "model.layers.49.block_sparse_moe.experts.169.w2", "model.layers.49.block_sparse_moe.experts.170.w2", "model.layers.49.block_sparse_moe.experts.171.w2", "model.layers.49.block_sparse_moe.experts.172.w2", "model.layers.49.block_sparse_moe.experts.173.w2", "model.layers.49.block_sparse_moe.experts.174.w2", "model.layers.49.block_sparse_moe.experts.175.w2", "model.layers.49.block_sparse_moe.experts.176.w2", "model.layers.49.block_sparse_moe.experts.177.w2", "model.layers.49.block_sparse_moe.experts.178.w2", "model.layers.49.block_sparse_moe.experts.179.w2", "model.layers.49.block_sparse_moe.experts.180.w2", "model.layers.49.block_sparse_moe.experts.181.w2", "model.layers.49.block_sparse_moe.experts.182.w2", "model.layers.49.block_sparse_moe.experts.183.w2", "model.layers.49.block_sparse_moe.experts.184.w2", "model.layers.49.block_sparse_moe.experts.185.w2", "model.layers.49.block_sparse_moe.experts.186.w2", "model.layers.49.block_sparse_moe.experts.187.w2", "model.layers.49.block_sparse_moe.experts.188.w2", "model.layers.49.block_sparse_moe.experts.189.w2", "model.layers.49.block_sparse_moe.experts.190.w2", "model.layers.49.block_sparse_moe.experts.191.w2", "model.layers.49.block_sparse_moe.experts.192.w2", "model.layers.49.block_sparse_moe.experts.193.w2", "model.layers.49.block_sparse_moe.experts.194.w2", "model.layers.49.block_sparse_moe.experts.195.w2", "model.layers.49.block_sparse_moe.experts.196.w2", "model.layers.49.block_sparse_moe.experts.197.w2", "model.layers.49.block_sparse_moe.experts.198.w2", "model.layers.49.block_sparse_moe.experts.199.w2", "model.layers.49.block_sparse_moe.experts.200.w2", "model.layers.49.block_sparse_moe.experts.201.w2", "model.layers.49.block_sparse_moe.experts.202.w2", "model.layers.49.block_sparse_moe.experts.203.w2", "model.layers.49.block_sparse_moe.experts.204.w2", "model.layers.49.block_sparse_moe.experts.205.w2", "model.layers.49.block_sparse_moe.experts.206.w2", "model.layers.49.block_sparse_moe.experts.207.w2", "model.layers.49.block_sparse_moe.experts.208.w2", "model.layers.49.block_sparse_moe.experts.209.w2", "model.layers.49.block_sparse_moe.experts.210.w2", "model.layers.49.block_sparse_moe.experts.211.w2", "model.layers.49.block_sparse_moe.experts.212.w2", "model.layers.49.block_sparse_moe.experts.213.w2", "model.layers.49.block_sparse_moe.experts.214.w2", "model.layers.49.block_sparse_moe.experts.215.w2", "model.layers.49.block_sparse_moe.experts.216.w2", "model.layers.49.block_sparse_moe.experts.217.w2", "model.layers.49.block_sparse_moe.experts.218.w2", "model.layers.49.block_sparse_moe.experts.219.w2", "model.layers.49.block_sparse_moe.experts.220.w2", "model.layers.49.block_sparse_moe.experts.221.w2", "model.layers.49.block_sparse_moe.experts.222.w2", "model.layers.49.block_sparse_moe.experts.223.w2", "model.layers.49.block_sparse_moe.experts.224.w2", "model.layers.49.block_sparse_moe.experts.225.w2", "model.layers.49.block_sparse_moe.experts.226.w2", "model.layers.49.block_sparse_moe.experts.227.w2", "model.layers.49.block_sparse_moe.experts.228.w2", "model.layers.49.block_sparse_moe.experts.229.w2", "model.layers.49.block_sparse_moe.experts.230.w2", "model.layers.49.block_sparse_moe.experts.231.w2", "model.layers.49.block_sparse_moe.experts.232.w2", "model.layers.49.block_sparse_moe.experts.233.w2", "model.layers.49.block_sparse_moe.experts.234.w2", "model.layers.49.block_sparse_moe.experts.235.w2", "model.layers.49.block_sparse_moe.experts.236.w2", "model.layers.49.block_sparse_moe.experts.237.w2", "model.layers.49.block_sparse_moe.experts.238.w2", "model.layers.49.block_sparse_moe.experts.239.w2", "model.layers.49.block_sparse_moe.experts.240.w2", "model.layers.49.block_sparse_moe.experts.241.w2", "model.layers.49.block_sparse_moe.experts.242.w2", "model.layers.49.block_sparse_moe.experts.243.w2", "model.layers.49.block_sparse_moe.experts.244.w2", "model.layers.49.block_sparse_moe.experts.245.w2", "model.layers.49.block_sparse_moe.experts.246.w2", "model.layers.49.block_sparse_moe.experts.247.w2", "model.layers.49.block_sparse_moe.experts.248.w2", "model.layers.49.block_sparse_moe.experts.249.w2", "model.layers.49.block_sparse_moe.experts.250.w2", "model.layers.49.block_sparse_moe.experts.251.w2", "model.layers.49.block_sparse_moe.experts.252.w2", "model.layers.49.block_sparse_moe.experts.253.w2", "model.layers.49.block_sparse_moe.experts.254.w2", "model.layers.49.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -4.317015409470437e-05, "dbits": 1207959552 } ] }, { "idx": 250, "layers": [ "model.layers.50.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00023286566138266962, "dbits": 18874368 } ] }, { "idx": 251, "layers": [ "model.layers.50.self_attn.k_proj", "model.layers.50.self_attn.v_proj" ], "candidates": [ { "dkld": 0.000248185358941555, "dbits": 6291456 } ] }, { "idx": 252, "layers": [ "model.layers.50.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0007236965000629453, "dbits": 18874368 } ] }, { "idx": 253, "layers": [ "model.layers.50.block_sparse_moe.experts.0.w1", "model.layers.50.block_sparse_moe.experts.1.w1", "model.layers.50.block_sparse_moe.experts.2.w1", "model.layers.50.block_sparse_moe.experts.3.w1", "model.layers.50.block_sparse_moe.experts.4.w1", "model.layers.50.block_sparse_moe.experts.5.w1", "model.layers.50.block_sparse_moe.experts.6.w1", "model.layers.50.block_sparse_moe.experts.7.w1", "model.layers.50.block_sparse_moe.experts.8.w1", "model.layers.50.block_sparse_moe.experts.9.w1", "model.layers.50.block_sparse_moe.experts.10.w1", "model.layers.50.block_sparse_moe.experts.11.w1", "model.layers.50.block_sparse_moe.experts.12.w1", "model.layers.50.block_sparse_moe.experts.13.w1", "model.layers.50.block_sparse_moe.experts.14.w1", "model.layers.50.block_sparse_moe.experts.15.w1", "model.layers.50.block_sparse_moe.experts.16.w1", "model.layers.50.block_sparse_moe.experts.17.w1", "model.layers.50.block_sparse_moe.experts.18.w1", "model.layers.50.block_sparse_moe.experts.19.w1", "model.layers.50.block_sparse_moe.experts.20.w1", "model.layers.50.block_sparse_moe.experts.21.w1", "model.layers.50.block_sparse_moe.experts.22.w1", "model.layers.50.block_sparse_moe.experts.23.w1", "model.layers.50.block_sparse_moe.experts.24.w1", "model.layers.50.block_sparse_moe.experts.25.w1", "model.layers.50.block_sparse_moe.experts.26.w1", "model.layers.50.block_sparse_moe.experts.27.w1", "model.layers.50.block_sparse_moe.experts.28.w1", "model.layers.50.block_sparse_moe.experts.29.w1", "model.layers.50.block_sparse_moe.experts.30.w1", "model.layers.50.block_sparse_moe.experts.31.w1", "model.layers.50.block_sparse_moe.experts.32.w1", "model.layers.50.block_sparse_moe.experts.33.w1", "model.layers.50.block_sparse_moe.experts.34.w1", "model.layers.50.block_sparse_moe.experts.35.w1", "model.layers.50.block_sparse_moe.experts.36.w1", "model.layers.50.block_sparse_moe.experts.37.w1", "model.layers.50.block_sparse_moe.experts.38.w1", "model.layers.50.block_sparse_moe.experts.39.w1", "model.layers.50.block_sparse_moe.experts.40.w1", "model.layers.50.block_sparse_moe.experts.41.w1", "model.layers.50.block_sparse_moe.experts.42.w1", "model.layers.50.block_sparse_moe.experts.43.w1", "model.layers.50.block_sparse_moe.experts.44.w1", "model.layers.50.block_sparse_moe.experts.45.w1", "model.layers.50.block_sparse_moe.experts.46.w1", "model.layers.50.block_sparse_moe.experts.47.w1", "model.layers.50.block_sparse_moe.experts.48.w1", "model.layers.50.block_sparse_moe.experts.49.w1", "model.layers.50.block_sparse_moe.experts.50.w1", "model.layers.50.block_sparse_moe.experts.51.w1", "model.layers.50.block_sparse_moe.experts.52.w1", "model.layers.50.block_sparse_moe.experts.53.w1", "model.layers.50.block_sparse_moe.experts.54.w1", "model.layers.50.block_sparse_moe.experts.55.w1", "model.layers.50.block_sparse_moe.experts.56.w1", "model.layers.50.block_sparse_moe.experts.57.w1", "model.layers.50.block_sparse_moe.experts.58.w1", "model.layers.50.block_sparse_moe.experts.59.w1", "model.layers.50.block_sparse_moe.experts.60.w1", "model.layers.50.block_sparse_moe.experts.61.w1", "model.layers.50.block_sparse_moe.experts.62.w1", "model.layers.50.block_sparse_moe.experts.63.w1", "model.layers.50.block_sparse_moe.experts.64.w1", "model.layers.50.block_sparse_moe.experts.65.w1", "model.layers.50.block_sparse_moe.experts.66.w1", "model.layers.50.block_sparse_moe.experts.67.w1", "model.layers.50.block_sparse_moe.experts.68.w1", "model.layers.50.block_sparse_moe.experts.69.w1", "model.layers.50.block_sparse_moe.experts.70.w1", "model.layers.50.block_sparse_moe.experts.71.w1", "model.layers.50.block_sparse_moe.experts.72.w1", "model.layers.50.block_sparse_moe.experts.73.w1", "model.layers.50.block_sparse_moe.experts.74.w1", "model.layers.50.block_sparse_moe.experts.75.w1", "model.layers.50.block_sparse_moe.experts.76.w1", "model.layers.50.block_sparse_moe.experts.77.w1", "model.layers.50.block_sparse_moe.experts.78.w1", "model.layers.50.block_sparse_moe.experts.79.w1", "model.layers.50.block_sparse_moe.experts.80.w1", "model.layers.50.block_sparse_moe.experts.81.w1", "model.layers.50.block_sparse_moe.experts.82.w1", "model.layers.50.block_sparse_moe.experts.83.w1", "model.layers.50.block_sparse_moe.experts.84.w1", "model.layers.50.block_sparse_moe.experts.85.w1", "model.layers.50.block_sparse_moe.experts.86.w1", "model.layers.50.block_sparse_moe.experts.87.w1", "model.layers.50.block_sparse_moe.experts.88.w1", "model.layers.50.block_sparse_moe.experts.89.w1", "model.layers.50.block_sparse_moe.experts.90.w1", "model.layers.50.block_sparse_moe.experts.91.w1", "model.layers.50.block_sparse_moe.experts.92.w1", "model.layers.50.block_sparse_moe.experts.93.w1", "model.layers.50.block_sparse_moe.experts.94.w1", "model.layers.50.block_sparse_moe.experts.95.w1", "model.layers.50.block_sparse_moe.experts.96.w1", "model.layers.50.block_sparse_moe.experts.97.w1", "model.layers.50.block_sparse_moe.experts.98.w1", "model.layers.50.block_sparse_moe.experts.99.w1", "model.layers.50.block_sparse_moe.experts.100.w1", "model.layers.50.block_sparse_moe.experts.101.w1", "model.layers.50.block_sparse_moe.experts.102.w1", "model.layers.50.block_sparse_moe.experts.103.w1", "model.layers.50.block_sparse_moe.experts.104.w1", "model.layers.50.block_sparse_moe.experts.105.w1", "model.layers.50.block_sparse_moe.experts.106.w1", "model.layers.50.block_sparse_moe.experts.107.w1", "model.layers.50.block_sparse_moe.experts.108.w1", "model.layers.50.block_sparse_moe.experts.109.w1", "model.layers.50.block_sparse_moe.experts.110.w1", "model.layers.50.block_sparse_moe.experts.111.w1", "model.layers.50.block_sparse_moe.experts.112.w1", "model.layers.50.block_sparse_moe.experts.113.w1", "model.layers.50.block_sparse_moe.experts.114.w1", "model.layers.50.block_sparse_moe.experts.115.w1", "model.layers.50.block_sparse_moe.experts.116.w1", "model.layers.50.block_sparse_moe.experts.117.w1", "model.layers.50.block_sparse_moe.experts.118.w1", "model.layers.50.block_sparse_moe.experts.119.w1", "model.layers.50.block_sparse_moe.experts.120.w1", "model.layers.50.block_sparse_moe.experts.121.w1", "model.layers.50.block_sparse_moe.experts.122.w1", "model.layers.50.block_sparse_moe.experts.123.w1", "model.layers.50.block_sparse_moe.experts.124.w1", "model.layers.50.block_sparse_moe.experts.125.w1", "model.layers.50.block_sparse_moe.experts.126.w1", "model.layers.50.block_sparse_moe.experts.127.w1", "model.layers.50.block_sparse_moe.experts.128.w1", "model.layers.50.block_sparse_moe.experts.129.w1", "model.layers.50.block_sparse_moe.experts.130.w1", "model.layers.50.block_sparse_moe.experts.131.w1", "model.layers.50.block_sparse_moe.experts.132.w1", "model.layers.50.block_sparse_moe.experts.133.w1", "model.layers.50.block_sparse_moe.experts.134.w1", "model.layers.50.block_sparse_moe.experts.135.w1", "model.layers.50.block_sparse_moe.experts.136.w1", "model.layers.50.block_sparse_moe.experts.137.w1", "model.layers.50.block_sparse_moe.experts.138.w1", "model.layers.50.block_sparse_moe.experts.139.w1", "model.layers.50.block_sparse_moe.experts.140.w1", "model.layers.50.block_sparse_moe.experts.141.w1", "model.layers.50.block_sparse_moe.experts.142.w1", "model.layers.50.block_sparse_moe.experts.143.w1", "model.layers.50.block_sparse_moe.experts.144.w1", "model.layers.50.block_sparse_moe.experts.145.w1", "model.layers.50.block_sparse_moe.experts.146.w1", "model.layers.50.block_sparse_moe.experts.147.w1", "model.layers.50.block_sparse_moe.experts.148.w1", "model.layers.50.block_sparse_moe.experts.149.w1", "model.layers.50.block_sparse_moe.experts.150.w1", "model.layers.50.block_sparse_moe.experts.151.w1", "model.layers.50.block_sparse_moe.experts.152.w1", "model.layers.50.block_sparse_moe.experts.153.w1", "model.layers.50.block_sparse_moe.experts.154.w1", "model.layers.50.block_sparse_moe.experts.155.w1", "model.layers.50.block_sparse_moe.experts.156.w1", "model.layers.50.block_sparse_moe.experts.157.w1", "model.layers.50.block_sparse_moe.experts.158.w1", "model.layers.50.block_sparse_moe.experts.159.w1", "model.layers.50.block_sparse_moe.experts.160.w1", "model.layers.50.block_sparse_moe.experts.161.w1", "model.layers.50.block_sparse_moe.experts.162.w1", "model.layers.50.block_sparse_moe.experts.163.w1", "model.layers.50.block_sparse_moe.experts.164.w1", "model.layers.50.block_sparse_moe.experts.165.w1", "model.layers.50.block_sparse_moe.experts.166.w1", "model.layers.50.block_sparse_moe.experts.167.w1", "model.layers.50.block_sparse_moe.experts.168.w1", "model.layers.50.block_sparse_moe.experts.169.w1", "model.layers.50.block_sparse_moe.experts.170.w1", "model.layers.50.block_sparse_moe.experts.171.w1", "model.layers.50.block_sparse_moe.experts.172.w1", "model.layers.50.block_sparse_moe.experts.173.w1", "model.layers.50.block_sparse_moe.experts.174.w1", "model.layers.50.block_sparse_moe.experts.175.w1", "model.layers.50.block_sparse_moe.experts.176.w1", "model.layers.50.block_sparse_moe.experts.177.w1", "model.layers.50.block_sparse_moe.experts.178.w1", "model.layers.50.block_sparse_moe.experts.179.w1", "model.layers.50.block_sparse_moe.experts.180.w1", "model.layers.50.block_sparse_moe.experts.181.w1", "model.layers.50.block_sparse_moe.experts.182.w1", "model.layers.50.block_sparse_moe.experts.183.w1", "model.layers.50.block_sparse_moe.experts.184.w1", "model.layers.50.block_sparse_moe.experts.185.w1", "model.layers.50.block_sparse_moe.experts.186.w1", "model.layers.50.block_sparse_moe.experts.187.w1", "model.layers.50.block_sparse_moe.experts.188.w1", "model.layers.50.block_sparse_moe.experts.189.w1", "model.layers.50.block_sparse_moe.experts.190.w1", "model.layers.50.block_sparse_moe.experts.191.w1", "model.layers.50.block_sparse_moe.experts.192.w1", "model.layers.50.block_sparse_moe.experts.193.w1", "model.layers.50.block_sparse_moe.experts.194.w1", "model.layers.50.block_sparse_moe.experts.195.w1", "model.layers.50.block_sparse_moe.experts.196.w1", "model.layers.50.block_sparse_moe.experts.197.w1", "model.layers.50.block_sparse_moe.experts.198.w1", "model.layers.50.block_sparse_moe.experts.199.w1", "model.layers.50.block_sparse_moe.experts.200.w1", "model.layers.50.block_sparse_moe.experts.201.w1", "model.layers.50.block_sparse_moe.experts.202.w1", "model.layers.50.block_sparse_moe.experts.203.w1", "model.layers.50.block_sparse_moe.experts.204.w1", "model.layers.50.block_sparse_moe.experts.205.w1", "model.layers.50.block_sparse_moe.experts.206.w1", "model.layers.50.block_sparse_moe.experts.207.w1", "model.layers.50.block_sparse_moe.experts.208.w1", "model.layers.50.block_sparse_moe.experts.209.w1", "model.layers.50.block_sparse_moe.experts.210.w1", "model.layers.50.block_sparse_moe.experts.211.w1", "model.layers.50.block_sparse_moe.experts.212.w1", "model.layers.50.block_sparse_moe.experts.213.w1", "model.layers.50.block_sparse_moe.experts.214.w1", "model.layers.50.block_sparse_moe.experts.215.w1", "model.layers.50.block_sparse_moe.experts.216.w1", "model.layers.50.block_sparse_moe.experts.217.w1", "model.layers.50.block_sparse_moe.experts.218.w1", "model.layers.50.block_sparse_moe.experts.219.w1", "model.layers.50.block_sparse_moe.experts.220.w1", "model.layers.50.block_sparse_moe.experts.221.w1", "model.layers.50.block_sparse_moe.experts.222.w1", "model.layers.50.block_sparse_moe.experts.223.w1", "model.layers.50.block_sparse_moe.experts.224.w1", "model.layers.50.block_sparse_moe.experts.225.w1", "model.layers.50.block_sparse_moe.experts.226.w1", "model.layers.50.block_sparse_moe.experts.227.w1", "model.layers.50.block_sparse_moe.experts.228.w1", "model.layers.50.block_sparse_moe.experts.229.w1", "model.layers.50.block_sparse_moe.experts.230.w1", "model.layers.50.block_sparse_moe.experts.231.w1", "model.layers.50.block_sparse_moe.experts.232.w1", "model.layers.50.block_sparse_moe.experts.233.w1", "model.layers.50.block_sparse_moe.experts.234.w1", "model.layers.50.block_sparse_moe.experts.235.w1", "model.layers.50.block_sparse_moe.experts.236.w1", "model.layers.50.block_sparse_moe.experts.237.w1", "model.layers.50.block_sparse_moe.experts.238.w1", "model.layers.50.block_sparse_moe.experts.239.w1", "model.layers.50.block_sparse_moe.experts.240.w1", "model.layers.50.block_sparse_moe.experts.241.w1", "model.layers.50.block_sparse_moe.experts.242.w1", "model.layers.50.block_sparse_moe.experts.243.w1", "model.layers.50.block_sparse_moe.experts.244.w1", "model.layers.50.block_sparse_moe.experts.245.w1", "model.layers.50.block_sparse_moe.experts.246.w1", "model.layers.50.block_sparse_moe.experts.247.w1", "model.layers.50.block_sparse_moe.experts.248.w1", "model.layers.50.block_sparse_moe.experts.249.w1", "model.layers.50.block_sparse_moe.experts.250.w1", "model.layers.50.block_sparse_moe.experts.251.w1", "model.layers.50.block_sparse_moe.experts.252.w1", "model.layers.50.block_sparse_moe.experts.253.w1", "model.layers.50.block_sparse_moe.experts.254.w1", "model.layers.50.block_sparse_moe.experts.255.w1", "model.layers.50.block_sparse_moe.experts.0.w3", "model.layers.50.block_sparse_moe.experts.1.w3", "model.layers.50.block_sparse_moe.experts.2.w3", "model.layers.50.block_sparse_moe.experts.3.w3", "model.layers.50.block_sparse_moe.experts.4.w3", "model.layers.50.block_sparse_moe.experts.5.w3", "model.layers.50.block_sparse_moe.experts.6.w3", "model.layers.50.block_sparse_moe.experts.7.w3", "model.layers.50.block_sparse_moe.experts.8.w3", "model.layers.50.block_sparse_moe.experts.9.w3", "model.layers.50.block_sparse_moe.experts.10.w3", "model.layers.50.block_sparse_moe.experts.11.w3", "model.layers.50.block_sparse_moe.experts.12.w3", "model.layers.50.block_sparse_moe.experts.13.w3", "model.layers.50.block_sparse_moe.experts.14.w3", "model.layers.50.block_sparse_moe.experts.15.w3", "model.layers.50.block_sparse_moe.experts.16.w3", "model.layers.50.block_sparse_moe.experts.17.w3", "model.layers.50.block_sparse_moe.experts.18.w3", "model.layers.50.block_sparse_moe.experts.19.w3", "model.layers.50.block_sparse_moe.experts.20.w3", "model.layers.50.block_sparse_moe.experts.21.w3", "model.layers.50.block_sparse_moe.experts.22.w3", "model.layers.50.block_sparse_moe.experts.23.w3", "model.layers.50.block_sparse_moe.experts.24.w3", "model.layers.50.block_sparse_moe.experts.25.w3", "model.layers.50.block_sparse_moe.experts.26.w3", "model.layers.50.block_sparse_moe.experts.27.w3", "model.layers.50.block_sparse_moe.experts.28.w3", "model.layers.50.block_sparse_moe.experts.29.w3", "model.layers.50.block_sparse_moe.experts.30.w3", "model.layers.50.block_sparse_moe.experts.31.w3", "model.layers.50.block_sparse_moe.experts.32.w3", "model.layers.50.block_sparse_moe.experts.33.w3", "model.layers.50.block_sparse_moe.experts.34.w3", "model.layers.50.block_sparse_moe.experts.35.w3", "model.layers.50.block_sparse_moe.experts.36.w3", "model.layers.50.block_sparse_moe.experts.37.w3", "model.layers.50.block_sparse_moe.experts.38.w3", "model.layers.50.block_sparse_moe.experts.39.w3", "model.layers.50.block_sparse_moe.experts.40.w3", "model.layers.50.block_sparse_moe.experts.41.w3", "model.layers.50.block_sparse_moe.experts.42.w3", "model.layers.50.block_sparse_moe.experts.43.w3", "model.layers.50.block_sparse_moe.experts.44.w3", "model.layers.50.block_sparse_moe.experts.45.w3", "model.layers.50.block_sparse_moe.experts.46.w3", "model.layers.50.block_sparse_moe.experts.47.w3", "model.layers.50.block_sparse_moe.experts.48.w3", "model.layers.50.block_sparse_moe.experts.49.w3", "model.layers.50.block_sparse_moe.experts.50.w3", "model.layers.50.block_sparse_moe.experts.51.w3", "model.layers.50.block_sparse_moe.experts.52.w3", "model.layers.50.block_sparse_moe.experts.53.w3", "model.layers.50.block_sparse_moe.experts.54.w3", "model.layers.50.block_sparse_moe.experts.55.w3", "model.layers.50.block_sparse_moe.experts.56.w3", "model.layers.50.block_sparse_moe.experts.57.w3", "model.layers.50.block_sparse_moe.experts.58.w3", "model.layers.50.block_sparse_moe.experts.59.w3", "model.layers.50.block_sparse_moe.experts.60.w3", "model.layers.50.block_sparse_moe.experts.61.w3", "model.layers.50.block_sparse_moe.experts.62.w3", "model.layers.50.block_sparse_moe.experts.63.w3", "model.layers.50.block_sparse_moe.experts.64.w3", "model.layers.50.block_sparse_moe.experts.65.w3", "model.layers.50.block_sparse_moe.experts.66.w3", "model.layers.50.block_sparse_moe.experts.67.w3", "model.layers.50.block_sparse_moe.experts.68.w3", "model.layers.50.block_sparse_moe.experts.69.w3", "model.layers.50.block_sparse_moe.experts.70.w3", "model.layers.50.block_sparse_moe.experts.71.w3", "model.layers.50.block_sparse_moe.experts.72.w3", "model.layers.50.block_sparse_moe.experts.73.w3", "model.layers.50.block_sparse_moe.experts.74.w3", "model.layers.50.block_sparse_moe.experts.75.w3", "model.layers.50.block_sparse_moe.experts.76.w3", "model.layers.50.block_sparse_moe.experts.77.w3", "model.layers.50.block_sparse_moe.experts.78.w3", "model.layers.50.block_sparse_moe.experts.79.w3", "model.layers.50.block_sparse_moe.experts.80.w3", "model.layers.50.block_sparse_moe.experts.81.w3", "model.layers.50.block_sparse_moe.experts.82.w3", "model.layers.50.block_sparse_moe.experts.83.w3", "model.layers.50.block_sparse_moe.experts.84.w3", "model.layers.50.block_sparse_moe.experts.85.w3", "model.layers.50.block_sparse_moe.experts.86.w3", "model.layers.50.block_sparse_moe.experts.87.w3", "model.layers.50.block_sparse_moe.experts.88.w3", "model.layers.50.block_sparse_moe.experts.89.w3", "model.layers.50.block_sparse_moe.experts.90.w3", "model.layers.50.block_sparse_moe.experts.91.w3", "model.layers.50.block_sparse_moe.experts.92.w3", "model.layers.50.block_sparse_moe.experts.93.w3", "model.layers.50.block_sparse_moe.experts.94.w3", "model.layers.50.block_sparse_moe.experts.95.w3", "model.layers.50.block_sparse_moe.experts.96.w3", "model.layers.50.block_sparse_moe.experts.97.w3", "model.layers.50.block_sparse_moe.experts.98.w3", "model.layers.50.block_sparse_moe.experts.99.w3", "model.layers.50.block_sparse_moe.experts.100.w3", "model.layers.50.block_sparse_moe.experts.101.w3", "model.layers.50.block_sparse_moe.experts.102.w3", "model.layers.50.block_sparse_moe.experts.103.w3", "model.layers.50.block_sparse_moe.experts.104.w3", "model.layers.50.block_sparse_moe.experts.105.w3", "model.layers.50.block_sparse_moe.experts.106.w3", "model.layers.50.block_sparse_moe.experts.107.w3", "model.layers.50.block_sparse_moe.experts.108.w3", "model.layers.50.block_sparse_moe.experts.109.w3", "model.layers.50.block_sparse_moe.experts.110.w3", "model.layers.50.block_sparse_moe.experts.111.w3", "model.layers.50.block_sparse_moe.experts.112.w3", "model.layers.50.block_sparse_moe.experts.113.w3", "model.layers.50.block_sparse_moe.experts.114.w3", "model.layers.50.block_sparse_moe.experts.115.w3", "model.layers.50.block_sparse_moe.experts.116.w3", "model.layers.50.block_sparse_moe.experts.117.w3", "model.layers.50.block_sparse_moe.experts.118.w3", "model.layers.50.block_sparse_moe.experts.119.w3", "model.layers.50.block_sparse_moe.experts.120.w3", "model.layers.50.block_sparse_moe.experts.121.w3", "model.layers.50.block_sparse_moe.experts.122.w3", "model.layers.50.block_sparse_moe.experts.123.w3", "model.layers.50.block_sparse_moe.experts.124.w3", "model.layers.50.block_sparse_moe.experts.125.w3", "model.layers.50.block_sparse_moe.experts.126.w3", "model.layers.50.block_sparse_moe.experts.127.w3", "model.layers.50.block_sparse_moe.experts.128.w3", "model.layers.50.block_sparse_moe.experts.129.w3", "model.layers.50.block_sparse_moe.experts.130.w3", "model.layers.50.block_sparse_moe.experts.131.w3", "model.layers.50.block_sparse_moe.experts.132.w3", "model.layers.50.block_sparse_moe.experts.133.w3", "model.layers.50.block_sparse_moe.experts.134.w3", "model.layers.50.block_sparse_moe.experts.135.w3", "model.layers.50.block_sparse_moe.experts.136.w3", "model.layers.50.block_sparse_moe.experts.137.w3", "model.layers.50.block_sparse_moe.experts.138.w3", "model.layers.50.block_sparse_moe.experts.139.w3", "model.layers.50.block_sparse_moe.experts.140.w3", "model.layers.50.block_sparse_moe.experts.141.w3", "model.layers.50.block_sparse_moe.experts.142.w3", "model.layers.50.block_sparse_moe.experts.143.w3", "model.layers.50.block_sparse_moe.experts.144.w3", "model.layers.50.block_sparse_moe.experts.145.w3", "model.layers.50.block_sparse_moe.experts.146.w3", "model.layers.50.block_sparse_moe.experts.147.w3", "model.layers.50.block_sparse_moe.experts.148.w3", "model.layers.50.block_sparse_moe.experts.149.w3", "model.layers.50.block_sparse_moe.experts.150.w3", "model.layers.50.block_sparse_moe.experts.151.w3", "model.layers.50.block_sparse_moe.experts.152.w3", "model.layers.50.block_sparse_moe.experts.153.w3", "model.layers.50.block_sparse_moe.experts.154.w3", "model.layers.50.block_sparse_moe.experts.155.w3", "model.layers.50.block_sparse_moe.experts.156.w3", "model.layers.50.block_sparse_moe.experts.157.w3", "model.layers.50.block_sparse_moe.experts.158.w3", "model.layers.50.block_sparse_moe.experts.159.w3", "model.layers.50.block_sparse_moe.experts.160.w3", "model.layers.50.block_sparse_moe.experts.161.w3", "model.layers.50.block_sparse_moe.experts.162.w3", "model.layers.50.block_sparse_moe.experts.163.w3", "model.layers.50.block_sparse_moe.experts.164.w3", "model.layers.50.block_sparse_moe.experts.165.w3", "model.layers.50.block_sparse_moe.experts.166.w3", "model.layers.50.block_sparse_moe.experts.167.w3", "model.layers.50.block_sparse_moe.experts.168.w3", "model.layers.50.block_sparse_moe.experts.169.w3", "model.layers.50.block_sparse_moe.experts.170.w3", "model.layers.50.block_sparse_moe.experts.171.w3", "model.layers.50.block_sparse_moe.experts.172.w3", "model.layers.50.block_sparse_moe.experts.173.w3", "model.layers.50.block_sparse_moe.experts.174.w3", "model.layers.50.block_sparse_moe.experts.175.w3", "model.layers.50.block_sparse_moe.experts.176.w3", "model.layers.50.block_sparse_moe.experts.177.w3", "model.layers.50.block_sparse_moe.experts.178.w3", "model.layers.50.block_sparse_moe.experts.179.w3", "model.layers.50.block_sparse_moe.experts.180.w3", "model.layers.50.block_sparse_moe.experts.181.w3", "model.layers.50.block_sparse_moe.experts.182.w3", "model.layers.50.block_sparse_moe.experts.183.w3", "model.layers.50.block_sparse_moe.experts.184.w3", "model.layers.50.block_sparse_moe.experts.185.w3", "model.layers.50.block_sparse_moe.experts.186.w3", "model.layers.50.block_sparse_moe.experts.187.w3", "model.layers.50.block_sparse_moe.experts.188.w3", "model.layers.50.block_sparse_moe.experts.189.w3", "model.layers.50.block_sparse_moe.experts.190.w3", "model.layers.50.block_sparse_moe.experts.191.w3", "model.layers.50.block_sparse_moe.experts.192.w3", "model.layers.50.block_sparse_moe.experts.193.w3", "model.layers.50.block_sparse_moe.experts.194.w3", "model.layers.50.block_sparse_moe.experts.195.w3", "model.layers.50.block_sparse_moe.experts.196.w3", "model.layers.50.block_sparse_moe.experts.197.w3", "model.layers.50.block_sparse_moe.experts.198.w3", "model.layers.50.block_sparse_moe.experts.199.w3", "model.layers.50.block_sparse_moe.experts.200.w3", "model.layers.50.block_sparse_moe.experts.201.w3", "model.layers.50.block_sparse_moe.experts.202.w3", "model.layers.50.block_sparse_moe.experts.203.w3", "model.layers.50.block_sparse_moe.experts.204.w3", "model.layers.50.block_sparse_moe.experts.205.w3", "model.layers.50.block_sparse_moe.experts.206.w3", "model.layers.50.block_sparse_moe.experts.207.w3", "model.layers.50.block_sparse_moe.experts.208.w3", "model.layers.50.block_sparse_moe.experts.209.w3", "model.layers.50.block_sparse_moe.experts.210.w3", "model.layers.50.block_sparse_moe.experts.211.w3", "model.layers.50.block_sparse_moe.experts.212.w3", "model.layers.50.block_sparse_moe.experts.213.w3", "model.layers.50.block_sparse_moe.experts.214.w3", "model.layers.50.block_sparse_moe.experts.215.w3", "model.layers.50.block_sparse_moe.experts.216.w3", "model.layers.50.block_sparse_moe.experts.217.w3", "model.layers.50.block_sparse_moe.experts.218.w3", "model.layers.50.block_sparse_moe.experts.219.w3", "model.layers.50.block_sparse_moe.experts.220.w3", "model.layers.50.block_sparse_moe.experts.221.w3", "model.layers.50.block_sparse_moe.experts.222.w3", "model.layers.50.block_sparse_moe.experts.223.w3", "model.layers.50.block_sparse_moe.experts.224.w3", "model.layers.50.block_sparse_moe.experts.225.w3", "model.layers.50.block_sparse_moe.experts.226.w3", "model.layers.50.block_sparse_moe.experts.227.w3", "model.layers.50.block_sparse_moe.experts.228.w3", "model.layers.50.block_sparse_moe.experts.229.w3", "model.layers.50.block_sparse_moe.experts.230.w3", "model.layers.50.block_sparse_moe.experts.231.w3", "model.layers.50.block_sparse_moe.experts.232.w3", "model.layers.50.block_sparse_moe.experts.233.w3", "model.layers.50.block_sparse_moe.experts.234.w3", "model.layers.50.block_sparse_moe.experts.235.w3", "model.layers.50.block_sparse_moe.experts.236.w3", "model.layers.50.block_sparse_moe.experts.237.w3", "model.layers.50.block_sparse_moe.experts.238.w3", "model.layers.50.block_sparse_moe.experts.239.w3", "model.layers.50.block_sparse_moe.experts.240.w3", "model.layers.50.block_sparse_moe.experts.241.w3", "model.layers.50.block_sparse_moe.experts.242.w3", "model.layers.50.block_sparse_moe.experts.243.w3", "model.layers.50.block_sparse_moe.experts.244.w3", "model.layers.50.block_sparse_moe.experts.245.w3", "model.layers.50.block_sparse_moe.experts.246.w3", "model.layers.50.block_sparse_moe.experts.247.w3", "model.layers.50.block_sparse_moe.experts.248.w3", "model.layers.50.block_sparse_moe.experts.249.w3", "model.layers.50.block_sparse_moe.experts.250.w3", "model.layers.50.block_sparse_moe.experts.251.w3", "model.layers.50.block_sparse_moe.experts.252.w3", "model.layers.50.block_sparse_moe.experts.253.w3", "model.layers.50.block_sparse_moe.experts.254.w3", "model.layers.50.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 5.840752273797434e-05, "dbits": 2415919104 } ] }, { "idx": 254, "layers": [ "model.layers.50.block_sparse_moe.experts.0.w2", "model.layers.50.block_sparse_moe.experts.1.w2", "model.layers.50.block_sparse_moe.experts.2.w2", "model.layers.50.block_sparse_moe.experts.3.w2", "model.layers.50.block_sparse_moe.experts.4.w2", "model.layers.50.block_sparse_moe.experts.5.w2", "model.layers.50.block_sparse_moe.experts.6.w2", "model.layers.50.block_sparse_moe.experts.7.w2", "model.layers.50.block_sparse_moe.experts.8.w2", "model.layers.50.block_sparse_moe.experts.9.w2", "model.layers.50.block_sparse_moe.experts.10.w2", "model.layers.50.block_sparse_moe.experts.11.w2", "model.layers.50.block_sparse_moe.experts.12.w2", "model.layers.50.block_sparse_moe.experts.13.w2", "model.layers.50.block_sparse_moe.experts.14.w2", "model.layers.50.block_sparse_moe.experts.15.w2", "model.layers.50.block_sparse_moe.experts.16.w2", "model.layers.50.block_sparse_moe.experts.17.w2", "model.layers.50.block_sparse_moe.experts.18.w2", "model.layers.50.block_sparse_moe.experts.19.w2", "model.layers.50.block_sparse_moe.experts.20.w2", "model.layers.50.block_sparse_moe.experts.21.w2", "model.layers.50.block_sparse_moe.experts.22.w2", "model.layers.50.block_sparse_moe.experts.23.w2", "model.layers.50.block_sparse_moe.experts.24.w2", "model.layers.50.block_sparse_moe.experts.25.w2", "model.layers.50.block_sparse_moe.experts.26.w2", "model.layers.50.block_sparse_moe.experts.27.w2", "model.layers.50.block_sparse_moe.experts.28.w2", "model.layers.50.block_sparse_moe.experts.29.w2", "model.layers.50.block_sparse_moe.experts.30.w2", "model.layers.50.block_sparse_moe.experts.31.w2", "model.layers.50.block_sparse_moe.experts.32.w2", "model.layers.50.block_sparse_moe.experts.33.w2", "model.layers.50.block_sparse_moe.experts.34.w2", "model.layers.50.block_sparse_moe.experts.35.w2", "model.layers.50.block_sparse_moe.experts.36.w2", "model.layers.50.block_sparse_moe.experts.37.w2", "model.layers.50.block_sparse_moe.experts.38.w2", "model.layers.50.block_sparse_moe.experts.39.w2", "model.layers.50.block_sparse_moe.experts.40.w2", "model.layers.50.block_sparse_moe.experts.41.w2", "model.layers.50.block_sparse_moe.experts.42.w2", "model.layers.50.block_sparse_moe.experts.43.w2", "model.layers.50.block_sparse_moe.experts.44.w2", "model.layers.50.block_sparse_moe.experts.45.w2", "model.layers.50.block_sparse_moe.experts.46.w2", "model.layers.50.block_sparse_moe.experts.47.w2", "model.layers.50.block_sparse_moe.experts.48.w2", "model.layers.50.block_sparse_moe.experts.49.w2", "model.layers.50.block_sparse_moe.experts.50.w2", "model.layers.50.block_sparse_moe.experts.51.w2", "model.layers.50.block_sparse_moe.experts.52.w2", "model.layers.50.block_sparse_moe.experts.53.w2", "model.layers.50.block_sparse_moe.experts.54.w2", "model.layers.50.block_sparse_moe.experts.55.w2", "model.layers.50.block_sparse_moe.experts.56.w2", "model.layers.50.block_sparse_moe.experts.57.w2", "model.layers.50.block_sparse_moe.experts.58.w2", "model.layers.50.block_sparse_moe.experts.59.w2", "model.layers.50.block_sparse_moe.experts.60.w2", "model.layers.50.block_sparse_moe.experts.61.w2", "model.layers.50.block_sparse_moe.experts.62.w2", "model.layers.50.block_sparse_moe.experts.63.w2", "model.layers.50.block_sparse_moe.experts.64.w2", "model.layers.50.block_sparse_moe.experts.65.w2", "model.layers.50.block_sparse_moe.experts.66.w2", "model.layers.50.block_sparse_moe.experts.67.w2", "model.layers.50.block_sparse_moe.experts.68.w2", "model.layers.50.block_sparse_moe.experts.69.w2", "model.layers.50.block_sparse_moe.experts.70.w2", "model.layers.50.block_sparse_moe.experts.71.w2", "model.layers.50.block_sparse_moe.experts.72.w2", "model.layers.50.block_sparse_moe.experts.73.w2", "model.layers.50.block_sparse_moe.experts.74.w2", "model.layers.50.block_sparse_moe.experts.75.w2", "model.layers.50.block_sparse_moe.experts.76.w2", "model.layers.50.block_sparse_moe.experts.77.w2", "model.layers.50.block_sparse_moe.experts.78.w2", "model.layers.50.block_sparse_moe.experts.79.w2", "model.layers.50.block_sparse_moe.experts.80.w2", "model.layers.50.block_sparse_moe.experts.81.w2", "model.layers.50.block_sparse_moe.experts.82.w2", "model.layers.50.block_sparse_moe.experts.83.w2", "model.layers.50.block_sparse_moe.experts.84.w2", "model.layers.50.block_sparse_moe.experts.85.w2", "model.layers.50.block_sparse_moe.experts.86.w2", "model.layers.50.block_sparse_moe.experts.87.w2", "model.layers.50.block_sparse_moe.experts.88.w2", "model.layers.50.block_sparse_moe.experts.89.w2", "model.layers.50.block_sparse_moe.experts.90.w2", "model.layers.50.block_sparse_moe.experts.91.w2", "model.layers.50.block_sparse_moe.experts.92.w2", "model.layers.50.block_sparse_moe.experts.93.w2", "model.layers.50.block_sparse_moe.experts.94.w2", "model.layers.50.block_sparse_moe.experts.95.w2", "model.layers.50.block_sparse_moe.experts.96.w2", "model.layers.50.block_sparse_moe.experts.97.w2", "model.layers.50.block_sparse_moe.experts.98.w2", "model.layers.50.block_sparse_moe.experts.99.w2", "model.layers.50.block_sparse_moe.experts.100.w2", "model.layers.50.block_sparse_moe.experts.101.w2", "model.layers.50.block_sparse_moe.experts.102.w2", "model.layers.50.block_sparse_moe.experts.103.w2", "model.layers.50.block_sparse_moe.experts.104.w2", "model.layers.50.block_sparse_moe.experts.105.w2", "model.layers.50.block_sparse_moe.experts.106.w2", "model.layers.50.block_sparse_moe.experts.107.w2", "model.layers.50.block_sparse_moe.experts.108.w2", "model.layers.50.block_sparse_moe.experts.109.w2", "model.layers.50.block_sparse_moe.experts.110.w2", "model.layers.50.block_sparse_moe.experts.111.w2", "model.layers.50.block_sparse_moe.experts.112.w2", "model.layers.50.block_sparse_moe.experts.113.w2", "model.layers.50.block_sparse_moe.experts.114.w2", "model.layers.50.block_sparse_moe.experts.115.w2", "model.layers.50.block_sparse_moe.experts.116.w2", "model.layers.50.block_sparse_moe.experts.117.w2", "model.layers.50.block_sparse_moe.experts.118.w2", "model.layers.50.block_sparse_moe.experts.119.w2", "model.layers.50.block_sparse_moe.experts.120.w2", "model.layers.50.block_sparse_moe.experts.121.w2", "model.layers.50.block_sparse_moe.experts.122.w2", "model.layers.50.block_sparse_moe.experts.123.w2", "model.layers.50.block_sparse_moe.experts.124.w2", "model.layers.50.block_sparse_moe.experts.125.w2", "model.layers.50.block_sparse_moe.experts.126.w2", "model.layers.50.block_sparse_moe.experts.127.w2", "model.layers.50.block_sparse_moe.experts.128.w2", "model.layers.50.block_sparse_moe.experts.129.w2", "model.layers.50.block_sparse_moe.experts.130.w2", "model.layers.50.block_sparse_moe.experts.131.w2", "model.layers.50.block_sparse_moe.experts.132.w2", "model.layers.50.block_sparse_moe.experts.133.w2", "model.layers.50.block_sparse_moe.experts.134.w2", "model.layers.50.block_sparse_moe.experts.135.w2", "model.layers.50.block_sparse_moe.experts.136.w2", "model.layers.50.block_sparse_moe.experts.137.w2", "model.layers.50.block_sparse_moe.experts.138.w2", "model.layers.50.block_sparse_moe.experts.139.w2", "model.layers.50.block_sparse_moe.experts.140.w2", "model.layers.50.block_sparse_moe.experts.141.w2", "model.layers.50.block_sparse_moe.experts.142.w2", "model.layers.50.block_sparse_moe.experts.143.w2", "model.layers.50.block_sparse_moe.experts.144.w2", "model.layers.50.block_sparse_moe.experts.145.w2", "model.layers.50.block_sparse_moe.experts.146.w2", "model.layers.50.block_sparse_moe.experts.147.w2", "model.layers.50.block_sparse_moe.experts.148.w2", "model.layers.50.block_sparse_moe.experts.149.w2", "model.layers.50.block_sparse_moe.experts.150.w2", "model.layers.50.block_sparse_moe.experts.151.w2", "model.layers.50.block_sparse_moe.experts.152.w2", "model.layers.50.block_sparse_moe.experts.153.w2", "model.layers.50.block_sparse_moe.experts.154.w2", "model.layers.50.block_sparse_moe.experts.155.w2", "model.layers.50.block_sparse_moe.experts.156.w2", "model.layers.50.block_sparse_moe.experts.157.w2", "model.layers.50.block_sparse_moe.experts.158.w2", "model.layers.50.block_sparse_moe.experts.159.w2", "model.layers.50.block_sparse_moe.experts.160.w2", "model.layers.50.block_sparse_moe.experts.161.w2", "model.layers.50.block_sparse_moe.experts.162.w2", "model.layers.50.block_sparse_moe.experts.163.w2", "model.layers.50.block_sparse_moe.experts.164.w2", "model.layers.50.block_sparse_moe.experts.165.w2", "model.layers.50.block_sparse_moe.experts.166.w2", "model.layers.50.block_sparse_moe.experts.167.w2", "model.layers.50.block_sparse_moe.experts.168.w2", "model.layers.50.block_sparse_moe.experts.169.w2", "model.layers.50.block_sparse_moe.experts.170.w2", "model.layers.50.block_sparse_moe.experts.171.w2", "model.layers.50.block_sparse_moe.experts.172.w2", "model.layers.50.block_sparse_moe.experts.173.w2", "model.layers.50.block_sparse_moe.experts.174.w2", "model.layers.50.block_sparse_moe.experts.175.w2", "model.layers.50.block_sparse_moe.experts.176.w2", "model.layers.50.block_sparse_moe.experts.177.w2", "model.layers.50.block_sparse_moe.experts.178.w2", "model.layers.50.block_sparse_moe.experts.179.w2", "model.layers.50.block_sparse_moe.experts.180.w2", "model.layers.50.block_sparse_moe.experts.181.w2", "model.layers.50.block_sparse_moe.experts.182.w2", "model.layers.50.block_sparse_moe.experts.183.w2", "model.layers.50.block_sparse_moe.experts.184.w2", "model.layers.50.block_sparse_moe.experts.185.w2", "model.layers.50.block_sparse_moe.experts.186.w2", "model.layers.50.block_sparse_moe.experts.187.w2", "model.layers.50.block_sparse_moe.experts.188.w2", "model.layers.50.block_sparse_moe.experts.189.w2", "model.layers.50.block_sparse_moe.experts.190.w2", "model.layers.50.block_sparse_moe.experts.191.w2", "model.layers.50.block_sparse_moe.experts.192.w2", "model.layers.50.block_sparse_moe.experts.193.w2", "model.layers.50.block_sparse_moe.experts.194.w2", "model.layers.50.block_sparse_moe.experts.195.w2", "model.layers.50.block_sparse_moe.experts.196.w2", "model.layers.50.block_sparse_moe.experts.197.w2", "model.layers.50.block_sparse_moe.experts.198.w2", "model.layers.50.block_sparse_moe.experts.199.w2", "model.layers.50.block_sparse_moe.experts.200.w2", "model.layers.50.block_sparse_moe.experts.201.w2", "model.layers.50.block_sparse_moe.experts.202.w2", "model.layers.50.block_sparse_moe.experts.203.w2", "model.layers.50.block_sparse_moe.experts.204.w2", "model.layers.50.block_sparse_moe.experts.205.w2", "model.layers.50.block_sparse_moe.experts.206.w2", "model.layers.50.block_sparse_moe.experts.207.w2", "model.layers.50.block_sparse_moe.experts.208.w2", "model.layers.50.block_sparse_moe.experts.209.w2", "model.layers.50.block_sparse_moe.experts.210.w2", "model.layers.50.block_sparse_moe.experts.211.w2", "model.layers.50.block_sparse_moe.experts.212.w2", "model.layers.50.block_sparse_moe.experts.213.w2", "model.layers.50.block_sparse_moe.experts.214.w2", "model.layers.50.block_sparse_moe.experts.215.w2", "model.layers.50.block_sparse_moe.experts.216.w2", "model.layers.50.block_sparse_moe.experts.217.w2", "model.layers.50.block_sparse_moe.experts.218.w2", "model.layers.50.block_sparse_moe.experts.219.w2", "model.layers.50.block_sparse_moe.experts.220.w2", "model.layers.50.block_sparse_moe.experts.221.w2", "model.layers.50.block_sparse_moe.experts.222.w2", "model.layers.50.block_sparse_moe.experts.223.w2", "model.layers.50.block_sparse_moe.experts.224.w2", "model.layers.50.block_sparse_moe.experts.225.w2", "model.layers.50.block_sparse_moe.experts.226.w2", "model.layers.50.block_sparse_moe.experts.227.w2", "model.layers.50.block_sparse_moe.experts.228.w2", "model.layers.50.block_sparse_moe.experts.229.w2", "model.layers.50.block_sparse_moe.experts.230.w2", "model.layers.50.block_sparse_moe.experts.231.w2", "model.layers.50.block_sparse_moe.experts.232.w2", "model.layers.50.block_sparse_moe.experts.233.w2", "model.layers.50.block_sparse_moe.experts.234.w2", "model.layers.50.block_sparse_moe.experts.235.w2", "model.layers.50.block_sparse_moe.experts.236.w2", "model.layers.50.block_sparse_moe.experts.237.w2", "model.layers.50.block_sparse_moe.experts.238.w2", "model.layers.50.block_sparse_moe.experts.239.w2", "model.layers.50.block_sparse_moe.experts.240.w2", "model.layers.50.block_sparse_moe.experts.241.w2", "model.layers.50.block_sparse_moe.experts.242.w2", "model.layers.50.block_sparse_moe.experts.243.w2", "model.layers.50.block_sparse_moe.experts.244.w2", "model.layers.50.block_sparse_moe.experts.245.w2", "model.layers.50.block_sparse_moe.experts.246.w2", "model.layers.50.block_sparse_moe.experts.247.w2", "model.layers.50.block_sparse_moe.experts.248.w2", "model.layers.50.block_sparse_moe.experts.249.w2", "model.layers.50.block_sparse_moe.experts.250.w2", "model.layers.50.block_sparse_moe.experts.251.w2", "model.layers.50.block_sparse_moe.experts.252.w2", "model.layers.50.block_sparse_moe.experts.253.w2", "model.layers.50.block_sparse_moe.experts.254.w2", "model.layers.50.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 8.27834010123929e-06, "dbits": 1207959552 } ] }, { "idx": 255, "layers": [ "model.layers.51.self_attn.q_proj" ], "candidates": [ { "dkld": 7.693953812122623e-05, "dbits": 18874368 } ] }, { "idx": 256, "layers": [ "model.layers.51.self_attn.k_proj", "model.layers.51.self_attn.v_proj" ], "candidates": [ { "dkld": -0.00040344558656216223, "dbits": 6291456 } ] }, { "idx": 257, "layers": [ "model.layers.51.self_attn.o_proj" ], "candidates": [ { "dkld": 1.3854354619924347e-06, "dbits": 18874368 } ] }, { "idx": 258, "layers": [ "model.layers.51.block_sparse_moe.experts.0.w1", "model.layers.51.block_sparse_moe.experts.1.w1", "model.layers.51.block_sparse_moe.experts.2.w1", "model.layers.51.block_sparse_moe.experts.3.w1", "model.layers.51.block_sparse_moe.experts.4.w1", "model.layers.51.block_sparse_moe.experts.5.w1", "model.layers.51.block_sparse_moe.experts.6.w1", "model.layers.51.block_sparse_moe.experts.7.w1", "model.layers.51.block_sparse_moe.experts.8.w1", "model.layers.51.block_sparse_moe.experts.9.w1", "model.layers.51.block_sparse_moe.experts.10.w1", "model.layers.51.block_sparse_moe.experts.11.w1", "model.layers.51.block_sparse_moe.experts.12.w1", "model.layers.51.block_sparse_moe.experts.13.w1", "model.layers.51.block_sparse_moe.experts.14.w1", "model.layers.51.block_sparse_moe.experts.15.w1", "model.layers.51.block_sparse_moe.experts.16.w1", "model.layers.51.block_sparse_moe.experts.17.w1", "model.layers.51.block_sparse_moe.experts.18.w1", "model.layers.51.block_sparse_moe.experts.19.w1", "model.layers.51.block_sparse_moe.experts.20.w1", "model.layers.51.block_sparse_moe.experts.21.w1", "model.layers.51.block_sparse_moe.experts.22.w1", "model.layers.51.block_sparse_moe.experts.23.w1", "model.layers.51.block_sparse_moe.experts.24.w1", "model.layers.51.block_sparse_moe.experts.25.w1", "model.layers.51.block_sparse_moe.experts.26.w1", "model.layers.51.block_sparse_moe.experts.27.w1", "model.layers.51.block_sparse_moe.experts.28.w1", "model.layers.51.block_sparse_moe.experts.29.w1", "model.layers.51.block_sparse_moe.experts.30.w1", "model.layers.51.block_sparse_moe.experts.31.w1", "model.layers.51.block_sparse_moe.experts.32.w1", "model.layers.51.block_sparse_moe.experts.33.w1", "model.layers.51.block_sparse_moe.experts.34.w1", "model.layers.51.block_sparse_moe.experts.35.w1", "model.layers.51.block_sparse_moe.experts.36.w1", "model.layers.51.block_sparse_moe.experts.37.w1", "model.layers.51.block_sparse_moe.experts.38.w1", "model.layers.51.block_sparse_moe.experts.39.w1", "model.layers.51.block_sparse_moe.experts.40.w1", "model.layers.51.block_sparse_moe.experts.41.w1", "model.layers.51.block_sparse_moe.experts.42.w1", "model.layers.51.block_sparse_moe.experts.43.w1", "model.layers.51.block_sparse_moe.experts.44.w1", "model.layers.51.block_sparse_moe.experts.45.w1", "model.layers.51.block_sparse_moe.experts.46.w1", "model.layers.51.block_sparse_moe.experts.47.w1", "model.layers.51.block_sparse_moe.experts.48.w1", "model.layers.51.block_sparse_moe.experts.49.w1", "model.layers.51.block_sparse_moe.experts.50.w1", "model.layers.51.block_sparse_moe.experts.51.w1", "model.layers.51.block_sparse_moe.experts.52.w1", "model.layers.51.block_sparse_moe.experts.53.w1", "model.layers.51.block_sparse_moe.experts.54.w1", "model.layers.51.block_sparse_moe.experts.55.w1", "model.layers.51.block_sparse_moe.experts.56.w1", "model.layers.51.block_sparse_moe.experts.57.w1", "model.layers.51.block_sparse_moe.experts.58.w1", "model.layers.51.block_sparse_moe.experts.59.w1", "model.layers.51.block_sparse_moe.experts.60.w1", "model.layers.51.block_sparse_moe.experts.61.w1", "model.layers.51.block_sparse_moe.experts.62.w1", "model.layers.51.block_sparse_moe.experts.63.w1", "model.layers.51.block_sparse_moe.experts.64.w1", "model.layers.51.block_sparse_moe.experts.65.w1", "model.layers.51.block_sparse_moe.experts.66.w1", "model.layers.51.block_sparse_moe.experts.67.w1", "model.layers.51.block_sparse_moe.experts.68.w1", "model.layers.51.block_sparse_moe.experts.69.w1", "model.layers.51.block_sparse_moe.experts.70.w1", "model.layers.51.block_sparse_moe.experts.71.w1", "model.layers.51.block_sparse_moe.experts.72.w1", "model.layers.51.block_sparse_moe.experts.73.w1", "model.layers.51.block_sparse_moe.experts.74.w1", "model.layers.51.block_sparse_moe.experts.75.w1", "model.layers.51.block_sparse_moe.experts.76.w1", "model.layers.51.block_sparse_moe.experts.77.w1", "model.layers.51.block_sparse_moe.experts.78.w1", "model.layers.51.block_sparse_moe.experts.79.w1", "model.layers.51.block_sparse_moe.experts.80.w1", "model.layers.51.block_sparse_moe.experts.81.w1", "model.layers.51.block_sparse_moe.experts.82.w1", "model.layers.51.block_sparse_moe.experts.83.w1", "model.layers.51.block_sparse_moe.experts.84.w1", "model.layers.51.block_sparse_moe.experts.85.w1", "model.layers.51.block_sparse_moe.experts.86.w1", "model.layers.51.block_sparse_moe.experts.87.w1", "model.layers.51.block_sparse_moe.experts.88.w1", "model.layers.51.block_sparse_moe.experts.89.w1", "model.layers.51.block_sparse_moe.experts.90.w1", "model.layers.51.block_sparse_moe.experts.91.w1", "model.layers.51.block_sparse_moe.experts.92.w1", "model.layers.51.block_sparse_moe.experts.93.w1", "model.layers.51.block_sparse_moe.experts.94.w1", "model.layers.51.block_sparse_moe.experts.95.w1", "model.layers.51.block_sparse_moe.experts.96.w1", "model.layers.51.block_sparse_moe.experts.97.w1", "model.layers.51.block_sparse_moe.experts.98.w1", "model.layers.51.block_sparse_moe.experts.99.w1", "model.layers.51.block_sparse_moe.experts.100.w1", "model.layers.51.block_sparse_moe.experts.101.w1", "model.layers.51.block_sparse_moe.experts.102.w1", "model.layers.51.block_sparse_moe.experts.103.w1", "model.layers.51.block_sparse_moe.experts.104.w1", "model.layers.51.block_sparse_moe.experts.105.w1", "model.layers.51.block_sparse_moe.experts.106.w1", "model.layers.51.block_sparse_moe.experts.107.w1", "model.layers.51.block_sparse_moe.experts.108.w1", "model.layers.51.block_sparse_moe.experts.109.w1", "model.layers.51.block_sparse_moe.experts.110.w1", "model.layers.51.block_sparse_moe.experts.111.w1", "model.layers.51.block_sparse_moe.experts.112.w1", "model.layers.51.block_sparse_moe.experts.113.w1", "model.layers.51.block_sparse_moe.experts.114.w1", "model.layers.51.block_sparse_moe.experts.115.w1", "model.layers.51.block_sparse_moe.experts.116.w1", "model.layers.51.block_sparse_moe.experts.117.w1", "model.layers.51.block_sparse_moe.experts.118.w1", "model.layers.51.block_sparse_moe.experts.119.w1", "model.layers.51.block_sparse_moe.experts.120.w1", "model.layers.51.block_sparse_moe.experts.121.w1", "model.layers.51.block_sparse_moe.experts.122.w1", "model.layers.51.block_sparse_moe.experts.123.w1", "model.layers.51.block_sparse_moe.experts.124.w1", "model.layers.51.block_sparse_moe.experts.125.w1", "model.layers.51.block_sparse_moe.experts.126.w1", "model.layers.51.block_sparse_moe.experts.127.w1", "model.layers.51.block_sparse_moe.experts.128.w1", "model.layers.51.block_sparse_moe.experts.129.w1", "model.layers.51.block_sparse_moe.experts.130.w1", "model.layers.51.block_sparse_moe.experts.131.w1", "model.layers.51.block_sparse_moe.experts.132.w1", "model.layers.51.block_sparse_moe.experts.133.w1", "model.layers.51.block_sparse_moe.experts.134.w1", "model.layers.51.block_sparse_moe.experts.135.w1", "model.layers.51.block_sparse_moe.experts.136.w1", "model.layers.51.block_sparse_moe.experts.137.w1", "model.layers.51.block_sparse_moe.experts.138.w1", "model.layers.51.block_sparse_moe.experts.139.w1", "model.layers.51.block_sparse_moe.experts.140.w1", "model.layers.51.block_sparse_moe.experts.141.w1", "model.layers.51.block_sparse_moe.experts.142.w1", "model.layers.51.block_sparse_moe.experts.143.w1", "model.layers.51.block_sparse_moe.experts.144.w1", "model.layers.51.block_sparse_moe.experts.145.w1", "model.layers.51.block_sparse_moe.experts.146.w1", "model.layers.51.block_sparse_moe.experts.147.w1", "model.layers.51.block_sparse_moe.experts.148.w1", "model.layers.51.block_sparse_moe.experts.149.w1", "model.layers.51.block_sparse_moe.experts.150.w1", "model.layers.51.block_sparse_moe.experts.151.w1", "model.layers.51.block_sparse_moe.experts.152.w1", "model.layers.51.block_sparse_moe.experts.153.w1", "model.layers.51.block_sparse_moe.experts.154.w1", "model.layers.51.block_sparse_moe.experts.155.w1", "model.layers.51.block_sparse_moe.experts.156.w1", "model.layers.51.block_sparse_moe.experts.157.w1", "model.layers.51.block_sparse_moe.experts.158.w1", "model.layers.51.block_sparse_moe.experts.159.w1", "model.layers.51.block_sparse_moe.experts.160.w1", "model.layers.51.block_sparse_moe.experts.161.w1", "model.layers.51.block_sparse_moe.experts.162.w1", "model.layers.51.block_sparse_moe.experts.163.w1", "model.layers.51.block_sparse_moe.experts.164.w1", "model.layers.51.block_sparse_moe.experts.165.w1", "model.layers.51.block_sparse_moe.experts.166.w1", "model.layers.51.block_sparse_moe.experts.167.w1", "model.layers.51.block_sparse_moe.experts.168.w1", "model.layers.51.block_sparse_moe.experts.169.w1", "model.layers.51.block_sparse_moe.experts.170.w1", "model.layers.51.block_sparse_moe.experts.171.w1", "model.layers.51.block_sparse_moe.experts.172.w1", "model.layers.51.block_sparse_moe.experts.173.w1", "model.layers.51.block_sparse_moe.experts.174.w1", "model.layers.51.block_sparse_moe.experts.175.w1", "model.layers.51.block_sparse_moe.experts.176.w1", "model.layers.51.block_sparse_moe.experts.177.w1", "model.layers.51.block_sparse_moe.experts.178.w1", "model.layers.51.block_sparse_moe.experts.179.w1", "model.layers.51.block_sparse_moe.experts.180.w1", "model.layers.51.block_sparse_moe.experts.181.w1", "model.layers.51.block_sparse_moe.experts.182.w1", "model.layers.51.block_sparse_moe.experts.183.w1", "model.layers.51.block_sparse_moe.experts.184.w1", "model.layers.51.block_sparse_moe.experts.185.w1", "model.layers.51.block_sparse_moe.experts.186.w1", "model.layers.51.block_sparse_moe.experts.187.w1", "model.layers.51.block_sparse_moe.experts.188.w1", "model.layers.51.block_sparse_moe.experts.189.w1", "model.layers.51.block_sparse_moe.experts.190.w1", "model.layers.51.block_sparse_moe.experts.191.w1", "model.layers.51.block_sparse_moe.experts.192.w1", "model.layers.51.block_sparse_moe.experts.193.w1", "model.layers.51.block_sparse_moe.experts.194.w1", "model.layers.51.block_sparse_moe.experts.195.w1", "model.layers.51.block_sparse_moe.experts.196.w1", "model.layers.51.block_sparse_moe.experts.197.w1", "model.layers.51.block_sparse_moe.experts.198.w1", "model.layers.51.block_sparse_moe.experts.199.w1", "model.layers.51.block_sparse_moe.experts.200.w1", "model.layers.51.block_sparse_moe.experts.201.w1", "model.layers.51.block_sparse_moe.experts.202.w1", "model.layers.51.block_sparse_moe.experts.203.w1", "model.layers.51.block_sparse_moe.experts.204.w1", "model.layers.51.block_sparse_moe.experts.205.w1", "model.layers.51.block_sparse_moe.experts.206.w1", "model.layers.51.block_sparse_moe.experts.207.w1", "model.layers.51.block_sparse_moe.experts.208.w1", "model.layers.51.block_sparse_moe.experts.209.w1", "model.layers.51.block_sparse_moe.experts.210.w1", "model.layers.51.block_sparse_moe.experts.211.w1", "model.layers.51.block_sparse_moe.experts.212.w1", "model.layers.51.block_sparse_moe.experts.213.w1", "model.layers.51.block_sparse_moe.experts.214.w1", "model.layers.51.block_sparse_moe.experts.215.w1", "model.layers.51.block_sparse_moe.experts.216.w1", "model.layers.51.block_sparse_moe.experts.217.w1", "model.layers.51.block_sparse_moe.experts.218.w1", "model.layers.51.block_sparse_moe.experts.219.w1", "model.layers.51.block_sparse_moe.experts.220.w1", "model.layers.51.block_sparse_moe.experts.221.w1", "model.layers.51.block_sparse_moe.experts.222.w1", "model.layers.51.block_sparse_moe.experts.223.w1", "model.layers.51.block_sparse_moe.experts.224.w1", "model.layers.51.block_sparse_moe.experts.225.w1", "model.layers.51.block_sparse_moe.experts.226.w1", "model.layers.51.block_sparse_moe.experts.227.w1", "model.layers.51.block_sparse_moe.experts.228.w1", "model.layers.51.block_sparse_moe.experts.229.w1", "model.layers.51.block_sparse_moe.experts.230.w1", "model.layers.51.block_sparse_moe.experts.231.w1", "model.layers.51.block_sparse_moe.experts.232.w1", "model.layers.51.block_sparse_moe.experts.233.w1", "model.layers.51.block_sparse_moe.experts.234.w1", "model.layers.51.block_sparse_moe.experts.235.w1", "model.layers.51.block_sparse_moe.experts.236.w1", "model.layers.51.block_sparse_moe.experts.237.w1", "model.layers.51.block_sparse_moe.experts.238.w1", "model.layers.51.block_sparse_moe.experts.239.w1", "model.layers.51.block_sparse_moe.experts.240.w1", "model.layers.51.block_sparse_moe.experts.241.w1", "model.layers.51.block_sparse_moe.experts.242.w1", "model.layers.51.block_sparse_moe.experts.243.w1", "model.layers.51.block_sparse_moe.experts.244.w1", "model.layers.51.block_sparse_moe.experts.245.w1", "model.layers.51.block_sparse_moe.experts.246.w1", "model.layers.51.block_sparse_moe.experts.247.w1", "model.layers.51.block_sparse_moe.experts.248.w1", "model.layers.51.block_sparse_moe.experts.249.w1", "model.layers.51.block_sparse_moe.experts.250.w1", "model.layers.51.block_sparse_moe.experts.251.w1", "model.layers.51.block_sparse_moe.experts.252.w1", "model.layers.51.block_sparse_moe.experts.253.w1", "model.layers.51.block_sparse_moe.experts.254.w1", "model.layers.51.block_sparse_moe.experts.255.w1", "model.layers.51.block_sparse_moe.experts.0.w3", "model.layers.51.block_sparse_moe.experts.1.w3", "model.layers.51.block_sparse_moe.experts.2.w3", "model.layers.51.block_sparse_moe.experts.3.w3", "model.layers.51.block_sparse_moe.experts.4.w3", "model.layers.51.block_sparse_moe.experts.5.w3", "model.layers.51.block_sparse_moe.experts.6.w3", "model.layers.51.block_sparse_moe.experts.7.w3", "model.layers.51.block_sparse_moe.experts.8.w3", "model.layers.51.block_sparse_moe.experts.9.w3", "model.layers.51.block_sparse_moe.experts.10.w3", "model.layers.51.block_sparse_moe.experts.11.w3", "model.layers.51.block_sparse_moe.experts.12.w3", "model.layers.51.block_sparse_moe.experts.13.w3", "model.layers.51.block_sparse_moe.experts.14.w3", "model.layers.51.block_sparse_moe.experts.15.w3", "model.layers.51.block_sparse_moe.experts.16.w3", "model.layers.51.block_sparse_moe.experts.17.w3", "model.layers.51.block_sparse_moe.experts.18.w3", "model.layers.51.block_sparse_moe.experts.19.w3", "model.layers.51.block_sparse_moe.experts.20.w3", "model.layers.51.block_sparse_moe.experts.21.w3", "model.layers.51.block_sparse_moe.experts.22.w3", "model.layers.51.block_sparse_moe.experts.23.w3", "model.layers.51.block_sparse_moe.experts.24.w3", "model.layers.51.block_sparse_moe.experts.25.w3", "model.layers.51.block_sparse_moe.experts.26.w3", "model.layers.51.block_sparse_moe.experts.27.w3", "model.layers.51.block_sparse_moe.experts.28.w3", "model.layers.51.block_sparse_moe.experts.29.w3", "model.layers.51.block_sparse_moe.experts.30.w3", "model.layers.51.block_sparse_moe.experts.31.w3", "model.layers.51.block_sparse_moe.experts.32.w3", "model.layers.51.block_sparse_moe.experts.33.w3", "model.layers.51.block_sparse_moe.experts.34.w3", "model.layers.51.block_sparse_moe.experts.35.w3", "model.layers.51.block_sparse_moe.experts.36.w3", "model.layers.51.block_sparse_moe.experts.37.w3", "model.layers.51.block_sparse_moe.experts.38.w3", "model.layers.51.block_sparse_moe.experts.39.w3", "model.layers.51.block_sparse_moe.experts.40.w3", "model.layers.51.block_sparse_moe.experts.41.w3", "model.layers.51.block_sparse_moe.experts.42.w3", "model.layers.51.block_sparse_moe.experts.43.w3", "model.layers.51.block_sparse_moe.experts.44.w3", "model.layers.51.block_sparse_moe.experts.45.w3", "model.layers.51.block_sparse_moe.experts.46.w3", "model.layers.51.block_sparse_moe.experts.47.w3", "model.layers.51.block_sparse_moe.experts.48.w3", "model.layers.51.block_sparse_moe.experts.49.w3", "model.layers.51.block_sparse_moe.experts.50.w3", "model.layers.51.block_sparse_moe.experts.51.w3", "model.layers.51.block_sparse_moe.experts.52.w3", "model.layers.51.block_sparse_moe.experts.53.w3", "model.layers.51.block_sparse_moe.experts.54.w3", "model.layers.51.block_sparse_moe.experts.55.w3", "model.layers.51.block_sparse_moe.experts.56.w3", "model.layers.51.block_sparse_moe.experts.57.w3", "model.layers.51.block_sparse_moe.experts.58.w3", "model.layers.51.block_sparse_moe.experts.59.w3", "model.layers.51.block_sparse_moe.experts.60.w3", "model.layers.51.block_sparse_moe.experts.61.w3", "model.layers.51.block_sparse_moe.experts.62.w3", "model.layers.51.block_sparse_moe.experts.63.w3", "model.layers.51.block_sparse_moe.experts.64.w3", "model.layers.51.block_sparse_moe.experts.65.w3", "model.layers.51.block_sparse_moe.experts.66.w3", "model.layers.51.block_sparse_moe.experts.67.w3", "model.layers.51.block_sparse_moe.experts.68.w3", "model.layers.51.block_sparse_moe.experts.69.w3", "model.layers.51.block_sparse_moe.experts.70.w3", "model.layers.51.block_sparse_moe.experts.71.w3", "model.layers.51.block_sparse_moe.experts.72.w3", "model.layers.51.block_sparse_moe.experts.73.w3", "model.layers.51.block_sparse_moe.experts.74.w3", "model.layers.51.block_sparse_moe.experts.75.w3", "model.layers.51.block_sparse_moe.experts.76.w3", "model.layers.51.block_sparse_moe.experts.77.w3", "model.layers.51.block_sparse_moe.experts.78.w3", "model.layers.51.block_sparse_moe.experts.79.w3", "model.layers.51.block_sparse_moe.experts.80.w3", "model.layers.51.block_sparse_moe.experts.81.w3", "model.layers.51.block_sparse_moe.experts.82.w3", "model.layers.51.block_sparse_moe.experts.83.w3", "model.layers.51.block_sparse_moe.experts.84.w3", "model.layers.51.block_sparse_moe.experts.85.w3", "model.layers.51.block_sparse_moe.experts.86.w3", "model.layers.51.block_sparse_moe.experts.87.w3", "model.layers.51.block_sparse_moe.experts.88.w3", "model.layers.51.block_sparse_moe.experts.89.w3", "model.layers.51.block_sparse_moe.experts.90.w3", "model.layers.51.block_sparse_moe.experts.91.w3", "model.layers.51.block_sparse_moe.experts.92.w3", "model.layers.51.block_sparse_moe.experts.93.w3", "model.layers.51.block_sparse_moe.experts.94.w3", "model.layers.51.block_sparse_moe.experts.95.w3", "model.layers.51.block_sparse_moe.experts.96.w3", "model.layers.51.block_sparse_moe.experts.97.w3", "model.layers.51.block_sparse_moe.experts.98.w3", "model.layers.51.block_sparse_moe.experts.99.w3", "model.layers.51.block_sparse_moe.experts.100.w3", "model.layers.51.block_sparse_moe.experts.101.w3", "model.layers.51.block_sparse_moe.experts.102.w3", "model.layers.51.block_sparse_moe.experts.103.w3", "model.layers.51.block_sparse_moe.experts.104.w3", "model.layers.51.block_sparse_moe.experts.105.w3", "model.layers.51.block_sparse_moe.experts.106.w3", "model.layers.51.block_sparse_moe.experts.107.w3", "model.layers.51.block_sparse_moe.experts.108.w3", "model.layers.51.block_sparse_moe.experts.109.w3", "model.layers.51.block_sparse_moe.experts.110.w3", "model.layers.51.block_sparse_moe.experts.111.w3", "model.layers.51.block_sparse_moe.experts.112.w3", "model.layers.51.block_sparse_moe.experts.113.w3", "model.layers.51.block_sparse_moe.experts.114.w3", "model.layers.51.block_sparse_moe.experts.115.w3", "model.layers.51.block_sparse_moe.experts.116.w3", "model.layers.51.block_sparse_moe.experts.117.w3", "model.layers.51.block_sparse_moe.experts.118.w3", "model.layers.51.block_sparse_moe.experts.119.w3", "model.layers.51.block_sparse_moe.experts.120.w3", "model.layers.51.block_sparse_moe.experts.121.w3", "model.layers.51.block_sparse_moe.experts.122.w3", "model.layers.51.block_sparse_moe.experts.123.w3", "model.layers.51.block_sparse_moe.experts.124.w3", "model.layers.51.block_sparse_moe.experts.125.w3", "model.layers.51.block_sparse_moe.experts.126.w3", "model.layers.51.block_sparse_moe.experts.127.w3", "model.layers.51.block_sparse_moe.experts.128.w3", "model.layers.51.block_sparse_moe.experts.129.w3", "model.layers.51.block_sparse_moe.experts.130.w3", "model.layers.51.block_sparse_moe.experts.131.w3", "model.layers.51.block_sparse_moe.experts.132.w3", "model.layers.51.block_sparse_moe.experts.133.w3", "model.layers.51.block_sparse_moe.experts.134.w3", "model.layers.51.block_sparse_moe.experts.135.w3", "model.layers.51.block_sparse_moe.experts.136.w3", "model.layers.51.block_sparse_moe.experts.137.w3", "model.layers.51.block_sparse_moe.experts.138.w3", "model.layers.51.block_sparse_moe.experts.139.w3", "model.layers.51.block_sparse_moe.experts.140.w3", "model.layers.51.block_sparse_moe.experts.141.w3", "model.layers.51.block_sparse_moe.experts.142.w3", "model.layers.51.block_sparse_moe.experts.143.w3", "model.layers.51.block_sparse_moe.experts.144.w3", "model.layers.51.block_sparse_moe.experts.145.w3", "model.layers.51.block_sparse_moe.experts.146.w3", "model.layers.51.block_sparse_moe.experts.147.w3", "model.layers.51.block_sparse_moe.experts.148.w3", "model.layers.51.block_sparse_moe.experts.149.w3", "model.layers.51.block_sparse_moe.experts.150.w3", "model.layers.51.block_sparse_moe.experts.151.w3", "model.layers.51.block_sparse_moe.experts.152.w3", "model.layers.51.block_sparse_moe.experts.153.w3", "model.layers.51.block_sparse_moe.experts.154.w3", "model.layers.51.block_sparse_moe.experts.155.w3", "model.layers.51.block_sparse_moe.experts.156.w3", "model.layers.51.block_sparse_moe.experts.157.w3", "model.layers.51.block_sparse_moe.experts.158.w3", "model.layers.51.block_sparse_moe.experts.159.w3", "model.layers.51.block_sparse_moe.experts.160.w3", "model.layers.51.block_sparse_moe.experts.161.w3", "model.layers.51.block_sparse_moe.experts.162.w3", "model.layers.51.block_sparse_moe.experts.163.w3", "model.layers.51.block_sparse_moe.experts.164.w3", "model.layers.51.block_sparse_moe.experts.165.w3", "model.layers.51.block_sparse_moe.experts.166.w3", "model.layers.51.block_sparse_moe.experts.167.w3", "model.layers.51.block_sparse_moe.experts.168.w3", "model.layers.51.block_sparse_moe.experts.169.w3", "model.layers.51.block_sparse_moe.experts.170.w3", "model.layers.51.block_sparse_moe.experts.171.w3", "model.layers.51.block_sparse_moe.experts.172.w3", "model.layers.51.block_sparse_moe.experts.173.w3", "model.layers.51.block_sparse_moe.experts.174.w3", "model.layers.51.block_sparse_moe.experts.175.w3", "model.layers.51.block_sparse_moe.experts.176.w3", "model.layers.51.block_sparse_moe.experts.177.w3", "model.layers.51.block_sparse_moe.experts.178.w3", "model.layers.51.block_sparse_moe.experts.179.w3", "model.layers.51.block_sparse_moe.experts.180.w3", "model.layers.51.block_sparse_moe.experts.181.w3", "model.layers.51.block_sparse_moe.experts.182.w3", "model.layers.51.block_sparse_moe.experts.183.w3", "model.layers.51.block_sparse_moe.experts.184.w3", "model.layers.51.block_sparse_moe.experts.185.w3", "model.layers.51.block_sparse_moe.experts.186.w3", "model.layers.51.block_sparse_moe.experts.187.w3", "model.layers.51.block_sparse_moe.experts.188.w3", "model.layers.51.block_sparse_moe.experts.189.w3", "model.layers.51.block_sparse_moe.experts.190.w3", "model.layers.51.block_sparse_moe.experts.191.w3", "model.layers.51.block_sparse_moe.experts.192.w3", "model.layers.51.block_sparse_moe.experts.193.w3", "model.layers.51.block_sparse_moe.experts.194.w3", "model.layers.51.block_sparse_moe.experts.195.w3", "model.layers.51.block_sparse_moe.experts.196.w3", "model.layers.51.block_sparse_moe.experts.197.w3", "model.layers.51.block_sparse_moe.experts.198.w3", "model.layers.51.block_sparse_moe.experts.199.w3", "model.layers.51.block_sparse_moe.experts.200.w3", "model.layers.51.block_sparse_moe.experts.201.w3", "model.layers.51.block_sparse_moe.experts.202.w3", "model.layers.51.block_sparse_moe.experts.203.w3", "model.layers.51.block_sparse_moe.experts.204.w3", "model.layers.51.block_sparse_moe.experts.205.w3", "model.layers.51.block_sparse_moe.experts.206.w3", "model.layers.51.block_sparse_moe.experts.207.w3", "model.layers.51.block_sparse_moe.experts.208.w3", "model.layers.51.block_sparse_moe.experts.209.w3", "model.layers.51.block_sparse_moe.experts.210.w3", "model.layers.51.block_sparse_moe.experts.211.w3", "model.layers.51.block_sparse_moe.experts.212.w3", "model.layers.51.block_sparse_moe.experts.213.w3", "model.layers.51.block_sparse_moe.experts.214.w3", "model.layers.51.block_sparse_moe.experts.215.w3", "model.layers.51.block_sparse_moe.experts.216.w3", "model.layers.51.block_sparse_moe.experts.217.w3", "model.layers.51.block_sparse_moe.experts.218.w3", "model.layers.51.block_sparse_moe.experts.219.w3", "model.layers.51.block_sparse_moe.experts.220.w3", "model.layers.51.block_sparse_moe.experts.221.w3", "model.layers.51.block_sparse_moe.experts.222.w3", "model.layers.51.block_sparse_moe.experts.223.w3", "model.layers.51.block_sparse_moe.experts.224.w3", "model.layers.51.block_sparse_moe.experts.225.w3", "model.layers.51.block_sparse_moe.experts.226.w3", "model.layers.51.block_sparse_moe.experts.227.w3", "model.layers.51.block_sparse_moe.experts.228.w3", "model.layers.51.block_sparse_moe.experts.229.w3", "model.layers.51.block_sparse_moe.experts.230.w3", "model.layers.51.block_sparse_moe.experts.231.w3", "model.layers.51.block_sparse_moe.experts.232.w3", "model.layers.51.block_sparse_moe.experts.233.w3", "model.layers.51.block_sparse_moe.experts.234.w3", "model.layers.51.block_sparse_moe.experts.235.w3", "model.layers.51.block_sparse_moe.experts.236.w3", "model.layers.51.block_sparse_moe.experts.237.w3", "model.layers.51.block_sparse_moe.experts.238.w3", "model.layers.51.block_sparse_moe.experts.239.w3", "model.layers.51.block_sparse_moe.experts.240.w3", "model.layers.51.block_sparse_moe.experts.241.w3", "model.layers.51.block_sparse_moe.experts.242.w3", "model.layers.51.block_sparse_moe.experts.243.w3", "model.layers.51.block_sparse_moe.experts.244.w3", "model.layers.51.block_sparse_moe.experts.245.w3", "model.layers.51.block_sparse_moe.experts.246.w3", "model.layers.51.block_sparse_moe.experts.247.w3", "model.layers.51.block_sparse_moe.experts.248.w3", "model.layers.51.block_sparse_moe.experts.249.w3", "model.layers.51.block_sparse_moe.experts.250.w3", "model.layers.51.block_sparse_moe.experts.251.w3", "model.layers.51.block_sparse_moe.experts.252.w3", "model.layers.51.block_sparse_moe.experts.253.w3", "model.layers.51.block_sparse_moe.experts.254.w3", "model.layers.51.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 8.650589734315872e-05, "dbits": 2415919104 } ] }, { "idx": 259, "layers": [ "model.layers.51.block_sparse_moe.experts.0.w2", "model.layers.51.block_sparse_moe.experts.1.w2", "model.layers.51.block_sparse_moe.experts.2.w2", "model.layers.51.block_sparse_moe.experts.3.w2", "model.layers.51.block_sparse_moe.experts.4.w2", "model.layers.51.block_sparse_moe.experts.5.w2", "model.layers.51.block_sparse_moe.experts.6.w2", "model.layers.51.block_sparse_moe.experts.7.w2", "model.layers.51.block_sparse_moe.experts.8.w2", "model.layers.51.block_sparse_moe.experts.9.w2", "model.layers.51.block_sparse_moe.experts.10.w2", "model.layers.51.block_sparse_moe.experts.11.w2", "model.layers.51.block_sparse_moe.experts.12.w2", "model.layers.51.block_sparse_moe.experts.13.w2", "model.layers.51.block_sparse_moe.experts.14.w2", "model.layers.51.block_sparse_moe.experts.15.w2", "model.layers.51.block_sparse_moe.experts.16.w2", "model.layers.51.block_sparse_moe.experts.17.w2", "model.layers.51.block_sparse_moe.experts.18.w2", "model.layers.51.block_sparse_moe.experts.19.w2", "model.layers.51.block_sparse_moe.experts.20.w2", "model.layers.51.block_sparse_moe.experts.21.w2", "model.layers.51.block_sparse_moe.experts.22.w2", "model.layers.51.block_sparse_moe.experts.23.w2", "model.layers.51.block_sparse_moe.experts.24.w2", "model.layers.51.block_sparse_moe.experts.25.w2", "model.layers.51.block_sparse_moe.experts.26.w2", "model.layers.51.block_sparse_moe.experts.27.w2", "model.layers.51.block_sparse_moe.experts.28.w2", "model.layers.51.block_sparse_moe.experts.29.w2", "model.layers.51.block_sparse_moe.experts.30.w2", "model.layers.51.block_sparse_moe.experts.31.w2", "model.layers.51.block_sparse_moe.experts.32.w2", "model.layers.51.block_sparse_moe.experts.33.w2", "model.layers.51.block_sparse_moe.experts.34.w2", "model.layers.51.block_sparse_moe.experts.35.w2", "model.layers.51.block_sparse_moe.experts.36.w2", "model.layers.51.block_sparse_moe.experts.37.w2", "model.layers.51.block_sparse_moe.experts.38.w2", "model.layers.51.block_sparse_moe.experts.39.w2", "model.layers.51.block_sparse_moe.experts.40.w2", "model.layers.51.block_sparse_moe.experts.41.w2", "model.layers.51.block_sparse_moe.experts.42.w2", "model.layers.51.block_sparse_moe.experts.43.w2", "model.layers.51.block_sparse_moe.experts.44.w2", "model.layers.51.block_sparse_moe.experts.45.w2", "model.layers.51.block_sparse_moe.experts.46.w2", "model.layers.51.block_sparse_moe.experts.47.w2", "model.layers.51.block_sparse_moe.experts.48.w2", "model.layers.51.block_sparse_moe.experts.49.w2", "model.layers.51.block_sparse_moe.experts.50.w2", "model.layers.51.block_sparse_moe.experts.51.w2", "model.layers.51.block_sparse_moe.experts.52.w2", "model.layers.51.block_sparse_moe.experts.53.w2", "model.layers.51.block_sparse_moe.experts.54.w2", "model.layers.51.block_sparse_moe.experts.55.w2", "model.layers.51.block_sparse_moe.experts.56.w2", "model.layers.51.block_sparse_moe.experts.57.w2", "model.layers.51.block_sparse_moe.experts.58.w2", "model.layers.51.block_sparse_moe.experts.59.w2", "model.layers.51.block_sparse_moe.experts.60.w2", "model.layers.51.block_sparse_moe.experts.61.w2", "model.layers.51.block_sparse_moe.experts.62.w2", "model.layers.51.block_sparse_moe.experts.63.w2", "model.layers.51.block_sparse_moe.experts.64.w2", "model.layers.51.block_sparse_moe.experts.65.w2", "model.layers.51.block_sparse_moe.experts.66.w2", "model.layers.51.block_sparse_moe.experts.67.w2", "model.layers.51.block_sparse_moe.experts.68.w2", "model.layers.51.block_sparse_moe.experts.69.w2", "model.layers.51.block_sparse_moe.experts.70.w2", "model.layers.51.block_sparse_moe.experts.71.w2", "model.layers.51.block_sparse_moe.experts.72.w2", "model.layers.51.block_sparse_moe.experts.73.w2", "model.layers.51.block_sparse_moe.experts.74.w2", "model.layers.51.block_sparse_moe.experts.75.w2", "model.layers.51.block_sparse_moe.experts.76.w2", "model.layers.51.block_sparse_moe.experts.77.w2", "model.layers.51.block_sparse_moe.experts.78.w2", "model.layers.51.block_sparse_moe.experts.79.w2", "model.layers.51.block_sparse_moe.experts.80.w2", "model.layers.51.block_sparse_moe.experts.81.w2", "model.layers.51.block_sparse_moe.experts.82.w2", "model.layers.51.block_sparse_moe.experts.83.w2", "model.layers.51.block_sparse_moe.experts.84.w2", "model.layers.51.block_sparse_moe.experts.85.w2", "model.layers.51.block_sparse_moe.experts.86.w2", "model.layers.51.block_sparse_moe.experts.87.w2", "model.layers.51.block_sparse_moe.experts.88.w2", "model.layers.51.block_sparse_moe.experts.89.w2", "model.layers.51.block_sparse_moe.experts.90.w2", "model.layers.51.block_sparse_moe.experts.91.w2", "model.layers.51.block_sparse_moe.experts.92.w2", "model.layers.51.block_sparse_moe.experts.93.w2", "model.layers.51.block_sparse_moe.experts.94.w2", "model.layers.51.block_sparse_moe.experts.95.w2", "model.layers.51.block_sparse_moe.experts.96.w2", "model.layers.51.block_sparse_moe.experts.97.w2", "model.layers.51.block_sparse_moe.experts.98.w2", "model.layers.51.block_sparse_moe.experts.99.w2", "model.layers.51.block_sparse_moe.experts.100.w2", "model.layers.51.block_sparse_moe.experts.101.w2", "model.layers.51.block_sparse_moe.experts.102.w2", "model.layers.51.block_sparse_moe.experts.103.w2", "model.layers.51.block_sparse_moe.experts.104.w2", "model.layers.51.block_sparse_moe.experts.105.w2", "model.layers.51.block_sparse_moe.experts.106.w2", "model.layers.51.block_sparse_moe.experts.107.w2", "model.layers.51.block_sparse_moe.experts.108.w2", "model.layers.51.block_sparse_moe.experts.109.w2", "model.layers.51.block_sparse_moe.experts.110.w2", "model.layers.51.block_sparse_moe.experts.111.w2", "model.layers.51.block_sparse_moe.experts.112.w2", "model.layers.51.block_sparse_moe.experts.113.w2", "model.layers.51.block_sparse_moe.experts.114.w2", "model.layers.51.block_sparse_moe.experts.115.w2", "model.layers.51.block_sparse_moe.experts.116.w2", "model.layers.51.block_sparse_moe.experts.117.w2", "model.layers.51.block_sparse_moe.experts.118.w2", "model.layers.51.block_sparse_moe.experts.119.w2", "model.layers.51.block_sparse_moe.experts.120.w2", "model.layers.51.block_sparse_moe.experts.121.w2", "model.layers.51.block_sparse_moe.experts.122.w2", "model.layers.51.block_sparse_moe.experts.123.w2", "model.layers.51.block_sparse_moe.experts.124.w2", "model.layers.51.block_sparse_moe.experts.125.w2", "model.layers.51.block_sparse_moe.experts.126.w2", "model.layers.51.block_sparse_moe.experts.127.w2", "model.layers.51.block_sparse_moe.experts.128.w2", "model.layers.51.block_sparse_moe.experts.129.w2", "model.layers.51.block_sparse_moe.experts.130.w2", "model.layers.51.block_sparse_moe.experts.131.w2", "model.layers.51.block_sparse_moe.experts.132.w2", "model.layers.51.block_sparse_moe.experts.133.w2", "model.layers.51.block_sparse_moe.experts.134.w2", "model.layers.51.block_sparse_moe.experts.135.w2", "model.layers.51.block_sparse_moe.experts.136.w2", "model.layers.51.block_sparse_moe.experts.137.w2", "model.layers.51.block_sparse_moe.experts.138.w2", "model.layers.51.block_sparse_moe.experts.139.w2", "model.layers.51.block_sparse_moe.experts.140.w2", "model.layers.51.block_sparse_moe.experts.141.w2", "model.layers.51.block_sparse_moe.experts.142.w2", "model.layers.51.block_sparse_moe.experts.143.w2", "model.layers.51.block_sparse_moe.experts.144.w2", "model.layers.51.block_sparse_moe.experts.145.w2", "model.layers.51.block_sparse_moe.experts.146.w2", "model.layers.51.block_sparse_moe.experts.147.w2", "model.layers.51.block_sparse_moe.experts.148.w2", "model.layers.51.block_sparse_moe.experts.149.w2", "model.layers.51.block_sparse_moe.experts.150.w2", "model.layers.51.block_sparse_moe.experts.151.w2", "model.layers.51.block_sparse_moe.experts.152.w2", "model.layers.51.block_sparse_moe.experts.153.w2", "model.layers.51.block_sparse_moe.experts.154.w2", "model.layers.51.block_sparse_moe.experts.155.w2", "model.layers.51.block_sparse_moe.experts.156.w2", "model.layers.51.block_sparse_moe.experts.157.w2", "model.layers.51.block_sparse_moe.experts.158.w2", "model.layers.51.block_sparse_moe.experts.159.w2", "model.layers.51.block_sparse_moe.experts.160.w2", "model.layers.51.block_sparse_moe.experts.161.w2", "model.layers.51.block_sparse_moe.experts.162.w2", "model.layers.51.block_sparse_moe.experts.163.w2", "model.layers.51.block_sparse_moe.experts.164.w2", "model.layers.51.block_sparse_moe.experts.165.w2", "model.layers.51.block_sparse_moe.experts.166.w2", "model.layers.51.block_sparse_moe.experts.167.w2", "model.layers.51.block_sparse_moe.experts.168.w2", "model.layers.51.block_sparse_moe.experts.169.w2", "model.layers.51.block_sparse_moe.experts.170.w2", "model.layers.51.block_sparse_moe.experts.171.w2", "model.layers.51.block_sparse_moe.experts.172.w2", "model.layers.51.block_sparse_moe.experts.173.w2", "model.layers.51.block_sparse_moe.experts.174.w2", "model.layers.51.block_sparse_moe.experts.175.w2", "model.layers.51.block_sparse_moe.experts.176.w2", "model.layers.51.block_sparse_moe.experts.177.w2", "model.layers.51.block_sparse_moe.experts.178.w2", "model.layers.51.block_sparse_moe.experts.179.w2", "model.layers.51.block_sparse_moe.experts.180.w2", "model.layers.51.block_sparse_moe.experts.181.w2", "model.layers.51.block_sparse_moe.experts.182.w2", "model.layers.51.block_sparse_moe.experts.183.w2", "model.layers.51.block_sparse_moe.experts.184.w2", "model.layers.51.block_sparse_moe.experts.185.w2", "model.layers.51.block_sparse_moe.experts.186.w2", "model.layers.51.block_sparse_moe.experts.187.w2", "model.layers.51.block_sparse_moe.experts.188.w2", "model.layers.51.block_sparse_moe.experts.189.w2", "model.layers.51.block_sparse_moe.experts.190.w2", "model.layers.51.block_sparse_moe.experts.191.w2", "model.layers.51.block_sparse_moe.experts.192.w2", "model.layers.51.block_sparse_moe.experts.193.w2", "model.layers.51.block_sparse_moe.experts.194.w2", "model.layers.51.block_sparse_moe.experts.195.w2", "model.layers.51.block_sparse_moe.experts.196.w2", "model.layers.51.block_sparse_moe.experts.197.w2", "model.layers.51.block_sparse_moe.experts.198.w2", "model.layers.51.block_sparse_moe.experts.199.w2", "model.layers.51.block_sparse_moe.experts.200.w2", "model.layers.51.block_sparse_moe.experts.201.w2", "model.layers.51.block_sparse_moe.experts.202.w2", "model.layers.51.block_sparse_moe.experts.203.w2", "model.layers.51.block_sparse_moe.experts.204.w2", "model.layers.51.block_sparse_moe.experts.205.w2", "model.layers.51.block_sparse_moe.experts.206.w2", "model.layers.51.block_sparse_moe.experts.207.w2", "model.layers.51.block_sparse_moe.experts.208.w2", "model.layers.51.block_sparse_moe.experts.209.w2", "model.layers.51.block_sparse_moe.experts.210.w2", "model.layers.51.block_sparse_moe.experts.211.w2", "model.layers.51.block_sparse_moe.experts.212.w2", "model.layers.51.block_sparse_moe.experts.213.w2", "model.layers.51.block_sparse_moe.experts.214.w2", "model.layers.51.block_sparse_moe.experts.215.w2", "model.layers.51.block_sparse_moe.experts.216.w2", "model.layers.51.block_sparse_moe.experts.217.w2", "model.layers.51.block_sparse_moe.experts.218.w2", "model.layers.51.block_sparse_moe.experts.219.w2", "model.layers.51.block_sparse_moe.experts.220.w2", "model.layers.51.block_sparse_moe.experts.221.w2", "model.layers.51.block_sparse_moe.experts.222.w2", "model.layers.51.block_sparse_moe.experts.223.w2", "model.layers.51.block_sparse_moe.experts.224.w2", "model.layers.51.block_sparse_moe.experts.225.w2", "model.layers.51.block_sparse_moe.experts.226.w2", "model.layers.51.block_sparse_moe.experts.227.w2", "model.layers.51.block_sparse_moe.experts.228.w2", "model.layers.51.block_sparse_moe.experts.229.w2", "model.layers.51.block_sparse_moe.experts.230.w2", "model.layers.51.block_sparse_moe.experts.231.w2", "model.layers.51.block_sparse_moe.experts.232.w2", "model.layers.51.block_sparse_moe.experts.233.w2", "model.layers.51.block_sparse_moe.experts.234.w2", "model.layers.51.block_sparse_moe.experts.235.w2", "model.layers.51.block_sparse_moe.experts.236.w2", "model.layers.51.block_sparse_moe.experts.237.w2", "model.layers.51.block_sparse_moe.experts.238.w2", "model.layers.51.block_sparse_moe.experts.239.w2", "model.layers.51.block_sparse_moe.experts.240.w2", "model.layers.51.block_sparse_moe.experts.241.w2", "model.layers.51.block_sparse_moe.experts.242.w2", "model.layers.51.block_sparse_moe.experts.243.w2", "model.layers.51.block_sparse_moe.experts.244.w2", "model.layers.51.block_sparse_moe.experts.245.w2", "model.layers.51.block_sparse_moe.experts.246.w2", "model.layers.51.block_sparse_moe.experts.247.w2", "model.layers.51.block_sparse_moe.experts.248.w2", "model.layers.51.block_sparse_moe.experts.249.w2", "model.layers.51.block_sparse_moe.experts.250.w2", "model.layers.51.block_sparse_moe.experts.251.w2", "model.layers.51.block_sparse_moe.experts.252.w2", "model.layers.51.block_sparse_moe.experts.253.w2", "model.layers.51.block_sparse_moe.experts.254.w2", "model.layers.51.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 2.6896223425859622e-05, "dbits": 1207959552 } ] }, { "idx": 260, "layers": [ "model.layers.52.self_attn.q_proj" ], "candidates": [ { "dkld": 4.4187344610688295e-05, "dbits": 18874368 } ] }, { "idx": 261, "layers": [ "model.layers.52.self_attn.k_proj", "model.layers.52.self_attn.v_proj" ], "candidates": [ { "dkld": -0.001047939248383048, "dbits": 6291456 } ] }, { "idx": 262, "layers": [ "model.layers.52.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0009757934138178853, "dbits": 18874368 } ] }, { "idx": 263, "layers": [ "model.layers.52.block_sparse_moe.experts.0.w1", "model.layers.52.block_sparse_moe.experts.1.w1", "model.layers.52.block_sparse_moe.experts.2.w1", "model.layers.52.block_sparse_moe.experts.3.w1", "model.layers.52.block_sparse_moe.experts.4.w1", "model.layers.52.block_sparse_moe.experts.5.w1", "model.layers.52.block_sparse_moe.experts.6.w1", "model.layers.52.block_sparse_moe.experts.7.w1", "model.layers.52.block_sparse_moe.experts.8.w1", "model.layers.52.block_sparse_moe.experts.9.w1", "model.layers.52.block_sparse_moe.experts.10.w1", "model.layers.52.block_sparse_moe.experts.11.w1", "model.layers.52.block_sparse_moe.experts.12.w1", "model.layers.52.block_sparse_moe.experts.13.w1", "model.layers.52.block_sparse_moe.experts.14.w1", "model.layers.52.block_sparse_moe.experts.15.w1", "model.layers.52.block_sparse_moe.experts.16.w1", "model.layers.52.block_sparse_moe.experts.17.w1", "model.layers.52.block_sparse_moe.experts.18.w1", "model.layers.52.block_sparse_moe.experts.19.w1", "model.layers.52.block_sparse_moe.experts.20.w1", "model.layers.52.block_sparse_moe.experts.21.w1", "model.layers.52.block_sparse_moe.experts.22.w1", "model.layers.52.block_sparse_moe.experts.23.w1", "model.layers.52.block_sparse_moe.experts.24.w1", "model.layers.52.block_sparse_moe.experts.25.w1", "model.layers.52.block_sparse_moe.experts.26.w1", "model.layers.52.block_sparse_moe.experts.27.w1", "model.layers.52.block_sparse_moe.experts.28.w1", "model.layers.52.block_sparse_moe.experts.29.w1", "model.layers.52.block_sparse_moe.experts.30.w1", "model.layers.52.block_sparse_moe.experts.31.w1", "model.layers.52.block_sparse_moe.experts.32.w1", "model.layers.52.block_sparse_moe.experts.33.w1", "model.layers.52.block_sparse_moe.experts.34.w1", "model.layers.52.block_sparse_moe.experts.35.w1", "model.layers.52.block_sparse_moe.experts.36.w1", "model.layers.52.block_sparse_moe.experts.37.w1", "model.layers.52.block_sparse_moe.experts.38.w1", "model.layers.52.block_sparse_moe.experts.39.w1", "model.layers.52.block_sparse_moe.experts.40.w1", "model.layers.52.block_sparse_moe.experts.41.w1", "model.layers.52.block_sparse_moe.experts.42.w1", "model.layers.52.block_sparse_moe.experts.43.w1", "model.layers.52.block_sparse_moe.experts.44.w1", "model.layers.52.block_sparse_moe.experts.45.w1", "model.layers.52.block_sparse_moe.experts.46.w1", "model.layers.52.block_sparse_moe.experts.47.w1", "model.layers.52.block_sparse_moe.experts.48.w1", "model.layers.52.block_sparse_moe.experts.49.w1", "model.layers.52.block_sparse_moe.experts.50.w1", "model.layers.52.block_sparse_moe.experts.51.w1", "model.layers.52.block_sparse_moe.experts.52.w1", "model.layers.52.block_sparse_moe.experts.53.w1", "model.layers.52.block_sparse_moe.experts.54.w1", "model.layers.52.block_sparse_moe.experts.55.w1", "model.layers.52.block_sparse_moe.experts.56.w1", "model.layers.52.block_sparse_moe.experts.57.w1", "model.layers.52.block_sparse_moe.experts.58.w1", "model.layers.52.block_sparse_moe.experts.59.w1", "model.layers.52.block_sparse_moe.experts.60.w1", "model.layers.52.block_sparse_moe.experts.61.w1", "model.layers.52.block_sparse_moe.experts.62.w1", "model.layers.52.block_sparse_moe.experts.63.w1", "model.layers.52.block_sparse_moe.experts.64.w1", "model.layers.52.block_sparse_moe.experts.65.w1", "model.layers.52.block_sparse_moe.experts.66.w1", "model.layers.52.block_sparse_moe.experts.67.w1", "model.layers.52.block_sparse_moe.experts.68.w1", "model.layers.52.block_sparse_moe.experts.69.w1", "model.layers.52.block_sparse_moe.experts.70.w1", "model.layers.52.block_sparse_moe.experts.71.w1", "model.layers.52.block_sparse_moe.experts.72.w1", "model.layers.52.block_sparse_moe.experts.73.w1", "model.layers.52.block_sparse_moe.experts.74.w1", "model.layers.52.block_sparse_moe.experts.75.w1", "model.layers.52.block_sparse_moe.experts.76.w1", "model.layers.52.block_sparse_moe.experts.77.w1", "model.layers.52.block_sparse_moe.experts.78.w1", "model.layers.52.block_sparse_moe.experts.79.w1", "model.layers.52.block_sparse_moe.experts.80.w1", "model.layers.52.block_sparse_moe.experts.81.w1", "model.layers.52.block_sparse_moe.experts.82.w1", "model.layers.52.block_sparse_moe.experts.83.w1", "model.layers.52.block_sparse_moe.experts.84.w1", "model.layers.52.block_sparse_moe.experts.85.w1", "model.layers.52.block_sparse_moe.experts.86.w1", "model.layers.52.block_sparse_moe.experts.87.w1", "model.layers.52.block_sparse_moe.experts.88.w1", "model.layers.52.block_sparse_moe.experts.89.w1", "model.layers.52.block_sparse_moe.experts.90.w1", "model.layers.52.block_sparse_moe.experts.91.w1", "model.layers.52.block_sparse_moe.experts.92.w1", "model.layers.52.block_sparse_moe.experts.93.w1", "model.layers.52.block_sparse_moe.experts.94.w1", "model.layers.52.block_sparse_moe.experts.95.w1", "model.layers.52.block_sparse_moe.experts.96.w1", "model.layers.52.block_sparse_moe.experts.97.w1", "model.layers.52.block_sparse_moe.experts.98.w1", "model.layers.52.block_sparse_moe.experts.99.w1", "model.layers.52.block_sparse_moe.experts.100.w1", "model.layers.52.block_sparse_moe.experts.101.w1", "model.layers.52.block_sparse_moe.experts.102.w1", "model.layers.52.block_sparse_moe.experts.103.w1", "model.layers.52.block_sparse_moe.experts.104.w1", "model.layers.52.block_sparse_moe.experts.105.w1", "model.layers.52.block_sparse_moe.experts.106.w1", "model.layers.52.block_sparse_moe.experts.107.w1", "model.layers.52.block_sparse_moe.experts.108.w1", "model.layers.52.block_sparse_moe.experts.109.w1", "model.layers.52.block_sparse_moe.experts.110.w1", "model.layers.52.block_sparse_moe.experts.111.w1", "model.layers.52.block_sparse_moe.experts.112.w1", "model.layers.52.block_sparse_moe.experts.113.w1", "model.layers.52.block_sparse_moe.experts.114.w1", "model.layers.52.block_sparse_moe.experts.115.w1", "model.layers.52.block_sparse_moe.experts.116.w1", "model.layers.52.block_sparse_moe.experts.117.w1", "model.layers.52.block_sparse_moe.experts.118.w1", "model.layers.52.block_sparse_moe.experts.119.w1", "model.layers.52.block_sparse_moe.experts.120.w1", "model.layers.52.block_sparse_moe.experts.121.w1", "model.layers.52.block_sparse_moe.experts.122.w1", "model.layers.52.block_sparse_moe.experts.123.w1", "model.layers.52.block_sparse_moe.experts.124.w1", "model.layers.52.block_sparse_moe.experts.125.w1", "model.layers.52.block_sparse_moe.experts.126.w1", "model.layers.52.block_sparse_moe.experts.127.w1", "model.layers.52.block_sparse_moe.experts.128.w1", "model.layers.52.block_sparse_moe.experts.129.w1", "model.layers.52.block_sparse_moe.experts.130.w1", "model.layers.52.block_sparse_moe.experts.131.w1", "model.layers.52.block_sparse_moe.experts.132.w1", "model.layers.52.block_sparse_moe.experts.133.w1", "model.layers.52.block_sparse_moe.experts.134.w1", "model.layers.52.block_sparse_moe.experts.135.w1", "model.layers.52.block_sparse_moe.experts.136.w1", "model.layers.52.block_sparse_moe.experts.137.w1", "model.layers.52.block_sparse_moe.experts.138.w1", "model.layers.52.block_sparse_moe.experts.139.w1", "model.layers.52.block_sparse_moe.experts.140.w1", "model.layers.52.block_sparse_moe.experts.141.w1", "model.layers.52.block_sparse_moe.experts.142.w1", "model.layers.52.block_sparse_moe.experts.143.w1", "model.layers.52.block_sparse_moe.experts.144.w1", "model.layers.52.block_sparse_moe.experts.145.w1", "model.layers.52.block_sparse_moe.experts.146.w1", "model.layers.52.block_sparse_moe.experts.147.w1", "model.layers.52.block_sparse_moe.experts.148.w1", "model.layers.52.block_sparse_moe.experts.149.w1", "model.layers.52.block_sparse_moe.experts.150.w1", "model.layers.52.block_sparse_moe.experts.151.w1", "model.layers.52.block_sparse_moe.experts.152.w1", "model.layers.52.block_sparse_moe.experts.153.w1", "model.layers.52.block_sparse_moe.experts.154.w1", "model.layers.52.block_sparse_moe.experts.155.w1", "model.layers.52.block_sparse_moe.experts.156.w1", "model.layers.52.block_sparse_moe.experts.157.w1", "model.layers.52.block_sparse_moe.experts.158.w1", "model.layers.52.block_sparse_moe.experts.159.w1", "model.layers.52.block_sparse_moe.experts.160.w1", "model.layers.52.block_sparse_moe.experts.161.w1", "model.layers.52.block_sparse_moe.experts.162.w1", "model.layers.52.block_sparse_moe.experts.163.w1", "model.layers.52.block_sparse_moe.experts.164.w1", "model.layers.52.block_sparse_moe.experts.165.w1", "model.layers.52.block_sparse_moe.experts.166.w1", "model.layers.52.block_sparse_moe.experts.167.w1", "model.layers.52.block_sparse_moe.experts.168.w1", "model.layers.52.block_sparse_moe.experts.169.w1", "model.layers.52.block_sparse_moe.experts.170.w1", "model.layers.52.block_sparse_moe.experts.171.w1", "model.layers.52.block_sparse_moe.experts.172.w1", "model.layers.52.block_sparse_moe.experts.173.w1", "model.layers.52.block_sparse_moe.experts.174.w1", "model.layers.52.block_sparse_moe.experts.175.w1", "model.layers.52.block_sparse_moe.experts.176.w1", "model.layers.52.block_sparse_moe.experts.177.w1", "model.layers.52.block_sparse_moe.experts.178.w1", "model.layers.52.block_sparse_moe.experts.179.w1", "model.layers.52.block_sparse_moe.experts.180.w1", "model.layers.52.block_sparse_moe.experts.181.w1", "model.layers.52.block_sparse_moe.experts.182.w1", "model.layers.52.block_sparse_moe.experts.183.w1", "model.layers.52.block_sparse_moe.experts.184.w1", "model.layers.52.block_sparse_moe.experts.185.w1", "model.layers.52.block_sparse_moe.experts.186.w1", "model.layers.52.block_sparse_moe.experts.187.w1", "model.layers.52.block_sparse_moe.experts.188.w1", "model.layers.52.block_sparse_moe.experts.189.w1", "model.layers.52.block_sparse_moe.experts.190.w1", "model.layers.52.block_sparse_moe.experts.191.w1", "model.layers.52.block_sparse_moe.experts.192.w1", "model.layers.52.block_sparse_moe.experts.193.w1", "model.layers.52.block_sparse_moe.experts.194.w1", "model.layers.52.block_sparse_moe.experts.195.w1", "model.layers.52.block_sparse_moe.experts.196.w1", "model.layers.52.block_sparse_moe.experts.197.w1", "model.layers.52.block_sparse_moe.experts.198.w1", "model.layers.52.block_sparse_moe.experts.199.w1", "model.layers.52.block_sparse_moe.experts.200.w1", "model.layers.52.block_sparse_moe.experts.201.w1", "model.layers.52.block_sparse_moe.experts.202.w1", "model.layers.52.block_sparse_moe.experts.203.w1", "model.layers.52.block_sparse_moe.experts.204.w1", "model.layers.52.block_sparse_moe.experts.205.w1", "model.layers.52.block_sparse_moe.experts.206.w1", "model.layers.52.block_sparse_moe.experts.207.w1", "model.layers.52.block_sparse_moe.experts.208.w1", "model.layers.52.block_sparse_moe.experts.209.w1", "model.layers.52.block_sparse_moe.experts.210.w1", "model.layers.52.block_sparse_moe.experts.211.w1", "model.layers.52.block_sparse_moe.experts.212.w1", "model.layers.52.block_sparse_moe.experts.213.w1", "model.layers.52.block_sparse_moe.experts.214.w1", "model.layers.52.block_sparse_moe.experts.215.w1", "model.layers.52.block_sparse_moe.experts.216.w1", "model.layers.52.block_sparse_moe.experts.217.w1", "model.layers.52.block_sparse_moe.experts.218.w1", "model.layers.52.block_sparse_moe.experts.219.w1", "model.layers.52.block_sparse_moe.experts.220.w1", "model.layers.52.block_sparse_moe.experts.221.w1", "model.layers.52.block_sparse_moe.experts.222.w1", "model.layers.52.block_sparse_moe.experts.223.w1", "model.layers.52.block_sparse_moe.experts.224.w1", "model.layers.52.block_sparse_moe.experts.225.w1", "model.layers.52.block_sparse_moe.experts.226.w1", "model.layers.52.block_sparse_moe.experts.227.w1", "model.layers.52.block_sparse_moe.experts.228.w1", "model.layers.52.block_sparse_moe.experts.229.w1", "model.layers.52.block_sparse_moe.experts.230.w1", "model.layers.52.block_sparse_moe.experts.231.w1", "model.layers.52.block_sparse_moe.experts.232.w1", "model.layers.52.block_sparse_moe.experts.233.w1", "model.layers.52.block_sparse_moe.experts.234.w1", "model.layers.52.block_sparse_moe.experts.235.w1", "model.layers.52.block_sparse_moe.experts.236.w1", "model.layers.52.block_sparse_moe.experts.237.w1", "model.layers.52.block_sparse_moe.experts.238.w1", "model.layers.52.block_sparse_moe.experts.239.w1", "model.layers.52.block_sparse_moe.experts.240.w1", "model.layers.52.block_sparse_moe.experts.241.w1", "model.layers.52.block_sparse_moe.experts.242.w1", "model.layers.52.block_sparse_moe.experts.243.w1", "model.layers.52.block_sparse_moe.experts.244.w1", "model.layers.52.block_sparse_moe.experts.245.w1", "model.layers.52.block_sparse_moe.experts.246.w1", "model.layers.52.block_sparse_moe.experts.247.w1", "model.layers.52.block_sparse_moe.experts.248.w1", "model.layers.52.block_sparse_moe.experts.249.w1", "model.layers.52.block_sparse_moe.experts.250.w1", "model.layers.52.block_sparse_moe.experts.251.w1", "model.layers.52.block_sparse_moe.experts.252.w1", "model.layers.52.block_sparse_moe.experts.253.w1", "model.layers.52.block_sparse_moe.experts.254.w1", "model.layers.52.block_sparse_moe.experts.255.w1", "model.layers.52.block_sparse_moe.experts.0.w3", "model.layers.52.block_sparse_moe.experts.1.w3", "model.layers.52.block_sparse_moe.experts.2.w3", "model.layers.52.block_sparse_moe.experts.3.w3", "model.layers.52.block_sparse_moe.experts.4.w3", "model.layers.52.block_sparse_moe.experts.5.w3", "model.layers.52.block_sparse_moe.experts.6.w3", "model.layers.52.block_sparse_moe.experts.7.w3", "model.layers.52.block_sparse_moe.experts.8.w3", "model.layers.52.block_sparse_moe.experts.9.w3", "model.layers.52.block_sparse_moe.experts.10.w3", "model.layers.52.block_sparse_moe.experts.11.w3", "model.layers.52.block_sparse_moe.experts.12.w3", "model.layers.52.block_sparse_moe.experts.13.w3", "model.layers.52.block_sparse_moe.experts.14.w3", "model.layers.52.block_sparse_moe.experts.15.w3", "model.layers.52.block_sparse_moe.experts.16.w3", "model.layers.52.block_sparse_moe.experts.17.w3", "model.layers.52.block_sparse_moe.experts.18.w3", "model.layers.52.block_sparse_moe.experts.19.w3", "model.layers.52.block_sparse_moe.experts.20.w3", "model.layers.52.block_sparse_moe.experts.21.w3", "model.layers.52.block_sparse_moe.experts.22.w3", "model.layers.52.block_sparse_moe.experts.23.w3", "model.layers.52.block_sparse_moe.experts.24.w3", "model.layers.52.block_sparse_moe.experts.25.w3", "model.layers.52.block_sparse_moe.experts.26.w3", "model.layers.52.block_sparse_moe.experts.27.w3", "model.layers.52.block_sparse_moe.experts.28.w3", "model.layers.52.block_sparse_moe.experts.29.w3", "model.layers.52.block_sparse_moe.experts.30.w3", "model.layers.52.block_sparse_moe.experts.31.w3", "model.layers.52.block_sparse_moe.experts.32.w3", "model.layers.52.block_sparse_moe.experts.33.w3", "model.layers.52.block_sparse_moe.experts.34.w3", "model.layers.52.block_sparse_moe.experts.35.w3", "model.layers.52.block_sparse_moe.experts.36.w3", "model.layers.52.block_sparse_moe.experts.37.w3", "model.layers.52.block_sparse_moe.experts.38.w3", "model.layers.52.block_sparse_moe.experts.39.w3", "model.layers.52.block_sparse_moe.experts.40.w3", "model.layers.52.block_sparse_moe.experts.41.w3", "model.layers.52.block_sparse_moe.experts.42.w3", "model.layers.52.block_sparse_moe.experts.43.w3", "model.layers.52.block_sparse_moe.experts.44.w3", "model.layers.52.block_sparse_moe.experts.45.w3", "model.layers.52.block_sparse_moe.experts.46.w3", "model.layers.52.block_sparse_moe.experts.47.w3", "model.layers.52.block_sparse_moe.experts.48.w3", "model.layers.52.block_sparse_moe.experts.49.w3", "model.layers.52.block_sparse_moe.experts.50.w3", "model.layers.52.block_sparse_moe.experts.51.w3", "model.layers.52.block_sparse_moe.experts.52.w3", "model.layers.52.block_sparse_moe.experts.53.w3", "model.layers.52.block_sparse_moe.experts.54.w3", "model.layers.52.block_sparse_moe.experts.55.w3", "model.layers.52.block_sparse_moe.experts.56.w3", "model.layers.52.block_sparse_moe.experts.57.w3", "model.layers.52.block_sparse_moe.experts.58.w3", "model.layers.52.block_sparse_moe.experts.59.w3", "model.layers.52.block_sparse_moe.experts.60.w3", "model.layers.52.block_sparse_moe.experts.61.w3", "model.layers.52.block_sparse_moe.experts.62.w3", "model.layers.52.block_sparse_moe.experts.63.w3", "model.layers.52.block_sparse_moe.experts.64.w3", "model.layers.52.block_sparse_moe.experts.65.w3", "model.layers.52.block_sparse_moe.experts.66.w3", "model.layers.52.block_sparse_moe.experts.67.w3", "model.layers.52.block_sparse_moe.experts.68.w3", "model.layers.52.block_sparse_moe.experts.69.w3", "model.layers.52.block_sparse_moe.experts.70.w3", "model.layers.52.block_sparse_moe.experts.71.w3", "model.layers.52.block_sparse_moe.experts.72.w3", "model.layers.52.block_sparse_moe.experts.73.w3", "model.layers.52.block_sparse_moe.experts.74.w3", "model.layers.52.block_sparse_moe.experts.75.w3", "model.layers.52.block_sparse_moe.experts.76.w3", "model.layers.52.block_sparse_moe.experts.77.w3", "model.layers.52.block_sparse_moe.experts.78.w3", "model.layers.52.block_sparse_moe.experts.79.w3", "model.layers.52.block_sparse_moe.experts.80.w3", "model.layers.52.block_sparse_moe.experts.81.w3", "model.layers.52.block_sparse_moe.experts.82.w3", "model.layers.52.block_sparse_moe.experts.83.w3", "model.layers.52.block_sparse_moe.experts.84.w3", "model.layers.52.block_sparse_moe.experts.85.w3", "model.layers.52.block_sparse_moe.experts.86.w3", "model.layers.52.block_sparse_moe.experts.87.w3", "model.layers.52.block_sparse_moe.experts.88.w3", "model.layers.52.block_sparse_moe.experts.89.w3", "model.layers.52.block_sparse_moe.experts.90.w3", "model.layers.52.block_sparse_moe.experts.91.w3", "model.layers.52.block_sparse_moe.experts.92.w3", "model.layers.52.block_sparse_moe.experts.93.w3", "model.layers.52.block_sparse_moe.experts.94.w3", "model.layers.52.block_sparse_moe.experts.95.w3", "model.layers.52.block_sparse_moe.experts.96.w3", "model.layers.52.block_sparse_moe.experts.97.w3", "model.layers.52.block_sparse_moe.experts.98.w3", "model.layers.52.block_sparse_moe.experts.99.w3", "model.layers.52.block_sparse_moe.experts.100.w3", "model.layers.52.block_sparse_moe.experts.101.w3", "model.layers.52.block_sparse_moe.experts.102.w3", "model.layers.52.block_sparse_moe.experts.103.w3", "model.layers.52.block_sparse_moe.experts.104.w3", "model.layers.52.block_sparse_moe.experts.105.w3", "model.layers.52.block_sparse_moe.experts.106.w3", "model.layers.52.block_sparse_moe.experts.107.w3", "model.layers.52.block_sparse_moe.experts.108.w3", "model.layers.52.block_sparse_moe.experts.109.w3", "model.layers.52.block_sparse_moe.experts.110.w3", "model.layers.52.block_sparse_moe.experts.111.w3", "model.layers.52.block_sparse_moe.experts.112.w3", "model.layers.52.block_sparse_moe.experts.113.w3", "model.layers.52.block_sparse_moe.experts.114.w3", "model.layers.52.block_sparse_moe.experts.115.w3", "model.layers.52.block_sparse_moe.experts.116.w3", "model.layers.52.block_sparse_moe.experts.117.w3", "model.layers.52.block_sparse_moe.experts.118.w3", "model.layers.52.block_sparse_moe.experts.119.w3", "model.layers.52.block_sparse_moe.experts.120.w3", "model.layers.52.block_sparse_moe.experts.121.w3", "model.layers.52.block_sparse_moe.experts.122.w3", "model.layers.52.block_sparse_moe.experts.123.w3", "model.layers.52.block_sparse_moe.experts.124.w3", "model.layers.52.block_sparse_moe.experts.125.w3", "model.layers.52.block_sparse_moe.experts.126.w3", "model.layers.52.block_sparse_moe.experts.127.w3", "model.layers.52.block_sparse_moe.experts.128.w3", "model.layers.52.block_sparse_moe.experts.129.w3", "model.layers.52.block_sparse_moe.experts.130.w3", "model.layers.52.block_sparse_moe.experts.131.w3", "model.layers.52.block_sparse_moe.experts.132.w3", "model.layers.52.block_sparse_moe.experts.133.w3", "model.layers.52.block_sparse_moe.experts.134.w3", "model.layers.52.block_sparse_moe.experts.135.w3", "model.layers.52.block_sparse_moe.experts.136.w3", "model.layers.52.block_sparse_moe.experts.137.w3", "model.layers.52.block_sparse_moe.experts.138.w3", "model.layers.52.block_sparse_moe.experts.139.w3", "model.layers.52.block_sparse_moe.experts.140.w3", "model.layers.52.block_sparse_moe.experts.141.w3", "model.layers.52.block_sparse_moe.experts.142.w3", "model.layers.52.block_sparse_moe.experts.143.w3", "model.layers.52.block_sparse_moe.experts.144.w3", "model.layers.52.block_sparse_moe.experts.145.w3", "model.layers.52.block_sparse_moe.experts.146.w3", "model.layers.52.block_sparse_moe.experts.147.w3", "model.layers.52.block_sparse_moe.experts.148.w3", "model.layers.52.block_sparse_moe.experts.149.w3", "model.layers.52.block_sparse_moe.experts.150.w3", "model.layers.52.block_sparse_moe.experts.151.w3", "model.layers.52.block_sparse_moe.experts.152.w3", "model.layers.52.block_sparse_moe.experts.153.w3", "model.layers.52.block_sparse_moe.experts.154.w3", "model.layers.52.block_sparse_moe.experts.155.w3", "model.layers.52.block_sparse_moe.experts.156.w3", "model.layers.52.block_sparse_moe.experts.157.w3", "model.layers.52.block_sparse_moe.experts.158.w3", "model.layers.52.block_sparse_moe.experts.159.w3", "model.layers.52.block_sparse_moe.experts.160.w3", "model.layers.52.block_sparse_moe.experts.161.w3", "model.layers.52.block_sparse_moe.experts.162.w3", "model.layers.52.block_sparse_moe.experts.163.w3", "model.layers.52.block_sparse_moe.experts.164.w3", "model.layers.52.block_sparse_moe.experts.165.w3", "model.layers.52.block_sparse_moe.experts.166.w3", "model.layers.52.block_sparse_moe.experts.167.w3", "model.layers.52.block_sparse_moe.experts.168.w3", "model.layers.52.block_sparse_moe.experts.169.w3", "model.layers.52.block_sparse_moe.experts.170.w3", "model.layers.52.block_sparse_moe.experts.171.w3", "model.layers.52.block_sparse_moe.experts.172.w3", "model.layers.52.block_sparse_moe.experts.173.w3", "model.layers.52.block_sparse_moe.experts.174.w3", "model.layers.52.block_sparse_moe.experts.175.w3", "model.layers.52.block_sparse_moe.experts.176.w3", "model.layers.52.block_sparse_moe.experts.177.w3", "model.layers.52.block_sparse_moe.experts.178.w3", "model.layers.52.block_sparse_moe.experts.179.w3", "model.layers.52.block_sparse_moe.experts.180.w3", "model.layers.52.block_sparse_moe.experts.181.w3", "model.layers.52.block_sparse_moe.experts.182.w3", "model.layers.52.block_sparse_moe.experts.183.w3", "model.layers.52.block_sparse_moe.experts.184.w3", "model.layers.52.block_sparse_moe.experts.185.w3", "model.layers.52.block_sparse_moe.experts.186.w3", "model.layers.52.block_sparse_moe.experts.187.w3", "model.layers.52.block_sparse_moe.experts.188.w3", "model.layers.52.block_sparse_moe.experts.189.w3", "model.layers.52.block_sparse_moe.experts.190.w3", "model.layers.52.block_sparse_moe.experts.191.w3", "model.layers.52.block_sparse_moe.experts.192.w3", "model.layers.52.block_sparse_moe.experts.193.w3", "model.layers.52.block_sparse_moe.experts.194.w3", "model.layers.52.block_sparse_moe.experts.195.w3", "model.layers.52.block_sparse_moe.experts.196.w3", "model.layers.52.block_sparse_moe.experts.197.w3", "model.layers.52.block_sparse_moe.experts.198.w3", "model.layers.52.block_sparse_moe.experts.199.w3", "model.layers.52.block_sparse_moe.experts.200.w3", "model.layers.52.block_sparse_moe.experts.201.w3", "model.layers.52.block_sparse_moe.experts.202.w3", "model.layers.52.block_sparse_moe.experts.203.w3", "model.layers.52.block_sparse_moe.experts.204.w3", "model.layers.52.block_sparse_moe.experts.205.w3", "model.layers.52.block_sparse_moe.experts.206.w3", "model.layers.52.block_sparse_moe.experts.207.w3", "model.layers.52.block_sparse_moe.experts.208.w3", "model.layers.52.block_sparse_moe.experts.209.w3", "model.layers.52.block_sparse_moe.experts.210.w3", "model.layers.52.block_sparse_moe.experts.211.w3", "model.layers.52.block_sparse_moe.experts.212.w3", "model.layers.52.block_sparse_moe.experts.213.w3", "model.layers.52.block_sparse_moe.experts.214.w3", "model.layers.52.block_sparse_moe.experts.215.w3", "model.layers.52.block_sparse_moe.experts.216.w3", "model.layers.52.block_sparse_moe.experts.217.w3", "model.layers.52.block_sparse_moe.experts.218.w3", "model.layers.52.block_sparse_moe.experts.219.w3", "model.layers.52.block_sparse_moe.experts.220.w3", "model.layers.52.block_sparse_moe.experts.221.w3", "model.layers.52.block_sparse_moe.experts.222.w3", "model.layers.52.block_sparse_moe.experts.223.w3", "model.layers.52.block_sparse_moe.experts.224.w3", "model.layers.52.block_sparse_moe.experts.225.w3", "model.layers.52.block_sparse_moe.experts.226.w3", "model.layers.52.block_sparse_moe.experts.227.w3", "model.layers.52.block_sparse_moe.experts.228.w3", "model.layers.52.block_sparse_moe.experts.229.w3", "model.layers.52.block_sparse_moe.experts.230.w3", "model.layers.52.block_sparse_moe.experts.231.w3", "model.layers.52.block_sparse_moe.experts.232.w3", "model.layers.52.block_sparse_moe.experts.233.w3", "model.layers.52.block_sparse_moe.experts.234.w3", "model.layers.52.block_sparse_moe.experts.235.w3", "model.layers.52.block_sparse_moe.experts.236.w3", "model.layers.52.block_sparse_moe.experts.237.w3", "model.layers.52.block_sparse_moe.experts.238.w3", "model.layers.52.block_sparse_moe.experts.239.w3", "model.layers.52.block_sparse_moe.experts.240.w3", "model.layers.52.block_sparse_moe.experts.241.w3", "model.layers.52.block_sparse_moe.experts.242.w3", "model.layers.52.block_sparse_moe.experts.243.w3", "model.layers.52.block_sparse_moe.experts.244.w3", "model.layers.52.block_sparse_moe.experts.245.w3", "model.layers.52.block_sparse_moe.experts.246.w3", "model.layers.52.block_sparse_moe.experts.247.w3", "model.layers.52.block_sparse_moe.experts.248.w3", "model.layers.52.block_sparse_moe.experts.249.w3", "model.layers.52.block_sparse_moe.experts.250.w3", "model.layers.52.block_sparse_moe.experts.251.w3", "model.layers.52.block_sparse_moe.experts.252.w3", "model.layers.52.block_sparse_moe.experts.253.w3", "model.layers.52.block_sparse_moe.experts.254.w3", "model.layers.52.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -6.765220314264297e-05, "dbits": 2415919104 } ] }, { "idx": 264, "layers": [ "model.layers.52.block_sparse_moe.experts.0.w2", "model.layers.52.block_sparse_moe.experts.1.w2", "model.layers.52.block_sparse_moe.experts.2.w2", "model.layers.52.block_sparse_moe.experts.3.w2", "model.layers.52.block_sparse_moe.experts.4.w2", "model.layers.52.block_sparse_moe.experts.5.w2", "model.layers.52.block_sparse_moe.experts.6.w2", "model.layers.52.block_sparse_moe.experts.7.w2", "model.layers.52.block_sparse_moe.experts.8.w2", "model.layers.52.block_sparse_moe.experts.9.w2", "model.layers.52.block_sparse_moe.experts.10.w2", "model.layers.52.block_sparse_moe.experts.11.w2", "model.layers.52.block_sparse_moe.experts.12.w2", "model.layers.52.block_sparse_moe.experts.13.w2", "model.layers.52.block_sparse_moe.experts.14.w2", "model.layers.52.block_sparse_moe.experts.15.w2", "model.layers.52.block_sparse_moe.experts.16.w2", "model.layers.52.block_sparse_moe.experts.17.w2", "model.layers.52.block_sparse_moe.experts.18.w2", "model.layers.52.block_sparse_moe.experts.19.w2", "model.layers.52.block_sparse_moe.experts.20.w2", "model.layers.52.block_sparse_moe.experts.21.w2", "model.layers.52.block_sparse_moe.experts.22.w2", "model.layers.52.block_sparse_moe.experts.23.w2", "model.layers.52.block_sparse_moe.experts.24.w2", "model.layers.52.block_sparse_moe.experts.25.w2", "model.layers.52.block_sparse_moe.experts.26.w2", "model.layers.52.block_sparse_moe.experts.27.w2", "model.layers.52.block_sparse_moe.experts.28.w2", "model.layers.52.block_sparse_moe.experts.29.w2", "model.layers.52.block_sparse_moe.experts.30.w2", "model.layers.52.block_sparse_moe.experts.31.w2", "model.layers.52.block_sparse_moe.experts.32.w2", "model.layers.52.block_sparse_moe.experts.33.w2", "model.layers.52.block_sparse_moe.experts.34.w2", "model.layers.52.block_sparse_moe.experts.35.w2", "model.layers.52.block_sparse_moe.experts.36.w2", "model.layers.52.block_sparse_moe.experts.37.w2", "model.layers.52.block_sparse_moe.experts.38.w2", "model.layers.52.block_sparse_moe.experts.39.w2", "model.layers.52.block_sparse_moe.experts.40.w2", "model.layers.52.block_sparse_moe.experts.41.w2", "model.layers.52.block_sparse_moe.experts.42.w2", "model.layers.52.block_sparse_moe.experts.43.w2", "model.layers.52.block_sparse_moe.experts.44.w2", "model.layers.52.block_sparse_moe.experts.45.w2", "model.layers.52.block_sparse_moe.experts.46.w2", "model.layers.52.block_sparse_moe.experts.47.w2", "model.layers.52.block_sparse_moe.experts.48.w2", "model.layers.52.block_sparse_moe.experts.49.w2", "model.layers.52.block_sparse_moe.experts.50.w2", "model.layers.52.block_sparse_moe.experts.51.w2", "model.layers.52.block_sparse_moe.experts.52.w2", "model.layers.52.block_sparse_moe.experts.53.w2", "model.layers.52.block_sparse_moe.experts.54.w2", "model.layers.52.block_sparse_moe.experts.55.w2", "model.layers.52.block_sparse_moe.experts.56.w2", "model.layers.52.block_sparse_moe.experts.57.w2", "model.layers.52.block_sparse_moe.experts.58.w2", "model.layers.52.block_sparse_moe.experts.59.w2", "model.layers.52.block_sparse_moe.experts.60.w2", "model.layers.52.block_sparse_moe.experts.61.w2", "model.layers.52.block_sparse_moe.experts.62.w2", "model.layers.52.block_sparse_moe.experts.63.w2", "model.layers.52.block_sparse_moe.experts.64.w2", "model.layers.52.block_sparse_moe.experts.65.w2", "model.layers.52.block_sparse_moe.experts.66.w2", "model.layers.52.block_sparse_moe.experts.67.w2", "model.layers.52.block_sparse_moe.experts.68.w2", "model.layers.52.block_sparse_moe.experts.69.w2", "model.layers.52.block_sparse_moe.experts.70.w2", "model.layers.52.block_sparse_moe.experts.71.w2", "model.layers.52.block_sparse_moe.experts.72.w2", "model.layers.52.block_sparse_moe.experts.73.w2", "model.layers.52.block_sparse_moe.experts.74.w2", "model.layers.52.block_sparse_moe.experts.75.w2", "model.layers.52.block_sparse_moe.experts.76.w2", "model.layers.52.block_sparse_moe.experts.77.w2", "model.layers.52.block_sparse_moe.experts.78.w2", "model.layers.52.block_sparse_moe.experts.79.w2", "model.layers.52.block_sparse_moe.experts.80.w2", "model.layers.52.block_sparse_moe.experts.81.w2", "model.layers.52.block_sparse_moe.experts.82.w2", "model.layers.52.block_sparse_moe.experts.83.w2", "model.layers.52.block_sparse_moe.experts.84.w2", "model.layers.52.block_sparse_moe.experts.85.w2", "model.layers.52.block_sparse_moe.experts.86.w2", "model.layers.52.block_sparse_moe.experts.87.w2", "model.layers.52.block_sparse_moe.experts.88.w2", "model.layers.52.block_sparse_moe.experts.89.w2", "model.layers.52.block_sparse_moe.experts.90.w2", "model.layers.52.block_sparse_moe.experts.91.w2", "model.layers.52.block_sparse_moe.experts.92.w2", "model.layers.52.block_sparse_moe.experts.93.w2", "model.layers.52.block_sparse_moe.experts.94.w2", "model.layers.52.block_sparse_moe.experts.95.w2", "model.layers.52.block_sparse_moe.experts.96.w2", "model.layers.52.block_sparse_moe.experts.97.w2", "model.layers.52.block_sparse_moe.experts.98.w2", "model.layers.52.block_sparse_moe.experts.99.w2", "model.layers.52.block_sparse_moe.experts.100.w2", "model.layers.52.block_sparse_moe.experts.101.w2", "model.layers.52.block_sparse_moe.experts.102.w2", "model.layers.52.block_sparse_moe.experts.103.w2", "model.layers.52.block_sparse_moe.experts.104.w2", "model.layers.52.block_sparse_moe.experts.105.w2", "model.layers.52.block_sparse_moe.experts.106.w2", "model.layers.52.block_sparse_moe.experts.107.w2", "model.layers.52.block_sparse_moe.experts.108.w2", "model.layers.52.block_sparse_moe.experts.109.w2", "model.layers.52.block_sparse_moe.experts.110.w2", "model.layers.52.block_sparse_moe.experts.111.w2", "model.layers.52.block_sparse_moe.experts.112.w2", "model.layers.52.block_sparse_moe.experts.113.w2", "model.layers.52.block_sparse_moe.experts.114.w2", "model.layers.52.block_sparse_moe.experts.115.w2", "model.layers.52.block_sparse_moe.experts.116.w2", "model.layers.52.block_sparse_moe.experts.117.w2", "model.layers.52.block_sparse_moe.experts.118.w2", "model.layers.52.block_sparse_moe.experts.119.w2", "model.layers.52.block_sparse_moe.experts.120.w2", "model.layers.52.block_sparse_moe.experts.121.w2", "model.layers.52.block_sparse_moe.experts.122.w2", "model.layers.52.block_sparse_moe.experts.123.w2", "model.layers.52.block_sparse_moe.experts.124.w2", "model.layers.52.block_sparse_moe.experts.125.w2", "model.layers.52.block_sparse_moe.experts.126.w2", "model.layers.52.block_sparse_moe.experts.127.w2", "model.layers.52.block_sparse_moe.experts.128.w2", "model.layers.52.block_sparse_moe.experts.129.w2", "model.layers.52.block_sparse_moe.experts.130.w2", "model.layers.52.block_sparse_moe.experts.131.w2", "model.layers.52.block_sparse_moe.experts.132.w2", "model.layers.52.block_sparse_moe.experts.133.w2", "model.layers.52.block_sparse_moe.experts.134.w2", "model.layers.52.block_sparse_moe.experts.135.w2", "model.layers.52.block_sparse_moe.experts.136.w2", "model.layers.52.block_sparse_moe.experts.137.w2", "model.layers.52.block_sparse_moe.experts.138.w2", "model.layers.52.block_sparse_moe.experts.139.w2", "model.layers.52.block_sparse_moe.experts.140.w2", "model.layers.52.block_sparse_moe.experts.141.w2", "model.layers.52.block_sparse_moe.experts.142.w2", "model.layers.52.block_sparse_moe.experts.143.w2", "model.layers.52.block_sparse_moe.experts.144.w2", "model.layers.52.block_sparse_moe.experts.145.w2", "model.layers.52.block_sparse_moe.experts.146.w2", "model.layers.52.block_sparse_moe.experts.147.w2", "model.layers.52.block_sparse_moe.experts.148.w2", "model.layers.52.block_sparse_moe.experts.149.w2", "model.layers.52.block_sparse_moe.experts.150.w2", "model.layers.52.block_sparse_moe.experts.151.w2", "model.layers.52.block_sparse_moe.experts.152.w2", "model.layers.52.block_sparse_moe.experts.153.w2", "model.layers.52.block_sparse_moe.experts.154.w2", "model.layers.52.block_sparse_moe.experts.155.w2", "model.layers.52.block_sparse_moe.experts.156.w2", "model.layers.52.block_sparse_moe.experts.157.w2", "model.layers.52.block_sparse_moe.experts.158.w2", "model.layers.52.block_sparse_moe.experts.159.w2", "model.layers.52.block_sparse_moe.experts.160.w2", "model.layers.52.block_sparse_moe.experts.161.w2", "model.layers.52.block_sparse_moe.experts.162.w2", "model.layers.52.block_sparse_moe.experts.163.w2", "model.layers.52.block_sparse_moe.experts.164.w2", "model.layers.52.block_sparse_moe.experts.165.w2", "model.layers.52.block_sparse_moe.experts.166.w2", "model.layers.52.block_sparse_moe.experts.167.w2", "model.layers.52.block_sparse_moe.experts.168.w2", "model.layers.52.block_sparse_moe.experts.169.w2", "model.layers.52.block_sparse_moe.experts.170.w2", "model.layers.52.block_sparse_moe.experts.171.w2", "model.layers.52.block_sparse_moe.experts.172.w2", "model.layers.52.block_sparse_moe.experts.173.w2", "model.layers.52.block_sparse_moe.experts.174.w2", "model.layers.52.block_sparse_moe.experts.175.w2", "model.layers.52.block_sparse_moe.experts.176.w2", "model.layers.52.block_sparse_moe.experts.177.w2", "model.layers.52.block_sparse_moe.experts.178.w2", "model.layers.52.block_sparse_moe.experts.179.w2", "model.layers.52.block_sparse_moe.experts.180.w2", "model.layers.52.block_sparse_moe.experts.181.w2", "model.layers.52.block_sparse_moe.experts.182.w2", "model.layers.52.block_sparse_moe.experts.183.w2", "model.layers.52.block_sparse_moe.experts.184.w2", "model.layers.52.block_sparse_moe.experts.185.w2", "model.layers.52.block_sparse_moe.experts.186.w2", "model.layers.52.block_sparse_moe.experts.187.w2", "model.layers.52.block_sparse_moe.experts.188.w2", "model.layers.52.block_sparse_moe.experts.189.w2", "model.layers.52.block_sparse_moe.experts.190.w2", "model.layers.52.block_sparse_moe.experts.191.w2", "model.layers.52.block_sparse_moe.experts.192.w2", "model.layers.52.block_sparse_moe.experts.193.w2", "model.layers.52.block_sparse_moe.experts.194.w2", "model.layers.52.block_sparse_moe.experts.195.w2", "model.layers.52.block_sparse_moe.experts.196.w2", "model.layers.52.block_sparse_moe.experts.197.w2", "model.layers.52.block_sparse_moe.experts.198.w2", "model.layers.52.block_sparse_moe.experts.199.w2", "model.layers.52.block_sparse_moe.experts.200.w2", "model.layers.52.block_sparse_moe.experts.201.w2", "model.layers.52.block_sparse_moe.experts.202.w2", "model.layers.52.block_sparse_moe.experts.203.w2", "model.layers.52.block_sparse_moe.experts.204.w2", "model.layers.52.block_sparse_moe.experts.205.w2", "model.layers.52.block_sparse_moe.experts.206.w2", "model.layers.52.block_sparse_moe.experts.207.w2", "model.layers.52.block_sparse_moe.experts.208.w2", "model.layers.52.block_sparse_moe.experts.209.w2", "model.layers.52.block_sparse_moe.experts.210.w2", "model.layers.52.block_sparse_moe.experts.211.w2", "model.layers.52.block_sparse_moe.experts.212.w2", "model.layers.52.block_sparse_moe.experts.213.w2", "model.layers.52.block_sparse_moe.experts.214.w2", "model.layers.52.block_sparse_moe.experts.215.w2", "model.layers.52.block_sparse_moe.experts.216.w2", "model.layers.52.block_sparse_moe.experts.217.w2", "model.layers.52.block_sparse_moe.experts.218.w2", "model.layers.52.block_sparse_moe.experts.219.w2", "model.layers.52.block_sparse_moe.experts.220.w2", "model.layers.52.block_sparse_moe.experts.221.w2", "model.layers.52.block_sparse_moe.experts.222.w2", "model.layers.52.block_sparse_moe.experts.223.w2", "model.layers.52.block_sparse_moe.experts.224.w2", "model.layers.52.block_sparse_moe.experts.225.w2", "model.layers.52.block_sparse_moe.experts.226.w2", "model.layers.52.block_sparse_moe.experts.227.w2", "model.layers.52.block_sparse_moe.experts.228.w2", "model.layers.52.block_sparse_moe.experts.229.w2", "model.layers.52.block_sparse_moe.experts.230.w2", "model.layers.52.block_sparse_moe.experts.231.w2", "model.layers.52.block_sparse_moe.experts.232.w2", "model.layers.52.block_sparse_moe.experts.233.w2", "model.layers.52.block_sparse_moe.experts.234.w2", "model.layers.52.block_sparse_moe.experts.235.w2", "model.layers.52.block_sparse_moe.experts.236.w2", "model.layers.52.block_sparse_moe.experts.237.w2", "model.layers.52.block_sparse_moe.experts.238.w2", "model.layers.52.block_sparse_moe.experts.239.w2", "model.layers.52.block_sparse_moe.experts.240.w2", "model.layers.52.block_sparse_moe.experts.241.w2", "model.layers.52.block_sparse_moe.experts.242.w2", "model.layers.52.block_sparse_moe.experts.243.w2", "model.layers.52.block_sparse_moe.experts.244.w2", "model.layers.52.block_sparse_moe.experts.245.w2", "model.layers.52.block_sparse_moe.experts.246.w2", "model.layers.52.block_sparse_moe.experts.247.w2", "model.layers.52.block_sparse_moe.experts.248.w2", "model.layers.52.block_sparse_moe.experts.249.w2", "model.layers.52.block_sparse_moe.experts.250.w2", "model.layers.52.block_sparse_moe.experts.251.w2", "model.layers.52.block_sparse_moe.experts.252.w2", "model.layers.52.block_sparse_moe.experts.253.w2", "model.layers.52.block_sparse_moe.experts.254.w2", "model.layers.52.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -1.5316531062209426e-06, "dbits": 1207959552 } ] }, { "idx": 265, "layers": [ "model.layers.53.self_attn.q_proj" ], "candidates": [ { "dkld": -7.073171436787484e-05, "dbits": 18874368 } ] }, { "idx": 266, "layers": [ "model.layers.53.self_attn.k_proj", "model.layers.53.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0007229927927255714, "dbits": 6291456 } ] }, { "idx": 267, "layers": [ "model.layers.53.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0008459491655230522, "dbits": 18874368 } ] }, { "idx": 268, "layers": [ "model.layers.53.block_sparse_moe.experts.0.w1", "model.layers.53.block_sparse_moe.experts.1.w1", "model.layers.53.block_sparse_moe.experts.2.w1", "model.layers.53.block_sparse_moe.experts.3.w1", "model.layers.53.block_sparse_moe.experts.4.w1", "model.layers.53.block_sparse_moe.experts.5.w1", "model.layers.53.block_sparse_moe.experts.6.w1", "model.layers.53.block_sparse_moe.experts.7.w1", "model.layers.53.block_sparse_moe.experts.8.w1", "model.layers.53.block_sparse_moe.experts.9.w1", "model.layers.53.block_sparse_moe.experts.10.w1", "model.layers.53.block_sparse_moe.experts.11.w1", "model.layers.53.block_sparse_moe.experts.12.w1", "model.layers.53.block_sparse_moe.experts.13.w1", "model.layers.53.block_sparse_moe.experts.14.w1", "model.layers.53.block_sparse_moe.experts.15.w1", "model.layers.53.block_sparse_moe.experts.16.w1", "model.layers.53.block_sparse_moe.experts.17.w1", "model.layers.53.block_sparse_moe.experts.18.w1", "model.layers.53.block_sparse_moe.experts.19.w1", "model.layers.53.block_sparse_moe.experts.20.w1", "model.layers.53.block_sparse_moe.experts.21.w1", "model.layers.53.block_sparse_moe.experts.22.w1", "model.layers.53.block_sparse_moe.experts.23.w1", "model.layers.53.block_sparse_moe.experts.24.w1", "model.layers.53.block_sparse_moe.experts.25.w1", "model.layers.53.block_sparse_moe.experts.26.w1", "model.layers.53.block_sparse_moe.experts.27.w1", "model.layers.53.block_sparse_moe.experts.28.w1", "model.layers.53.block_sparse_moe.experts.29.w1", "model.layers.53.block_sparse_moe.experts.30.w1", "model.layers.53.block_sparse_moe.experts.31.w1", "model.layers.53.block_sparse_moe.experts.32.w1", "model.layers.53.block_sparse_moe.experts.33.w1", "model.layers.53.block_sparse_moe.experts.34.w1", "model.layers.53.block_sparse_moe.experts.35.w1", "model.layers.53.block_sparse_moe.experts.36.w1", "model.layers.53.block_sparse_moe.experts.37.w1", "model.layers.53.block_sparse_moe.experts.38.w1", "model.layers.53.block_sparse_moe.experts.39.w1", "model.layers.53.block_sparse_moe.experts.40.w1", "model.layers.53.block_sparse_moe.experts.41.w1", "model.layers.53.block_sparse_moe.experts.42.w1", "model.layers.53.block_sparse_moe.experts.43.w1", "model.layers.53.block_sparse_moe.experts.44.w1", "model.layers.53.block_sparse_moe.experts.45.w1", "model.layers.53.block_sparse_moe.experts.46.w1", "model.layers.53.block_sparse_moe.experts.47.w1", "model.layers.53.block_sparse_moe.experts.48.w1", "model.layers.53.block_sparse_moe.experts.49.w1", "model.layers.53.block_sparse_moe.experts.50.w1", "model.layers.53.block_sparse_moe.experts.51.w1", "model.layers.53.block_sparse_moe.experts.52.w1", "model.layers.53.block_sparse_moe.experts.53.w1", "model.layers.53.block_sparse_moe.experts.54.w1", "model.layers.53.block_sparse_moe.experts.55.w1", "model.layers.53.block_sparse_moe.experts.56.w1", "model.layers.53.block_sparse_moe.experts.57.w1", "model.layers.53.block_sparse_moe.experts.58.w1", "model.layers.53.block_sparse_moe.experts.59.w1", "model.layers.53.block_sparse_moe.experts.60.w1", "model.layers.53.block_sparse_moe.experts.61.w1", "model.layers.53.block_sparse_moe.experts.62.w1", "model.layers.53.block_sparse_moe.experts.63.w1", "model.layers.53.block_sparse_moe.experts.64.w1", "model.layers.53.block_sparse_moe.experts.65.w1", "model.layers.53.block_sparse_moe.experts.66.w1", "model.layers.53.block_sparse_moe.experts.67.w1", "model.layers.53.block_sparse_moe.experts.68.w1", "model.layers.53.block_sparse_moe.experts.69.w1", "model.layers.53.block_sparse_moe.experts.70.w1", "model.layers.53.block_sparse_moe.experts.71.w1", "model.layers.53.block_sparse_moe.experts.72.w1", "model.layers.53.block_sparse_moe.experts.73.w1", "model.layers.53.block_sparse_moe.experts.74.w1", "model.layers.53.block_sparse_moe.experts.75.w1", "model.layers.53.block_sparse_moe.experts.76.w1", "model.layers.53.block_sparse_moe.experts.77.w1", "model.layers.53.block_sparse_moe.experts.78.w1", "model.layers.53.block_sparse_moe.experts.79.w1", "model.layers.53.block_sparse_moe.experts.80.w1", "model.layers.53.block_sparse_moe.experts.81.w1", "model.layers.53.block_sparse_moe.experts.82.w1", "model.layers.53.block_sparse_moe.experts.83.w1", "model.layers.53.block_sparse_moe.experts.84.w1", "model.layers.53.block_sparse_moe.experts.85.w1", "model.layers.53.block_sparse_moe.experts.86.w1", "model.layers.53.block_sparse_moe.experts.87.w1", "model.layers.53.block_sparse_moe.experts.88.w1", "model.layers.53.block_sparse_moe.experts.89.w1", "model.layers.53.block_sparse_moe.experts.90.w1", "model.layers.53.block_sparse_moe.experts.91.w1", "model.layers.53.block_sparse_moe.experts.92.w1", "model.layers.53.block_sparse_moe.experts.93.w1", "model.layers.53.block_sparse_moe.experts.94.w1", "model.layers.53.block_sparse_moe.experts.95.w1", "model.layers.53.block_sparse_moe.experts.96.w1", "model.layers.53.block_sparse_moe.experts.97.w1", "model.layers.53.block_sparse_moe.experts.98.w1", "model.layers.53.block_sparse_moe.experts.99.w1", "model.layers.53.block_sparse_moe.experts.100.w1", "model.layers.53.block_sparse_moe.experts.101.w1", "model.layers.53.block_sparse_moe.experts.102.w1", "model.layers.53.block_sparse_moe.experts.103.w1", "model.layers.53.block_sparse_moe.experts.104.w1", "model.layers.53.block_sparse_moe.experts.105.w1", "model.layers.53.block_sparse_moe.experts.106.w1", "model.layers.53.block_sparse_moe.experts.107.w1", "model.layers.53.block_sparse_moe.experts.108.w1", "model.layers.53.block_sparse_moe.experts.109.w1", "model.layers.53.block_sparse_moe.experts.110.w1", "model.layers.53.block_sparse_moe.experts.111.w1", "model.layers.53.block_sparse_moe.experts.112.w1", "model.layers.53.block_sparse_moe.experts.113.w1", "model.layers.53.block_sparse_moe.experts.114.w1", "model.layers.53.block_sparse_moe.experts.115.w1", "model.layers.53.block_sparse_moe.experts.116.w1", "model.layers.53.block_sparse_moe.experts.117.w1", "model.layers.53.block_sparse_moe.experts.118.w1", "model.layers.53.block_sparse_moe.experts.119.w1", "model.layers.53.block_sparse_moe.experts.120.w1", "model.layers.53.block_sparse_moe.experts.121.w1", "model.layers.53.block_sparse_moe.experts.122.w1", "model.layers.53.block_sparse_moe.experts.123.w1", "model.layers.53.block_sparse_moe.experts.124.w1", "model.layers.53.block_sparse_moe.experts.125.w1", "model.layers.53.block_sparse_moe.experts.126.w1", "model.layers.53.block_sparse_moe.experts.127.w1", "model.layers.53.block_sparse_moe.experts.128.w1", "model.layers.53.block_sparse_moe.experts.129.w1", "model.layers.53.block_sparse_moe.experts.130.w1", "model.layers.53.block_sparse_moe.experts.131.w1", "model.layers.53.block_sparse_moe.experts.132.w1", "model.layers.53.block_sparse_moe.experts.133.w1", "model.layers.53.block_sparse_moe.experts.134.w1", "model.layers.53.block_sparse_moe.experts.135.w1", "model.layers.53.block_sparse_moe.experts.136.w1", "model.layers.53.block_sparse_moe.experts.137.w1", "model.layers.53.block_sparse_moe.experts.138.w1", "model.layers.53.block_sparse_moe.experts.139.w1", "model.layers.53.block_sparse_moe.experts.140.w1", "model.layers.53.block_sparse_moe.experts.141.w1", "model.layers.53.block_sparse_moe.experts.142.w1", "model.layers.53.block_sparse_moe.experts.143.w1", "model.layers.53.block_sparse_moe.experts.144.w1", "model.layers.53.block_sparse_moe.experts.145.w1", "model.layers.53.block_sparse_moe.experts.146.w1", "model.layers.53.block_sparse_moe.experts.147.w1", "model.layers.53.block_sparse_moe.experts.148.w1", "model.layers.53.block_sparse_moe.experts.149.w1", "model.layers.53.block_sparse_moe.experts.150.w1", "model.layers.53.block_sparse_moe.experts.151.w1", "model.layers.53.block_sparse_moe.experts.152.w1", "model.layers.53.block_sparse_moe.experts.153.w1", "model.layers.53.block_sparse_moe.experts.154.w1", "model.layers.53.block_sparse_moe.experts.155.w1", "model.layers.53.block_sparse_moe.experts.156.w1", "model.layers.53.block_sparse_moe.experts.157.w1", "model.layers.53.block_sparse_moe.experts.158.w1", "model.layers.53.block_sparse_moe.experts.159.w1", "model.layers.53.block_sparse_moe.experts.160.w1", "model.layers.53.block_sparse_moe.experts.161.w1", "model.layers.53.block_sparse_moe.experts.162.w1", "model.layers.53.block_sparse_moe.experts.163.w1", "model.layers.53.block_sparse_moe.experts.164.w1", "model.layers.53.block_sparse_moe.experts.165.w1", "model.layers.53.block_sparse_moe.experts.166.w1", "model.layers.53.block_sparse_moe.experts.167.w1", "model.layers.53.block_sparse_moe.experts.168.w1", "model.layers.53.block_sparse_moe.experts.169.w1", "model.layers.53.block_sparse_moe.experts.170.w1", "model.layers.53.block_sparse_moe.experts.171.w1", "model.layers.53.block_sparse_moe.experts.172.w1", "model.layers.53.block_sparse_moe.experts.173.w1", "model.layers.53.block_sparse_moe.experts.174.w1", "model.layers.53.block_sparse_moe.experts.175.w1", "model.layers.53.block_sparse_moe.experts.176.w1", "model.layers.53.block_sparse_moe.experts.177.w1", "model.layers.53.block_sparse_moe.experts.178.w1", "model.layers.53.block_sparse_moe.experts.179.w1", "model.layers.53.block_sparse_moe.experts.180.w1", "model.layers.53.block_sparse_moe.experts.181.w1", "model.layers.53.block_sparse_moe.experts.182.w1", "model.layers.53.block_sparse_moe.experts.183.w1", "model.layers.53.block_sparse_moe.experts.184.w1", "model.layers.53.block_sparse_moe.experts.185.w1", "model.layers.53.block_sparse_moe.experts.186.w1", "model.layers.53.block_sparse_moe.experts.187.w1", "model.layers.53.block_sparse_moe.experts.188.w1", "model.layers.53.block_sparse_moe.experts.189.w1", "model.layers.53.block_sparse_moe.experts.190.w1", "model.layers.53.block_sparse_moe.experts.191.w1", "model.layers.53.block_sparse_moe.experts.192.w1", "model.layers.53.block_sparse_moe.experts.193.w1", "model.layers.53.block_sparse_moe.experts.194.w1", "model.layers.53.block_sparse_moe.experts.195.w1", "model.layers.53.block_sparse_moe.experts.196.w1", "model.layers.53.block_sparse_moe.experts.197.w1", "model.layers.53.block_sparse_moe.experts.198.w1", "model.layers.53.block_sparse_moe.experts.199.w1", "model.layers.53.block_sparse_moe.experts.200.w1", "model.layers.53.block_sparse_moe.experts.201.w1", "model.layers.53.block_sparse_moe.experts.202.w1", "model.layers.53.block_sparse_moe.experts.203.w1", "model.layers.53.block_sparse_moe.experts.204.w1", "model.layers.53.block_sparse_moe.experts.205.w1", "model.layers.53.block_sparse_moe.experts.206.w1", "model.layers.53.block_sparse_moe.experts.207.w1", "model.layers.53.block_sparse_moe.experts.208.w1", "model.layers.53.block_sparse_moe.experts.209.w1", "model.layers.53.block_sparse_moe.experts.210.w1", "model.layers.53.block_sparse_moe.experts.211.w1", "model.layers.53.block_sparse_moe.experts.212.w1", "model.layers.53.block_sparse_moe.experts.213.w1", "model.layers.53.block_sparse_moe.experts.214.w1", "model.layers.53.block_sparse_moe.experts.215.w1", "model.layers.53.block_sparse_moe.experts.216.w1", "model.layers.53.block_sparse_moe.experts.217.w1", "model.layers.53.block_sparse_moe.experts.218.w1", "model.layers.53.block_sparse_moe.experts.219.w1", "model.layers.53.block_sparse_moe.experts.220.w1", "model.layers.53.block_sparse_moe.experts.221.w1", "model.layers.53.block_sparse_moe.experts.222.w1", "model.layers.53.block_sparse_moe.experts.223.w1", "model.layers.53.block_sparse_moe.experts.224.w1", "model.layers.53.block_sparse_moe.experts.225.w1", "model.layers.53.block_sparse_moe.experts.226.w1", "model.layers.53.block_sparse_moe.experts.227.w1", "model.layers.53.block_sparse_moe.experts.228.w1", "model.layers.53.block_sparse_moe.experts.229.w1", "model.layers.53.block_sparse_moe.experts.230.w1", "model.layers.53.block_sparse_moe.experts.231.w1", "model.layers.53.block_sparse_moe.experts.232.w1", "model.layers.53.block_sparse_moe.experts.233.w1", "model.layers.53.block_sparse_moe.experts.234.w1", "model.layers.53.block_sparse_moe.experts.235.w1", "model.layers.53.block_sparse_moe.experts.236.w1", "model.layers.53.block_sparse_moe.experts.237.w1", "model.layers.53.block_sparse_moe.experts.238.w1", "model.layers.53.block_sparse_moe.experts.239.w1", "model.layers.53.block_sparse_moe.experts.240.w1", "model.layers.53.block_sparse_moe.experts.241.w1", "model.layers.53.block_sparse_moe.experts.242.w1", "model.layers.53.block_sparse_moe.experts.243.w1", "model.layers.53.block_sparse_moe.experts.244.w1", "model.layers.53.block_sparse_moe.experts.245.w1", "model.layers.53.block_sparse_moe.experts.246.w1", "model.layers.53.block_sparse_moe.experts.247.w1", "model.layers.53.block_sparse_moe.experts.248.w1", "model.layers.53.block_sparse_moe.experts.249.w1", "model.layers.53.block_sparse_moe.experts.250.w1", "model.layers.53.block_sparse_moe.experts.251.w1", "model.layers.53.block_sparse_moe.experts.252.w1", "model.layers.53.block_sparse_moe.experts.253.w1", "model.layers.53.block_sparse_moe.experts.254.w1", "model.layers.53.block_sparse_moe.experts.255.w1", "model.layers.53.block_sparse_moe.experts.0.w3", "model.layers.53.block_sparse_moe.experts.1.w3", "model.layers.53.block_sparse_moe.experts.2.w3", "model.layers.53.block_sparse_moe.experts.3.w3", "model.layers.53.block_sparse_moe.experts.4.w3", "model.layers.53.block_sparse_moe.experts.5.w3", "model.layers.53.block_sparse_moe.experts.6.w3", "model.layers.53.block_sparse_moe.experts.7.w3", "model.layers.53.block_sparse_moe.experts.8.w3", "model.layers.53.block_sparse_moe.experts.9.w3", "model.layers.53.block_sparse_moe.experts.10.w3", "model.layers.53.block_sparse_moe.experts.11.w3", "model.layers.53.block_sparse_moe.experts.12.w3", "model.layers.53.block_sparse_moe.experts.13.w3", "model.layers.53.block_sparse_moe.experts.14.w3", "model.layers.53.block_sparse_moe.experts.15.w3", "model.layers.53.block_sparse_moe.experts.16.w3", "model.layers.53.block_sparse_moe.experts.17.w3", "model.layers.53.block_sparse_moe.experts.18.w3", "model.layers.53.block_sparse_moe.experts.19.w3", "model.layers.53.block_sparse_moe.experts.20.w3", "model.layers.53.block_sparse_moe.experts.21.w3", "model.layers.53.block_sparse_moe.experts.22.w3", "model.layers.53.block_sparse_moe.experts.23.w3", "model.layers.53.block_sparse_moe.experts.24.w3", "model.layers.53.block_sparse_moe.experts.25.w3", "model.layers.53.block_sparse_moe.experts.26.w3", "model.layers.53.block_sparse_moe.experts.27.w3", "model.layers.53.block_sparse_moe.experts.28.w3", "model.layers.53.block_sparse_moe.experts.29.w3", "model.layers.53.block_sparse_moe.experts.30.w3", "model.layers.53.block_sparse_moe.experts.31.w3", "model.layers.53.block_sparse_moe.experts.32.w3", "model.layers.53.block_sparse_moe.experts.33.w3", "model.layers.53.block_sparse_moe.experts.34.w3", "model.layers.53.block_sparse_moe.experts.35.w3", "model.layers.53.block_sparse_moe.experts.36.w3", "model.layers.53.block_sparse_moe.experts.37.w3", "model.layers.53.block_sparse_moe.experts.38.w3", "model.layers.53.block_sparse_moe.experts.39.w3", "model.layers.53.block_sparse_moe.experts.40.w3", "model.layers.53.block_sparse_moe.experts.41.w3", "model.layers.53.block_sparse_moe.experts.42.w3", "model.layers.53.block_sparse_moe.experts.43.w3", "model.layers.53.block_sparse_moe.experts.44.w3", "model.layers.53.block_sparse_moe.experts.45.w3", "model.layers.53.block_sparse_moe.experts.46.w3", "model.layers.53.block_sparse_moe.experts.47.w3", "model.layers.53.block_sparse_moe.experts.48.w3", "model.layers.53.block_sparse_moe.experts.49.w3", "model.layers.53.block_sparse_moe.experts.50.w3", "model.layers.53.block_sparse_moe.experts.51.w3", "model.layers.53.block_sparse_moe.experts.52.w3", "model.layers.53.block_sparse_moe.experts.53.w3", "model.layers.53.block_sparse_moe.experts.54.w3", "model.layers.53.block_sparse_moe.experts.55.w3", "model.layers.53.block_sparse_moe.experts.56.w3", "model.layers.53.block_sparse_moe.experts.57.w3", "model.layers.53.block_sparse_moe.experts.58.w3", "model.layers.53.block_sparse_moe.experts.59.w3", "model.layers.53.block_sparse_moe.experts.60.w3", "model.layers.53.block_sparse_moe.experts.61.w3", "model.layers.53.block_sparse_moe.experts.62.w3", "model.layers.53.block_sparse_moe.experts.63.w3", "model.layers.53.block_sparse_moe.experts.64.w3", "model.layers.53.block_sparse_moe.experts.65.w3", "model.layers.53.block_sparse_moe.experts.66.w3", "model.layers.53.block_sparse_moe.experts.67.w3", "model.layers.53.block_sparse_moe.experts.68.w3", "model.layers.53.block_sparse_moe.experts.69.w3", "model.layers.53.block_sparse_moe.experts.70.w3", "model.layers.53.block_sparse_moe.experts.71.w3", "model.layers.53.block_sparse_moe.experts.72.w3", "model.layers.53.block_sparse_moe.experts.73.w3", "model.layers.53.block_sparse_moe.experts.74.w3", "model.layers.53.block_sparse_moe.experts.75.w3", "model.layers.53.block_sparse_moe.experts.76.w3", "model.layers.53.block_sparse_moe.experts.77.w3", "model.layers.53.block_sparse_moe.experts.78.w3", "model.layers.53.block_sparse_moe.experts.79.w3", "model.layers.53.block_sparse_moe.experts.80.w3", "model.layers.53.block_sparse_moe.experts.81.w3", "model.layers.53.block_sparse_moe.experts.82.w3", "model.layers.53.block_sparse_moe.experts.83.w3", "model.layers.53.block_sparse_moe.experts.84.w3", "model.layers.53.block_sparse_moe.experts.85.w3", "model.layers.53.block_sparse_moe.experts.86.w3", "model.layers.53.block_sparse_moe.experts.87.w3", "model.layers.53.block_sparse_moe.experts.88.w3", "model.layers.53.block_sparse_moe.experts.89.w3", "model.layers.53.block_sparse_moe.experts.90.w3", "model.layers.53.block_sparse_moe.experts.91.w3", "model.layers.53.block_sparse_moe.experts.92.w3", "model.layers.53.block_sparse_moe.experts.93.w3", "model.layers.53.block_sparse_moe.experts.94.w3", "model.layers.53.block_sparse_moe.experts.95.w3", "model.layers.53.block_sparse_moe.experts.96.w3", "model.layers.53.block_sparse_moe.experts.97.w3", "model.layers.53.block_sparse_moe.experts.98.w3", "model.layers.53.block_sparse_moe.experts.99.w3", "model.layers.53.block_sparse_moe.experts.100.w3", "model.layers.53.block_sparse_moe.experts.101.w3", "model.layers.53.block_sparse_moe.experts.102.w3", "model.layers.53.block_sparse_moe.experts.103.w3", "model.layers.53.block_sparse_moe.experts.104.w3", "model.layers.53.block_sparse_moe.experts.105.w3", "model.layers.53.block_sparse_moe.experts.106.w3", "model.layers.53.block_sparse_moe.experts.107.w3", "model.layers.53.block_sparse_moe.experts.108.w3", "model.layers.53.block_sparse_moe.experts.109.w3", "model.layers.53.block_sparse_moe.experts.110.w3", "model.layers.53.block_sparse_moe.experts.111.w3", "model.layers.53.block_sparse_moe.experts.112.w3", "model.layers.53.block_sparse_moe.experts.113.w3", "model.layers.53.block_sparse_moe.experts.114.w3", "model.layers.53.block_sparse_moe.experts.115.w3", "model.layers.53.block_sparse_moe.experts.116.w3", "model.layers.53.block_sparse_moe.experts.117.w3", "model.layers.53.block_sparse_moe.experts.118.w3", "model.layers.53.block_sparse_moe.experts.119.w3", "model.layers.53.block_sparse_moe.experts.120.w3", "model.layers.53.block_sparse_moe.experts.121.w3", "model.layers.53.block_sparse_moe.experts.122.w3", "model.layers.53.block_sparse_moe.experts.123.w3", "model.layers.53.block_sparse_moe.experts.124.w3", "model.layers.53.block_sparse_moe.experts.125.w3", "model.layers.53.block_sparse_moe.experts.126.w3", "model.layers.53.block_sparse_moe.experts.127.w3", "model.layers.53.block_sparse_moe.experts.128.w3", "model.layers.53.block_sparse_moe.experts.129.w3", "model.layers.53.block_sparse_moe.experts.130.w3", "model.layers.53.block_sparse_moe.experts.131.w3", "model.layers.53.block_sparse_moe.experts.132.w3", "model.layers.53.block_sparse_moe.experts.133.w3", "model.layers.53.block_sparse_moe.experts.134.w3", "model.layers.53.block_sparse_moe.experts.135.w3", "model.layers.53.block_sparse_moe.experts.136.w3", "model.layers.53.block_sparse_moe.experts.137.w3", "model.layers.53.block_sparse_moe.experts.138.w3", "model.layers.53.block_sparse_moe.experts.139.w3", "model.layers.53.block_sparse_moe.experts.140.w3", "model.layers.53.block_sparse_moe.experts.141.w3", "model.layers.53.block_sparse_moe.experts.142.w3", "model.layers.53.block_sparse_moe.experts.143.w3", "model.layers.53.block_sparse_moe.experts.144.w3", "model.layers.53.block_sparse_moe.experts.145.w3", "model.layers.53.block_sparse_moe.experts.146.w3", "model.layers.53.block_sparse_moe.experts.147.w3", "model.layers.53.block_sparse_moe.experts.148.w3", "model.layers.53.block_sparse_moe.experts.149.w3", "model.layers.53.block_sparse_moe.experts.150.w3", "model.layers.53.block_sparse_moe.experts.151.w3", "model.layers.53.block_sparse_moe.experts.152.w3", "model.layers.53.block_sparse_moe.experts.153.w3", "model.layers.53.block_sparse_moe.experts.154.w3", "model.layers.53.block_sparse_moe.experts.155.w3", "model.layers.53.block_sparse_moe.experts.156.w3", "model.layers.53.block_sparse_moe.experts.157.w3", "model.layers.53.block_sparse_moe.experts.158.w3", "model.layers.53.block_sparse_moe.experts.159.w3", "model.layers.53.block_sparse_moe.experts.160.w3", "model.layers.53.block_sparse_moe.experts.161.w3", "model.layers.53.block_sparse_moe.experts.162.w3", "model.layers.53.block_sparse_moe.experts.163.w3", "model.layers.53.block_sparse_moe.experts.164.w3", "model.layers.53.block_sparse_moe.experts.165.w3", "model.layers.53.block_sparse_moe.experts.166.w3", "model.layers.53.block_sparse_moe.experts.167.w3", "model.layers.53.block_sparse_moe.experts.168.w3", "model.layers.53.block_sparse_moe.experts.169.w3", "model.layers.53.block_sparse_moe.experts.170.w3", "model.layers.53.block_sparse_moe.experts.171.w3", "model.layers.53.block_sparse_moe.experts.172.w3", "model.layers.53.block_sparse_moe.experts.173.w3", "model.layers.53.block_sparse_moe.experts.174.w3", "model.layers.53.block_sparse_moe.experts.175.w3", "model.layers.53.block_sparse_moe.experts.176.w3", "model.layers.53.block_sparse_moe.experts.177.w3", "model.layers.53.block_sparse_moe.experts.178.w3", "model.layers.53.block_sparse_moe.experts.179.w3", "model.layers.53.block_sparse_moe.experts.180.w3", "model.layers.53.block_sparse_moe.experts.181.w3", "model.layers.53.block_sparse_moe.experts.182.w3", "model.layers.53.block_sparse_moe.experts.183.w3", "model.layers.53.block_sparse_moe.experts.184.w3", "model.layers.53.block_sparse_moe.experts.185.w3", "model.layers.53.block_sparse_moe.experts.186.w3", "model.layers.53.block_sparse_moe.experts.187.w3", "model.layers.53.block_sparse_moe.experts.188.w3", "model.layers.53.block_sparse_moe.experts.189.w3", "model.layers.53.block_sparse_moe.experts.190.w3", "model.layers.53.block_sparse_moe.experts.191.w3", "model.layers.53.block_sparse_moe.experts.192.w3", "model.layers.53.block_sparse_moe.experts.193.w3", "model.layers.53.block_sparse_moe.experts.194.w3", "model.layers.53.block_sparse_moe.experts.195.w3", "model.layers.53.block_sparse_moe.experts.196.w3", "model.layers.53.block_sparse_moe.experts.197.w3", "model.layers.53.block_sparse_moe.experts.198.w3", "model.layers.53.block_sparse_moe.experts.199.w3", "model.layers.53.block_sparse_moe.experts.200.w3", "model.layers.53.block_sparse_moe.experts.201.w3", "model.layers.53.block_sparse_moe.experts.202.w3", "model.layers.53.block_sparse_moe.experts.203.w3", "model.layers.53.block_sparse_moe.experts.204.w3", "model.layers.53.block_sparse_moe.experts.205.w3", "model.layers.53.block_sparse_moe.experts.206.w3", "model.layers.53.block_sparse_moe.experts.207.w3", "model.layers.53.block_sparse_moe.experts.208.w3", "model.layers.53.block_sparse_moe.experts.209.w3", "model.layers.53.block_sparse_moe.experts.210.w3", "model.layers.53.block_sparse_moe.experts.211.w3", "model.layers.53.block_sparse_moe.experts.212.w3", "model.layers.53.block_sparse_moe.experts.213.w3", "model.layers.53.block_sparse_moe.experts.214.w3", "model.layers.53.block_sparse_moe.experts.215.w3", "model.layers.53.block_sparse_moe.experts.216.w3", "model.layers.53.block_sparse_moe.experts.217.w3", "model.layers.53.block_sparse_moe.experts.218.w3", "model.layers.53.block_sparse_moe.experts.219.w3", "model.layers.53.block_sparse_moe.experts.220.w3", "model.layers.53.block_sparse_moe.experts.221.w3", "model.layers.53.block_sparse_moe.experts.222.w3", "model.layers.53.block_sparse_moe.experts.223.w3", "model.layers.53.block_sparse_moe.experts.224.w3", "model.layers.53.block_sparse_moe.experts.225.w3", "model.layers.53.block_sparse_moe.experts.226.w3", "model.layers.53.block_sparse_moe.experts.227.w3", "model.layers.53.block_sparse_moe.experts.228.w3", "model.layers.53.block_sparse_moe.experts.229.w3", "model.layers.53.block_sparse_moe.experts.230.w3", "model.layers.53.block_sparse_moe.experts.231.w3", "model.layers.53.block_sparse_moe.experts.232.w3", "model.layers.53.block_sparse_moe.experts.233.w3", "model.layers.53.block_sparse_moe.experts.234.w3", "model.layers.53.block_sparse_moe.experts.235.w3", "model.layers.53.block_sparse_moe.experts.236.w3", "model.layers.53.block_sparse_moe.experts.237.w3", "model.layers.53.block_sparse_moe.experts.238.w3", "model.layers.53.block_sparse_moe.experts.239.w3", "model.layers.53.block_sparse_moe.experts.240.w3", "model.layers.53.block_sparse_moe.experts.241.w3", "model.layers.53.block_sparse_moe.experts.242.w3", "model.layers.53.block_sparse_moe.experts.243.w3", "model.layers.53.block_sparse_moe.experts.244.w3", "model.layers.53.block_sparse_moe.experts.245.w3", "model.layers.53.block_sparse_moe.experts.246.w3", "model.layers.53.block_sparse_moe.experts.247.w3", "model.layers.53.block_sparse_moe.experts.248.w3", "model.layers.53.block_sparse_moe.experts.249.w3", "model.layers.53.block_sparse_moe.experts.250.w3", "model.layers.53.block_sparse_moe.experts.251.w3", "model.layers.53.block_sparse_moe.experts.252.w3", "model.layers.53.block_sparse_moe.experts.253.w3", "model.layers.53.block_sparse_moe.experts.254.w3", "model.layers.53.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -2.0728446543216705e-05, "dbits": 2415919104 } ] }, { "idx": 269, "layers": [ "model.layers.53.block_sparse_moe.experts.0.w2", "model.layers.53.block_sparse_moe.experts.1.w2", "model.layers.53.block_sparse_moe.experts.2.w2", "model.layers.53.block_sparse_moe.experts.3.w2", "model.layers.53.block_sparse_moe.experts.4.w2", "model.layers.53.block_sparse_moe.experts.5.w2", "model.layers.53.block_sparse_moe.experts.6.w2", "model.layers.53.block_sparse_moe.experts.7.w2", "model.layers.53.block_sparse_moe.experts.8.w2", "model.layers.53.block_sparse_moe.experts.9.w2", "model.layers.53.block_sparse_moe.experts.10.w2", "model.layers.53.block_sparse_moe.experts.11.w2", "model.layers.53.block_sparse_moe.experts.12.w2", "model.layers.53.block_sparse_moe.experts.13.w2", "model.layers.53.block_sparse_moe.experts.14.w2", "model.layers.53.block_sparse_moe.experts.15.w2", "model.layers.53.block_sparse_moe.experts.16.w2", "model.layers.53.block_sparse_moe.experts.17.w2", "model.layers.53.block_sparse_moe.experts.18.w2", "model.layers.53.block_sparse_moe.experts.19.w2", "model.layers.53.block_sparse_moe.experts.20.w2", "model.layers.53.block_sparse_moe.experts.21.w2", "model.layers.53.block_sparse_moe.experts.22.w2", "model.layers.53.block_sparse_moe.experts.23.w2", "model.layers.53.block_sparse_moe.experts.24.w2", "model.layers.53.block_sparse_moe.experts.25.w2", "model.layers.53.block_sparse_moe.experts.26.w2", "model.layers.53.block_sparse_moe.experts.27.w2", "model.layers.53.block_sparse_moe.experts.28.w2", "model.layers.53.block_sparse_moe.experts.29.w2", "model.layers.53.block_sparse_moe.experts.30.w2", "model.layers.53.block_sparse_moe.experts.31.w2", "model.layers.53.block_sparse_moe.experts.32.w2", "model.layers.53.block_sparse_moe.experts.33.w2", "model.layers.53.block_sparse_moe.experts.34.w2", "model.layers.53.block_sparse_moe.experts.35.w2", "model.layers.53.block_sparse_moe.experts.36.w2", "model.layers.53.block_sparse_moe.experts.37.w2", "model.layers.53.block_sparse_moe.experts.38.w2", "model.layers.53.block_sparse_moe.experts.39.w2", "model.layers.53.block_sparse_moe.experts.40.w2", "model.layers.53.block_sparse_moe.experts.41.w2", "model.layers.53.block_sparse_moe.experts.42.w2", "model.layers.53.block_sparse_moe.experts.43.w2", "model.layers.53.block_sparse_moe.experts.44.w2", "model.layers.53.block_sparse_moe.experts.45.w2", "model.layers.53.block_sparse_moe.experts.46.w2", "model.layers.53.block_sparse_moe.experts.47.w2", "model.layers.53.block_sparse_moe.experts.48.w2", "model.layers.53.block_sparse_moe.experts.49.w2", "model.layers.53.block_sparse_moe.experts.50.w2", "model.layers.53.block_sparse_moe.experts.51.w2", "model.layers.53.block_sparse_moe.experts.52.w2", "model.layers.53.block_sparse_moe.experts.53.w2", "model.layers.53.block_sparse_moe.experts.54.w2", "model.layers.53.block_sparse_moe.experts.55.w2", "model.layers.53.block_sparse_moe.experts.56.w2", "model.layers.53.block_sparse_moe.experts.57.w2", "model.layers.53.block_sparse_moe.experts.58.w2", "model.layers.53.block_sparse_moe.experts.59.w2", "model.layers.53.block_sparse_moe.experts.60.w2", "model.layers.53.block_sparse_moe.experts.61.w2", "model.layers.53.block_sparse_moe.experts.62.w2", "model.layers.53.block_sparse_moe.experts.63.w2", "model.layers.53.block_sparse_moe.experts.64.w2", "model.layers.53.block_sparse_moe.experts.65.w2", "model.layers.53.block_sparse_moe.experts.66.w2", "model.layers.53.block_sparse_moe.experts.67.w2", "model.layers.53.block_sparse_moe.experts.68.w2", "model.layers.53.block_sparse_moe.experts.69.w2", "model.layers.53.block_sparse_moe.experts.70.w2", "model.layers.53.block_sparse_moe.experts.71.w2", "model.layers.53.block_sparse_moe.experts.72.w2", "model.layers.53.block_sparse_moe.experts.73.w2", "model.layers.53.block_sparse_moe.experts.74.w2", "model.layers.53.block_sparse_moe.experts.75.w2", "model.layers.53.block_sparse_moe.experts.76.w2", "model.layers.53.block_sparse_moe.experts.77.w2", "model.layers.53.block_sparse_moe.experts.78.w2", "model.layers.53.block_sparse_moe.experts.79.w2", "model.layers.53.block_sparse_moe.experts.80.w2", "model.layers.53.block_sparse_moe.experts.81.w2", "model.layers.53.block_sparse_moe.experts.82.w2", "model.layers.53.block_sparse_moe.experts.83.w2", "model.layers.53.block_sparse_moe.experts.84.w2", "model.layers.53.block_sparse_moe.experts.85.w2", "model.layers.53.block_sparse_moe.experts.86.w2", "model.layers.53.block_sparse_moe.experts.87.w2", "model.layers.53.block_sparse_moe.experts.88.w2", "model.layers.53.block_sparse_moe.experts.89.w2", "model.layers.53.block_sparse_moe.experts.90.w2", "model.layers.53.block_sparse_moe.experts.91.w2", "model.layers.53.block_sparse_moe.experts.92.w2", "model.layers.53.block_sparse_moe.experts.93.w2", "model.layers.53.block_sparse_moe.experts.94.w2", "model.layers.53.block_sparse_moe.experts.95.w2", "model.layers.53.block_sparse_moe.experts.96.w2", "model.layers.53.block_sparse_moe.experts.97.w2", "model.layers.53.block_sparse_moe.experts.98.w2", "model.layers.53.block_sparse_moe.experts.99.w2", "model.layers.53.block_sparse_moe.experts.100.w2", "model.layers.53.block_sparse_moe.experts.101.w2", "model.layers.53.block_sparse_moe.experts.102.w2", "model.layers.53.block_sparse_moe.experts.103.w2", "model.layers.53.block_sparse_moe.experts.104.w2", "model.layers.53.block_sparse_moe.experts.105.w2", "model.layers.53.block_sparse_moe.experts.106.w2", "model.layers.53.block_sparse_moe.experts.107.w2", "model.layers.53.block_sparse_moe.experts.108.w2", "model.layers.53.block_sparse_moe.experts.109.w2", "model.layers.53.block_sparse_moe.experts.110.w2", "model.layers.53.block_sparse_moe.experts.111.w2", "model.layers.53.block_sparse_moe.experts.112.w2", "model.layers.53.block_sparse_moe.experts.113.w2", "model.layers.53.block_sparse_moe.experts.114.w2", "model.layers.53.block_sparse_moe.experts.115.w2", "model.layers.53.block_sparse_moe.experts.116.w2", "model.layers.53.block_sparse_moe.experts.117.w2", "model.layers.53.block_sparse_moe.experts.118.w2", "model.layers.53.block_sparse_moe.experts.119.w2", "model.layers.53.block_sparse_moe.experts.120.w2", "model.layers.53.block_sparse_moe.experts.121.w2", "model.layers.53.block_sparse_moe.experts.122.w2", "model.layers.53.block_sparse_moe.experts.123.w2", "model.layers.53.block_sparse_moe.experts.124.w2", "model.layers.53.block_sparse_moe.experts.125.w2", "model.layers.53.block_sparse_moe.experts.126.w2", "model.layers.53.block_sparse_moe.experts.127.w2", "model.layers.53.block_sparse_moe.experts.128.w2", "model.layers.53.block_sparse_moe.experts.129.w2", "model.layers.53.block_sparse_moe.experts.130.w2", "model.layers.53.block_sparse_moe.experts.131.w2", "model.layers.53.block_sparse_moe.experts.132.w2", "model.layers.53.block_sparse_moe.experts.133.w2", "model.layers.53.block_sparse_moe.experts.134.w2", "model.layers.53.block_sparse_moe.experts.135.w2", "model.layers.53.block_sparse_moe.experts.136.w2", "model.layers.53.block_sparse_moe.experts.137.w2", "model.layers.53.block_sparse_moe.experts.138.w2", "model.layers.53.block_sparse_moe.experts.139.w2", "model.layers.53.block_sparse_moe.experts.140.w2", "model.layers.53.block_sparse_moe.experts.141.w2", "model.layers.53.block_sparse_moe.experts.142.w2", "model.layers.53.block_sparse_moe.experts.143.w2", "model.layers.53.block_sparse_moe.experts.144.w2", "model.layers.53.block_sparse_moe.experts.145.w2", "model.layers.53.block_sparse_moe.experts.146.w2", "model.layers.53.block_sparse_moe.experts.147.w2", "model.layers.53.block_sparse_moe.experts.148.w2", "model.layers.53.block_sparse_moe.experts.149.w2", "model.layers.53.block_sparse_moe.experts.150.w2", "model.layers.53.block_sparse_moe.experts.151.w2", "model.layers.53.block_sparse_moe.experts.152.w2", "model.layers.53.block_sparse_moe.experts.153.w2", "model.layers.53.block_sparse_moe.experts.154.w2", "model.layers.53.block_sparse_moe.experts.155.w2", "model.layers.53.block_sparse_moe.experts.156.w2", "model.layers.53.block_sparse_moe.experts.157.w2", "model.layers.53.block_sparse_moe.experts.158.w2", "model.layers.53.block_sparse_moe.experts.159.w2", "model.layers.53.block_sparse_moe.experts.160.w2", "model.layers.53.block_sparse_moe.experts.161.w2", "model.layers.53.block_sparse_moe.experts.162.w2", "model.layers.53.block_sparse_moe.experts.163.w2", "model.layers.53.block_sparse_moe.experts.164.w2", "model.layers.53.block_sparse_moe.experts.165.w2", "model.layers.53.block_sparse_moe.experts.166.w2", "model.layers.53.block_sparse_moe.experts.167.w2", "model.layers.53.block_sparse_moe.experts.168.w2", "model.layers.53.block_sparse_moe.experts.169.w2", "model.layers.53.block_sparse_moe.experts.170.w2", "model.layers.53.block_sparse_moe.experts.171.w2", "model.layers.53.block_sparse_moe.experts.172.w2", "model.layers.53.block_sparse_moe.experts.173.w2", "model.layers.53.block_sparse_moe.experts.174.w2", "model.layers.53.block_sparse_moe.experts.175.w2", "model.layers.53.block_sparse_moe.experts.176.w2", "model.layers.53.block_sparse_moe.experts.177.w2", "model.layers.53.block_sparse_moe.experts.178.w2", "model.layers.53.block_sparse_moe.experts.179.w2", "model.layers.53.block_sparse_moe.experts.180.w2", "model.layers.53.block_sparse_moe.experts.181.w2", "model.layers.53.block_sparse_moe.experts.182.w2", "model.layers.53.block_sparse_moe.experts.183.w2", "model.layers.53.block_sparse_moe.experts.184.w2", "model.layers.53.block_sparse_moe.experts.185.w2", "model.layers.53.block_sparse_moe.experts.186.w2", "model.layers.53.block_sparse_moe.experts.187.w2", "model.layers.53.block_sparse_moe.experts.188.w2", "model.layers.53.block_sparse_moe.experts.189.w2", "model.layers.53.block_sparse_moe.experts.190.w2", "model.layers.53.block_sparse_moe.experts.191.w2", "model.layers.53.block_sparse_moe.experts.192.w2", "model.layers.53.block_sparse_moe.experts.193.w2", "model.layers.53.block_sparse_moe.experts.194.w2", "model.layers.53.block_sparse_moe.experts.195.w2", "model.layers.53.block_sparse_moe.experts.196.w2", "model.layers.53.block_sparse_moe.experts.197.w2", "model.layers.53.block_sparse_moe.experts.198.w2", "model.layers.53.block_sparse_moe.experts.199.w2", "model.layers.53.block_sparse_moe.experts.200.w2", "model.layers.53.block_sparse_moe.experts.201.w2", "model.layers.53.block_sparse_moe.experts.202.w2", "model.layers.53.block_sparse_moe.experts.203.w2", "model.layers.53.block_sparse_moe.experts.204.w2", "model.layers.53.block_sparse_moe.experts.205.w2", "model.layers.53.block_sparse_moe.experts.206.w2", "model.layers.53.block_sparse_moe.experts.207.w2", "model.layers.53.block_sparse_moe.experts.208.w2", "model.layers.53.block_sparse_moe.experts.209.w2", "model.layers.53.block_sparse_moe.experts.210.w2", "model.layers.53.block_sparse_moe.experts.211.w2", "model.layers.53.block_sparse_moe.experts.212.w2", "model.layers.53.block_sparse_moe.experts.213.w2", "model.layers.53.block_sparse_moe.experts.214.w2", "model.layers.53.block_sparse_moe.experts.215.w2", "model.layers.53.block_sparse_moe.experts.216.w2", "model.layers.53.block_sparse_moe.experts.217.w2", "model.layers.53.block_sparse_moe.experts.218.w2", "model.layers.53.block_sparse_moe.experts.219.w2", "model.layers.53.block_sparse_moe.experts.220.w2", "model.layers.53.block_sparse_moe.experts.221.w2", "model.layers.53.block_sparse_moe.experts.222.w2", "model.layers.53.block_sparse_moe.experts.223.w2", "model.layers.53.block_sparse_moe.experts.224.w2", "model.layers.53.block_sparse_moe.experts.225.w2", "model.layers.53.block_sparse_moe.experts.226.w2", "model.layers.53.block_sparse_moe.experts.227.w2", "model.layers.53.block_sparse_moe.experts.228.w2", "model.layers.53.block_sparse_moe.experts.229.w2", "model.layers.53.block_sparse_moe.experts.230.w2", "model.layers.53.block_sparse_moe.experts.231.w2", "model.layers.53.block_sparse_moe.experts.232.w2", "model.layers.53.block_sparse_moe.experts.233.w2", "model.layers.53.block_sparse_moe.experts.234.w2", "model.layers.53.block_sparse_moe.experts.235.w2", "model.layers.53.block_sparse_moe.experts.236.w2", "model.layers.53.block_sparse_moe.experts.237.w2", "model.layers.53.block_sparse_moe.experts.238.w2", "model.layers.53.block_sparse_moe.experts.239.w2", "model.layers.53.block_sparse_moe.experts.240.w2", "model.layers.53.block_sparse_moe.experts.241.w2", "model.layers.53.block_sparse_moe.experts.242.w2", "model.layers.53.block_sparse_moe.experts.243.w2", "model.layers.53.block_sparse_moe.experts.244.w2", "model.layers.53.block_sparse_moe.experts.245.w2", "model.layers.53.block_sparse_moe.experts.246.w2", "model.layers.53.block_sparse_moe.experts.247.w2", "model.layers.53.block_sparse_moe.experts.248.w2", "model.layers.53.block_sparse_moe.experts.249.w2", "model.layers.53.block_sparse_moe.experts.250.w2", "model.layers.53.block_sparse_moe.experts.251.w2", "model.layers.53.block_sparse_moe.experts.252.w2", "model.layers.53.block_sparse_moe.experts.253.w2", "model.layers.53.block_sparse_moe.experts.254.w2", "model.layers.53.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 7.723122835158469e-05, "dbits": 1207959552 } ] }, { "idx": 270, "layers": [ "model.layers.54.self_attn.q_proj" ], "candidates": [ { "dkld": -2.0590797066691313e-05, "dbits": 18874368 } ] }, { "idx": 271, "layers": [ "model.layers.54.self_attn.k_proj", "model.layers.54.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0005306340754032135, "dbits": 6291456 } ] }, { "idx": 272, "layers": [ "model.layers.54.self_attn.o_proj" ], "candidates": [ { "dkld": 0.00018329471349716464, "dbits": 18874368 } ] }, { "idx": 273, "layers": [ "model.layers.54.block_sparse_moe.experts.0.w1", "model.layers.54.block_sparse_moe.experts.1.w1", "model.layers.54.block_sparse_moe.experts.2.w1", "model.layers.54.block_sparse_moe.experts.3.w1", "model.layers.54.block_sparse_moe.experts.4.w1", "model.layers.54.block_sparse_moe.experts.5.w1", "model.layers.54.block_sparse_moe.experts.6.w1", "model.layers.54.block_sparse_moe.experts.7.w1", "model.layers.54.block_sparse_moe.experts.8.w1", "model.layers.54.block_sparse_moe.experts.9.w1", "model.layers.54.block_sparse_moe.experts.10.w1", "model.layers.54.block_sparse_moe.experts.11.w1", "model.layers.54.block_sparse_moe.experts.12.w1", "model.layers.54.block_sparse_moe.experts.13.w1", "model.layers.54.block_sparse_moe.experts.14.w1", "model.layers.54.block_sparse_moe.experts.15.w1", "model.layers.54.block_sparse_moe.experts.16.w1", "model.layers.54.block_sparse_moe.experts.17.w1", "model.layers.54.block_sparse_moe.experts.18.w1", "model.layers.54.block_sparse_moe.experts.19.w1", "model.layers.54.block_sparse_moe.experts.20.w1", "model.layers.54.block_sparse_moe.experts.21.w1", "model.layers.54.block_sparse_moe.experts.22.w1", "model.layers.54.block_sparse_moe.experts.23.w1", "model.layers.54.block_sparse_moe.experts.24.w1", "model.layers.54.block_sparse_moe.experts.25.w1", "model.layers.54.block_sparse_moe.experts.26.w1", "model.layers.54.block_sparse_moe.experts.27.w1", "model.layers.54.block_sparse_moe.experts.28.w1", "model.layers.54.block_sparse_moe.experts.29.w1", "model.layers.54.block_sparse_moe.experts.30.w1", "model.layers.54.block_sparse_moe.experts.31.w1", "model.layers.54.block_sparse_moe.experts.32.w1", "model.layers.54.block_sparse_moe.experts.33.w1", "model.layers.54.block_sparse_moe.experts.34.w1", "model.layers.54.block_sparse_moe.experts.35.w1", "model.layers.54.block_sparse_moe.experts.36.w1", "model.layers.54.block_sparse_moe.experts.37.w1", "model.layers.54.block_sparse_moe.experts.38.w1", "model.layers.54.block_sparse_moe.experts.39.w1", "model.layers.54.block_sparse_moe.experts.40.w1", "model.layers.54.block_sparse_moe.experts.41.w1", "model.layers.54.block_sparse_moe.experts.42.w1", "model.layers.54.block_sparse_moe.experts.43.w1", "model.layers.54.block_sparse_moe.experts.44.w1", "model.layers.54.block_sparse_moe.experts.45.w1", "model.layers.54.block_sparse_moe.experts.46.w1", "model.layers.54.block_sparse_moe.experts.47.w1", "model.layers.54.block_sparse_moe.experts.48.w1", "model.layers.54.block_sparse_moe.experts.49.w1", "model.layers.54.block_sparse_moe.experts.50.w1", "model.layers.54.block_sparse_moe.experts.51.w1", "model.layers.54.block_sparse_moe.experts.52.w1", "model.layers.54.block_sparse_moe.experts.53.w1", "model.layers.54.block_sparse_moe.experts.54.w1", "model.layers.54.block_sparse_moe.experts.55.w1", "model.layers.54.block_sparse_moe.experts.56.w1", "model.layers.54.block_sparse_moe.experts.57.w1", "model.layers.54.block_sparse_moe.experts.58.w1", "model.layers.54.block_sparse_moe.experts.59.w1", "model.layers.54.block_sparse_moe.experts.60.w1", "model.layers.54.block_sparse_moe.experts.61.w1", "model.layers.54.block_sparse_moe.experts.62.w1", "model.layers.54.block_sparse_moe.experts.63.w1", "model.layers.54.block_sparse_moe.experts.64.w1", "model.layers.54.block_sparse_moe.experts.65.w1", "model.layers.54.block_sparse_moe.experts.66.w1", "model.layers.54.block_sparse_moe.experts.67.w1", "model.layers.54.block_sparse_moe.experts.68.w1", "model.layers.54.block_sparse_moe.experts.69.w1", "model.layers.54.block_sparse_moe.experts.70.w1", "model.layers.54.block_sparse_moe.experts.71.w1", "model.layers.54.block_sparse_moe.experts.72.w1", "model.layers.54.block_sparse_moe.experts.73.w1", "model.layers.54.block_sparse_moe.experts.74.w1", "model.layers.54.block_sparse_moe.experts.75.w1", "model.layers.54.block_sparse_moe.experts.76.w1", "model.layers.54.block_sparse_moe.experts.77.w1", "model.layers.54.block_sparse_moe.experts.78.w1", "model.layers.54.block_sparse_moe.experts.79.w1", "model.layers.54.block_sparse_moe.experts.80.w1", "model.layers.54.block_sparse_moe.experts.81.w1", "model.layers.54.block_sparse_moe.experts.82.w1", "model.layers.54.block_sparse_moe.experts.83.w1", "model.layers.54.block_sparse_moe.experts.84.w1", "model.layers.54.block_sparse_moe.experts.85.w1", "model.layers.54.block_sparse_moe.experts.86.w1", "model.layers.54.block_sparse_moe.experts.87.w1", "model.layers.54.block_sparse_moe.experts.88.w1", "model.layers.54.block_sparse_moe.experts.89.w1", "model.layers.54.block_sparse_moe.experts.90.w1", "model.layers.54.block_sparse_moe.experts.91.w1", "model.layers.54.block_sparse_moe.experts.92.w1", "model.layers.54.block_sparse_moe.experts.93.w1", "model.layers.54.block_sparse_moe.experts.94.w1", "model.layers.54.block_sparse_moe.experts.95.w1", "model.layers.54.block_sparse_moe.experts.96.w1", "model.layers.54.block_sparse_moe.experts.97.w1", "model.layers.54.block_sparse_moe.experts.98.w1", "model.layers.54.block_sparse_moe.experts.99.w1", "model.layers.54.block_sparse_moe.experts.100.w1", "model.layers.54.block_sparse_moe.experts.101.w1", "model.layers.54.block_sparse_moe.experts.102.w1", "model.layers.54.block_sparse_moe.experts.103.w1", "model.layers.54.block_sparse_moe.experts.104.w1", "model.layers.54.block_sparse_moe.experts.105.w1", "model.layers.54.block_sparse_moe.experts.106.w1", "model.layers.54.block_sparse_moe.experts.107.w1", "model.layers.54.block_sparse_moe.experts.108.w1", "model.layers.54.block_sparse_moe.experts.109.w1", "model.layers.54.block_sparse_moe.experts.110.w1", "model.layers.54.block_sparse_moe.experts.111.w1", "model.layers.54.block_sparse_moe.experts.112.w1", "model.layers.54.block_sparse_moe.experts.113.w1", "model.layers.54.block_sparse_moe.experts.114.w1", "model.layers.54.block_sparse_moe.experts.115.w1", "model.layers.54.block_sparse_moe.experts.116.w1", "model.layers.54.block_sparse_moe.experts.117.w1", "model.layers.54.block_sparse_moe.experts.118.w1", "model.layers.54.block_sparse_moe.experts.119.w1", "model.layers.54.block_sparse_moe.experts.120.w1", "model.layers.54.block_sparse_moe.experts.121.w1", "model.layers.54.block_sparse_moe.experts.122.w1", "model.layers.54.block_sparse_moe.experts.123.w1", "model.layers.54.block_sparse_moe.experts.124.w1", "model.layers.54.block_sparse_moe.experts.125.w1", "model.layers.54.block_sparse_moe.experts.126.w1", "model.layers.54.block_sparse_moe.experts.127.w1", "model.layers.54.block_sparse_moe.experts.128.w1", "model.layers.54.block_sparse_moe.experts.129.w1", "model.layers.54.block_sparse_moe.experts.130.w1", "model.layers.54.block_sparse_moe.experts.131.w1", "model.layers.54.block_sparse_moe.experts.132.w1", "model.layers.54.block_sparse_moe.experts.133.w1", "model.layers.54.block_sparse_moe.experts.134.w1", "model.layers.54.block_sparse_moe.experts.135.w1", "model.layers.54.block_sparse_moe.experts.136.w1", "model.layers.54.block_sparse_moe.experts.137.w1", "model.layers.54.block_sparse_moe.experts.138.w1", "model.layers.54.block_sparse_moe.experts.139.w1", "model.layers.54.block_sparse_moe.experts.140.w1", "model.layers.54.block_sparse_moe.experts.141.w1", "model.layers.54.block_sparse_moe.experts.142.w1", "model.layers.54.block_sparse_moe.experts.143.w1", "model.layers.54.block_sparse_moe.experts.144.w1", "model.layers.54.block_sparse_moe.experts.145.w1", "model.layers.54.block_sparse_moe.experts.146.w1", "model.layers.54.block_sparse_moe.experts.147.w1", "model.layers.54.block_sparse_moe.experts.148.w1", "model.layers.54.block_sparse_moe.experts.149.w1", "model.layers.54.block_sparse_moe.experts.150.w1", "model.layers.54.block_sparse_moe.experts.151.w1", "model.layers.54.block_sparse_moe.experts.152.w1", "model.layers.54.block_sparse_moe.experts.153.w1", "model.layers.54.block_sparse_moe.experts.154.w1", "model.layers.54.block_sparse_moe.experts.155.w1", "model.layers.54.block_sparse_moe.experts.156.w1", "model.layers.54.block_sparse_moe.experts.157.w1", "model.layers.54.block_sparse_moe.experts.158.w1", "model.layers.54.block_sparse_moe.experts.159.w1", "model.layers.54.block_sparse_moe.experts.160.w1", "model.layers.54.block_sparse_moe.experts.161.w1", "model.layers.54.block_sparse_moe.experts.162.w1", "model.layers.54.block_sparse_moe.experts.163.w1", "model.layers.54.block_sparse_moe.experts.164.w1", "model.layers.54.block_sparse_moe.experts.165.w1", "model.layers.54.block_sparse_moe.experts.166.w1", "model.layers.54.block_sparse_moe.experts.167.w1", "model.layers.54.block_sparse_moe.experts.168.w1", "model.layers.54.block_sparse_moe.experts.169.w1", "model.layers.54.block_sparse_moe.experts.170.w1", "model.layers.54.block_sparse_moe.experts.171.w1", "model.layers.54.block_sparse_moe.experts.172.w1", "model.layers.54.block_sparse_moe.experts.173.w1", "model.layers.54.block_sparse_moe.experts.174.w1", "model.layers.54.block_sparse_moe.experts.175.w1", "model.layers.54.block_sparse_moe.experts.176.w1", "model.layers.54.block_sparse_moe.experts.177.w1", "model.layers.54.block_sparse_moe.experts.178.w1", "model.layers.54.block_sparse_moe.experts.179.w1", "model.layers.54.block_sparse_moe.experts.180.w1", "model.layers.54.block_sparse_moe.experts.181.w1", "model.layers.54.block_sparse_moe.experts.182.w1", "model.layers.54.block_sparse_moe.experts.183.w1", "model.layers.54.block_sparse_moe.experts.184.w1", "model.layers.54.block_sparse_moe.experts.185.w1", "model.layers.54.block_sparse_moe.experts.186.w1", "model.layers.54.block_sparse_moe.experts.187.w1", "model.layers.54.block_sparse_moe.experts.188.w1", "model.layers.54.block_sparse_moe.experts.189.w1", "model.layers.54.block_sparse_moe.experts.190.w1", "model.layers.54.block_sparse_moe.experts.191.w1", "model.layers.54.block_sparse_moe.experts.192.w1", "model.layers.54.block_sparse_moe.experts.193.w1", "model.layers.54.block_sparse_moe.experts.194.w1", "model.layers.54.block_sparse_moe.experts.195.w1", "model.layers.54.block_sparse_moe.experts.196.w1", "model.layers.54.block_sparse_moe.experts.197.w1", "model.layers.54.block_sparse_moe.experts.198.w1", "model.layers.54.block_sparse_moe.experts.199.w1", "model.layers.54.block_sparse_moe.experts.200.w1", "model.layers.54.block_sparse_moe.experts.201.w1", "model.layers.54.block_sparse_moe.experts.202.w1", "model.layers.54.block_sparse_moe.experts.203.w1", "model.layers.54.block_sparse_moe.experts.204.w1", "model.layers.54.block_sparse_moe.experts.205.w1", "model.layers.54.block_sparse_moe.experts.206.w1", "model.layers.54.block_sparse_moe.experts.207.w1", "model.layers.54.block_sparse_moe.experts.208.w1", "model.layers.54.block_sparse_moe.experts.209.w1", "model.layers.54.block_sparse_moe.experts.210.w1", "model.layers.54.block_sparse_moe.experts.211.w1", "model.layers.54.block_sparse_moe.experts.212.w1", "model.layers.54.block_sparse_moe.experts.213.w1", "model.layers.54.block_sparse_moe.experts.214.w1", "model.layers.54.block_sparse_moe.experts.215.w1", "model.layers.54.block_sparse_moe.experts.216.w1", "model.layers.54.block_sparse_moe.experts.217.w1", "model.layers.54.block_sparse_moe.experts.218.w1", "model.layers.54.block_sparse_moe.experts.219.w1", "model.layers.54.block_sparse_moe.experts.220.w1", "model.layers.54.block_sparse_moe.experts.221.w1", "model.layers.54.block_sparse_moe.experts.222.w1", "model.layers.54.block_sparse_moe.experts.223.w1", "model.layers.54.block_sparse_moe.experts.224.w1", "model.layers.54.block_sparse_moe.experts.225.w1", "model.layers.54.block_sparse_moe.experts.226.w1", "model.layers.54.block_sparse_moe.experts.227.w1", "model.layers.54.block_sparse_moe.experts.228.w1", "model.layers.54.block_sparse_moe.experts.229.w1", "model.layers.54.block_sparse_moe.experts.230.w1", "model.layers.54.block_sparse_moe.experts.231.w1", "model.layers.54.block_sparse_moe.experts.232.w1", "model.layers.54.block_sparse_moe.experts.233.w1", "model.layers.54.block_sparse_moe.experts.234.w1", "model.layers.54.block_sparse_moe.experts.235.w1", "model.layers.54.block_sparse_moe.experts.236.w1", "model.layers.54.block_sparse_moe.experts.237.w1", "model.layers.54.block_sparse_moe.experts.238.w1", "model.layers.54.block_sparse_moe.experts.239.w1", "model.layers.54.block_sparse_moe.experts.240.w1", "model.layers.54.block_sparse_moe.experts.241.w1", "model.layers.54.block_sparse_moe.experts.242.w1", "model.layers.54.block_sparse_moe.experts.243.w1", "model.layers.54.block_sparse_moe.experts.244.w1", "model.layers.54.block_sparse_moe.experts.245.w1", "model.layers.54.block_sparse_moe.experts.246.w1", "model.layers.54.block_sparse_moe.experts.247.w1", "model.layers.54.block_sparse_moe.experts.248.w1", "model.layers.54.block_sparse_moe.experts.249.w1", "model.layers.54.block_sparse_moe.experts.250.w1", "model.layers.54.block_sparse_moe.experts.251.w1", "model.layers.54.block_sparse_moe.experts.252.w1", "model.layers.54.block_sparse_moe.experts.253.w1", "model.layers.54.block_sparse_moe.experts.254.w1", "model.layers.54.block_sparse_moe.experts.255.w1", "model.layers.54.block_sparse_moe.experts.0.w3", "model.layers.54.block_sparse_moe.experts.1.w3", "model.layers.54.block_sparse_moe.experts.2.w3", "model.layers.54.block_sparse_moe.experts.3.w3", "model.layers.54.block_sparse_moe.experts.4.w3", "model.layers.54.block_sparse_moe.experts.5.w3", "model.layers.54.block_sparse_moe.experts.6.w3", "model.layers.54.block_sparse_moe.experts.7.w3", "model.layers.54.block_sparse_moe.experts.8.w3", "model.layers.54.block_sparse_moe.experts.9.w3", "model.layers.54.block_sparse_moe.experts.10.w3", "model.layers.54.block_sparse_moe.experts.11.w3", "model.layers.54.block_sparse_moe.experts.12.w3", "model.layers.54.block_sparse_moe.experts.13.w3", "model.layers.54.block_sparse_moe.experts.14.w3", "model.layers.54.block_sparse_moe.experts.15.w3", "model.layers.54.block_sparse_moe.experts.16.w3", "model.layers.54.block_sparse_moe.experts.17.w3", "model.layers.54.block_sparse_moe.experts.18.w3", "model.layers.54.block_sparse_moe.experts.19.w3", "model.layers.54.block_sparse_moe.experts.20.w3", "model.layers.54.block_sparse_moe.experts.21.w3", "model.layers.54.block_sparse_moe.experts.22.w3", "model.layers.54.block_sparse_moe.experts.23.w3", "model.layers.54.block_sparse_moe.experts.24.w3", "model.layers.54.block_sparse_moe.experts.25.w3", "model.layers.54.block_sparse_moe.experts.26.w3", "model.layers.54.block_sparse_moe.experts.27.w3", "model.layers.54.block_sparse_moe.experts.28.w3", "model.layers.54.block_sparse_moe.experts.29.w3", "model.layers.54.block_sparse_moe.experts.30.w3", "model.layers.54.block_sparse_moe.experts.31.w3", "model.layers.54.block_sparse_moe.experts.32.w3", "model.layers.54.block_sparse_moe.experts.33.w3", "model.layers.54.block_sparse_moe.experts.34.w3", "model.layers.54.block_sparse_moe.experts.35.w3", "model.layers.54.block_sparse_moe.experts.36.w3", "model.layers.54.block_sparse_moe.experts.37.w3", "model.layers.54.block_sparse_moe.experts.38.w3", "model.layers.54.block_sparse_moe.experts.39.w3", "model.layers.54.block_sparse_moe.experts.40.w3", "model.layers.54.block_sparse_moe.experts.41.w3", "model.layers.54.block_sparse_moe.experts.42.w3", "model.layers.54.block_sparse_moe.experts.43.w3", "model.layers.54.block_sparse_moe.experts.44.w3", "model.layers.54.block_sparse_moe.experts.45.w3", "model.layers.54.block_sparse_moe.experts.46.w3", "model.layers.54.block_sparse_moe.experts.47.w3", "model.layers.54.block_sparse_moe.experts.48.w3", "model.layers.54.block_sparse_moe.experts.49.w3", "model.layers.54.block_sparse_moe.experts.50.w3", "model.layers.54.block_sparse_moe.experts.51.w3", "model.layers.54.block_sparse_moe.experts.52.w3", "model.layers.54.block_sparse_moe.experts.53.w3", "model.layers.54.block_sparse_moe.experts.54.w3", "model.layers.54.block_sparse_moe.experts.55.w3", "model.layers.54.block_sparse_moe.experts.56.w3", "model.layers.54.block_sparse_moe.experts.57.w3", "model.layers.54.block_sparse_moe.experts.58.w3", "model.layers.54.block_sparse_moe.experts.59.w3", "model.layers.54.block_sparse_moe.experts.60.w3", "model.layers.54.block_sparse_moe.experts.61.w3", "model.layers.54.block_sparse_moe.experts.62.w3", "model.layers.54.block_sparse_moe.experts.63.w3", "model.layers.54.block_sparse_moe.experts.64.w3", "model.layers.54.block_sparse_moe.experts.65.w3", "model.layers.54.block_sparse_moe.experts.66.w3", "model.layers.54.block_sparse_moe.experts.67.w3", "model.layers.54.block_sparse_moe.experts.68.w3", "model.layers.54.block_sparse_moe.experts.69.w3", "model.layers.54.block_sparse_moe.experts.70.w3", "model.layers.54.block_sparse_moe.experts.71.w3", "model.layers.54.block_sparse_moe.experts.72.w3", "model.layers.54.block_sparse_moe.experts.73.w3", "model.layers.54.block_sparse_moe.experts.74.w3", "model.layers.54.block_sparse_moe.experts.75.w3", "model.layers.54.block_sparse_moe.experts.76.w3", "model.layers.54.block_sparse_moe.experts.77.w3", "model.layers.54.block_sparse_moe.experts.78.w3", "model.layers.54.block_sparse_moe.experts.79.w3", "model.layers.54.block_sparse_moe.experts.80.w3", "model.layers.54.block_sparse_moe.experts.81.w3", "model.layers.54.block_sparse_moe.experts.82.w3", "model.layers.54.block_sparse_moe.experts.83.w3", "model.layers.54.block_sparse_moe.experts.84.w3", "model.layers.54.block_sparse_moe.experts.85.w3", "model.layers.54.block_sparse_moe.experts.86.w3", "model.layers.54.block_sparse_moe.experts.87.w3", "model.layers.54.block_sparse_moe.experts.88.w3", "model.layers.54.block_sparse_moe.experts.89.w3", "model.layers.54.block_sparse_moe.experts.90.w3", "model.layers.54.block_sparse_moe.experts.91.w3", "model.layers.54.block_sparse_moe.experts.92.w3", "model.layers.54.block_sparse_moe.experts.93.w3", "model.layers.54.block_sparse_moe.experts.94.w3", "model.layers.54.block_sparse_moe.experts.95.w3", "model.layers.54.block_sparse_moe.experts.96.w3", "model.layers.54.block_sparse_moe.experts.97.w3", "model.layers.54.block_sparse_moe.experts.98.w3", "model.layers.54.block_sparse_moe.experts.99.w3", "model.layers.54.block_sparse_moe.experts.100.w3", "model.layers.54.block_sparse_moe.experts.101.w3", "model.layers.54.block_sparse_moe.experts.102.w3", "model.layers.54.block_sparse_moe.experts.103.w3", "model.layers.54.block_sparse_moe.experts.104.w3", "model.layers.54.block_sparse_moe.experts.105.w3", "model.layers.54.block_sparse_moe.experts.106.w3", "model.layers.54.block_sparse_moe.experts.107.w3", "model.layers.54.block_sparse_moe.experts.108.w3", "model.layers.54.block_sparse_moe.experts.109.w3", "model.layers.54.block_sparse_moe.experts.110.w3", "model.layers.54.block_sparse_moe.experts.111.w3", "model.layers.54.block_sparse_moe.experts.112.w3", "model.layers.54.block_sparse_moe.experts.113.w3", "model.layers.54.block_sparse_moe.experts.114.w3", "model.layers.54.block_sparse_moe.experts.115.w3", "model.layers.54.block_sparse_moe.experts.116.w3", "model.layers.54.block_sparse_moe.experts.117.w3", "model.layers.54.block_sparse_moe.experts.118.w3", "model.layers.54.block_sparse_moe.experts.119.w3", "model.layers.54.block_sparse_moe.experts.120.w3", "model.layers.54.block_sparse_moe.experts.121.w3", "model.layers.54.block_sparse_moe.experts.122.w3", "model.layers.54.block_sparse_moe.experts.123.w3", "model.layers.54.block_sparse_moe.experts.124.w3", "model.layers.54.block_sparse_moe.experts.125.w3", "model.layers.54.block_sparse_moe.experts.126.w3", "model.layers.54.block_sparse_moe.experts.127.w3", "model.layers.54.block_sparse_moe.experts.128.w3", "model.layers.54.block_sparse_moe.experts.129.w3", "model.layers.54.block_sparse_moe.experts.130.w3", "model.layers.54.block_sparse_moe.experts.131.w3", "model.layers.54.block_sparse_moe.experts.132.w3", "model.layers.54.block_sparse_moe.experts.133.w3", "model.layers.54.block_sparse_moe.experts.134.w3", "model.layers.54.block_sparse_moe.experts.135.w3", "model.layers.54.block_sparse_moe.experts.136.w3", "model.layers.54.block_sparse_moe.experts.137.w3", "model.layers.54.block_sparse_moe.experts.138.w3", "model.layers.54.block_sparse_moe.experts.139.w3", "model.layers.54.block_sparse_moe.experts.140.w3", "model.layers.54.block_sparse_moe.experts.141.w3", "model.layers.54.block_sparse_moe.experts.142.w3", "model.layers.54.block_sparse_moe.experts.143.w3", "model.layers.54.block_sparse_moe.experts.144.w3", "model.layers.54.block_sparse_moe.experts.145.w3", "model.layers.54.block_sparse_moe.experts.146.w3", "model.layers.54.block_sparse_moe.experts.147.w3", "model.layers.54.block_sparse_moe.experts.148.w3", "model.layers.54.block_sparse_moe.experts.149.w3", "model.layers.54.block_sparse_moe.experts.150.w3", "model.layers.54.block_sparse_moe.experts.151.w3", "model.layers.54.block_sparse_moe.experts.152.w3", "model.layers.54.block_sparse_moe.experts.153.w3", "model.layers.54.block_sparse_moe.experts.154.w3", "model.layers.54.block_sparse_moe.experts.155.w3", "model.layers.54.block_sparse_moe.experts.156.w3", "model.layers.54.block_sparse_moe.experts.157.w3", "model.layers.54.block_sparse_moe.experts.158.w3", "model.layers.54.block_sparse_moe.experts.159.w3", "model.layers.54.block_sparse_moe.experts.160.w3", "model.layers.54.block_sparse_moe.experts.161.w3", "model.layers.54.block_sparse_moe.experts.162.w3", "model.layers.54.block_sparse_moe.experts.163.w3", "model.layers.54.block_sparse_moe.experts.164.w3", "model.layers.54.block_sparse_moe.experts.165.w3", "model.layers.54.block_sparse_moe.experts.166.w3", "model.layers.54.block_sparse_moe.experts.167.w3", "model.layers.54.block_sparse_moe.experts.168.w3", "model.layers.54.block_sparse_moe.experts.169.w3", "model.layers.54.block_sparse_moe.experts.170.w3", "model.layers.54.block_sparse_moe.experts.171.w3", "model.layers.54.block_sparse_moe.experts.172.w3", "model.layers.54.block_sparse_moe.experts.173.w3", "model.layers.54.block_sparse_moe.experts.174.w3", "model.layers.54.block_sparse_moe.experts.175.w3", "model.layers.54.block_sparse_moe.experts.176.w3", "model.layers.54.block_sparse_moe.experts.177.w3", "model.layers.54.block_sparse_moe.experts.178.w3", "model.layers.54.block_sparse_moe.experts.179.w3", "model.layers.54.block_sparse_moe.experts.180.w3", "model.layers.54.block_sparse_moe.experts.181.w3", "model.layers.54.block_sparse_moe.experts.182.w3", "model.layers.54.block_sparse_moe.experts.183.w3", "model.layers.54.block_sparse_moe.experts.184.w3", "model.layers.54.block_sparse_moe.experts.185.w3", "model.layers.54.block_sparse_moe.experts.186.w3", "model.layers.54.block_sparse_moe.experts.187.w3", "model.layers.54.block_sparse_moe.experts.188.w3", "model.layers.54.block_sparse_moe.experts.189.w3", "model.layers.54.block_sparse_moe.experts.190.w3", "model.layers.54.block_sparse_moe.experts.191.w3", "model.layers.54.block_sparse_moe.experts.192.w3", "model.layers.54.block_sparse_moe.experts.193.w3", "model.layers.54.block_sparse_moe.experts.194.w3", "model.layers.54.block_sparse_moe.experts.195.w3", "model.layers.54.block_sparse_moe.experts.196.w3", "model.layers.54.block_sparse_moe.experts.197.w3", "model.layers.54.block_sparse_moe.experts.198.w3", "model.layers.54.block_sparse_moe.experts.199.w3", "model.layers.54.block_sparse_moe.experts.200.w3", "model.layers.54.block_sparse_moe.experts.201.w3", "model.layers.54.block_sparse_moe.experts.202.w3", "model.layers.54.block_sparse_moe.experts.203.w3", "model.layers.54.block_sparse_moe.experts.204.w3", "model.layers.54.block_sparse_moe.experts.205.w3", "model.layers.54.block_sparse_moe.experts.206.w3", "model.layers.54.block_sparse_moe.experts.207.w3", "model.layers.54.block_sparse_moe.experts.208.w3", "model.layers.54.block_sparse_moe.experts.209.w3", "model.layers.54.block_sparse_moe.experts.210.w3", "model.layers.54.block_sparse_moe.experts.211.w3", "model.layers.54.block_sparse_moe.experts.212.w3", "model.layers.54.block_sparse_moe.experts.213.w3", "model.layers.54.block_sparse_moe.experts.214.w3", "model.layers.54.block_sparse_moe.experts.215.w3", "model.layers.54.block_sparse_moe.experts.216.w3", "model.layers.54.block_sparse_moe.experts.217.w3", "model.layers.54.block_sparse_moe.experts.218.w3", "model.layers.54.block_sparse_moe.experts.219.w3", "model.layers.54.block_sparse_moe.experts.220.w3", "model.layers.54.block_sparse_moe.experts.221.w3", "model.layers.54.block_sparse_moe.experts.222.w3", "model.layers.54.block_sparse_moe.experts.223.w3", "model.layers.54.block_sparse_moe.experts.224.w3", "model.layers.54.block_sparse_moe.experts.225.w3", "model.layers.54.block_sparse_moe.experts.226.w3", "model.layers.54.block_sparse_moe.experts.227.w3", "model.layers.54.block_sparse_moe.experts.228.w3", "model.layers.54.block_sparse_moe.experts.229.w3", "model.layers.54.block_sparse_moe.experts.230.w3", "model.layers.54.block_sparse_moe.experts.231.w3", "model.layers.54.block_sparse_moe.experts.232.w3", "model.layers.54.block_sparse_moe.experts.233.w3", "model.layers.54.block_sparse_moe.experts.234.w3", "model.layers.54.block_sparse_moe.experts.235.w3", "model.layers.54.block_sparse_moe.experts.236.w3", "model.layers.54.block_sparse_moe.experts.237.w3", "model.layers.54.block_sparse_moe.experts.238.w3", "model.layers.54.block_sparse_moe.experts.239.w3", "model.layers.54.block_sparse_moe.experts.240.w3", "model.layers.54.block_sparse_moe.experts.241.w3", "model.layers.54.block_sparse_moe.experts.242.w3", "model.layers.54.block_sparse_moe.experts.243.w3", "model.layers.54.block_sparse_moe.experts.244.w3", "model.layers.54.block_sparse_moe.experts.245.w3", "model.layers.54.block_sparse_moe.experts.246.w3", "model.layers.54.block_sparse_moe.experts.247.w3", "model.layers.54.block_sparse_moe.experts.248.w3", "model.layers.54.block_sparse_moe.experts.249.w3", "model.layers.54.block_sparse_moe.experts.250.w3", "model.layers.54.block_sparse_moe.experts.251.w3", "model.layers.54.block_sparse_moe.experts.252.w3", "model.layers.54.block_sparse_moe.experts.253.w3", "model.layers.54.block_sparse_moe.experts.254.w3", "model.layers.54.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -1.0674074292185698e-05, "dbits": 2415919104 } ] }, { "idx": 274, "layers": [ "model.layers.54.block_sparse_moe.experts.0.w2", "model.layers.54.block_sparse_moe.experts.1.w2", "model.layers.54.block_sparse_moe.experts.2.w2", "model.layers.54.block_sparse_moe.experts.3.w2", "model.layers.54.block_sparse_moe.experts.4.w2", "model.layers.54.block_sparse_moe.experts.5.w2", "model.layers.54.block_sparse_moe.experts.6.w2", "model.layers.54.block_sparse_moe.experts.7.w2", "model.layers.54.block_sparse_moe.experts.8.w2", "model.layers.54.block_sparse_moe.experts.9.w2", "model.layers.54.block_sparse_moe.experts.10.w2", "model.layers.54.block_sparse_moe.experts.11.w2", "model.layers.54.block_sparse_moe.experts.12.w2", "model.layers.54.block_sparse_moe.experts.13.w2", "model.layers.54.block_sparse_moe.experts.14.w2", "model.layers.54.block_sparse_moe.experts.15.w2", "model.layers.54.block_sparse_moe.experts.16.w2", "model.layers.54.block_sparse_moe.experts.17.w2", "model.layers.54.block_sparse_moe.experts.18.w2", "model.layers.54.block_sparse_moe.experts.19.w2", "model.layers.54.block_sparse_moe.experts.20.w2", "model.layers.54.block_sparse_moe.experts.21.w2", "model.layers.54.block_sparse_moe.experts.22.w2", "model.layers.54.block_sparse_moe.experts.23.w2", "model.layers.54.block_sparse_moe.experts.24.w2", "model.layers.54.block_sparse_moe.experts.25.w2", "model.layers.54.block_sparse_moe.experts.26.w2", "model.layers.54.block_sparse_moe.experts.27.w2", "model.layers.54.block_sparse_moe.experts.28.w2", "model.layers.54.block_sparse_moe.experts.29.w2", "model.layers.54.block_sparse_moe.experts.30.w2", "model.layers.54.block_sparse_moe.experts.31.w2", "model.layers.54.block_sparse_moe.experts.32.w2", "model.layers.54.block_sparse_moe.experts.33.w2", "model.layers.54.block_sparse_moe.experts.34.w2", "model.layers.54.block_sparse_moe.experts.35.w2", "model.layers.54.block_sparse_moe.experts.36.w2", "model.layers.54.block_sparse_moe.experts.37.w2", "model.layers.54.block_sparse_moe.experts.38.w2", "model.layers.54.block_sparse_moe.experts.39.w2", "model.layers.54.block_sparse_moe.experts.40.w2", "model.layers.54.block_sparse_moe.experts.41.w2", "model.layers.54.block_sparse_moe.experts.42.w2", "model.layers.54.block_sparse_moe.experts.43.w2", "model.layers.54.block_sparse_moe.experts.44.w2", "model.layers.54.block_sparse_moe.experts.45.w2", "model.layers.54.block_sparse_moe.experts.46.w2", "model.layers.54.block_sparse_moe.experts.47.w2", "model.layers.54.block_sparse_moe.experts.48.w2", "model.layers.54.block_sparse_moe.experts.49.w2", "model.layers.54.block_sparse_moe.experts.50.w2", "model.layers.54.block_sparse_moe.experts.51.w2", "model.layers.54.block_sparse_moe.experts.52.w2", "model.layers.54.block_sparse_moe.experts.53.w2", "model.layers.54.block_sparse_moe.experts.54.w2", "model.layers.54.block_sparse_moe.experts.55.w2", "model.layers.54.block_sparse_moe.experts.56.w2", "model.layers.54.block_sparse_moe.experts.57.w2", "model.layers.54.block_sparse_moe.experts.58.w2", "model.layers.54.block_sparse_moe.experts.59.w2", "model.layers.54.block_sparse_moe.experts.60.w2", "model.layers.54.block_sparse_moe.experts.61.w2", "model.layers.54.block_sparse_moe.experts.62.w2", "model.layers.54.block_sparse_moe.experts.63.w2", "model.layers.54.block_sparse_moe.experts.64.w2", "model.layers.54.block_sparse_moe.experts.65.w2", "model.layers.54.block_sparse_moe.experts.66.w2", "model.layers.54.block_sparse_moe.experts.67.w2", "model.layers.54.block_sparse_moe.experts.68.w2", "model.layers.54.block_sparse_moe.experts.69.w2", "model.layers.54.block_sparse_moe.experts.70.w2", "model.layers.54.block_sparse_moe.experts.71.w2", "model.layers.54.block_sparse_moe.experts.72.w2", "model.layers.54.block_sparse_moe.experts.73.w2", "model.layers.54.block_sparse_moe.experts.74.w2", "model.layers.54.block_sparse_moe.experts.75.w2", "model.layers.54.block_sparse_moe.experts.76.w2", "model.layers.54.block_sparse_moe.experts.77.w2", "model.layers.54.block_sparse_moe.experts.78.w2", "model.layers.54.block_sparse_moe.experts.79.w2", "model.layers.54.block_sparse_moe.experts.80.w2", "model.layers.54.block_sparse_moe.experts.81.w2", "model.layers.54.block_sparse_moe.experts.82.w2", "model.layers.54.block_sparse_moe.experts.83.w2", "model.layers.54.block_sparse_moe.experts.84.w2", "model.layers.54.block_sparse_moe.experts.85.w2", "model.layers.54.block_sparse_moe.experts.86.w2", "model.layers.54.block_sparse_moe.experts.87.w2", "model.layers.54.block_sparse_moe.experts.88.w2", "model.layers.54.block_sparse_moe.experts.89.w2", "model.layers.54.block_sparse_moe.experts.90.w2", "model.layers.54.block_sparse_moe.experts.91.w2", "model.layers.54.block_sparse_moe.experts.92.w2", "model.layers.54.block_sparse_moe.experts.93.w2", "model.layers.54.block_sparse_moe.experts.94.w2", "model.layers.54.block_sparse_moe.experts.95.w2", "model.layers.54.block_sparse_moe.experts.96.w2", "model.layers.54.block_sparse_moe.experts.97.w2", "model.layers.54.block_sparse_moe.experts.98.w2", "model.layers.54.block_sparse_moe.experts.99.w2", "model.layers.54.block_sparse_moe.experts.100.w2", "model.layers.54.block_sparse_moe.experts.101.w2", "model.layers.54.block_sparse_moe.experts.102.w2", "model.layers.54.block_sparse_moe.experts.103.w2", "model.layers.54.block_sparse_moe.experts.104.w2", "model.layers.54.block_sparse_moe.experts.105.w2", "model.layers.54.block_sparse_moe.experts.106.w2", "model.layers.54.block_sparse_moe.experts.107.w2", "model.layers.54.block_sparse_moe.experts.108.w2", "model.layers.54.block_sparse_moe.experts.109.w2", "model.layers.54.block_sparse_moe.experts.110.w2", "model.layers.54.block_sparse_moe.experts.111.w2", "model.layers.54.block_sparse_moe.experts.112.w2", "model.layers.54.block_sparse_moe.experts.113.w2", "model.layers.54.block_sparse_moe.experts.114.w2", "model.layers.54.block_sparse_moe.experts.115.w2", "model.layers.54.block_sparse_moe.experts.116.w2", "model.layers.54.block_sparse_moe.experts.117.w2", "model.layers.54.block_sparse_moe.experts.118.w2", "model.layers.54.block_sparse_moe.experts.119.w2", "model.layers.54.block_sparse_moe.experts.120.w2", "model.layers.54.block_sparse_moe.experts.121.w2", "model.layers.54.block_sparse_moe.experts.122.w2", "model.layers.54.block_sparse_moe.experts.123.w2", "model.layers.54.block_sparse_moe.experts.124.w2", "model.layers.54.block_sparse_moe.experts.125.w2", "model.layers.54.block_sparse_moe.experts.126.w2", "model.layers.54.block_sparse_moe.experts.127.w2", "model.layers.54.block_sparse_moe.experts.128.w2", "model.layers.54.block_sparse_moe.experts.129.w2", "model.layers.54.block_sparse_moe.experts.130.w2", "model.layers.54.block_sparse_moe.experts.131.w2", "model.layers.54.block_sparse_moe.experts.132.w2", "model.layers.54.block_sparse_moe.experts.133.w2", "model.layers.54.block_sparse_moe.experts.134.w2", "model.layers.54.block_sparse_moe.experts.135.w2", "model.layers.54.block_sparse_moe.experts.136.w2", "model.layers.54.block_sparse_moe.experts.137.w2", "model.layers.54.block_sparse_moe.experts.138.w2", "model.layers.54.block_sparse_moe.experts.139.w2", "model.layers.54.block_sparse_moe.experts.140.w2", "model.layers.54.block_sparse_moe.experts.141.w2", "model.layers.54.block_sparse_moe.experts.142.w2", "model.layers.54.block_sparse_moe.experts.143.w2", "model.layers.54.block_sparse_moe.experts.144.w2", "model.layers.54.block_sparse_moe.experts.145.w2", "model.layers.54.block_sparse_moe.experts.146.w2", "model.layers.54.block_sparse_moe.experts.147.w2", "model.layers.54.block_sparse_moe.experts.148.w2", "model.layers.54.block_sparse_moe.experts.149.w2", "model.layers.54.block_sparse_moe.experts.150.w2", "model.layers.54.block_sparse_moe.experts.151.w2", "model.layers.54.block_sparse_moe.experts.152.w2", "model.layers.54.block_sparse_moe.experts.153.w2", "model.layers.54.block_sparse_moe.experts.154.w2", "model.layers.54.block_sparse_moe.experts.155.w2", "model.layers.54.block_sparse_moe.experts.156.w2", "model.layers.54.block_sparse_moe.experts.157.w2", "model.layers.54.block_sparse_moe.experts.158.w2", "model.layers.54.block_sparse_moe.experts.159.w2", "model.layers.54.block_sparse_moe.experts.160.w2", "model.layers.54.block_sparse_moe.experts.161.w2", "model.layers.54.block_sparse_moe.experts.162.w2", "model.layers.54.block_sparse_moe.experts.163.w2", "model.layers.54.block_sparse_moe.experts.164.w2", "model.layers.54.block_sparse_moe.experts.165.w2", "model.layers.54.block_sparse_moe.experts.166.w2", "model.layers.54.block_sparse_moe.experts.167.w2", "model.layers.54.block_sparse_moe.experts.168.w2", "model.layers.54.block_sparse_moe.experts.169.w2", "model.layers.54.block_sparse_moe.experts.170.w2", "model.layers.54.block_sparse_moe.experts.171.w2", "model.layers.54.block_sparse_moe.experts.172.w2", "model.layers.54.block_sparse_moe.experts.173.w2", "model.layers.54.block_sparse_moe.experts.174.w2", "model.layers.54.block_sparse_moe.experts.175.w2", "model.layers.54.block_sparse_moe.experts.176.w2", "model.layers.54.block_sparse_moe.experts.177.w2", "model.layers.54.block_sparse_moe.experts.178.w2", "model.layers.54.block_sparse_moe.experts.179.w2", "model.layers.54.block_sparse_moe.experts.180.w2", "model.layers.54.block_sparse_moe.experts.181.w2", "model.layers.54.block_sparse_moe.experts.182.w2", "model.layers.54.block_sparse_moe.experts.183.w2", "model.layers.54.block_sparse_moe.experts.184.w2", "model.layers.54.block_sparse_moe.experts.185.w2", "model.layers.54.block_sparse_moe.experts.186.w2", "model.layers.54.block_sparse_moe.experts.187.w2", "model.layers.54.block_sparse_moe.experts.188.w2", "model.layers.54.block_sparse_moe.experts.189.w2", "model.layers.54.block_sparse_moe.experts.190.w2", "model.layers.54.block_sparse_moe.experts.191.w2", "model.layers.54.block_sparse_moe.experts.192.w2", "model.layers.54.block_sparse_moe.experts.193.w2", "model.layers.54.block_sparse_moe.experts.194.w2", "model.layers.54.block_sparse_moe.experts.195.w2", "model.layers.54.block_sparse_moe.experts.196.w2", "model.layers.54.block_sparse_moe.experts.197.w2", "model.layers.54.block_sparse_moe.experts.198.w2", "model.layers.54.block_sparse_moe.experts.199.w2", "model.layers.54.block_sparse_moe.experts.200.w2", "model.layers.54.block_sparse_moe.experts.201.w2", "model.layers.54.block_sparse_moe.experts.202.w2", "model.layers.54.block_sparse_moe.experts.203.w2", "model.layers.54.block_sparse_moe.experts.204.w2", "model.layers.54.block_sparse_moe.experts.205.w2", "model.layers.54.block_sparse_moe.experts.206.w2", "model.layers.54.block_sparse_moe.experts.207.w2", "model.layers.54.block_sparse_moe.experts.208.w2", "model.layers.54.block_sparse_moe.experts.209.w2", "model.layers.54.block_sparse_moe.experts.210.w2", "model.layers.54.block_sparse_moe.experts.211.w2", "model.layers.54.block_sparse_moe.experts.212.w2", "model.layers.54.block_sparse_moe.experts.213.w2", "model.layers.54.block_sparse_moe.experts.214.w2", "model.layers.54.block_sparse_moe.experts.215.w2", "model.layers.54.block_sparse_moe.experts.216.w2", "model.layers.54.block_sparse_moe.experts.217.w2", "model.layers.54.block_sparse_moe.experts.218.w2", "model.layers.54.block_sparse_moe.experts.219.w2", "model.layers.54.block_sparse_moe.experts.220.w2", "model.layers.54.block_sparse_moe.experts.221.w2", "model.layers.54.block_sparse_moe.experts.222.w2", "model.layers.54.block_sparse_moe.experts.223.w2", "model.layers.54.block_sparse_moe.experts.224.w2", "model.layers.54.block_sparse_moe.experts.225.w2", "model.layers.54.block_sparse_moe.experts.226.w2", "model.layers.54.block_sparse_moe.experts.227.w2", "model.layers.54.block_sparse_moe.experts.228.w2", "model.layers.54.block_sparse_moe.experts.229.w2", "model.layers.54.block_sparse_moe.experts.230.w2", "model.layers.54.block_sparse_moe.experts.231.w2", "model.layers.54.block_sparse_moe.experts.232.w2", "model.layers.54.block_sparse_moe.experts.233.w2", "model.layers.54.block_sparse_moe.experts.234.w2", "model.layers.54.block_sparse_moe.experts.235.w2", "model.layers.54.block_sparse_moe.experts.236.w2", "model.layers.54.block_sparse_moe.experts.237.w2", "model.layers.54.block_sparse_moe.experts.238.w2", "model.layers.54.block_sparse_moe.experts.239.w2", "model.layers.54.block_sparse_moe.experts.240.w2", "model.layers.54.block_sparse_moe.experts.241.w2", "model.layers.54.block_sparse_moe.experts.242.w2", "model.layers.54.block_sparse_moe.experts.243.w2", "model.layers.54.block_sparse_moe.experts.244.w2", "model.layers.54.block_sparse_moe.experts.245.w2", "model.layers.54.block_sparse_moe.experts.246.w2", "model.layers.54.block_sparse_moe.experts.247.w2", "model.layers.54.block_sparse_moe.experts.248.w2", "model.layers.54.block_sparse_moe.experts.249.w2", "model.layers.54.block_sparse_moe.experts.250.w2", "model.layers.54.block_sparse_moe.experts.251.w2", "model.layers.54.block_sparse_moe.experts.252.w2", "model.layers.54.block_sparse_moe.experts.253.w2", "model.layers.54.block_sparse_moe.experts.254.w2", "model.layers.54.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 3.3633410930627994e-05, "dbits": 1207959552 } ] }, { "idx": 275, "layers": [ "model.layers.55.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00043277312070132135, "dbits": 18874368 } ] }, { "idx": 276, "layers": [ "model.layers.55.self_attn.k_proj", "model.layers.55.self_attn.v_proj" ], "candidates": [ { "dkld": -2.9053725302222166e-05, "dbits": 6291456 } ] }, { "idx": 277, "layers": [ "model.layers.55.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0004355857148766601, "dbits": 18874368 } ] }, { "idx": 278, "layers": [ "model.layers.55.block_sparse_moe.experts.0.w1", "model.layers.55.block_sparse_moe.experts.1.w1", "model.layers.55.block_sparse_moe.experts.2.w1", "model.layers.55.block_sparse_moe.experts.3.w1", "model.layers.55.block_sparse_moe.experts.4.w1", "model.layers.55.block_sparse_moe.experts.5.w1", "model.layers.55.block_sparse_moe.experts.6.w1", "model.layers.55.block_sparse_moe.experts.7.w1", "model.layers.55.block_sparse_moe.experts.8.w1", "model.layers.55.block_sparse_moe.experts.9.w1", "model.layers.55.block_sparse_moe.experts.10.w1", "model.layers.55.block_sparse_moe.experts.11.w1", "model.layers.55.block_sparse_moe.experts.12.w1", "model.layers.55.block_sparse_moe.experts.13.w1", "model.layers.55.block_sparse_moe.experts.14.w1", "model.layers.55.block_sparse_moe.experts.15.w1", "model.layers.55.block_sparse_moe.experts.16.w1", "model.layers.55.block_sparse_moe.experts.17.w1", "model.layers.55.block_sparse_moe.experts.18.w1", "model.layers.55.block_sparse_moe.experts.19.w1", "model.layers.55.block_sparse_moe.experts.20.w1", "model.layers.55.block_sparse_moe.experts.21.w1", "model.layers.55.block_sparse_moe.experts.22.w1", "model.layers.55.block_sparse_moe.experts.23.w1", "model.layers.55.block_sparse_moe.experts.24.w1", "model.layers.55.block_sparse_moe.experts.25.w1", "model.layers.55.block_sparse_moe.experts.26.w1", "model.layers.55.block_sparse_moe.experts.27.w1", "model.layers.55.block_sparse_moe.experts.28.w1", "model.layers.55.block_sparse_moe.experts.29.w1", "model.layers.55.block_sparse_moe.experts.30.w1", "model.layers.55.block_sparse_moe.experts.31.w1", "model.layers.55.block_sparse_moe.experts.32.w1", "model.layers.55.block_sparse_moe.experts.33.w1", "model.layers.55.block_sparse_moe.experts.34.w1", "model.layers.55.block_sparse_moe.experts.35.w1", "model.layers.55.block_sparse_moe.experts.36.w1", "model.layers.55.block_sparse_moe.experts.37.w1", "model.layers.55.block_sparse_moe.experts.38.w1", "model.layers.55.block_sparse_moe.experts.39.w1", "model.layers.55.block_sparse_moe.experts.40.w1", "model.layers.55.block_sparse_moe.experts.41.w1", "model.layers.55.block_sparse_moe.experts.42.w1", "model.layers.55.block_sparse_moe.experts.43.w1", "model.layers.55.block_sparse_moe.experts.44.w1", "model.layers.55.block_sparse_moe.experts.45.w1", "model.layers.55.block_sparse_moe.experts.46.w1", "model.layers.55.block_sparse_moe.experts.47.w1", "model.layers.55.block_sparse_moe.experts.48.w1", "model.layers.55.block_sparse_moe.experts.49.w1", "model.layers.55.block_sparse_moe.experts.50.w1", "model.layers.55.block_sparse_moe.experts.51.w1", "model.layers.55.block_sparse_moe.experts.52.w1", "model.layers.55.block_sparse_moe.experts.53.w1", "model.layers.55.block_sparse_moe.experts.54.w1", "model.layers.55.block_sparse_moe.experts.55.w1", "model.layers.55.block_sparse_moe.experts.56.w1", "model.layers.55.block_sparse_moe.experts.57.w1", "model.layers.55.block_sparse_moe.experts.58.w1", "model.layers.55.block_sparse_moe.experts.59.w1", "model.layers.55.block_sparse_moe.experts.60.w1", "model.layers.55.block_sparse_moe.experts.61.w1", "model.layers.55.block_sparse_moe.experts.62.w1", "model.layers.55.block_sparse_moe.experts.63.w1", "model.layers.55.block_sparse_moe.experts.64.w1", "model.layers.55.block_sparse_moe.experts.65.w1", "model.layers.55.block_sparse_moe.experts.66.w1", "model.layers.55.block_sparse_moe.experts.67.w1", "model.layers.55.block_sparse_moe.experts.68.w1", "model.layers.55.block_sparse_moe.experts.69.w1", "model.layers.55.block_sparse_moe.experts.70.w1", "model.layers.55.block_sparse_moe.experts.71.w1", "model.layers.55.block_sparse_moe.experts.72.w1", "model.layers.55.block_sparse_moe.experts.73.w1", "model.layers.55.block_sparse_moe.experts.74.w1", "model.layers.55.block_sparse_moe.experts.75.w1", "model.layers.55.block_sparse_moe.experts.76.w1", "model.layers.55.block_sparse_moe.experts.77.w1", "model.layers.55.block_sparse_moe.experts.78.w1", "model.layers.55.block_sparse_moe.experts.79.w1", "model.layers.55.block_sparse_moe.experts.80.w1", "model.layers.55.block_sparse_moe.experts.81.w1", "model.layers.55.block_sparse_moe.experts.82.w1", "model.layers.55.block_sparse_moe.experts.83.w1", "model.layers.55.block_sparse_moe.experts.84.w1", "model.layers.55.block_sparse_moe.experts.85.w1", "model.layers.55.block_sparse_moe.experts.86.w1", "model.layers.55.block_sparse_moe.experts.87.w1", "model.layers.55.block_sparse_moe.experts.88.w1", "model.layers.55.block_sparse_moe.experts.89.w1", "model.layers.55.block_sparse_moe.experts.90.w1", "model.layers.55.block_sparse_moe.experts.91.w1", "model.layers.55.block_sparse_moe.experts.92.w1", "model.layers.55.block_sparse_moe.experts.93.w1", "model.layers.55.block_sparse_moe.experts.94.w1", "model.layers.55.block_sparse_moe.experts.95.w1", "model.layers.55.block_sparse_moe.experts.96.w1", "model.layers.55.block_sparse_moe.experts.97.w1", "model.layers.55.block_sparse_moe.experts.98.w1", "model.layers.55.block_sparse_moe.experts.99.w1", "model.layers.55.block_sparse_moe.experts.100.w1", "model.layers.55.block_sparse_moe.experts.101.w1", "model.layers.55.block_sparse_moe.experts.102.w1", "model.layers.55.block_sparse_moe.experts.103.w1", "model.layers.55.block_sparse_moe.experts.104.w1", "model.layers.55.block_sparse_moe.experts.105.w1", "model.layers.55.block_sparse_moe.experts.106.w1", "model.layers.55.block_sparse_moe.experts.107.w1", "model.layers.55.block_sparse_moe.experts.108.w1", "model.layers.55.block_sparse_moe.experts.109.w1", "model.layers.55.block_sparse_moe.experts.110.w1", "model.layers.55.block_sparse_moe.experts.111.w1", "model.layers.55.block_sparse_moe.experts.112.w1", "model.layers.55.block_sparse_moe.experts.113.w1", "model.layers.55.block_sparse_moe.experts.114.w1", "model.layers.55.block_sparse_moe.experts.115.w1", "model.layers.55.block_sparse_moe.experts.116.w1", "model.layers.55.block_sparse_moe.experts.117.w1", "model.layers.55.block_sparse_moe.experts.118.w1", "model.layers.55.block_sparse_moe.experts.119.w1", "model.layers.55.block_sparse_moe.experts.120.w1", "model.layers.55.block_sparse_moe.experts.121.w1", "model.layers.55.block_sparse_moe.experts.122.w1", "model.layers.55.block_sparse_moe.experts.123.w1", "model.layers.55.block_sparse_moe.experts.124.w1", "model.layers.55.block_sparse_moe.experts.125.w1", "model.layers.55.block_sparse_moe.experts.126.w1", "model.layers.55.block_sparse_moe.experts.127.w1", "model.layers.55.block_sparse_moe.experts.128.w1", "model.layers.55.block_sparse_moe.experts.129.w1", "model.layers.55.block_sparse_moe.experts.130.w1", "model.layers.55.block_sparse_moe.experts.131.w1", "model.layers.55.block_sparse_moe.experts.132.w1", "model.layers.55.block_sparse_moe.experts.133.w1", "model.layers.55.block_sparse_moe.experts.134.w1", "model.layers.55.block_sparse_moe.experts.135.w1", "model.layers.55.block_sparse_moe.experts.136.w1", "model.layers.55.block_sparse_moe.experts.137.w1", "model.layers.55.block_sparse_moe.experts.138.w1", "model.layers.55.block_sparse_moe.experts.139.w1", "model.layers.55.block_sparse_moe.experts.140.w1", "model.layers.55.block_sparse_moe.experts.141.w1", "model.layers.55.block_sparse_moe.experts.142.w1", "model.layers.55.block_sparse_moe.experts.143.w1", "model.layers.55.block_sparse_moe.experts.144.w1", "model.layers.55.block_sparse_moe.experts.145.w1", "model.layers.55.block_sparse_moe.experts.146.w1", "model.layers.55.block_sparse_moe.experts.147.w1", "model.layers.55.block_sparse_moe.experts.148.w1", "model.layers.55.block_sparse_moe.experts.149.w1", "model.layers.55.block_sparse_moe.experts.150.w1", "model.layers.55.block_sparse_moe.experts.151.w1", "model.layers.55.block_sparse_moe.experts.152.w1", "model.layers.55.block_sparse_moe.experts.153.w1", "model.layers.55.block_sparse_moe.experts.154.w1", "model.layers.55.block_sparse_moe.experts.155.w1", "model.layers.55.block_sparse_moe.experts.156.w1", "model.layers.55.block_sparse_moe.experts.157.w1", "model.layers.55.block_sparse_moe.experts.158.w1", "model.layers.55.block_sparse_moe.experts.159.w1", "model.layers.55.block_sparse_moe.experts.160.w1", "model.layers.55.block_sparse_moe.experts.161.w1", "model.layers.55.block_sparse_moe.experts.162.w1", "model.layers.55.block_sparse_moe.experts.163.w1", "model.layers.55.block_sparse_moe.experts.164.w1", "model.layers.55.block_sparse_moe.experts.165.w1", "model.layers.55.block_sparse_moe.experts.166.w1", "model.layers.55.block_sparse_moe.experts.167.w1", "model.layers.55.block_sparse_moe.experts.168.w1", "model.layers.55.block_sparse_moe.experts.169.w1", "model.layers.55.block_sparse_moe.experts.170.w1", "model.layers.55.block_sparse_moe.experts.171.w1", "model.layers.55.block_sparse_moe.experts.172.w1", "model.layers.55.block_sparse_moe.experts.173.w1", "model.layers.55.block_sparse_moe.experts.174.w1", "model.layers.55.block_sparse_moe.experts.175.w1", "model.layers.55.block_sparse_moe.experts.176.w1", "model.layers.55.block_sparse_moe.experts.177.w1", "model.layers.55.block_sparse_moe.experts.178.w1", "model.layers.55.block_sparse_moe.experts.179.w1", "model.layers.55.block_sparse_moe.experts.180.w1", "model.layers.55.block_sparse_moe.experts.181.w1", "model.layers.55.block_sparse_moe.experts.182.w1", "model.layers.55.block_sparse_moe.experts.183.w1", "model.layers.55.block_sparse_moe.experts.184.w1", "model.layers.55.block_sparse_moe.experts.185.w1", "model.layers.55.block_sparse_moe.experts.186.w1", "model.layers.55.block_sparse_moe.experts.187.w1", "model.layers.55.block_sparse_moe.experts.188.w1", "model.layers.55.block_sparse_moe.experts.189.w1", "model.layers.55.block_sparse_moe.experts.190.w1", "model.layers.55.block_sparse_moe.experts.191.w1", "model.layers.55.block_sparse_moe.experts.192.w1", "model.layers.55.block_sparse_moe.experts.193.w1", "model.layers.55.block_sparse_moe.experts.194.w1", "model.layers.55.block_sparse_moe.experts.195.w1", "model.layers.55.block_sparse_moe.experts.196.w1", "model.layers.55.block_sparse_moe.experts.197.w1", "model.layers.55.block_sparse_moe.experts.198.w1", "model.layers.55.block_sparse_moe.experts.199.w1", "model.layers.55.block_sparse_moe.experts.200.w1", "model.layers.55.block_sparse_moe.experts.201.w1", "model.layers.55.block_sparse_moe.experts.202.w1", "model.layers.55.block_sparse_moe.experts.203.w1", "model.layers.55.block_sparse_moe.experts.204.w1", "model.layers.55.block_sparse_moe.experts.205.w1", "model.layers.55.block_sparse_moe.experts.206.w1", "model.layers.55.block_sparse_moe.experts.207.w1", "model.layers.55.block_sparse_moe.experts.208.w1", "model.layers.55.block_sparse_moe.experts.209.w1", "model.layers.55.block_sparse_moe.experts.210.w1", "model.layers.55.block_sparse_moe.experts.211.w1", "model.layers.55.block_sparse_moe.experts.212.w1", "model.layers.55.block_sparse_moe.experts.213.w1", "model.layers.55.block_sparse_moe.experts.214.w1", "model.layers.55.block_sparse_moe.experts.215.w1", "model.layers.55.block_sparse_moe.experts.216.w1", "model.layers.55.block_sparse_moe.experts.217.w1", "model.layers.55.block_sparse_moe.experts.218.w1", "model.layers.55.block_sparse_moe.experts.219.w1", "model.layers.55.block_sparse_moe.experts.220.w1", "model.layers.55.block_sparse_moe.experts.221.w1", "model.layers.55.block_sparse_moe.experts.222.w1", "model.layers.55.block_sparse_moe.experts.223.w1", "model.layers.55.block_sparse_moe.experts.224.w1", "model.layers.55.block_sparse_moe.experts.225.w1", "model.layers.55.block_sparse_moe.experts.226.w1", "model.layers.55.block_sparse_moe.experts.227.w1", "model.layers.55.block_sparse_moe.experts.228.w1", "model.layers.55.block_sparse_moe.experts.229.w1", "model.layers.55.block_sparse_moe.experts.230.w1", "model.layers.55.block_sparse_moe.experts.231.w1", "model.layers.55.block_sparse_moe.experts.232.w1", "model.layers.55.block_sparse_moe.experts.233.w1", "model.layers.55.block_sparse_moe.experts.234.w1", "model.layers.55.block_sparse_moe.experts.235.w1", "model.layers.55.block_sparse_moe.experts.236.w1", "model.layers.55.block_sparse_moe.experts.237.w1", "model.layers.55.block_sparse_moe.experts.238.w1", "model.layers.55.block_sparse_moe.experts.239.w1", "model.layers.55.block_sparse_moe.experts.240.w1", "model.layers.55.block_sparse_moe.experts.241.w1", "model.layers.55.block_sparse_moe.experts.242.w1", "model.layers.55.block_sparse_moe.experts.243.w1", "model.layers.55.block_sparse_moe.experts.244.w1", "model.layers.55.block_sparse_moe.experts.245.w1", "model.layers.55.block_sparse_moe.experts.246.w1", "model.layers.55.block_sparse_moe.experts.247.w1", "model.layers.55.block_sparse_moe.experts.248.w1", "model.layers.55.block_sparse_moe.experts.249.w1", "model.layers.55.block_sparse_moe.experts.250.w1", "model.layers.55.block_sparse_moe.experts.251.w1", "model.layers.55.block_sparse_moe.experts.252.w1", "model.layers.55.block_sparse_moe.experts.253.w1", "model.layers.55.block_sparse_moe.experts.254.w1", "model.layers.55.block_sparse_moe.experts.255.w1", "model.layers.55.block_sparse_moe.experts.0.w3", "model.layers.55.block_sparse_moe.experts.1.w3", "model.layers.55.block_sparse_moe.experts.2.w3", "model.layers.55.block_sparse_moe.experts.3.w3", "model.layers.55.block_sparse_moe.experts.4.w3", "model.layers.55.block_sparse_moe.experts.5.w3", "model.layers.55.block_sparse_moe.experts.6.w3", "model.layers.55.block_sparse_moe.experts.7.w3", "model.layers.55.block_sparse_moe.experts.8.w3", "model.layers.55.block_sparse_moe.experts.9.w3", "model.layers.55.block_sparse_moe.experts.10.w3", "model.layers.55.block_sparse_moe.experts.11.w3", "model.layers.55.block_sparse_moe.experts.12.w3", "model.layers.55.block_sparse_moe.experts.13.w3", "model.layers.55.block_sparse_moe.experts.14.w3", "model.layers.55.block_sparse_moe.experts.15.w3", "model.layers.55.block_sparse_moe.experts.16.w3", "model.layers.55.block_sparse_moe.experts.17.w3", "model.layers.55.block_sparse_moe.experts.18.w3", "model.layers.55.block_sparse_moe.experts.19.w3", "model.layers.55.block_sparse_moe.experts.20.w3", "model.layers.55.block_sparse_moe.experts.21.w3", "model.layers.55.block_sparse_moe.experts.22.w3", "model.layers.55.block_sparse_moe.experts.23.w3", "model.layers.55.block_sparse_moe.experts.24.w3", "model.layers.55.block_sparse_moe.experts.25.w3", "model.layers.55.block_sparse_moe.experts.26.w3", "model.layers.55.block_sparse_moe.experts.27.w3", "model.layers.55.block_sparse_moe.experts.28.w3", "model.layers.55.block_sparse_moe.experts.29.w3", "model.layers.55.block_sparse_moe.experts.30.w3", "model.layers.55.block_sparse_moe.experts.31.w3", "model.layers.55.block_sparse_moe.experts.32.w3", "model.layers.55.block_sparse_moe.experts.33.w3", "model.layers.55.block_sparse_moe.experts.34.w3", "model.layers.55.block_sparse_moe.experts.35.w3", "model.layers.55.block_sparse_moe.experts.36.w3", "model.layers.55.block_sparse_moe.experts.37.w3", "model.layers.55.block_sparse_moe.experts.38.w3", "model.layers.55.block_sparse_moe.experts.39.w3", "model.layers.55.block_sparse_moe.experts.40.w3", "model.layers.55.block_sparse_moe.experts.41.w3", "model.layers.55.block_sparse_moe.experts.42.w3", "model.layers.55.block_sparse_moe.experts.43.w3", "model.layers.55.block_sparse_moe.experts.44.w3", "model.layers.55.block_sparse_moe.experts.45.w3", "model.layers.55.block_sparse_moe.experts.46.w3", "model.layers.55.block_sparse_moe.experts.47.w3", "model.layers.55.block_sparse_moe.experts.48.w3", "model.layers.55.block_sparse_moe.experts.49.w3", "model.layers.55.block_sparse_moe.experts.50.w3", "model.layers.55.block_sparse_moe.experts.51.w3", "model.layers.55.block_sparse_moe.experts.52.w3", "model.layers.55.block_sparse_moe.experts.53.w3", "model.layers.55.block_sparse_moe.experts.54.w3", "model.layers.55.block_sparse_moe.experts.55.w3", "model.layers.55.block_sparse_moe.experts.56.w3", "model.layers.55.block_sparse_moe.experts.57.w3", "model.layers.55.block_sparse_moe.experts.58.w3", "model.layers.55.block_sparse_moe.experts.59.w3", "model.layers.55.block_sparse_moe.experts.60.w3", "model.layers.55.block_sparse_moe.experts.61.w3", "model.layers.55.block_sparse_moe.experts.62.w3", "model.layers.55.block_sparse_moe.experts.63.w3", "model.layers.55.block_sparse_moe.experts.64.w3", "model.layers.55.block_sparse_moe.experts.65.w3", "model.layers.55.block_sparse_moe.experts.66.w3", "model.layers.55.block_sparse_moe.experts.67.w3", "model.layers.55.block_sparse_moe.experts.68.w3", "model.layers.55.block_sparse_moe.experts.69.w3", "model.layers.55.block_sparse_moe.experts.70.w3", "model.layers.55.block_sparse_moe.experts.71.w3", "model.layers.55.block_sparse_moe.experts.72.w3", "model.layers.55.block_sparse_moe.experts.73.w3", "model.layers.55.block_sparse_moe.experts.74.w3", "model.layers.55.block_sparse_moe.experts.75.w3", "model.layers.55.block_sparse_moe.experts.76.w3", "model.layers.55.block_sparse_moe.experts.77.w3", "model.layers.55.block_sparse_moe.experts.78.w3", "model.layers.55.block_sparse_moe.experts.79.w3", "model.layers.55.block_sparse_moe.experts.80.w3", "model.layers.55.block_sparse_moe.experts.81.w3", "model.layers.55.block_sparse_moe.experts.82.w3", "model.layers.55.block_sparse_moe.experts.83.w3", "model.layers.55.block_sparse_moe.experts.84.w3", "model.layers.55.block_sparse_moe.experts.85.w3", "model.layers.55.block_sparse_moe.experts.86.w3", "model.layers.55.block_sparse_moe.experts.87.w3", "model.layers.55.block_sparse_moe.experts.88.w3", "model.layers.55.block_sparse_moe.experts.89.w3", "model.layers.55.block_sparse_moe.experts.90.w3", "model.layers.55.block_sparse_moe.experts.91.w3", "model.layers.55.block_sparse_moe.experts.92.w3", "model.layers.55.block_sparse_moe.experts.93.w3", "model.layers.55.block_sparse_moe.experts.94.w3", "model.layers.55.block_sparse_moe.experts.95.w3", "model.layers.55.block_sparse_moe.experts.96.w3", "model.layers.55.block_sparse_moe.experts.97.w3", "model.layers.55.block_sparse_moe.experts.98.w3", "model.layers.55.block_sparse_moe.experts.99.w3", "model.layers.55.block_sparse_moe.experts.100.w3", "model.layers.55.block_sparse_moe.experts.101.w3", "model.layers.55.block_sparse_moe.experts.102.w3", "model.layers.55.block_sparse_moe.experts.103.w3", "model.layers.55.block_sparse_moe.experts.104.w3", "model.layers.55.block_sparse_moe.experts.105.w3", "model.layers.55.block_sparse_moe.experts.106.w3", "model.layers.55.block_sparse_moe.experts.107.w3", "model.layers.55.block_sparse_moe.experts.108.w3", "model.layers.55.block_sparse_moe.experts.109.w3", "model.layers.55.block_sparse_moe.experts.110.w3", "model.layers.55.block_sparse_moe.experts.111.w3", "model.layers.55.block_sparse_moe.experts.112.w3", "model.layers.55.block_sparse_moe.experts.113.w3", "model.layers.55.block_sparse_moe.experts.114.w3", "model.layers.55.block_sparse_moe.experts.115.w3", "model.layers.55.block_sparse_moe.experts.116.w3", "model.layers.55.block_sparse_moe.experts.117.w3", "model.layers.55.block_sparse_moe.experts.118.w3", "model.layers.55.block_sparse_moe.experts.119.w3", "model.layers.55.block_sparse_moe.experts.120.w3", "model.layers.55.block_sparse_moe.experts.121.w3", "model.layers.55.block_sparse_moe.experts.122.w3", "model.layers.55.block_sparse_moe.experts.123.w3", "model.layers.55.block_sparse_moe.experts.124.w3", "model.layers.55.block_sparse_moe.experts.125.w3", "model.layers.55.block_sparse_moe.experts.126.w3", "model.layers.55.block_sparse_moe.experts.127.w3", "model.layers.55.block_sparse_moe.experts.128.w3", "model.layers.55.block_sparse_moe.experts.129.w3", "model.layers.55.block_sparse_moe.experts.130.w3", "model.layers.55.block_sparse_moe.experts.131.w3", "model.layers.55.block_sparse_moe.experts.132.w3", "model.layers.55.block_sparse_moe.experts.133.w3", "model.layers.55.block_sparse_moe.experts.134.w3", "model.layers.55.block_sparse_moe.experts.135.w3", "model.layers.55.block_sparse_moe.experts.136.w3", "model.layers.55.block_sparse_moe.experts.137.w3", "model.layers.55.block_sparse_moe.experts.138.w3", "model.layers.55.block_sparse_moe.experts.139.w3", "model.layers.55.block_sparse_moe.experts.140.w3", "model.layers.55.block_sparse_moe.experts.141.w3", "model.layers.55.block_sparse_moe.experts.142.w3", "model.layers.55.block_sparse_moe.experts.143.w3", "model.layers.55.block_sparse_moe.experts.144.w3", "model.layers.55.block_sparse_moe.experts.145.w3", "model.layers.55.block_sparse_moe.experts.146.w3", "model.layers.55.block_sparse_moe.experts.147.w3", "model.layers.55.block_sparse_moe.experts.148.w3", "model.layers.55.block_sparse_moe.experts.149.w3", "model.layers.55.block_sparse_moe.experts.150.w3", "model.layers.55.block_sparse_moe.experts.151.w3", "model.layers.55.block_sparse_moe.experts.152.w3", "model.layers.55.block_sparse_moe.experts.153.w3", "model.layers.55.block_sparse_moe.experts.154.w3", "model.layers.55.block_sparse_moe.experts.155.w3", "model.layers.55.block_sparse_moe.experts.156.w3", "model.layers.55.block_sparse_moe.experts.157.w3", "model.layers.55.block_sparse_moe.experts.158.w3", "model.layers.55.block_sparse_moe.experts.159.w3", "model.layers.55.block_sparse_moe.experts.160.w3", "model.layers.55.block_sparse_moe.experts.161.w3", "model.layers.55.block_sparse_moe.experts.162.w3", "model.layers.55.block_sparse_moe.experts.163.w3", "model.layers.55.block_sparse_moe.experts.164.w3", "model.layers.55.block_sparse_moe.experts.165.w3", "model.layers.55.block_sparse_moe.experts.166.w3", "model.layers.55.block_sparse_moe.experts.167.w3", "model.layers.55.block_sparse_moe.experts.168.w3", "model.layers.55.block_sparse_moe.experts.169.w3", "model.layers.55.block_sparse_moe.experts.170.w3", "model.layers.55.block_sparse_moe.experts.171.w3", "model.layers.55.block_sparse_moe.experts.172.w3", "model.layers.55.block_sparse_moe.experts.173.w3", "model.layers.55.block_sparse_moe.experts.174.w3", "model.layers.55.block_sparse_moe.experts.175.w3", "model.layers.55.block_sparse_moe.experts.176.w3", "model.layers.55.block_sparse_moe.experts.177.w3", "model.layers.55.block_sparse_moe.experts.178.w3", "model.layers.55.block_sparse_moe.experts.179.w3", "model.layers.55.block_sparse_moe.experts.180.w3", "model.layers.55.block_sparse_moe.experts.181.w3", "model.layers.55.block_sparse_moe.experts.182.w3", "model.layers.55.block_sparse_moe.experts.183.w3", "model.layers.55.block_sparse_moe.experts.184.w3", "model.layers.55.block_sparse_moe.experts.185.w3", "model.layers.55.block_sparse_moe.experts.186.w3", "model.layers.55.block_sparse_moe.experts.187.w3", "model.layers.55.block_sparse_moe.experts.188.w3", "model.layers.55.block_sparse_moe.experts.189.w3", "model.layers.55.block_sparse_moe.experts.190.w3", "model.layers.55.block_sparse_moe.experts.191.w3", "model.layers.55.block_sparse_moe.experts.192.w3", "model.layers.55.block_sparse_moe.experts.193.w3", "model.layers.55.block_sparse_moe.experts.194.w3", "model.layers.55.block_sparse_moe.experts.195.w3", "model.layers.55.block_sparse_moe.experts.196.w3", "model.layers.55.block_sparse_moe.experts.197.w3", "model.layers.55.block_sparse_moe.experts.198.w3", "model.layers.55.block_sparse_moe.experts.199.w3", "model.layers.55.block_sparse_moe.experts.200.w3", "model.layers.55.block_sparse_moe.experts.201.w3", "model.layers.55.block_sparse_moe.experts.202.w3", "model.layers.55.block_sparse_moe.experts.203.w3", "model.layers.55.block_sparse_moe.experts.204.w3", "model.layers.55.block_sparse_moe.experts.205.w3", "model.layers.55.block_sparse_moe.experts.206.w3", "model.layers.55.block_sparse_moe.experts.207.w3", "model.layers.55.block_sparse_moe.experts.208.w3", "model.layers.55.block_sparse_moe.experts.209.w3", "model.layers.55.block_sparse_moe.experts.210.w3", "model.layers.55.block_sparse_moe.experts.211.w3", "model.layers.55.block_sparse_moe.experts.212.w3", "model.layers.55.block_sparse_moe.experts.213.w3", "model.layers.55.block_sparse_moe.experts.214.w3", "model.layers.55.block_sparse_moe.experts.215.w3", "model.layers.55.block_sparse_moe.experts.216.w3", "model.layers.55.block_sparse_moe.experts.217.w3", "model.layers.55.block_sparse_moe.experts.218.w3", "model.layers.55.block_sparse_moe.experts.219.w3", "model.layers.55.block_sparse_moe.experts.220.w3", "model.layers.55.block_sparse_moe.experts.221.w3", "model.layers.55.block_sparse_moe.experts.222.w3", "model.layers.55.block_sparse_moe.experts.223.w3", "model.layers.55.block_sparse_moe.experts.224.w3", "model.layers.55.block_sparse_moe.experts.225.w3", "model.layers.55.block_sparse_moe.experts.226.w3", "model.layers.55.block_sparse_moe.experts.227.w3", "model.layers.55.block_sparse_moe.experts.228.w3", "model.layers.55.block_sparse_moe.experts.229.w3", "model.layers.55.block_sparse_moe.experts.230.w3", "model.layers.55.block_sparse_moe.experts.231.w3", "model.layers.55.block_sparse_moe.experts.232.w3", "model.layers.55.block_sparse_moe.experts.233.w3", "model.layers.55.block_sparse_moe.experts.234.w3", "model.layers.55.block_sparse_moe.experts.235.w3", "model.layers.55.block_sparse_moe.experts.236.w3", "model.layers.55.block_sparse_moe.experts.237.w3", "model.layers.55.block_sparse_moe.experts.238.w3", "model.layers.55.block_sparse_moe.experts.239.w3", "model.layers.55.block_sparse_moe.experts.240.w3", "model.layers.55.block_sparse_moe.experts.241.w3", "model.layers.55.block_sparse_moe.experts.242.w3", "model.layers.55.block_sparse_moe.experts.243.w3", "model.layers.55.block_sparse_moe.experts.244.w3", "model.layers.55.block_sparse_moe.experts.245.w3", "model.layers.55.block_sparse_moe.experts.246.w3", "model.layers.55.block_sparse_moe.experts.247.w3", "model.layers.55.block_sparse_moe.experts.248.w3", "model.layers.55.block_sparse_moe.experts.249.w3", "model.layers.55.block_sparse_moe.experts.250.w3", "model.layers.55.block_sparse_moe.experts.251.w3", "model.layers.55.block_sparse_moe.experts.252.w3", "model.layers.55.block_sparse_moe.experts.253.w3", "model.layers.55.block_sparse_moe.experts.254.w3", "model.layers.55.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 4.8279762268066406e-05, "dbits": 2415919104 } ] }, { "idx": 279, "layers": [ "model.layers.55.block_sparse_moe.experts.0.w2", "model.layers.55.block_sparse_moe.experts.1.w2", "model.layers.55.block_sparse_moe.experts.2.w2", "model.layers.55.block_sparse_moe.experts.3.w2", "model.layers.55.block_sparse_moe.experts.4.w2", "model.layers.55.block_sparse_moe.experts.5.w2", "model.layers.55.block_sparse_moe.experts.6.w2", "model.layers.55.block_sparse_moe.experts.7.w2", "model.layers.55.block_sparse_moe.experts.8.w2", "model.layers.55.block_sparse_moe.experts.9.w2", "model.layers.55.block_sparse_moe.experts.10.w2", "model.layers.55.block_sparse_moe.experts.11.w2", "model.layers.55.block_sparse_moe.experts.12.w2", "model.layers.55.block_sparse_moe.experts.13.w2", "model.layers.55.block_sparse_moe.experts.14.w2", "model.layers.55.block_sparse_moe.experts.15.w2", "model.layers.55.block_sparse_moe.experts.16.w2", "model.layers.55.block_sparse_moe.experts.17.w2", "model.layers.55.block_sparse_moe.experts.18.w2", "model.layers.55.block_sparse_moe.experts.19.w2", "model.layers.55.block_sparse_moe.experts.20.w2", "model.layers.55.block_sparse_moe.experts.21.w2", "model.layers.55.block_sparse_moe.experts.22.w2", "model.layers.55.block_sparse_moe.experts.23.w2", "model.layers.55.block_sparse_moe.experts.24.w2", "model.layers.55.block_sparse_moe.experts.25.w2", "model.layers.55.block_sparse_moe.experts.26.w2", "model.layers.55.block_sparse_moe.experts.27.w2", "model.layers.55.block_sparse_moe.experts.28.w2", "model.layers.55.block_sparse_moe.experts.29.w2", "model.layers.55.block_sparse_moe.experts.30.w2", "model.layers.55.block_sparse_moe.experts.31.w2", "model.layers.55.block_sparse_moe.experts.32.w2", "model.layers.55.block_sparse_moe.experts.33.w2", "model.layers.55.block_sparse_moe.experts.34.w2", "model.layers.55.block_sparse_moe.experts.35.w2", "model.layers.55.block_sparse_moe.experts.36.w2", "model.layers.55.block_sparse_moe.experts.37.w2", "model.layers.55.block_sparse_moe.experts.38.w2", "model.layers.55.block_sparse_moe.experts.39.w2", "model.layers.55.block_sparse_moe.experts.40.w2", "model.layers.55.block_sparse_moe.experts.41.w2", "model.layers.55.block_sparse_moe.experts.42.w2", "model.layers.55.block_sparse_moe.experts.43.w2", "model.layers.55.block_sparse_moe.experts.44.w2", "model.layers.55.block_sparse_moe.experts.45.w2", "model.layers.55.block_sparse_moe.experts.46.w2", "model.layers.55.block_sparse_moe.experts.47.w2", "model.layers.55.block_sparse_moe.experts.48.w2", "model.layers.55.block_sparse_moe.experts.49.w2", "model.layers.55.block_sparse_moe.experts.50.w2", "model.layers.55.block_sparse_moe.experts.51.w2", "model.layers.55.block_sparse_moe.experts.52.w2", "model.layers.55.block_sparse_moe.experts.53.w2", "model.layers.55.block_sparse_moe.experts.54.w2", "model.layers.55.block_sparse_moe.experts.55.w2", "model.layers.55.block_sparse_moe.experts.56.w2", "model.layers.55.block_sparse_moe.experts.57.w2", "model.layers.55.block_sparse_moe.experts.58.w2", "model.layers.55.block_sparse_moe.experts.59.w2", "model.layers.55.block_sparse_moe.experts.60.w2", "model.layers.55.block_sparse_moe.experts.61.w2", "model.layers.55.block_sparse_moe.experts.62.w2", "model.layers.55.block_sparse_moe.experts.63.w2", "model.layers.55.block_sparse_moe.experts.64.w2", "model.layers.55.block_sparse_moe.experts.65.w2", "model.layers.55.block_sparse_moe.experts.66.w2", "model.layers.55.block_sparse_moe.experts.67.w2", "model.layers.55.block_sparse_moe.experts.68.w2", "model.layers.55.block_sparse_moe.experts.69.w2", "model.layers.55.block_sparse_moe.experts.70.w2", "model.layers.55.block_sparse_moe.experts.71.w2", "model.layers.55.block_sparse_moe.experts.72.w2", "model.layers.55.block_sparse_moe.experts.73.w2", "model.layers.55.block_sparse_moe.experts.74.w2", "model.layers.55.block_sparse_moe.experts.75.w2", "model.layers.55.block_sparse_moe.experts.76.w2", "model.layers.55.block_sparse_moe.experts.77.w2", "model.layers.55.block_sparse_moe.experts.78.w2", "model.layers.55.block_sparse_moe.experts.79.w2", "model.layers.55.block_sparse_moe.experts.80.w2", "model.layers.55.block_sparse_moe.experts.81.w2", "model.layers.55.block_sparse_moe.experts.82.w2", "model.layers.55.block_sparse_moe.experts.83.w2", "model.layers.55.block_sparse_moe.experts.84.w2", "model.layers.55.block_sparse_moe.experts.85.w2", "model.layers.55.block_sparse_moe.experts.86.w2", "model.layers.55.block_sparse_moe.experts.87.w2", "model.layers.55.block_sparse_moe.experts.88.w2", "model.layers.55.block_sparse_moe.experts.89.w2", "model.layers.55.block_sparse_moe.experts.90.w2", "model.layers.55.block_sparse_moe.experts.91.w2", "model.layers.55.block_sparse_moe.experts.92.w2", "model.layers.55.block_sparse_moe.experts.93.w2", "model.layers.55.block_sparse_moe.experts.94.w2", "model.layers.55.block_sparse_moe.experts.95.w2", "model.layers.55.block_sparse_moe.experts.96.w2", "model.layers.55.block_sparse_moe.experts.97.w2", "model.layers.55.block_sparse_moe.experts.98.w2", "model.layers.55.block_sparse_moe.experts.99.w2", "model.layers.55.block_sparse_moe.experts.100.w2", "model.layers.55.block_sparse_moe.experts.101.w2", "model.layers.55.block_sparse_moe.experts.102.w2", "model.layers.55.block_sparse_moe.experts.103.w2", "model.layers.55.block_sparse_moe.experts.104.w2", "model.layers.55.block_sparse_moe.experts.105.w2", "model.layers.55.block_sparse_moe.experts.106.w2", "model.layers.55.block_sparse_moe.experts.107.w2", "model.layers.55.block_sparse_moe.experts.108.w2", "model.layers.55.block_sparse_moe.experts.109.w2", "model.layers.55.block_sparse_moe.experts.110.w2", "model.layers.55.block_sparse_moe.experts.111.w2", "model.layers.55.block_sparse_moe.experts.112.w2", "model.layers.55.block_sparse_moe.experts.113.w2", "model.layers.55.block_sparse_moe.experts.114.w2", "model.layers.55.block_sparse_moe.experts.115.w2", "model.layers.55.block_sparse_moe.experts.116.w2", "model.layers.55.block_sparse_moe.experts.117.w2", "model.layers.55.block_sparse_moe.experts.118.w2", "model.layers.55.block_sparse_moe.experts.119.w2", "model.layers.55.block_sparse_moe.experts.120.w2", "model.layers.55.block_sparse_moe.experts.121.w2", "model.layers.55.block_sparse_moe.experts.122.w2", "model.layers.55.block_sparse_moe.experts.123.w2", "model.layers.55.block_sparse_moe.experts.124.w2", "model.layers.55.block_sparse_moe.experts.125.w2", "model.layers.55.block_sparse_moe.experts.126.w2", "model.layers.55.block_sparse_moe.experts.127.w2", "model.layers.55.block_sparse_moe.experts.128.w2", "model.layers.55.block_sparse_moe.experts.129.w2", "model.layers.55.block_sparse_moe.experts.130.w2", "model.layers.55.block_sparse_moe.experts.131.w2", "model.layers.55.block_sparse_moe.experts.132.w2", "model.layers.55.block_sparse_moe.experts.133.w2", "model.layers.55.block_sparse_moe.experts.134.w2", "model.layers.55.block_sparse_moe.experts.135.w2", "model.layers.55.block_sparse_moe.experts.136.w2", "model.layers.55.block_sparse_moe.experts.137.w2", "model.layers.55.block_sparse_moe.experts.138.w2", "model.layers.55.block_sparse_moe.experts.139.w2", "model.layers.55.block_sparse_moe.experts.140.w2", "model.layers.55.block_sparse_moe.experts.141.w2", "model.layers.55.block_sparse_moe.experts.142.w2", "model.layers.55.block_sparse_moe.experts.143.w2", "model.layers.55.block_sparse_moe.experts.144.w2", "model.layers.55.block_sparse_moe.experts.145.w2", "model.layers.55.block_sparse_moe.experts.146.w2", "model.layers.55.block_sparse_moe.experts.147.w2", "model.layers.55.block_sparse_moe.experts.148.w2", "model.layers.55.block_sparse_moe.experts.149.w2", "model.layers.55.block_sparse_moe.experts.150.w2", "model.layers.55.block_sparse_moe.experts.151.w2", "model.layers.55.block_sparse_moe.experts.152.w2", "model.layers.55.block_sparse_moe.experts.153.w2", "model.layers.55.block_sparse_moe.experts.154.w2", "model.layers.55.block_sparse_moe.experts.155.w2", "model.layers.55.block_sparse_moe.experts.156.w2", "model.layers.55.block_sparse_moe.experts.157.w2", "model.layers.55.block_sparse_moe.experts.158.w2", "model.layers.55.block_sparse_moe.experts.159.w2", "model.layers.55.block_sparse_moe.experts.160.w2", "model.layers.55.block_sparse_moe.experts.161.w2", "model.layers.55.block_sparse_moe.experts.162.w2", "model.layers.55.block_sparse_moe.experts.163.w2", "model.layers.55.block_sparse_moe.experts.164.w2", "model.layers.55.block_sparse_moe.experts.165.w2", "model.layers.55.block_sparse_moe.experts.166.w2", "model.layers.55.block_sparse_moe.experts.167.w2", "model.layers.55.block_sparse_moe.experts.168.w2", "model.layers.55.block_sparse_moe.experts.169.w2", "model.layers.55.block_sparse_moe.experts.170.w2", "model.layers.55.block_sparse_moe.experts.171.w2", "model.layers.55.block_sparse_moe.experts.172.w2", "model.layers.55.block_sparse_moe.experts.173.w2", "model.layers.55.block_sparse_moe.experts.174.w2", "model.layers.55.block_sparse_moe.experts.175.w2", "model.layers.55.block_sparse_moe.experts.176.w2", "model.layers.55.block_sparse_moe.experts.177.w2", "model.layers.55.block_sparse_moe.experts.178.w2", "model.layers.55.block_sparse_moe.experts.179.w2", "model.layers.55.block_sparse_moe.experts.180.w2", "model.layers.55.block_sparse_moe.experts.181.w2", "model.layers.55.block_sparse_moe.experts.182.w2", "model.layers.55.block_sparse_moe.experts.183.w2", "model.layers.55.block_sparse_moe.experts.184.w2", "model.layers.55.block_sparse_moe.experts.185.w2", "model.layers.55.block_sparse_moe.experts.186.w2", "model.layers.55.block_sparse_moe.experts.187.w2", "model.layers.55.block_sparse_moe.experts.188.w2", "model.layers.55.block_sparse_moe.experts.189.w2", "model.layers.55.block_sparse_moe.experts.190.w2", "model.layers.55.block_sparse_moe.experts.191.w2", "model.layers.55.block_sparse_moe.experts.192.w2", "model.layers.55.block_sparse_moe.experts.193.w2", "model.layers.55.block_sparse_moe.experts.194.w2", "model.layers.55.block_sparse_moe.experts.195.w2", "model.layers.55.block_sparse_moe.experts.196.w2", "model.layers.55.block_sparse_moe.experts.197.w2", "model.layers.55.block_sparse_moe.experts.198.w2", "model.layers.55.block_sparse_moe.experts.199.w2", "model.layers.55.block_sparse_moe.experts.200.w2", "model.layers.55.block_sparse_moe.experts.201.w2", "model.layers.55.block_sparse_moe.experts.202.w2", "model.layers.55.block_sparse_moe.experts.203.w2", "model.layers.55.block_sparse_moe.experts.204.w2", "model.layers.55.block_sparse_moe.experts.205.w2", "model.layers.55.block_sparse_moe.experts.206.w2", "model.layers.55.block_sparse_moe.experts.207.w2", "model.layers.55.block_sparse_moe.experts.208.w2", "model.layers.55.block_sparse_moe.experts.209.w2", "model.layers.55.block_sparse_moe.experts.210.w2", "model.layers.55.block_sparse_moe.experts.211.w2", "model.layers.55.block_sparse_moe.experts.212.w2", "model.layers.55.block_sparse_moe.experts.213.w2", "model.layers.55.block_sparse_moe.experts.214.w2", "model.layers.55.block_sparse_moe.experts.215.w2", "model.layers.55.block_sparse_moe.experts.216.w2", "model.layers.55.block_sparse_moe.experts.217.w2", "model.layers.55.block_sparse_moe.experts.218.w2", "model.layers.55.block_sparse_moe.experts.219.w2", "model.layers.55.block_sparse_moe.experts.220.w2", "model.layers.55.block_sparse_moe.experts.221.w2", "model.layers.55.block_sparse_moe.experts.222.w2", "model.layers.55.block_sparse_moe.experts.223.w2", "model.layers.55.block_sparse_moe.experts.224.w2", "model.layers.55.block_sparse_moe.experts.225.w2", "model.layers.55.block_sparse_moe.experts.226.w2", "model.layers.55.block_sparse_moe.experts.227.w2", "model.layers.55.block_sparse_moe.experts.228.w2", "model.layers.55.block_sparse_moe.experts.229.w2", "model.layers.55.block_sparse_moe.experts.230.w2", "model.layers.55.block_sparse_moe.experts.231.w2", "model.layers.55.block_sparse_moe.experts.232.w2", "model.layers.55.block_sparse_moe.experts.233.w2", "model.layers.55.block_sparse_moe.experts.234.w2", "model.layers.55.block_sparse_moe.experts.235.w2", "model.layers.55.block_sparse_moe.experts.236.w2", "model.layers.55.block_sparse_moe.experts.237.w2", "model.layers.55.block_sparse_moe.experts.238.w2", "model.layers.55.block_sparse_moe.experts.239.w2", "model.layers.55.block_sparse_moe.experts.240.w2", "model.layers.55.block_sparse_moe.experts.241.w2", "model.layers.55.block_sparse_moe.experts.242.w2", "model.layers.55.block_sparse_moe.experts.243.w2", "model.layers.55.block_sparse_moe.experts.244.w2", "model.layers.55.block_sparse_moe.experts.245.w2", "model.layers.55.block_sparse_moe.experts.246.w2", "model.layers.55.block_sparse_moe.experts.247.w2", "model.layers.55.block_sparse_moe.experts.248.w2", "model.layers.55.block_sparse_moe.experts.249.w2", "model.layers.55.block_sparse_moe.experts.250.w2", "model.layers.55.block_sparse_moe.experts.251.w2", "model.layers.55.block_sparse_moe.experts.252.w2", "model.layers.55.block_sparse_moe.experts.253.w2", "model.layers.55.block_sparse_moe.experts.254.w2", "model.layers.55.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 3.107637166974153e-06, "dbits": 1207959552 } ] }, { "idx": 280, "layers": [ "model.layers.56.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00012015011161566058, "dbits": 18874368 } ] }, { "idx": 281, "layers": [ "model.layers.56.self_attn.k_proj", "model.layers.56.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0003116320818662699, "dbits": 6291456 } ] }, { "idx": 282, "layers": [ "model.layers.56.self_attn.o_proj" ], "candidates": [ { "dkld": -8.339621126651764e-05, "dbits": 18874368 } ] }, { "idx": 283, "layers": [ "model.layers.56.block_sparse_moe.experts.0.w1", "model.layers.56.block_sparse_moe.experts.1.w1", "model.layers.56.block_sparse_moe.experts.2.w1", "model.layers.56.block_sparse_moe.experts.3.w1", "model.layers.56.block_sparse_moe.experts.4.w1", "model.layers.56.block_sparse_moe.experts.5.w1", "model.layers.56.block_sparse_moe.experts.6.w1", "model.layers.56.block_sparse_moe.experts.7.w1", "model.layers.56.block_sparse_moe.experts.8.w1", "model.layers.56.block_sparse_moe.experts.9.w1", "model.layers.56.block_sparse_moe.experts.10.w1", "model.layers.56.block_sparse_moe.experts.11.w1", "model.layers.56.block_sparse_moe.experts.12.w1", "model.layers.56.block_sparse_moe.experts.13.w1", "model.layers.56.block_sparse_moe.experts.14.w1", "model.layers.56.block_sparse_moe.experts.15.w1", "model.layers.56.block_sparse_moe.experts.16.w1", "model.layers.56.block_sparse_moe.experts.17.w1", "model.layers.56.block_sparse_moe.experts.18.w1", "model.layers.56.block_sparse_moe.experts.19.w1", "model.layers.56.block_sparse_moe.experts.20.w1", "model.layers.56.block_sparse_moe.experts.21.w1", "model.layers.56.block_sparse_moe.experts.22.w1", "model.layers.56.block_sparse_moe.experts.23.w1", "model.layers.56.block_sparse_moe.experts.24.w1", "model.layers.56.block_sparse_moe.experts.25.w1", "model.layers.56.block_sparse_moe.experts.26.w1", "model.layers.56.block_sparse_moe.experts.27.w1", "model.layers.56.block_sparse_moe.experts.28.w1", "model.layers.56.block_sparse_moe.experts.29.w1", "model.layers.56.block_sparse_moe.experts.30.w1", "model.layers.56.block_sparse_moe.experts.31.w1", "model.layers.56.block_sparse_moe.experts.32.w1", "model.layers.56.block_sparse_moe.experts.33.w1", "model.layers.56.block_sparse_moe.experts.34.w1", "model.layers.56.block_sparse_moe.experts.35.w1", "model.layers.56.block_sparse_moe.experts.36.w1", "model.layers.56.block_sparse_moe.experts.37.w1", "model.layers.56.block_sparse_moe.experts.38.w1", "model.layers.56.block_sparse_moe.experts.39.w1", "model.layers.56.block_sparse_moe.experts.40.w1", "model.layers.56.block_sparse_moe.experts.41.w1", "model.layers.56.block_sparse_moe.experts.42.w1", "model.layers.56.block_sparse_moe.experts.43.w1", "model.layers.56.block_sparse_moe.experts.44.w1", "model.layers.56.block_sparse_moe.experts.45.w1", "model.layers.56.block_sparse_moe.experts.46.w1", "model.layers.56.block_sparse_moe.experts.47.w1", "model.layers.56.block_sparse_moe.experts.48.w1", "model.layers.56.block_sparse_moe.experts.49.w1", "model.layers.56.block_sparse_moe.experts.50.w1", "model.layers.56.block_sparse_moe.experts.51.w1", "model.layers.56.block_sparse_moe.experts.52.w1", "model.layers.56.block_sparse_moe.experts.53.w1", "model.layers.56.block_sparse_moe.experts.54.w1", "model.layers.56.block_sparse_moe.experts.55.w1", "model.layers.56.block_sparse_moe.experts.56.w1", "model.layers.56.block_sparse_moe.experts.57.w1", "model.layers.56.block_sparse_moe.experts.58.w1", "model.layers.56.block_sparse_moe.experts.59.w1", "model.layers.56.block_sparse_moe.experts.60.w1", "model.layers.56.block_sparse_moe.experts.61.w1", "model.layers.56.block_sparse_moe.experts.62.w1", "model.layers.56.block_sparse_moe.experts.63.w1", "model.layers.56.block_sparse_moe.experts.64.w1", "model.layers.56.block_sparse_moe.experts.65.w1", "model.layers.56.block_sparse_moe.experts.66.w1", "model.layers.56.block_sparse_moe.experts.67.w1", "model.layers.56.block_sparse_moe.experts.68.w1", "model.layers.56.block_sparse_moe.experts.69.w1", "model.layers.56.block_sparse_moe.experts.70.w1", "model.layers.56.block_sparse_moe.experts.71.w1", "model.layers.56.block_sparse_moe.experts.72.w1", "model.layers.56.block_sparse_moe.experts.73.w1", "model.layers.56.block_sparse_moe.experts.74.w1", "model.layers.56.block_sparse_moe.experts.75.w1", "model.layers.56.block_sparse_moe.experts.76.w1", "model.layers.56.block_sparse_moe.experts.77.w1", "model.layers.56.block_sparse_moe.experts.78.w1", "model.layers.56.block_sparse_moe.experts.79.w1", "model.layers.56.block_sparse_moe.experts.80.w1", "model.layers.56.block_sparse_moe.experts.81.w1", "model.layers.56.block_sparse_moe.experts.82.w1", "model.layers.56.block_sparse_moe.experts.83.w1", "model.layers.56.block_sparse_moe.experts.84.w1", "model.layers.56.block_sparse_moe.experts.85.w1", "model.layers.56.block_sparse_moe.experts.86.w1", "model.layers.56.block_sparse_moe.experts.87.w1", "model.layers.56.block_sparse_moe.experts.88.w1", "model.layers.56.block_sparse_moe.experts.89.w1", "model.layers.56.block_sparse_moe.experts.90.w1", "model.layers.56.block_sparse_moe.experts.91.w1", "model.layers.56.block_sparse_moe.experts.92.w1", "model.layers.56.block_sparse_moe.experts.93.w1", "model.layers.56.block_sparse_moe.experts.94.w1", "model.layers.56.block_sparse_moe.experts.95.w1", "model.layers.56.block_sparse_moe.experts.96.w1", "model.layers.56.block_sparse_moe.experts.97.w1", "model.layers.56.block_sparse_moe.experts.98.w1", "model.layers.56.block_sparse_moe.experts.99.w1", "model.layers.56.block_sparse_moe.experts.100.w1", "model.layers.56.block_sparse_moe.experts.101.w1", "model.layers.56.block_sparse_moe.experts.102.w1", "model.layers.56.block_sparse_moe.experts.103.w1", "model.layers.56.block_sparse_moe.experts.104.w1", "model.layers.56.block_sparse_moe.experts.105.w1", "model.layers.56.block_sparse_moe.experts.106.w1", "model.layers.56.block_sparse_moe.experts.107.w1", "model.layers.56.block_sparse_moe.experts.108.w1", "model.layers.56.block_sparse_moe.experts.109.w1", "model.layers.56.block_sparse_moe.experts.110.w1", "model.layers.56.block_sparse_moe.experts.111.w1", "model.layers.56.block_sparse_moe.experts.112.w1", "model.layers.56.block_sparse_moe.experts.113.w1", "model.layers.56.block_sparse_moe.experts.114.w1", "model.layers.56.block_sparse_moe.experts.115.w1", "model.layers.56.block_sparse_moe.experts.116.w1", "model.layers.56.block_sparse_moe.experts.117.w1", "model.layers.56.block_sparse_moe.experts.118.w1", "model.layers.56.block_sparse_moe.experts.119.w1", "model.layers.56.block_sparse_moe.experts.120.w1", "model.layers.56.block_sparse_moe.experts.121.w1", "model.layers.56.block_sparse_moe.experts.122.w1", "model.layers.56.block_sparse_moe.experts.123.w1", "model.layers.56.block_sparse_moe.experts.124.w1", "model.layers.56.block_sparse_moe.experts.125.w1", "model.layers.56.block_sparse_moe.experts.126.w1", "model.layers.56.block_sparse_moe.experts.127.w1", "model.layers.56.block_sparse_moe.experts.128.w1", "model.layers.56.block_sparse_moe.experts.129.w1", "model.layers.56.block_sparse_moe.experts.130.w1", "model.layers.56.block_sparse_moe.experts.131.w1", "model.layers.56.block_sparse_moe.experts.132.w1", "model.layers.56.block_sparse_moe.experts.133.w1", "model.layers.56.block_sparse_moe.experts.134.w1", "model.layers.56.block_sparse_moe.experts.135.w1", "model.layers.56.block_sparse_moe.experts.136.w1", "model.layers.56.block_sparse_moe.experts.137.w1", "model.layers.56.block_sparse_moe.experts.138.w1", "model.layers.56.block_sparse_moe.experts.139.w1", "model.layers.56.block_sparse_moe.experts.140.w1", "model.layers.56.block_sparse_moe.experts.141.w1", "model.layers.56.block_sparse_moe.experts.142.w1", "model.layers.56.block_sparse_moe.experts.143.w1", "model.layers.56.block_sparse_moe.experts.144.w1", "model.layers.56.block_sparse_moe.experts.145.w1", "model.layers.56.block_sparse_moe.experts.146.w1", "model.layers.56.block_sparse_moe.experts.147.w1", "model.layers.56.block_sparse_moe.experts.148.w1", "model.layers.56.block_sparse_moe.experts.149.w1", "model.layers.56.block_sparse_moe.experts.150.w1", "model.layers.56.block_sparse_moe.experts.151.w1", "model.layers.56.block_sparse_moe.experts.152.w1", "model.layers.56.block_sparse_moe.experts.153.w1", "model.layers.56.block_sparse_moe.experts.154.w1", "model.layers.56.block_sparse_moe.experts.155.w1", "model.layers.56.block_sparse_moe.experts.156.w1", "model.layers.56.block_sparse_moe.experts.157.w1", "model.layers.56.block_sparse_moe.experts.158.w1", "model.layers.56.block_sparse_moe.experts.159.w1", "model.layers.56.block_sparse_moe.experts.160.w1", "model.layers.56.block_sparse_moe.experts.161.w1", "model.layers.56.block_sparse_moe.experts.162.w1", "model.layers.56.block_sparse_moe.experts.163.w1", "model.layers.56.block_sparse_moe.experts.164.w1", "model.layers.56.block_sparse_moe.experts.165.w1", "model.layers.56.block_sparse_moe.experts.166.w1", "model.layers.56.block_sparse_moe.experts.167.w1", "model.layers.56.block_sparse_moe.experts.168.w1", "model.layers.56.block_sparse_moe.experts.169.w1", "model.layers.56.block_sparse_moe.experts.170.w1", "model.layers.56.block_sparse_moe.experts.171.w1", "model.layers.56.block_sparse_moe.experts.172.w1", "model.layers.56.block_sparse_moe.experts.173.w1", "model.layers.56.block_sparse_moe.experts.174.w1", "model.layers.56.block_sparse_moe.experts.175.w1", "model.layers.56.block_sparse_moe.experts.176.w1", "model.layers.56.block_sparse_moe.experts.177.w1", "model.layers.56.block_sparse_moe.experts.178.w1", "model.layers.56.block_sparse_moe.experts.179.w1", "model.layers.56.block_sparse_moe.experts.180.w1", "model.layers.56.block_sparse_moe.experts.181.w1", "model.layers.56.block_sparse_moe.experts.182.w1", "model.layers.56.block_sparse_moe.experts.183.w1", "model.layers.56.block_sparse_moe.experts.184.w1", "model.layers.56.block_sparse_moe.experts.185.w1", "model.layers.56.block_sparse_moe.experts.186.w1", "model.layers.56.block_sparse_moe.experts.187.w1", "model.layers.56.block_sparse_moe.experts.188.w1", "model.layers.56.block_sparse_moe.experts.189.w1", "model.layers.56.block_sparse_moe.experts.190.w1", "model.layers.56.block_sparse_moe.experts.191.w1", "model.layers.56.block_sparse_moe.experts.192.w1", "model.layers.56.block_sparse_moe.experts.193.w1", "model.layers.56.block_sparse_moe.experts.194.w1", "model.layers.56.block_sparse_moe.experts.195.w1", "model.layers.56.block_sparse_moe.experts.196.w1", "model.layers.56.block_sparse_moe.experts.197.w1", "model.layers.56.block_sparse_moe.experts.198.w1", "model.layers.56.block_sparse_moe.experts.199.w1", "model.layers.56.block_sparse_moe.experts.200.w1", "model.layers.56.block_sparse_moe.experts.201.w1", "model.layers.56.block_sparse_moe.experts.202.w1", "model.layers.56.block_sparse_moe.experts.203.w1", "model.layers.56.block_sparse_moe.experts.204.w1", "model.layers.56.block_sparse_moe.experts.205.w1", "model.layers.56.block_sparse_moe.experts.206.w1", "model.layers.56.block_sparse_moe.experts.207.w1", "model.layers.56.block_sparse_moe.experts.208.w1", "model.layers.56.block_sparse_moe.experts.209.w1", "model.layers.56.block_sparse_moe.experts.210.w1", "model.layers.56.block_sparse_moe.experts.211.w1", "model.layers.56.block_sparse_moe.experts.212.w1", "model.layers.56.block_sparse_moe.experts.213.w1", "model.layers.56.block_sparse_moe.experts.214.w1", "model.layers.56.block_sparse_moe.experts.215.w1", "model.layers.56.block_sparse_moe.experts.216.w1", "model.layers.56.block_sparse_moe.experts.217.w1", "model.layers.56.block_sparse_moe.experts.218.w1", "model.layers.56.block_sparse_moe.experts.219.w1", "model.layers.56.block_sparse_moe.experts.220.w1", "model.layers.56.block_sparse_moe.experts.221.w1", "model.layers.56.block_sparse_moe.experts.222.w1", "model.layers.56.block_sparse_moe.experts.223.w1", "model.layers.56.block_sparse_moe.experts.224.w1", "model.layers.56.block_sparse_moe.experts.225.w1", "model.layers.56.block_sparse_moe.experts.226.w1", "model.layers.56.block_sparse_moe.experts.227.w1", "model.layers.56.block_sparse_moe.experts.228.w1", "model.layers.56.block_sparse_moe.experts.229.w1", "model.layers.56.block_sparse_moe.experts.230.w1", "model.layers.56.block_sparse_moe.experts.231.w1", "model.layers.56.block_sparse_moe.experts.232.w1", "model.layers.56.block_sparse_moe.experts.233.w1", "model.layers.56.block_sparse_moe.experts.234.w1", "model.layers.56.block_sparse_moe.experts.235.w1", "model.layers.56.block_sparse_moe.experts.236.w1", "model.layers.56.block_sparse_moe.experts.237.w1", "model.layers.56.block_sparse_moe.experts.238.w1", "model.layers.56.block_sparse_moe.experts.239.w1", "model.layers.56.block_sparse_moe.experts.240.w1", "model.layers.56.block_sparse_moe.experts.241.w1", "model.layers.56.block_sparse_moe.experts.242.w1", "model.layers.56.block_sparse_moe.experts.243.w1", "model.layers.56.block_sparse_moe.experts.244.w1", "model.layers.56.block_sparse_moe.experts.245.w1", "model.layers.56.block_sparse_moe.experts.246.w1", "model.layers.56.block_sparse_moe.experts.247.w1", "model.layers.56.block_sparse_moe.experts.248.w1", "model.layers.56.block_sparse_moe.experts.249.w1", "model.layers.56.block_sparse_moe.experts.250.w1", "model.layers.56.block_sparse_moe.experts.251.w1", "model.layers.56.block_sparse_moe.experts.252.w1", "model.layers.56.block_sparse_moe.experts.253.w1", "model.layers.56.block_sparse_moe.experts.254.w1", "model.layers.56.block_sparse_moe.experts.255.w1", "model.layers.56.block_sparse_moe.experts.0.w3", "model.layers.56.block_sparse_moe.experts.1.w3", "model.layers.56.block_sparse_moe.experts.2.w3", "model.layers.56.block_sparse_moe.experts.3.w3", "model.layers.56.block_sparse_moe.experts.4.w3", "model.layers.56.block_sparse_moe.experts.5.w3", "model.layers.56.block_sparse_moe.experts.6.w3", "model.layers.56.block_sparse_moe.experts.7.w3", "model.layers.56.block_sparse_moe.experts.8.w3", "model.layers.56.block_sparse_moe.experts.9.w3", "model.layers.56.block_sparse_moe.experts.10.w3", "model.layers.56.block_sparse_moe.experts.11.w3", "model.layers.56.block_sparse_moe.experts.12.w3", "model.layers.56.block_sparse_moe.experts.13.w3", "model.layers.56.block_sparse_moe.experts.14.w3", "model.layers.56.block_sparse_moe.experts.15.w3", "model.layers.56.block_sparse_moe.experts.16.w3", "model.layers.56.block_sparse_moe.experts.17.w3", "model.layers.56.block_sparse_moe.experts.18.w3", "model.layers.56.block_sparse_moe.experts.19.w3", "model.layers.56.block_sparse_moe.experts.20.w3", "model.layers.56.block_sparse_moe.experts.21.w3", "model.layers.56.block_sparse_moe.experts.22.w3", "model.layers.56.block_sparse_moe.experts.23.w3", "model.layers.56.block_sparse_moe.experts.24.w3", "model.layers.56.block_sparse_moe.experts.25.w3", "model.layers.56.block_sparse_moe.experts.26.w3", "model.layers.56.block_sparse_moe.experts.27.w3", "model.layers.56.block_sparse_moe.experts.28.w3", "model.layers.56.block_sparse_moe.experts.29.w3", "model.layers.56.block_sparse_moe.experts.30.w3", "model.layers.56.block_sparse_moe.experts.31.w3", "model.layers.56.block_sparse_moe.experts.32.w3", "model.layers.56.block_sparse_moe.experts.33.w3", "model.layers.56.block_sparse_moe.experts.34.w3", "model.layers.56.block_sparse_moe.experts.35.w3", "model.layers.56.block_sparse_moe.experts.36.w3", "model.layers.56.block_sparse_moe.experts.37.w3", "model.layers.56.block_sparse_moe.experts.38.w3", "model.layers.56.block_sparse_moe.experts.39.w3", "model.layers.56.block_sparse_moe.experts.40.w3", "model.layers.56.block_sparse_moe.experts.41.w3", "model.layers.56.block_sparse_moe.experts.42.w3", "model.layers.56.block_sparse_moe.experts.43.w3", "model.layers.56.block_sparse_moe.experts.44.w3", "model.layers.56.block_sparse_moe.experts.45.w3", "model.layers.56.block_sparse_moe.experts.46.w3", "model.layers.56.block_sparse_moe.experts.47.w3", "model.layers.56.block_sparse_moe.experts.48.w3", "model.layers.56.block_sparse_moe.experts.49.w3", "model.layers.56.block_sparse_moe.experts.50.w3", "model.layers.56.block_sparse_moe.experts.51.w3", "model.layers.56.block_sparse_moe.experts.52.w3", "model.layers.56.block_sparse_moe.experts.53.w3", "model.layers.56.block_sparse_moe.experts.54.w3", "model.layers.56.block_sparse_moe.experts.55.w3", "model.layers.56.block_sparse_moe.experts.56.w3", "model.layers.56.block_sparse_moe.experts.57.w3", "model.layers.56.block_sparse_moe.experts.58.w3", "model.layers.56.block_sparse_moe.experts.59.w3", "model.layers.56.block_sparse_moe.experts.60.w3", "model.layers.56.block_sparse_moe.experts.61.w3", "model.layers.56.block_sparse_moe.experts.62.w3", "model.layers.56.block_sparse_moe.experts.63.w3", "model.layers.56.block_sparse_moe.experts.64.w3", "model.layers.56.block_sparse_moe.experts.65.w3", "model.layers.56.block_sparse_moe.experts.66.w3", "model.layers.56.block_sparse_moe.experts.67.w3", "model.layers.56.block_sparse_moe.experts.68.w3", "model.layers.56.block_sparse_moe.experts.69.w3", "model.layers.56.block_sparse_moe.experts.70.w3", "model.layers.56.block_sparse_moe.experts.71.w3", "model.layers.56.block_sparse_moe.experts.72.w3", "model.layers.56.block_sparse_moe.experts.73.w3", "model.layers.56.block_sparse_moe.experts.74.w3", "model.layers.56.block_sparse_moe.experts.75.w3", "model.layers.56.block_sparse_moe.experts.76.w3", "model.layers.56.block_sparse_moe.experts.77.w3", "model.layers.56.block_sparse_moe.experts.78.w3", "model.layers.56.block_sparse_moe.experts.79.w3", "model.layers.56.block_sparse_moe.experts.80.w3", "model.layers.56.block_sparse_moe.experts.81.w3", "model.layers.56.block_sparse_moe.experts.82.w3", "model.layers.56.block_sparse_moe.experts.83.w3", "model.layers.56.block_sparse_moe.experts.84.w3", "model.layers.56.block_sparse_moe.experts.85.w3", "model.layers.56.block_sparse_moe.experts.86.w3", "model.layers.56.block_sparse_moe.experts.87.w3", "model.layers.56.block_sparse_moe.experts.88.w3", "model.layers.56.block_sparse_moe.experts.89.w3", "model.layers.56.block_sparse_moe.experts.90.w3", "model.layers.56.block_sparse_moe.experts.91.w3", "model.layers.56.block_sparse_moe.experts.92.w3", "model.layers.56.block_sparse_moe.experts.93.w3", "model.layers.56.block_sparse_moe.experts.94.w3", "model.layers.56.block_sparse_moe.experts.95.w3", "model.layers.56.block_sparse_moe.experts.96.w3", "model.layers.56.block_sparse_moe.experts.97.w3", "model.layers.56.block_sparse_moe.experts.98.w3", "model.layers.56.block_sparse_moe.experts.99.w3", "model.layers.56.block_sparse_moe.experts.100.w3", "model.layers.56.block_sparse_moe.experts.101.w3", "model.layers.56.block_sparse_moe.experts.102.w3", "model.layers.56.block_sparse_moe.experts.103.w3", "model.layers.56.block_sparse_moe.experts.104.w3", "model.layers.56.block_sparse_moe.experts.105.w3", "model.layers.56.block_sparse_moe.experts.106.w3", "model.layers.56.block_sparse_moe.experts.107.w3", "model.layers.56.block_sparse_moe.experts.108.w3", "model.layers.56.block_sparse_moe.experts.109.w3", "model.layers.56.block_sparse_moe.experts.110.w3", "model.layers.56.block_sparse_moe.experts.111.w3", "model.layers.56.block_sparse_moe.experts.112.w3", "model.layers.56.block_sparse_moe.experts.113.w3", "model.layers.56.block_sparse_moe.experts.114.w3", "model.layers.56.block_sparse_moe.experts.115.w3", "model.layers.56.block_sparse_moe.experts.116.w3", "model.layers.56.block_sparse_moe.experts.117.w3", "model.layers.56.block_sparse_moe.experts.118.w3", "model.layers.56.block_sparse_moe.experts.119.w3", "model.layers.56.block_sparse_moe.experts.120.w3", "model.layers.56.block_sparse_moe.experts.121.w3", "model.layers.56.block_sparse_moe.experts.122.w3", "model.layers.56.block_sparse_moe.experts.123.w3", "model.layers.56.block_sparse_moe.experts.124.w3", "model.layers.56.block_sparse_moe.experts.125.w3", "model.layers.56.block_sparse_moe.experts.126.w3", "model.layers.56.block_sparse_moe.experts.127.w3", "model.layers.56.block_sparse_moe.experts.128.w3", "model.layers.56.block_sparse_moe.experts.129.w3", "model.layers.56.block_sparse_moe.experts.130.w3", "model.layers.56.block_sparse_moe.experts.131.w3", "model.layers.56.block_sparse_moe.experts.132.w3", "model.layers.56.block_sparse_moe.experts.133.w3", "model.layers.56.block_sparse_moe.experts.134.w3", "model.layers.56.block_sparse_moe.experts.135.w3", "model.layers.56.block_sparse_moe.experts.136.w3", "model.layers.56.block_sparse_moe.experts.137.w3", "model.layers.56.block_sparse_moe.experts.138.w3", "model.layers.56.block_sparse_moe.experts.139.w3", "model.layers.56.block_sparse_moe.experts.140.w3", "model.layers.56.block_sparse_moe.experts.141.w3", "model.layers.56.block_sparse_moe.experts.142.w3", "model.layers.56.block_sparse_moe.experts.143.w3", "model.layers.56.block_sparse_moe.experts.144.w3", "model.layers.56.block_sparse_moe.experts.145.w3", "model.layers.56.block_sparse_moe.experts.146.w3", "model.layers.56.block_sparse_moe.experts.147.w3", "model.layers.56.block_sparse_moe.experts.148.w3", "model.layers.56.block_sparse_moe.experts.149.w3", "model.layers.56.block_sparse_moe.experts.150.w3", "model.layers.56.block_sparse_moe.experts.151.w3", "model.layers.56.block_sparse_moe.experts.152.w3", "model.layers.56.block_sparse_moe.experts.153.w3", "model.layers.56.block_sparse_moe.experts.154.w3", "model.layers.56.block_sparse_moe.experts.155.w3", "model.layers.56.block_sparse_moe.experts.156.w3", "model.layers.56.block_sparse_moe.experts.157.w3", "model.layers.56.block_sparse_moe.experts.158.w3", "model.layers.56.block_sparse_moe.experts.159.w3", "model.layers.56.block_sparse_moe.experts.160.w3", "model.layers.56.block_sparse_moe.experts.161.w3", "model.layers.56.block_sparse_moe.experts.162.w3", "model.layers.56.block_sparse_moe.experts.163.w3", "model.layers.56.block_sparse_moe.experts.164.w3", "model.layers.56.block_sparse_moe.experts.165.w3", "model.layers.56.block_sparse_moe.experts.166.w3", "model.layers.56.block_sparse_moe.experts.167.w3", "model.layers.56.block_sparse_moe.experts.168.w3", "model.layers.56.block_sparse_moe.experts.169.w3", "model.layers.56.block_sparse_moe.experts.170.w3", "model.layers.56.block_sparse_moe.experts.171.w3", "model.layers.56.block_sparse_moe.experts.172.w3", "model.layers.56.block_sparse_moe.experts.173.w3", "model.layers.56.block_sparse_moe.experts.174.w3", "model.layers.56.block_sparse_moe.experts.175.w3", "model.layers.56.block_sparse_moe.experts.176.w3", "model.layers.56.block_sparse_moe.experts.177.w3", "model.layers.56.block_sparse_moe.experts.178.w3", "model.layers.56.block_sparse_moe.experts.179.w3", "model.layers.56.block_sparse_moe.experts.180.w3", "model.layers.56.block_sparse_moe.experts.181.w3", "model.layers.56.block_sparse_moe.experts.182.w3", "model.layers.56.block_sparse_moe.experts.183.w3", "model.layers.56.block_sparse_moe.experts.184.w3", "model.layers.56.block_sparse_moe.experts.185.w3", "model.layers.56.block_sparse_moe.experts.186.w3", "model.layers.56.block_sparse_moe.experts.187.w3", "model.layers.56.block_sparse_moe.experts.188.w3", "model.layers.56.block_sparse_moe.experts.189.w3", "model.layers.56.block_sparse_moe.experts.190.w3", "model.layers.56.block_sparse_moe.experts.191.w3", "model.layers.56.block_sparse_moe.experts.192.w3", "model.layers.56.block_sparse_moe.experts.193.w3", "model.layers.56.block_sparse_moe.experts.194.w3", "model.layers.56.block_sparse_moe.experts.195.w3", "model.layers.56.block_sparse_moe.experts.196.w3", "model.layers.56.block_sparse_moe.experts.197.w3", "model.layers.56.block_sparse_moe.experts.198.w3", "model.layers.56.block_sparse_moe.experts.199.w3", "model.layers.56.block_sparse_moe.experts.200.w3", "model.layers.56.block_sparse_moe.experts.201.w3", "model.layers.56.block_sparse_moe.experts.202.w3", "model.layers.56.block_sparse_moe.experts.203.w3", "model.layers.56.block_sparse_moe.experts.204.w3", "model.layers.56.block_sparse_moe.experts.205.w3", "model.layers.56.block_sparse_moe.experts.206.w3", "model.layers.56.block_sparse_moe.experts.207.w3", "model.layers.56.block_sparse_moe.experts.208.w3", "model.layers.56.block_sparse_moe.experts.209.w3", "model.layers.56.block_sparse_moe.experts.210.w3", "model.layers.56.block_sparse_moe.experts.211.w3", "model.layers.56.block_sparse_moe.experts.212.w3", "model.layers.56.block_sparse_moe.experts.213.w3", "model.layers.56.block_sparse_moe.experts.214.w3", "model.layers.56.block_sparse_moe.experts.215.w3", "model.layers.56.block_sparse_moe.experts.216.w3", "model.layers.56.block_sparse_moe.experts.217.w3", "model.layers.56.block_sparse_moe.experts.218.w3", "model.layers.56.block_sparse_moe.experts.219.w3", "model.layers.56.block_sparse_moe.experts.220.w3", "model.layers.56.block_sparse_moe.experts.221.w3", "model.layers.56.block_sparse_moe.experts.222.w3", "model.layers.56.block_sparse_moe.experts.223.w3", "model.layers.56.block_sparse_moe.experts.224.w3", "model.layers.56.block_sparse_moe.experts.225.w3", "model.layers.56.block_sparse_moe.experts.226.w3", "model.layers.56.block_sparse_moe.experts.227.w3", "model.layers.56.block_sparse_moe.experts.228.w3", "model.layers.56.block_sparse_moe.experts.229.w3", "model.layers.56.block_sparse_moe.experts.230.w3", "model.layers.56.block_sparse_moe.experts.231.w3", "model.layers.56.block_sparse_moe.experts.232.w3", "model.layers.56.block_sparse_moe.experts.233.w3", "model.layers.56.block_sparse_moe.experts.234.w3", "model.layers.56.block_sparse_moe.experts.235.w3", "model.layers.56.block_sparse_moe.experts.236.w3", "model.layers.56.block_sparse_moe.experts.237.w3", "model.layers.56.block_sparse_moe.experts.238.w3", "model.layers.56.block_sparse_moe.experts.239.w3", "model.layers.56.block_sparse_moe.experts.240.w3", "model.layers.56.block_sparse_moe.experts.241.w3", "model.layers.56.block_sparse_moe.experts.242.w3", "model.layers.56.block_sparse_moe.experts.243.w3", "model.layers.56.block_sparse_moe.experts.244.w3", "model.layers.56.block_sparse_moe.experts.245.w3", "model.layers.56.block_sparse_moe.experts.246.w3", "model.layers.56.block_sparse_moe.experts.247.w3", "model.layers.56.block_sparse_moe.experts.248.w3", "model.layers.56.block_sparse_moe.experts.249.w3", "model.layers.56.block_sparse_moe.experts.250.w3", "model.layers.56.block_sparse_moe.experts.251.w3", "model.layers.56.block_sparse_moe.experts.252.w3", "model.layers.56.block_sparse_moe.experts.253.w3", "model.layers.56.block_sparse_moe.experts.254.w3", "model.layers.56.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 6.052069365977686e-05, "dbits": 2415919104 } ] }, { "idx": 284, "layers": [ "model.layers.56.block_sparse_moe.experts.0.w2", "model.layers.56.block_sparse_moe.experts.1.w2", "model.layers.56.block_sparse_moe.experts.2.w2", "model.layers.56.block_sparse_moe.experts.3.w2", "model.layers.56.block_sparse_moe.experts.4.w2", "model.layers.56.block_sparse_moe.experts.5.w2", "model.layers.56.block_sparse_moe.experts.6.w2", "model.layers.56.block_sparse_moe.experts.7.w2", "model.layers.56.block_sparse_moe.experts.8.w2", "model.layers.56.block_sparse_moe.experts.9.w2", "model.layers.56.block_sparse_moe.experts.10.w2", "model.layers.56.block_sparse_moe.experts.11.w2", "model.layers.56.block_sparse_moe.experts.12.w2", "model.layers.56.block_sparse_moe.experts.13.w2", "model.layers.56.block_sparse_moe.experts.14.w2", "model.layers.56.block_sparse_moe.experts.15.w2", "model.layers.56.block_sparse_moe.experts.16.w2", "model.layers.56.block_sparse_moe.experts.17.w2", "model.layers.56.block_sparse_moe.experts.18.w2", "model.layers.56.block_sparse_moe.experts.19.w2", "model.layers.56.block_sparse_moe.experts.20.w2", "model.layers.56.block_sparse_moe.experts.21.w2", "model.layers.56.block_sparse_moe.experts.22.w2", "model.layers.56.block_sparse_moe.experts.23.w2", "model.layers.56.block_sparse_moe.experts.24.w2", "model.layers.56.block_sparse_moe.experts.25.w2", "model.layers.56.block_sparse_moe.experts.26.w2", "model.layers.56.block_sparse_moe.experts.27.w2", "model.layers.56.block_sparse_moe.experts.28.w2", "model.layers.56.block_sparse_moe.experts.29.w2", "model.layers.56.block_sparse_moe.experts.30.w2", "model.layers.56.block_sparse_moe.experts.31.w2", "model.layers.56.block_sparse_moe.experts.32.w2", "model.layers.56.block_sparse_moe.experts.33.w2", "model.layers.56.block_sparse_moe.experts.34.w2", "model.layers.56.block_sparse_moe.experts.35.w2", "model.layers.56.block_sparse_moe.experts.36.w2", "model.layers.56.block_sparse_moe.experts.37.w2", "model.layers.56.block_sparse_moe.experts.38.w2", "model.layers.56.block_sparse_moe.experts.39.w2", "model.layers.56.block_sparse_moe.experts.40.w2", "model.layers.56.block_sparse_moe.experts.41.w2", "model.layers.56.block_sparse_moe.experts.42.w2", "model.layers.56.block_sparse_moe.experts.43.w2", "model.layers.56.block_sparse_moe.experts.44.w2", "model.layers.56.block_sparse_moe.experts.45.w2", "model.layers.56.block_sparse_moe.experts.46.w2", "model.layers.56.block_sparse_moe.experts.47.w2", "model.layers.56.block_sparse_moe.experts.48.w2", "model.layers.56.block_sparse_moe.experts.49.w2", "model.layers.56.block_sparse_moe.experts.50.w2", "model.layers.56.block_sparse_moe.experts.51.w2", "model.layers.56.block_sparse_moe.experts.52.w2", "model.layers.56.block_sparse_moe.experts.53.w2", "model.layers.56.block_sparse_moe.experts.54.w2", "model.layers.56.block_sparse_moe.experts.55.w2", "model.layers.56.block_sparse_moe.experts.56.w2", "model.layers.56.block_sparse_moe.experts.57.w2", "model.layers.56.block_sparse_moe.experts.58.w2", "model.layers.56.block_sparse_moe.experts.59.w2", "model.layers.56.block_sparse_moe.experts.60.w2", "model.layers.56.block_sparse_moe.experts.61.w2", "model.layers.56.block_sparse_moe.experts.62.w2", "model.layers.56.block_sparse_moe.experts.63.w2", "model.layers.56.block_sparse_moe.experts.64.w2", "model.layers.56.block_sparse_moe.experts.65.w2", "model.layers.56.block_sparse_moe.experts.66.w2", "model.layers.56.block_sparse_moe.experts.67.w2", "model.layers.56.block_sparse_moe.experts.68.w2", "model.layers.56.block_sparse_moe.experts.69.w2", "model.layers.56.block_sparse_moe.experts.70.w2", "model.layers.56.block_sparse_moe.experts.71.w2", "model.layers.56.block_sparse_moe.experts.72.w2", "model.layers.56.block_sparse_moe.experts.73.w2", "model.layers.56.block_sparse_moe.experts.74.w2", "model.layers.56.block_sparse_moe.experts.75.w2", "model.layers.56.block_sparse_moe.experts.76.w2", "model.layers.56.block_sparse_moe.experts.77.w2", "model.layers.56.block_sparse_moe.experts.78.w2", "model.layers.56.block_sparse_moe.experts.79.w2", "model.layers.56.block_sparse_moe.experts.80.w2", "model.layers.56.block_sparse_moe.experts.81.w2", "model.layers.56.block_sparse_moe.experts.82.w2", "model.layers.56.block_sparse_moe.experts.83.w2", "model.layers.56.block_sparse_moe.experts.84.w2", "model.layers.56.block_sparse_moe.experts.85.w2", "model.layers.56.block_sparse_moe.experts.86.w2", "model.layers.56.block_sparse_moe.experts.87.w2", "model.layers.56.block_sparse_moe.experts.88.w2", "model.layers.56.block_sparse_moe.experts.89.w2", "model.layers.56.block_sparse_moe.experts.90.w2", "model.layers.56.block_sparse_moe.experts.91.w2", "model.layers.56.block_sparse_moe.experts.92.w2", "model.layers.56.block_sparse_moe.experts.93.w2", "model.layers.56.block_sparse_moe.experts.94.w2", "model.layers.56.block_sparse_moe.experts.95.w2", "model.layers.56.block_sparse_moe.experts.96.w2", "model.layers.56.block_sparse_moe.experts.97.w2", "model.layers.56.block_sparse_moe.experts.98.w2", "model.layers.56.block_sparse_moe.experts.99.w2", "model.layers.56.block_sparse_moe.experts.100.w2", "model.layers.56.block_sparse_moe.experts.101.w2", "model.layers.56.block_sparse_moe.experts.102.w2", "model.layers.56.block_sparse_moe.experts.103.w2", "model.layers.56.block_sparse_moe.experts.104.w2", "model.layers.56.block_sparse_moe.experts.105.w2", "model.layers.56.block_sparse_moe.experts.106.w2", "model.layers.56.block_sparse_moe.experts.107.w2", "model.layers.56.block_sparse_moe.experts.108.w2", "model.layers.56.block_sparse_moe.experts.109.w2", "model.layers.56.block_sparse_moe.experts.110.w2", "model.layers.56.block_sparse_moe.experts.111.w2", "model.layers.56.block_sparse_moe.experts.112.w2", "model.layers.56.block_sparse_moe.experts.113.w2", "model.layers.56.block_sparse_moe.experts.114.w2", "model.layers.56.block_sparse_moe.experts.115.w2", "model.layers.56.block_sparse_moe.experts.116.w2", "model.layers.56.block_sparse_moe.experts.117.w2", "model.layers.56.block_sparse_moe.experts.118.w2", "model.layers.56.block_sparse_moe.experts.119.w2", "model.layers.56.block_sparse_moe.experts.120.w2", "model.layers.56.block_sparse_moe.experts.121.w2", "model.layers.56.block_sparse_moe.experts.122.w2", "model.layers.56.block_sparse_moe.experts.123.w2", "model.layers.56.block_sparse_moe.experts.124.w2", "model.layers.56.block_sparse_moe.experts.125.w2", "model.layers.56.block_sparse_moe.experts.126.w2", "model.layers.56.block_sparse_moe.experts.127.w2", "model.layers.56.block_sparse_moe.experts.128.w2", "model.layers.56.block_sparse_moe.experts.129.w2", "model.layers.56.block_sparse_moe.experts.130.w2", "model.layers.56.block_sparse_moe.experts.131.w2", "model.layers.56.block_sparse_moe.experts.132.w2", "model.layers.56.block_sparse_moe.experts.133.w2", "model.layers.56.block_sparse_moe.experts.134.w2", "model.layers.56.block_sparse_moe.experts.135.w2", "model.layers.56.block_sparse_moe.experts.136.w2", "model.layers.56.block_sparse_moe.experts.137.w2", "model.layers.56.block_sparse_moe.experts.138.w2", "model.layers.56.block_sparse_moe.experts.139.w2", "model.layers.56.block_sparse_moe.experts.140.w2", "model.layers.56.block_sparse_moe.experts.141.w2", "model.layers.56.block_sparse_moe.experts.142.w2", "model.layers.56.block_sparse_moe.experts.143.w2", "model.layers.56.block_sparse_moe.experts.144.w2", "model.layers.56.block_sparse_moe.experts.145.w2", "model.layers.56.block_sparse_moe.experts.146.w2", "model.layers.56.block_sparse_moe.experts.147.w2", "model.layers.56.block_sparse_moe.experts.148.w2", "model.layers.56.block_sparse_moe.experts.149.w2", "model.layers.56.block_sparse_moe.experts.150.w2", "model.layers.56.block_sparse_moe.experts.151.w2", "model.layers.56.block_sparse_moe.experts.152.w2", "model.layers.56.block_sparse_moe.experts.153.w2", "model.layers.56.block_sparse_moe.experts.154.w2", "model.layers.56.block_sparse_moe.experts.155.w2", "model.layers.56.block_sparse_moe.experts.156.w2", "model.layers.56.block_sparse_moe.experts.157.w2", "model.layers.56.block_sparse_moe.experts.158.w2", "model.layers.56.block_sparse_moe.experts.159.w2", "model.layers.56.block_sparse_moe.experts.160.w2", "model.layers.56.block_sparse_moe.experts.161.w2", "model.layers.56.block_sparse_moe.experts.162.w2", "model.layers.56.block_sparse_moe.experts.163.w2", "model.layers.56.block_sparse_moe.experts.164.w2", "model.layers.56.block_sparse_moe.experts.165.w2", "model.layers.56.block_sparse_moe.experts.166.w2", "model.layers.56.block_sparse_moe.experts.167.w2", "model.layers.56.block_sparse_moe.experts.168.w2", "model.layers.56.block_sparse_moe.experts.169.w2", "model.layers.56.block_sparse_moe.experts.170.w2", "model.layers.56.block_sparse_moe.experts.171.w2", "model.layers.56.block_sparse_moe.experts.172.w2", "model.layers.56.block_sparse_moe.experts.173.w2", "model.layers.56.block_sparse_moe.experts.174.w2", "model.layers.56.block_sparse_moe.experts.175.w2", "model.layers.56.block_sparse_moe.experts.176.w2", "model.layers.56.block_sparse_moe.experts.177.w2", "model.layers.56.block_sparse_moe.experts.178.w2", "model.layers.56.block_sparse_moe.experts.179.w2", "model.layers.56.block_sparse_moe.experts.180.w2", "model.layers.56.block_sparse_moe.experts.181.w2", "model.layers.56.block_sparse_moe.experts.182.w2", "model.layers.56.block_sparse_moe.experts.183.w2", "model.layers.56.block_sparse_moe.experts.184.w2", "model.layers.56.block_sparse_moe.experts.185.w2", "model.layers.56.block_sparse_moe.experts.186.w2", "model.layers.56.block_sparse_moe.experts.187.w2", "model.layers.56.block_sparse_moe.experts.188.w2", "model.layers.56.block_sparse_moe.experts.189.w2", "model.layers.56.block_sparse_moe.experts.190.w2", "model.layers.56.block_sparse_moe.experts.191.w2", "model.layers.56.block_sparse_moe.experts.192.w2", "model.layers.56.block_sparse_moe.experts.193.w2", "model.layers.56.block_sparse_moe.experts.194.w2", "model.layers.56.block_sparse_moe.experts.195.w2", "model.layers.56.block_sparse_moe.experts.196.w2", "model.layers.56.block_sparse_moe.experts.197.w2", "model.layers.56.block_sparse_moe.experts.198.w2", "model.layers.56.block_sparse_moe.experts.199.w2", "model.layers.56.block_sparse_moe.experts.200.w2", "model.layers.56.block_sparse_moe.experts.201.w2", "model.layers.56.block_sparse_moe.experts.202.w2", "model.layers.56.block_sparse_moe.experts.203.w2", "model.layers.56.block_sparse_moe.experts.204.w2", "model.layers.56.block_sparse_moe.experts.205.w2", "model.layers.56.block_sparse_moe.experts.206.w2", "model.layers.56.block_sparse_moe.experts.207.w2", "model.layers.56.block_sparse_moe.experts.208.w2", "model.layers.56.block_sparse_moe.experts.209.w2", "model.layers.56.block_sparse_moe.experts.210.w2", "model.layers.56.block_sparse_moe.experts.211.w2", "model.layers.56.block_sparse_moe.experts.212.w2", "model.layers.56.block_sparse_moe.experts.213.w2", "model.layers.56.block_sparse_moe.experts.214.w2", "model.layers.56.block_sparse_moe.experts.215.w2", "model.layers.56.block_sparse_moe.experts.216.w2", "model.layers.56.block_sparse_moe.experts.217.w2", "model.layers.56.block_sparse_moe.experts.218.w2", "model.layers.56.block_sparse_moe.experts.219.w2", "model.layers.56.block_sparse_moe.experts.220.w2", "model.layers.56.block_sparse_moe.experts.221.w2", "model.layers.56.block_sparse_moe.experts.222.w2", "model.layers.56.block_sparse_moe.experts.223.w2", "model.layers.56.block_sparse_moe.experts.224.w2", "model.layers.56.block_sparse_moe.experts.225.w2", "model.layers.56.block_sparse_moe.experts.226.w2", "model.layers.56.block_sparse_moe.experts.227.w2", "model.layers.56.block_sparse_moe.experts.228.w2", "model.layers.56.block_sparse_moe.experts.229.w2", "model.layers.56.block_sparse_moe.experts.230.w2", "model.layers.56.block_sparse_moe.experts.231.w2", "model.layers.56.block_sparse_moe.experts.232.w2", "model.layers.56.block_sparse_moe.experts.233.w2", "model.layers.56.block_sparse_moe.experts.234.w2", "model.layers.56.block_sparse_moe.experts.235.w2", "model.layers.56.block_sparse_moe.experts.236.w2", "model.layers.56.block_sparse_moe.experts.237.w2", "model.layers.56.block_sparse_moe.experts.238.w2", "model.layers.56.block_sparse_moe.experts.239.w2", "model.layers.56.block_sparse_moe.experts.240.w2", "model.layers.56.block_sparse_moe.experts.241.w2", "model.layers.56.block_sparse_moe.experts.242.w2", "model.layers.56.block_sparse_moe.experts.243.w2", "model.layers.56.block_sparse_moe.experts.244.w2", "model.layers.56.block_sparse_moe.experts.245.w2", "model.layers.56.block_sparse_moe.experts.246.w2", "model.layers.56.block_sparse_moe.experts.247.w2", "model.layers.56.block_sparse_moe.experts.248.w2", "model.layers.56.block_sparse_moe.experts.249.w2", "model.layers.56.block_sparse_moe.experts.250.w2", "model.layers.56.block_sparse_moe.experts.251.w2", "model.layers.56.block_sparse_moe.experts.252.w2", "model.layers.56.block_sparse_moe.experts.253.w2", "model.layers.56.block_sparse_moe.experts.254.w2", "model.layers.56.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -1.2690201401793777e-06, "dbits": 1207959552 } ] }, { "idx": 285, "layers": [ "model.layers.57.self_attn.q_proj" ], "candidates": [ { "dkld": -0.000177606381475931, "dbits": 18874368 } ] }, { "idx": 286, "layers": [ "model.layers.57.self_attn.k_proj", "model.layers.57.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00033254381269216815, "dbits": 6291456 } ] }, { "idx": 287, "layers": [ "model.layers.57.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0004808684810996139, "dbits": 18874368 } ] }, { "idx": 288, "layers": [ "model.layers.57.block_sparse_moe.experts.0.w1", "model.layers.57.block_sparse_moe.experts.1.w1", "model.layers.57.block_sparse_moe.experts.2.w1", "model.layers.57.block_sparse_moe.experts.3.w1", "model.layers.57.block_sparse_moe.experts.4.w1", "model.layers.57.block_sparse_moe.experts.5.w1", "model.layers.57.block_sparse_moe.experts.6.w1", "model.layers.57.block_sparse_moe.experts.7.w1", "model.layers.57.block_sparse_moe.experts.8.w1", "model.layers.57.block_sparse_moe.experts.9.w1", "model.layers.57.block_sparse_moe.experts.10.w1", "model.layers.57.block_sparse_moe.experts.11.w1", "model.layers.57.block_sparse_moe.experts.12.w1", "model.layers.57.block_sparse_moe.experts.13.w1", "model.layers.57.block_sparse_moe.experts.14.w1", "model.layers.57.block_sparse_moe.experts.15.w1", "model.layers.57.block_sparse_moe.experts.16.w1", "model.layers.57.block_sparse_moe.experts.17.w1", "model.layers.57.block_sparse_moe.experts.18.w1", "model.layers.57.block_sparse_moe.experts.19.w1", "model.layers.57.block_sparse_moe.experts.20.w1", "model.layers.57.block_sparse_moe.experts.21.w1", "model.layers.57.block_sparse_moe.experts.22.w1", "model.layers.57.block_sparse_moe.experts.23.w1", "model.layers.57.block_sparse_moe.experts.24.w1", "model.layers.57.block_sparse_moe.experts.25.w1", "model.layers.57.block_sparse_moe.experts.26.w1", "model.layers.57.block_sparse_moe.experts.27.w1", "model.layers.57.block_sparse_moe.experts.28.w1", "model.layers.57.block_sparse_moe.experts.29.w1", "model.layers.57.block_sparse_moe.experts.30.w1", "model.layers.57.block_sparse_moe.experts.31.w1", "model.layers.57.block_sparse_moe.experts.32.w1", "model.layers.57.block_sparse_moe.experts.33.w1", "model.layers.57.block_sparse_moe.experts.34.w1", "model.layers.57.block_sparse_moe.experts.35.w1", "model.layers.57.block_sparse_moe.experts.36.w1", "model.layers.57.block_sparse_moe.experts.37.w1", "model.layers.57.block_sparse_moe.experts.38.w1", "model.layers.57.block_sparse_moe.experts.39.w1", "model.layers.57.block_sparse_moe.experts.40.w1", "model.layers.57.block_sparse_moe.experts.41.w1", "model.layers.57.block_sparse_moe.experts.42.w1", "model.layers.57.block_sparse_moe.experts.43.w1", "model.layers.57.block_sparse_moe.experts.44.w1", "model.layers.57.block_sparse_moe.experts.45.w1", "model.layers.57.block_sparse_moe.experts.46.w1", "model.layers.57.block_sparse_moe.experts.47.w1", "model.layers.57.block_sparse_moe.experts.48.w1", "model.layers.57.block_sparse_moe.experts.49.w1", "model.layers.57.block_sparse_moe.experts.50.w1", "model.layers.57.block_sparse_moe.experts.51.w1", "model.layers.57.block_sparse_moe.experts.52.w1", "model.layers.57.block_sparse_moe.experts.53.w1", "model.layers.57.block_sparse_moe.experts.54.w1", "model.layers.57.block_sparse_moe.experts.55.w1", "model.layers.57.block_sparse_moe.experts.56.w1", "model.layers.57.block_sparse_moe.experts.57.w1", "model.layers.57.block_sparse_moe.experts.58.w1", "model.layers.57.block_sparse_moe.experts.59.w1", "model.layers.57.block_sparse_moe.experts.60.w1", "model.layers.57.block_sparse_moe.experts.61.w1", "model.layers.57.block_sparse_moe.experts.62.w1", "model.layers.57.block_sparse_moe.experts.63.w1", "model.layers.57.block_sparse_moe.experts.64.w1", "model.layers.57.block_sparse_moe.experts.65.w1", "model.layers.57.block_sparse_moe.experts.66.w1", "model.layers.57.block_sparse_moe.experts.67.w1", "model.layers.57.block_sparse_moe.experts.68.w1", "model.layers.57.block_sparse_moe.experts.69.w1", "model.layers.57.block_sparse_moe.experts.70.w1", "model.layers.57.block_sparse_moe.experts.71.w1", "model.layers.57.block_sparse_moe.experts.72.w1", "model.layers.57.block_sparse_moe.experts.73.w1", "model.layers.57.block_sparse_moe.experts.74.w1", "model.layers.57.block_sparse_moe.experts.75.w1", "model.layers.57.block_sparse_moe.experts.76.w1", "model.layers.57.block_sparse_moe.experts.77.w1", "model.layers.57.block_sparse_moe.experts.78.w1", "model.layers.57.block_sparse_moe.experts.79.w1", "model.layers.57.block_sparse_moe.experts.80.w1", "model.layers.57.block_sparse_moe.experts.81.w1", "model.layers.57.block_sparse_moe.experts.82.w1", "model.layers.57.block_sparse_moe.experts.83.w1", "model.layers.57.block_sparse_moe.experts.84.w1", "model.layers.57.block_sparse_moe.experts.85.w1", "model.layers.57.block_sparse_moe.experts.86.w1", "model.layers.57.block_sparse_moe.experts.87.w1", "model.layers.57.block_sparse_moe.experts.88.w1", "model.layers.57.block_sparse_moe.experts.89.w1", "model.layers.57.block_sparse_moe.experts.90.w1", "model.layers.57.block_sparse_moe.experts.91.w1", "model.layers.57.block_sparse_moe.experts.92.w1", "model.layers.57.block_sparse_moe.experts.93.w1", "model.layers.57.block_sparse_moe.experts.94.w1", "model.layers.57.block_sparse_moe.experts.95.w1", "model.layers.57.block_sparse_moe.experts.96.w1", "model.layers.57.block_sparse_moe.experts.97.w1", "model.layers.57.block_sparse_moe.experts.98.w1", "model.layers.57.block_sparse_moe.experts.99.w1", "model.layers.57.block_sparse_moe.experts.100.w1", "model.layers.57.block_sparse_moe.experts.101.w1", "model.layers.57.block_sparse_moe.experts.102.w1", "model.layers.57.block_sparse_moe.experts.103.w1", "model.layers.57.block_sparse_moe.experts.104.w1", "model.layers.57.block_sparse_moe.experts.105.w1", "model.layers.57.block_sparse_moe.experts.106.w1", "model.layers.57.block_sparse_moe.experts.107.w1", "model.layers.57.block_sparse_moe.experts.108.w1", "model.layers.57.block_sparse_moe.experts.109.w1", "model.layers.57.block_sparse_moe.experts.110.w1", "model.layers.57.block_sparse_moe.experts.111.w1", "model.layers.57.block_sparse_moe.experts.112.w1", "model.layers.57.block_sparse_moe.experts.113.w1", "model.layers.57.block_sparse_moe.experts.114.w1", "model.layers.57.block_sparse_moe.experts.115.w1", "model.layers.57.block_sparse_moe.experts.116.w1", "model.layers.57.block_sparse_moe.experts.117.w1", "model.layers.57.block_sparse_moe.experts.118.w1", "model.layers.57.block_sparse_moe.experts.119.w1", "model.layers.57.block_sparse_moe.experts.120.w1", "model.layers.57.block_sparse_moe.experts.121.w1", "model.layers.57.block_sparse_moe.experts.122.w1", "model.layers.57.block_sparse_moe.experts.123.w1", "model.layers.57.block_sparse_moe.experts.124.w1", "model.layers.57.block_sparse_moe.experts.125.w1", "model.layers.57.block_sparse_moe.experts.126.w1", "model.layers.57.block_sparse_moe.experts.127.w1", "model.layers.57.block_sparse_moe.experts.128.w1", "model.layers.57.block_sparse_moe.experts.129.w1", "model.layers.57.block_sparse_moe.experts.130.w1", "model.layers.57.block_sparse_moe.experts.131.w1", "model.layers.57.block_sparse_moe.experts.132.w1", "model.layers.57.block_sparse_moe.experts.133.w1", "model.layers.57.block_sparse_moe.experts.134.w1", "model.layers.57.block_sparse_moe.experts.135.w1", "model.layers.57.block_sparse_moe.experts.136.w1", "model.layers.57.block_sparse_moe.experts.137.w1", "model.layers.57.block_sparse_moe.experts.138.w1", "model.layers.57.block_sparse_moe.experts.139.w1", "model.layers.57.block_sparse_moe.experts.140.w1", "model.layers.57.block_sparse_moe.experts.141.w1", "model.layers.57.block_sparse_moe.experts.142.w1", "model.layers.57.block_sparse_moe.experts.143.w1", "model.layers.57.block_sparse_moe.experts.144.w1", "model.layers.57.block_sparse_moe.experts.145.w1", "model.layers.57.block_sparse_moe.experts.146.w1", "model.layers.57.block_sparse_moe.experts.147.w1", "model.layers.57.block_sparse_moe.experts.148.w1", "model.layers.57.block_sparse_moe.experts.149.w1", "model.layers.57.block_sparse_moe.experts.150.w1", "model.layers.57.block_sparse_moe.experts.151.w1", "model.layers.57.block_sparse_moe.experts.152.w1", "model.layers.57.block_sparse_moe.experts.153.w1", "model.layers.57.block_sparse_moe.experts.154.w1", "model.layers.57.block_sparse_moe.experts.155.w1", "model.layers.57.block_sparse_moe.experts.156.w1", "model.layers.57.block_sparse_moe.experts.157.w1", "model.layers.57.block_sparse_moe.experts.158.w1", "model.layers.57.block_sparse_moe.experts.159.w1", "model.layers.57.block_sparse_moe.experts.160.w1", "model.layers.57.block_sparse_moe.experts.161.w1", "model.layers.57.block_sparse_moe.experts.162.w1", "model.layers.57.block_sparse_moe.experts.163.w1", "model.layers.57.block_sparse_moe.experts.164.w1", "model.layers.57.block_sparse_moe.experts.165.w1", "model.layers.57.block_sparse_moe.experts.166.w1", "model.layers.57.block_sparse_moe.experts.167.w1", "model.layers.57.block_sparse_moe.experts.168.w1", "model.layers.57.block_sparse_moe.experts.169.w1", "model.layers.57.block_sparse_moe.experts.170.w1", "model.layers.57.block_sparse_moe.experts.171.w1", "model.layers.57.block_sparse_moe.experts.172.w1", "model.layers.57.block_sparse_moe.experts.173.w1", "model.layers.57.block_sparse_moe.experts.174.w1", "model.layers.57.block_sparse_moe.experts.175.w1", "model.layers.57.block_sparse_moe.experts.176.w1", "model.layers.57.block_sparse_moe.experts.177.w1", "model.layers.57.block_sparse_moe.experts.178.w1", "model.layers.57.block_sparse_moe.experts.179.w1", "model.layers.57.block_sparse_moe.experts.180.w1", "model.layers.57.block_sparse_moe.experts.181.w1", "model.layers.57.block_sparse_moe.experts.182.w1", "model.layers.57.block_sparse_moe.experts.183.w1", "model.layers.57.block_sparse_moe.experts.184.w1", "model.layers.57.block_sparse_moe.experts.185.w1", "model.layers.57.block_sparse_moe.experts.186.w1", "model.layers.57.block_sparse_moe.experts.187.w1", "model.layers.57.block_sparse_moe.experts.188.w1", "model.layers.57.block_sparse_moe.experts.189.w1", "model.layers.57.block_sparse_moe.experts.190.w1", "model.layers.57.block_sparse_moe.experts.191.w1", "model.layers.57.block_sparse_moe.experts.192.w1", "model.layers.57.block_sparse_moe.experts.193.w1", "model.layers.57.block_sparse_moe.experts.194.w1", "model.layers.57.block_sparse_moe.experts.195.w1", "model.layers.57.block_sparse_moe.experts.196.w1", "model.layers.57.block_sparse_moe.experts.197.w1", "model.layers.57.block_sparse_moe.experts.198.w1", "model.layers.57.block_sparse_moe.experts.199.w1", "model.layers.57.block_sparse_moe.experts.200.w1", "model.layers.57.block_sparse_moe.experts.201.w1", "model.layers.57.block_sparse_moe.experts.202.w1", "model.layers.57.block_sparse_moe.experts.203.w1", "model.layers.57.block_sparse_moe.experts.204.w1", "model.layers.57.block_sparse_moe.experts.205.w1", "model.layers.57.block_sparse_moe.experts.206.w1", "model.layers.57.block_sparse_moe.experts.207.w1", "model.layers.57.block_sparse_moe.experts.208.w1", "model.layers.57.block_sparse_moe.experts.209.w1", "model.layers.57.block_sparse_moe.experts.210.w1", "model.layers.57.block_sparse_moe.experts.211.w1", "model.layers.57.block_sparse_moe.experts.212.w1", "model.layers.57.block_sparse_moe.experts.213.w1", "model.layers.57.block_sparse_moe.experts.214.w1", "model.layers.57.block_sparse_moe.experts.215.w1", "model.layers.57.block_sparse_moe.experts.216.w1", "model.layers.57.block_sparse_moe.experts.217.w1", "model.layers.57.block_sparse_moe.experts.218.w1", "model.layers.57.block_sparse_moe.experts.219.w1", "model.layers.57.block_sparse_moe.experts.220.w1", "model.layers.57.block_sparse_moe.experts.221.w1", "model.layers.57.block_sparse_moe.experts.222.w1", "model.layers.57.block_sparse_moe.experts.223.w1", "model.layers.57.block_sparse_moe.experts.224.w1", "model.layers.57.block_sparse_moe.experts.225.w1", "model.layers.57.block_sparse_moe.experts.226.w1", "model.layers.57.block_sparse_moe.experts.227.w1", "model.layers.57.block_sparse_moe.experts.228.w1", "model.layers.57.block_sparse_moe.experts.229.w1", "model.layers.57.block_sparse_moe.experts.230.w1", "model.layers.57.block_sparse_moe.experts.231.w1", "model.layers.57.block_sparse_moe.experts.232.w1", "model.layers.57.block_sparse_moe.experts.233.w1", "model.layers.57.block_sparse_moe.experts.234.w1", "model.layers.57.block_sparse_moe.experts.235.w1", "model.layers.57.block_sparse_moe.experts.236.w1", "model.layers.57.block_sparse_moe.experts.237.w1", "model.layers.57.block_sparse_moe.experts.238.w1", "model.layers.57.block_sparse_moe.experts.239.w1", "model.layers.57.block_sparse_moe.experts.240.w1", "model.layers.57.block_sparse_moe.experts.241.w1", "model.layers.57.block_sparse_moe.experts.242.w1", "model.layers.57.block_sparse_moe.experts.243.w1", "model.layers.57.block_sparse_moe.experts.244.w1", "model.layers.57.block_sparse_moe.experts.245.w1", "model.layers.57.block_sparse_moe.experts.246.w1", "model.layers.57.block_sparse_moe.experts.247.w1", "model.layers.57.block_sparse_moe.experts.248.w1", "model.layers.57.block_sparse_moe.experts.249.w1", "model.layers.57.block_sparse_moe.experts.250.w1", "model.layers.57.block_sparse_moe.experts.251.w1", "model.layers.57.block_sparse_moe.experts.252.w1", "model.layers.57.block_sparse_moe.experts.253.w1", "model.layers.57.block_sparse_moe.experts.254.w1", "model.layers.57.block_sparse_moe.experts.255.w1", "model.layers.57.block_sparse_moe.experts.0.w3", "model.layers.57.block_sparse_moe.experts.1.w3", "model.layers.57.block_sparse_moe.experts.2.w3", "model.layers.57.block_sparse_moe.experts.3.w3", "model.layers.57.block_sparse_moe.experts.4.w3", "model.layers.57.block_sparse_moe.experts.5.w3", "model.layers.57.block_sparse_moe.experts.6.w3", "model.layers.57.block_sparse_moe.experts.7.w3", "model.layers.57.block_sparse_moe.experts.8.w3", "model.layers.57.block_sparse_moe.experts.9.w3", "model.layers.57.block_sparse_moe.experts.10.w3", "model.layers.57.block_sparse_moe.experts.11.w3", "model.layers.57.block_sparse_moe.experts.12.w3", "model.layers.57.block_sparse_moe.experts.13.w3", "model.layers.57.block_sparse_moe.experts.14.w3", "model.layers.57.block_sparse_moe.experts.15.w3", "model.layers.57.block_sparse_moe.experts.16.w3", "model.layers.57.block_sparse_moe.experts.17.w3", "model.layers.57.block_sparse_moe.experts.18.w3", "model.layers.57.block_sparse_moe.experts.19.w3", "model.layers.57.block_sparse_moe.experts.20.w3", "model.layers.57.block_sparse_moe.experts.21.w3", "model.layers.57.block_sparse_moe.experts.22.w3", "model.layers.57.block_sparse_moe.experts.23.w3", "model.layers.57.block_sparse_moe.experts.24.w3", "model.layers.57.block_sparse_moe.experts.25.w3", "model.layers.57.block_sparse_moe.experts.26.w3", "model.layers.57.block_sparse_moe.experts.27.w3", "model.layers.57.block_sparse_moe.experts.28.w3", "model.layers.57.block_sparse_moe.experts.29.w3", "model.layers.57.block_sparse_moe.experts.30.w3", "model.layers.57.block_sparse_moe.experts.31.w3", "model.layers.57.block_sparse_moe.experts.32.w3", "model.layers.57.block_sparse_moe.experts.33.w3", "model.layers.57.block_sparse_moe.experts.34.w3", "model.layers.57.block_sparse_moe.experts.35.w3", "model.layers.57.block_sparse_moe.experts.36.w3", "model.layers.57.block_sparse_moe.experts.37.w3", "model.layers.57.block_sparse_moe.experts.38.w3", "model.layers.57.block_sparse_moe.experts.39.w3", "model.layers.57.block_sparse_moe.experts.40.w3", "model.layers.57.block_sparse_moe.experts.41.w3", "model.layers.57.block_sparse_moe.experts.42.w3", "model.layers.57.block_sparse_moe.experts.43.w3", "model.layers.57.block_sparse_moe.experts.44.w3", "model.layers.57.block_sparse_moe.experts.45.w3", "model.layers.57.block_sparse_moe.experts.46.w3", "model.layers.57.block_sparse_moe.experts.47.w3", "model.layers.57.block_sparse_moe.experts.48.w3", "model.layers.57.block_sparse_moe.experts.49.w3", "model.layers.57.block_sparse_moe.experts.50.w3", "model.layers.57.block_sparse_moe.experts.51.w3", "model.layers.57.block_sparse_moe.experts.52.w3", "model.layers.57.block_sparse_moe.experts.53.w3", "model.layers.57.block_sparse_moe.experts.54.w3", "model.layers.57.block_sparse_moe.experts.55.w3", "model.layers.57.block_sparse_moe.experts.56.w3", "model.layers.57.block_sparse_moe.experts.57.w3", "model.layers.57.block_sparse_moe.experts.58.w3", "model.layers.57.block_sparse_moe.experts.59.w3", "model.layers.57.block_sparse_moe.experts.60.w3", "model.layers.57.block_sparse_moe.experts.61.w3", "model.layers.57.block_sparse_moe.experts.62.w3", "model.layers.57.block_sparse_moe.experts.63.w3", "model.layers.57.block_sparse_moe.experts.64.w3", "model.layers.57.block_sparse_moe.experts.65.w3", "model.layers.57.block_sparse_moe.experts.66.w3", "model.layers.57.block_sparse_moe.experts.67.w3", "model.layers.57.block_sparse_moe.experts.68.w3", "model.layers.57.block_sparse_moe.experts.69.w3", "model.layers.57.block_sparse_moe.experts.70.w3", "model.layers.57.block_sparse_moe.experts.71.w3", "model.layers.57.block_sparse_moe.experts.72.w3", "model.layers.57.block_sparse_moe.experts.73.w3", "model.layers.57.block_sparse_moe.experts.74.w3", "model.layers.57.block_sparse_moe.experts.75.w3", "model.layers.57.block_sparse_moe.experts.76.w3", "model.layers.57.block_sparse_moe.experts.77.w3", "model.layers.57.block_sparse_moe.experts.78.w3", "model.layers.57.block_sparse_moe.experts.79.w3", "model.layers.57.block_sparse_moe.experts.80.w3", "model.layers.57.block_sparse_moe.experts.81.w3", "model.layers.57.block_sparse_moe.experts.82.w3", "model.layers.57.block_sparse_moe.experts.83.w3", "model.layers.57.block_sparse_moe.experts.84.w3", "model.layers.57.block_sparse_moe.experts.85.w3", "model.layers.57.block_sparse_moe.experts.86.w3", "model.layers.57.block_sparse_moe.experts.87.w3", "model.layers.57.block_sparse_moe.experts.88.w3", "model.layers.57.block_sparse_moe.experts.89.w3", "model.layers.57.block_sparse_moe.experts.90.w3", "model.layers.57.block_sparse_moe.experts.91.w3", "model.layers.57.block_sparse_moe.experts.92.w3", "model.layers.57.block_sparse_moe.experts.93.w3", "model.layers.57.block_sparse_moe.experts.94.w3", "model.layers.57.block_sparse_moe.experts.95.w3", "model.layers.57.block_sparse_moe.experts.96.w3", "model.layers.57.block_sparse_moe.experts.97.w3", "model.layers.57.block_sparse_moe.experts.98.w3", "model.layers.57.block_sparse_moe.experts.99.w3", "model.layers.57.block_sparse_moe.experts.100.w3", "model.layers.57.block_sparse_moe.experts.101.w3", "model.layers.57.block_sparse_moe.experts.102.w3", "model.layers.57.block_sparse_moe.experts.103.w3", "model.layers.57.block_sparse_moe.experts.104.w3", "model.layers.57.block_sparse_moe.experts.105.w3", "model.layers.57.block_sparse_moe.experts.106.w3", "model.layers.57.block_sparse_moe.experts.107.w3", "model.layers.57.block_sparse_moe.experts.108.w3", "model.layers.57.block_sparse_moe.experts.109.w3", "model.layers.57.block_sparse_moe.experts.110.w3", "model.layers.57.block_sparse_moe.experts.111.w3", "model.layers.57.block_sparse_moe.experts.112.w3", "model.layers.57.block_sparse_moe.experts.113.w3", "model.layers.57.block_sparse_moe.experts.114.w3", "model.layers.57.block_sparse_moe.experts.115.w3", "model.layers.57.block_sparse_moe.experts.116.w3", "model.layers.57.block_sparse_moe.experts.117.w3", "model.layers.57.block_sparse_moe.experts.118.w3", "model.layers.57.block_sparse_moe.experts.119.w3", "model.layers.57.block_sparse_moe.experts.120.w3", "model.layers.57.block_sparse_moe.experts.121.w3", "model.layers.57.block_sparse_moe.experts.122.w3", "model.layers.57.block_sparse_moe.experts.123.w3", "model.layers.57.block_sparse_moe.experts.124.w3", "model.layers.57.block_sparse_moe.experts.125.w3", "model.layers.57.block_sparse_moe.experts.126.w3", "model.layers.57.block_sparse_moe.experts.127.w3", "model.layers.57.block_sparse_moe.experts.128.w3", "model.layers.57.block_sparse_moe.experts.129.w3", "model.layers.57.block_sparse_moe.experts.130.w3", "model.layers.57.block_sparse_moe.experts.131.w3", "model.layers.57.block_sparse_moe.experts.132.w3", "model.layers.57.block_sparse_moe.experts.133.w3", "model.layers.57.block_sparse_moe.experts.134.w3", "model.layers.57.block_sparse_moe.experts.135.w3", "model.layers.57.block_sparse_moe.experts.136.w3", "model.layers.57.block_sparse_moe.experts.137.w3", "model.layers.57.block_sparse_moe.experts.138.w3", "model.layers.57.block_sparse_moe.experts.139.w3", "model.layers.57.block_sparse_moe.experts.140.w3", "model.layers.57.block_sparse_moe.experts.141.w3", "model.layers.57.block_sparse_moe.experts.142.w3", "model.layers.57.block_sparse_moe.experts.143.w3", "model.layers.57.block_sparse_moe.experts.144.w3", "model.layers.57.block_sparse_moe.experts.145.w3", "model.layers.57.block_sparse_moe.experts.146.w3", "model.layers.57.block_sparse_moe.experts.147.w3", "model.layers.57.block_sparse_moe.experts.148.w3", "model.layers.57.block_sparse_moe.experts.149.w3", "model.layers.57.block_sparse_moe.experts.150.w3", "model.layers.57.block_sparse_moe.experts.151.w3", "model.layers.57.block_sparse_moe.experts.152.w3", "model.layers.57.block_sparse_moe.experts.153.w3", "model.layers.57.block_sparse_moe.experts.154.w3", "model.layers.57.block_sparse_moe.experts.155.w3", "model.layers.57.block_sparse_moe.experts.156.w3", "model.layers.57.block_sparse_moe.experts.157.w3", "model.layers.57.block_sparse_moe.experts.158.w3", "model.layers.57.block_sparse_moe.experts.159.w3", "model.layers.57.block_sparse_moe.experts.160.w3", "model.layers.57.block_sparse_moe.experts.161.w3", "model.layers.57.block_sparse_moe.experts.162.w3", "model.layers.57.block_sparse_moe.experts.163.w3", "model.layers.57.block_sparse_moe.experts.164.w3", "model.layers.57.block_sparse_moe.experts.165.w3", "model.layers.57.block_sparse_moe.experts.166.w3", "model.layers.57.block_sparse_moe.experts.167.w3", "model.layers.57.block_sparse_moe.experts.168.w3", "model.layers.57.block_sparse_moe.experts.169.w3", "model.layers.57.block_sparse_moe.experts.170.w3", "model.layers.57.block_sparse_moe.experts.171.w3", "model.layers.57.block_sparse_moe.experts.172.w3", "model.layers.57.block_sparse_moe.experts.173.w3", "model.layers.57.block_sparse_moe.experts.174.w3", "model.layers.57.block_sparse_moe.experts.175.w3", "model.layers.57.block_sparse_moe.experts.176.w3", "model.layers.57.block_sparse_moe.experts.177.w3", "model.layers.57.block_sparse_moe.experts.178.w3", "model.layers.57.block_sparse_moe.experts.179.w3", "model.layers.57.block_sparse_moe.experts.180.w3", "model.layers.57.block_sparse_moe.experts.181.w3", "model.layers.57.block_sparse_moe.experts.182.w3", "model.layers.57.block_sparse_moe.experts.183.w3", "model.layers.57.block_sparse_moe.experts.184.w3", "model.layers.57.block_sparse_moe.experts.185.w3", "model.layers.57.block_sparse_moe.experts.186.w3", "model.layers.57.block_sparse_moe.experts.187.w3", "model.layers.57.block_sparse_moe.experts.188.w3", "model.layers.57.block_sparse_moe.experts.189.w3", "model.layers.57.block_sparse_moe.experts.190.w3", "model.layers.57.block_sparse_moe.experts.191.w3", "model.layers.57.block_sparse_moe.experts.192.w3", "model.layers.57.block_sparse_moe.experts.193.w3", "model.layers.57.block_sparse_moe.experts.194.w3", "model.layers.57.block_sparse_moe.experts.195.w3", "model.layers.57.block_sparse_moe.experts.196.w3", "model.layers.57.block_sparse_moe.experts.197.w3", "model.layers.57.block_sparse_moe.experts.198.w3", "model.layers.57.block_sparse_moe.experts.199.w3", "model.layers.57.block_sparse_moe.experts.200.w3", "model.layers.57.block_sparse_moe.experts.201.w3", "model.layers.57.block_sparse_moe.experts.202.w3", "model.layers.57.block_sparse_moe.experts.203.w3", "model.layers.57.block_sparse_moe.experts.204.w3", "model.layers.57.block_sparse_moe.experts.205.w3", "model.layers.57.block_sparse_moe.experts.206.w3", "model.layers.57.block_sparse_moe.experts.207.w3", "model.layers.57.block_sparse_moe.experts.208.w3", "model.layers.57.block_sparse_moe.experts.209.w3", "model.layers.57.block_sparse_moe.experts.210.w3", "model.layers.57.block_sparse_moe.experts.211.w3", "model.layers.57.block_sparse_moe.experts.212.w3", "model.layers.57.block_sparse_moe.experts.213.w3", "model.layers.57.block_sparse_moe.experts.214.w3", "model.layers.57.block_sparse_moe.experts.215.w3", "model.layers.57.block_sparse_moe.experts.216.w3", "model.layers.57.block_sparse_moe.experts.217.w3", "model.layers.57.block_sparse_moe.experts.218.w3", "model.layers.57.block_sparse_moe.experts.219.w3", "model.layers.57.block_sparse_moe.experts.220.w3", "model.layers.57.block_sparse_moe.experts.221.w3", "model.layers.57.block_sparse_moe.experts.222.w3", "model.layers.57.block_sparse_moe.experts.223.w3", "model.layers.57.block_sparse_moe.experts.224.w3", "model.layers.57.block_sparse_moe.experts.225.w3", "model.layers.57.block_sparse_moe.experts.226.w3", "model.layers.57.block_sparse_moe.experts.227.w3", "model.layers.57.block_sparse_moe.experts.228.w3", "model.layers.57.block_sparse_moe.experts.229.w3", "model.layers.57.block_sparse_moe.experts.230.w3", "model.layers.57.block_sparse_moe.experts.231.w3", "model.layers.57.block_sparse_moe.experts.232.w3", "model.layers.57.block_sparse_moe.experts.233.w3", "model.layers.57.block_sparse_moe.experts.234.w3", "model.layers.57.block_sparse_moe.experts.235.w3", "model.layers.57.block_sparse_moe.experts.236.w3", "model.layers.57.block_sparse_moe.experts.237.w3", "model.layers.57.block_sparse_moe.experts.238.w3", "model.layers.57.block_sparse_moe.experts.239.w3", "model.layers.57.block_sparse_moe.experts.240.w3", "model.layers.57.block_sparse_moe.experts.241.w3", "model.layers.57.block_sparse_moe.experts.242.w3", "model.layers.57.block_sparse_moe.experts.243.w3", "model.layers.57.block_sparse_moe.experts.244.w3", "model.layers.57.block_sparse_moe.experts.245.w3", "model.layers.57.block_sparse_moe.experts.246.w3", "model.layers.57.block_sparse_moe.experts.247.w3", "model.layers.57.block_sparse_moe.experts.248.w3", "model.layers.57.block_sparse_moe.experts.249.w3", "model.layers.57.block_sparse_moe.experts.250.w3", "model.layers.57.block_sparse_moe.experts.251.w3", "model.layers.57.block_sparse_moe.experts.252.w3", "model.layers.57.block_sparse_moe.experts.253.w3", "model.layers.57.block_sparse_moe.experts.254.w3", "model.layers.57.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 7.927417755154709e-07, "dbits": 2415919104 } ] }, { "idx": 289, "layers": [ "model.layers.57.block_sparse_moe.experts.0.w2", "model.layers.57.block_sparse_moe.experts.1.w2", "model.layers.57.block_sparse_moe.experts.2.w2", "model.layers.57.block_sparse_moe.experts.3.w2", "model.layers.57.block_sparse_moe.experts.4.w2", "model.layers.57.block_sparse_moe.experts.5.w2", "model.layers.57.block_sparse_moe.experts.6.w2", "model.layers.57.block_sparse_moe.experts.7.w2", "model.layers.57.block_sparse_moe.experts.8.w2", "model.layers.57.block_sparse_moe.experts.9.w2", "model.layers.57.block_sparse_moe.experts.10.w2", "model.layers.57.block_sparse_moe.experts.11.w2", "model.layers.57.block_sparse_moe.experts.12.w2", "model.layers.57.block_sparse_moe.experts.13.w2", "model.layers.57.block_sparse_moe.experts.14.w2", "model.layers.57.block_sparse_moe.experts.15.w2", "model.layers.57.block_sparse_moe.experts.16.w2", "model.layers.57.block_sparse_moe.experts.17.w2", "model.layers.57.block_sparse_moe.experts.18.w2", "model.layers.57.block_sparse_moe.experts.19.w2", "model.layers.57.block_sparse_moe.experts.20.w2", "model.layers.57.block_sparse_moe.experts.21.w2", "model.layers.57.block_sparse_moe.experts.22.w2", "model.layers.57.block_sparse_moe.experts.23.w2", "model.layers.57.block_sparse_moe.experts.24.w2", "model.layers.57.block_sparse_moe.experts.25.w2", "model.layers.57.block_sparse_moe.experts.26.w2", "model.layers.57.block_sparse_moe.experts.27.w2", "model.layers.57.block_sparse_moe.experts.28.w2", "model.layers.57.block_sparse_moe.experts.29.w2", "model.layers.57.block_sparse_moe.experts.30.w2", "model.layers.57.block_sparse_moe.experts.31.w2", "model.layers.57.block_sparse_moe.experts.32.w2", "model.layers.57.block_sparse_moe.experts.33.w2", "model.layers.57.block_sparse_moe.experts.34.w2", "model.layers.57.block_sparse_moe.experts.35.w2", "model.layers.57.block_sparse_moe.experts.36.w2", "model.layers.57.block_sparse_moe.experts.37.w2", "model.layers.57.block_sparse_moe.experts.38.w2", "model.layers.57.block_sparse_moe.experts.39.w2", "model.layers.57.block_sparse_moe.experts.40.w2", "model.layers.57.block_sparse_moe.experts.41.w2", "model.layers.57.block_sparse_moe.experts.42.w2", "model.layers.57.block_sparse_moe.experts.43.w2", "model.layers.57.block_sparse_moe.experts.44.w2", "model.layers.57.block_sparse_moe.experts.45.w2", "model.layers.57.block_sparse_moe.experts.46.w2", "model.layers.57.block_sparse_moe.experts.47.w2", "model.layers.57.block_sparse_moe.experts.48.w2", "model.layers.57.block_sparse_moe.experts.49.w2", "model.layers.57.block_sparse_moe.experts.50.w2", "model.layers.57.block_sparse_moe.experts.51.w2", "model.layers.57.block_sparse_moe.experts.52.w2", "model.layers.57.block_sparse_moe.experts.53.w2", "model.layers.57.block_sparse_moe.experts.54.w2", "model.layers.57.block_sparse_moe.experts.55.w2", "model.layers.57.block_sparse_moe.experts.56.w2", "model.layers.57.block_sparse_moe.experts.57.w2", "model.layers.57.block_sparse_moe.experts.58.w2", "model.layers.57.block_sparse_moe.experts.59.w2", "model.layers.57.block_sparse_moe.experts.60.w2", "model.layers.57.block_sparse_moe.experts.61.w2", "model.layers.57.block_sparse_moe.experts.62.w2", "model.layers.57.block_sparse_moe.experts.63.w2", "model.layers.57.block_sparse_moe.experts.64.w2", "model.layers.57.block_sparse_moe.experts.65.w2", "model.layers.57.block_sparse_moe.experts.66.w2", "model.layers.57.block_sparse_moe.experts.67.w2", "model.layers.57.block_sparse_moe.experts.68.w2", "model.layers.57.block_sparse_moe.experts.69.w2", "model.layers.57.block_sparse_moe.experts.70.w2", "model.layers.57.block_sparse_moe.experts.71.w2", "model.layers.57.block_sparse_moe.experts.72.w2", "model.layers.57.block_sparse_moe.experts.73.w2", "model.layers.57.block_sparse_moe.experts.74.w2", "model.layers.57.block_sparse_moe.experts.75.w2", "model.layers.57.block_sparse_moe.experts.76.w2", "model.layers.57.block_sparse_moe.experts.77.w2", "model.layers.57.block_sparse_moe.experts.78.w2", "model.layers.57.block_sparse_moe.experts.79.w2", "model.layers.57.block_sparse_moe.experts.80.w2", "model.layers.57.block_sparse_moe.experts.81.w2", "model.layers.57.block_sparse_moe.experts.82.w2", "model.layers.57.block_sparse_moe.experts.83.w2", "model.layers.57.block_sparse_moe.experts.84.w2", "model.layers.57.block_sparse_moe.experts.85.w2", "model.layers.57.block_sparse_moe.experts.86.w2", "model.layers.57.block_sparse_moe.experts.87.w2", "model.layers.57.block_sparse_moe.experts.88.w2", "model.layers.57.block_sparse_moe.experts.89.w2", "model.layers.57.block_sparse_moe.experts.90.w2", "model.layers.57.block_sparse_moe.experts.91.w2", "model.layers.57.block_sparse_moe.experts.92.w2", "model.layers.57.block_sparse_moe.experts.93.w2", "model.layers.57.block_sparse_moe.experts.94.w2", "model.layers.57.block_sparse_moe.experts.95.w2", "model.layers.57.block_sparse_moe.experts.96.w2", "model.layers.57.block_sparse_moe.experts.97.w2", "model.layers.57.block_sparse_moe.experts.98.w2", "model.layers.57.block_sparse_moe.experts.99.w2", "model.layers.57.block_sparse_moe.experts.100.w2", "model.layers.57.block_sparse_moe.experts.101.w2", "model.layers.57.block_sparse_moe.experts.102.w2", "model.layers.57.block_sparse_moe.experts.103.w2", "model.layers.57.block_sparse_moe.experts.104.w2", "model.layers.57.block_sparse_moe.experts.105.w2", "model.layers.57.block_sparse_moe.experts.106.w2", "model.layers.57.block_sparse_moe.experts.107.w2", "model.layers.57.block_sparse_moe.experts.108.w2", "model.layers.57.block_sparse_moe.experts.109.w2", "model.layers.57.block_sparse_moe.experts.110.w2", "model.layers.57.block_sparse_moe.experts.111.w2", "model.layers.57.block_sparse_moe.experts.112.w2", "model.layers.57.block_sparse_moe.experts.113.w2", "model.layers.57.block_sparse_moe.experts.114.w2", "model.layers.57.block_sparse_moe.experts.115.w2", "model.layers.57.block_sparse_moe.experts.116.w2", "model.layers.57.block_sparse_moe.experts.117.w2", "model.layers.57.block_sparse_moe.experts.118.w2", "model.layers.57.block_sparse_moe.experts.119.w2", "model.layers.57.block_sparse_moe.experts.120.w2", "model.layers.57.block_sparse_moe.experts.121.w2", "model.layers.57.block_sparse_moe.experts.122.w2", "model.layers.57.block_sparse_moe.experts.123.w2", "model.layers.57.block_sparse_moe.experts.124.w2", "model.layers.57.block_sparse_moe.experts.125.w2", "model.layers.57.block_sparse_moe.experts.126.w2", "model.layers.57.block_sparse_moe.experts.127.w2", "model.layers.57.block_sparse_moe.experts.128.w2", "model.layers.57.block_sparse_moe.experts.129.w2", "model.layers.57.block_sparse_moe.experts.130.w2", "model.layers.57.block_sparse_moe.experts.131.w2", "model.layers.57.block_sparse_moe.experts.132.w2", "model.layers.57.block_sparse_moe.experts.133.w2", "model.layers.57.block_sparse_moe.experts.134.w2", "model.layers.57.block_sparse_moe.experts.135.w2", "model.layers.57.block_sparse_moe.experts.136.w2", "model.layers.57.block_sparse_moe.experts.137.w2", "model.layers.57.block_sparse_moe.experts.138.w2", "model.layers.57.block_sparse_moe.experts.139.w2", "model.layers.57.block_sparse_moe.experts.140.w2", "model.layers.57.block_sparse_moe.experts.141.w2", "model.layers.57.block_sparse_moe.experts.142.w2", "model.layers.57.block_sparse_moe.experts.143.w2", "model.layers.57.block_sparse_moe.experts.144.w2", "model.layers.57.block_sparse_moe.experts.145.w2", "model.layers.57.block_sparse_moe.experts.146.w2", "model.layers.57.block_sparse_moe.experts.147.w2", "model.layers.57.block_sparse_moe.experts.148.w2", "model.layers.57.block_sparse_moe.experts.149.w2", "model.layers.57.block_sparse_moe.experts.150.w2", "model.layers.57.block_sparse_moe.experts.151.w2", "model.layers.57.block_sparse_moe.experts.152.w2", "model.layers.57.block_sparse_moe.experts.153.w2", "model.layers.57.block_sparse_moe.experts.154.w2", "model.layers.57.block_sparse_moe.experts.155.w2", "model.layers.57.block_sparse_moe.experts.156.w2", "model.layers.57.block_sparse_moe.experts.157.w2", "model.layers.57.block_sparse_moe.experts.158.w2", "model.layers.57.block_sparse_moe.experts.159.w2", "model.layers.57.block_sparse_moe.experts.160.w2", "model.layers.57.block_sparse_moe.experts.161.w2", "model.layers.57.block_sparse_moe.experts.162.w2", "model.layers.57.block_sparse_moe.experts.163.w2", "model.layers.57.block_sparse_moe.experts.164.w2", "model.layers.57.block_sparse_moe.experts.165.w2", "model.layers.57.block_sparse_moe.experts.166.w2", "model.layers.57.block_sparse_moe.experts.167.w2", "model.layers.57.block_sparse_moe.experts.168.w2", "model.layers.57.block_sparse_moe.experts.169.w2", "model.layers.57.block_sparse_moe.experts.170.w2", "model.layers.57.block_sparse_moe.experts.171.w2", "model.layers.57.block_sparse_moe.experts.172.w2", "model.layers.57.block_sparse_moe.experts.173.w2", "model.layers.57.block_sparse_moe.experts.174.w2", "model.layers.57.block_sparse_moe.experts.175.w2", "model.layers.57.block_sparse_moe.experts.176.w2", "model.layers.57.block_sparse_moe.experts.177.w2", "model.layers.57.block_sparse_moe.experts.178.w2", "model.layers.57.block_sparse_moe.experts.179.w2", "model.layers.57.block_sparse_moe.experts.180.w2", "model.layers.57.block_sparse_moe.experts.181.w2", "model.layers.57.block_sparse_moe.experts.182.w2", "model.layers.57.block_sparse_moe.experts.183.w2", "model.layers.57.block_sparse_moe.experts.184.w2", "model.layers.57.block_sparse_moe.experts.185.w2", "model.layers.57.block_sparse_moe.experts.186.w2", "model.layers.57.block_sparse_moe.experts.187.w2", "model.layers.57.block_sparse_moe.experts.188.w2", "model.layers.57.block_sparse_moe.experts.189.w2", "model.layers.57.block_sparse_moe.experts.190.w2", "model.layers.57.block_sparse_moe.experts.191.w2", "model.layers.57.block_sparse_moe.experts.192.w2", "model.layers.57.block_sparse_moe.experts.193.w2", "model.layers.57.block_sparse_moe.experts.194.w2", "model.layers.57.block_sparse_moe.experts.195.w2", "model.layers.57.block_sparse_moe.experts.196.w2", "model.layers.57.block_sparse_moe.experts.197.w2", "model.layers.57.block_sparse_moe.experts.198.w2", "model.layers.57.block_sparse_moe.experts.199.w2", "model.layers.57.block_sparse_moe.experts.200.w2", "model.layers.57.block_sparse_moe.experts.201.w2", "model.layers.57.block_sparse_moe.experts.202.w2", "model.layers.57.block_sparse_moe.experts.203.w2", "model.layers.57.block_sparse_moe.experts.204.w2", "model.layers.57.block_sparse_moe.experts.205.w2", "model.layers.57.block_sparse_moe.experts.206.w2", "model.layers.57.block_sparse_moe.experts.207.w2", "model.layers.57.block_sparse_moe.experts.208.w2", "model.layers.57.block_sparse_moe.experts.209.w2", "model.layers.57.block_sparse_moe.experts.210.w2", "model.layers.57.block_sparse_moe.experts.211.w2", "model.layers.57.block_sparse_moe.experts.212.w2", "model.layers.57.block_sparse_moe.experts.213.w2", "model.layers.57.block_sparse_moe.experts.214.w2", "model.layers.57.block_sparse_moe.experts.215.w2", "model.layers.57.block_sparse_moe.experts.216.w2", "model.layers.57.block_sparse_moe.experts.217.w2", "model.layers.57.block_sparse_moe.experts.218.w2", "model.layers.57.block_sparse_moe.experts.219.w2", "model.layers.57.block_sparse_moe.experts.220.w2", "model.layers.57.block_sparse_moe.experts.221.w2", "model.layers.57.block_sparse_moe.experts.222.w2", "model.layers.57.block_sparse_moe.experts.223.w2", "model.layers.57.block_sparse_moe.experts.224.w2", "model.layers.57.block_sparse_moe.experts.225.w2", "model.layers.57.block_sparse_moe.experts.226.w2", "model.layers.57.block_sparse_moe.experts.227.w2", "model.layers.57.block_sparse_moe.experts.228.w2", "model.layers.57.block_sparse_moe.experts.229.w2", "model.layers.57.block_sparse_moe.experts.230.w2", "model.layers.57.block_sparse_moe.experts.231.w2", "model.layers.57.block_sparse_moe.experts.232.w2", "model.layers.57.block_sparse_moe.experts.233.w2", "model.layers.57.block_sparse_moe.experts.234.w2", "model.layers.57.block_sparse_moe.experts.235.w2", "model.layers.57.block_sparse_moe.experts.236.w2", "model.layers.57.block_sparse_moe.experts.237.w2", "model.layers.57.block_sparse_moe.experts.238.w2", "model.layers.57.block_sparse_moe.experts.239.w2", "model.layers.57.block_sparse_moe.experts.240.w2", "model.layers.57.block_sparse_moe.experts.241.w2", "model.layers.57.block_sparse_moe.experts.242.w2", "model.layers.57.block_sparse_moe.experts.243.w2", "model.layers.57.block_sparse_moe.experts.244.w2", "model.layers.57.block_sparse_moe.experts.245.w2", "model.layers.57.block_sparse_moe.experts.246.w2", "model.layers.57.block_sparse_moe.experts.247.w2", "model.layers.57.block_sparse_moe.experts.248.w2", "model.layers.57.block_sparse_moe.experts.249.w2", "model.layers.57.block_sparse_moe.experts.250.w2", "model.layers.57.block_sparse_moe.experts.251.w2", "model.layers.57.block_sparse_moe.experts.252.w2", "model.layers.57.block_sparse_moe.experts.253.w2", "model.layers.57.block_sparse_moe.experts.254.w2", "model.layers.57.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 1.4890544116491489e-05, "dbits": 1207959552 } ] }, { "idx": 290, "layers": [ "model.layers.58.self_attn.q_proj" ], "candidates": [ { "dkld": -2.9677897691732236e-05, "dbits": 18874368 } ] }, { "idx": 291, "layers": [ "model.layers.58.self_attn.k_proj", "model.layers.58.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0008646221831440926, "dbits": 6291456 } ] }, { "idx": 292, "layers": [ "model.layers.58.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00014551673084498007, "dbits": 18874368 } ] }, { "idx": 293, "layers": [ "model.layers.58.block_sparse_moe.experts.0.w1", "model.layers.58.block_sparse_moe.experts.1.w1", "model.layers.58.block_sparse_moe.experts.2.w1", "model.layers.58.block_sparse_moe.experts.3.w1", "model.layers.58.block_sparse_moe.experts.4.w1", "model.layers.58.block_sparse_moe.experts.5.w1", "model.layers.58.block_sparse_moe.experts.6.w1", "model.layers.58.block_sparse_moe.experts.7.w1", "model.layers.58.block_sparse_moe.experts.8.w1", "model.layers.58.block_sparse_moe.experts.9.w1", "model.layers.58.block_sparse_moe.experts.10.w1", "model.layers.58.block_sparse_moe.experts.11.w1", "model.layers.58.block_sparse_moe.experts.12.w1", "model.layers.58.block_sparse_moe.experts.13.w1", "model.layers.58.block_sparse_moe.experts.14.w1", "model.layers.58.block_sparse_moe.experts.15.w1", "model.layers.58.block_sparse_moe.experts.16.w1", "model.layers.58.block_sparse_moe.experts.17.w1", "model.layers.58.block_sparse_moe.experts.18.w1", "model.layers.58.block_sparse_moe.experts.19.w1", "model.layers.58.block_sparse_moe.experts.20.w1", "model.layers.58.block_sparse_moe.experts.21.w1", "model.layers.58.block_sparse_moe.experts.22.w1", "model.layers.58.block_sparse_moe.experts.23.w1", "model.layers.58.block_sparse_moe.experts.24.w1", "model.layers.58.block_sparse_moe.experts.25.w1", "model.layers.58.block_sparse_moe.experts.26.w1", "model.layers.58.block_sparse_moe.experts.27.w1", "model.layers.58.block_sparse_moe.experts.28.w1", "model.layers.58.block_sparse_moe.experts.29.w1", "model.layers.58.block_sparse_moe.experts.30.w1", "model.layers.58.block_sparse_moe.experts.31.w1", "model.layers.58.block_sparse_moe.experts.32.w1", "model.layers.58.block_sparse_moe.experts.33.w1", "model.layers.58.block_sparse_moe.experts.34.w1", "model.layers.58.block_sparse_moe.experts.35.w1", "model.layers.58.block_sparse_moe.experts.36.w1", "model.layers.58.block_sparse_moe.experts.37.w1", "model.layers.58.block_sparse_moe.experts.38.w1", "model.layers.58.block_sparse_moe.experts.39.w1", "model.layers.58.block_sparse_moe.experts.40.w1", "model.layers.58.block_sparse_moe.experts.41.w1", "model.layers.58.block_sparse_moe.experts.42.w1", "model.layers.58.block_sparse_moe.experts.43.w1", "model.layers.58.block_sparse_moe.experts.44.w1", "model.layers.58.block_sparse_moe.experts.45.w1", "model.layers.58.block_sparse_moe.experts.46.w1", "model.layers.58.block_sparse_moe.experts.47.w1", "model.layers.58.block_sparse_moe.experts.48.w1", "model.layers.58.block_sparse_moe.experts.49.w1", "model.layers.58.block_sparse_moe.experts.50.w1", "model.layers.58.block_sparse_moe.experts.51.w1", "model.layers.58.block_sparse_moe.experts.52.w1", "model.layers.58.block_sparse_moe.experts.53.w1", "model.layers.58.block_sparse_moe.experts.54.w1", "model.layers.58.block_sparse_moe.experts.55.w1", "model.layers.58.block_sparse_moe.experts.56.w1", "model.layers.58.block_sparse_moe.experts.57.w1", "model.layers.58.block_sparse_moe.experts.58.w1", "model.layers.58.block_sparse_moe.experts.59.w1", "model.layers.58.block_sparse_moe.experts.60.w1", "model.layers.58.block_sparse_moe.experts.61.w1", "model.layers.58.block_sparse_moe.experts.62.w1", "model.layers.58.block_sparse_moe.experts.63.w1", "model.layers.58.block_sparse_moe.experts.64.w1", "model.layers.58.block_sparse_moe.experts.65.w1", "model.layers.58.block_sparse_moe.experts.66.w1", "model.layers.58.block_sparse_moe.experts.67.w1", "model.layers.58.block_sparse_moe.experts.68.w1", "model.layers.58.block_sparse_moe.experts.69.w1", "model.layers.58.block_sparse_moe.experts.70.w1", "model.layers.58.block_sparse_moe.experts.71.w1", "model.layers.58.block_sparse_moe.experts.72.w1", "model.layers.58.block_sparse_moe.experts.73.w1", "model.layers.58.block_sparse_moe.experts.74.w1", "model.layers.58.block_sparse_moe.experts.75.w1", "model.layers.58.block_sparse_moe.experts.76.w1", "model.layers.58.block_sparse_moe.experts.77.w1", "model.layers.58.block_sparse_moe.experts.78.w1", "model.layers.58.block_sparse_moe.experts.79.w1", "model.layers.58.block_sparse_moe.experts.80.w1", "model.layers.58.block_sparse_moe.experts.81.w1", "model.layers.58.block_sparse_moe.experts.82.w1", "model.layers.58.block_sparse_moe.experts.83.w1", "model.layers.58.block_sparse_moe.experts.84.w1", "model.layers.58.block_sparse_moe.experts.85.w1", "model.layers.58.block_sparse_moe.experts.86.w1", "model.layers.58.block_sparse_moe.experts.87.w1", "model.layers.58.block_sparse_moe.experts.88.w1", "model.layers.58.block_sparse_moe.experts.89.w1", "model.layers.58.block_sparse_moe.experts.90.w1", "model.layers.58.block_sparse_moe.experts.91.w1", "model.layers.58.block_sparse_moe.experts.92.w1", "model.layers.58.block_sparse_moe.experts.93.w1", "model.layers.58.block_sparse_moe.experts.94.w1", "model.layers.58.block_sparse_moe.experts.95.w1", "model.layers.58.block_sparse_moe.experts.96.w1", "model.layers.58.block_sparse_moe.experts.97.w1", "model.layers.58.block_sparse_moe.experts.98.w1", "model.layers.58.block_sparse_moe.experts.99.w1", "model.layers.58.block_sparse_moe.experts.100.w1", "model.layers.58.block_sparse_moe.experts.101.w1", "model.layers.58.block_sparse_moe.experts.102.w1", "model.layers.58.block_sparse_moe.experts.103.w1", "model.layers.58.block_sparse_moe.experts.104.w1", "model.layers.58.block_sparse_moe.experts.105.w1", "model.layers.58.block_sparse_moe.experts.106.w1", "model.layers.58.block_sparse_moe.experts.107.w1", "model.layers.58.block_sparse_moe.experts.108.w1", "model.layers.58.block_sparse_moe.experts.109.w1", "model.layers.58.block_sparse_moe.experts.110.w1", "model.layers.58.block_sparse_moe.experts.111.w1", "model.layers.58.block_sparse_moe.experts.112.w1", "model.layers.58.block_sparse_moe.experts.113.w1", "model.layers.58.block_sparse_moe.experts.114.w1", "model.layers.58.block_sparse_moe.experts.115.w1", "model.layers.58.block_sparse_moe.experts.116.w1", "model.layers.58.block_sparse_moe.experts.117.w1", "model.layers.58.block_sparse_moe.experts.118.w1", "model.layers.58.block_sparse_moe.experts.119.w1", "model.layers.58.block_sparse_moe.experts.120.w1", "model.layers.58.block_sparse_moe.experts.121.w1", "model.layers.58.block_sparse_moe.experts.122.w1", "model.layers.58.block_sparse_moe.experts.123.w1", "model.layers.58.block_sparse_moe.experts.124.w1", "model.layers.58.block_sparse_moe.experts.125.w1", "model.layers.58.block_sparse_moe.experts.126.w1", "model.layers.58.block_sparse_moe.experts.127.w1", "model.layers.58.block_sparse_moe.experts.128.w1", "model.layers.58.block_sparse_moe.experts.129.w1", "model.layers.58.block_sparse_moe.experts.130.w1", "model.layers.58.block_sparse_moe.experts.131.w1", "model.layers.58.block_sparse_moe.experts.132.w1", "model.layers.58.block_sparse_moe.experts.133.w1", "model.layers.58.block_sparse_moe.experts.134.w1", "model.layers.58.block_sparse_moe.experts.135.w1", "model.layers.58.block_sparse_moe.experts.136.w1", "model.layers.58.block_sparse_moe.experts.137.w1", "model.layers.58.block_sparse_moe.experts.138.w1", "model.layers.58.block_sparse_moe.experts.139.w1", "model.layers.58.block_sparse_moe.experts.140.w1", "model.layers.58.block_sparse_moe.experts.141.w1", "model.layers.58.block_sparse_moe.experts.142.w1", "model.layers.58.block_sparse_moe.experts.143.w1", "model.layers.58.block_sparse_moe.experts.144.w1", "model.layers.58.block_sparse_moe.experts.145.w1", "model.layers.58.block_sparse_moe.experts.146.w1", "model.layers.58.block_sparse_moe.experts.147.w1", "model.layers.58.block_sparse_moe.experts.148.w1", "model.layers.58.block_sparse_moe.experts.149.w1", "model.layers.58.block_sparse_moe.experts.150.w1", "model.layers.58.block_sparse_moe.experts.151.w1", "model.layers.58.block_sparse_moe.experts.152.w1", "model.layers.58.block_sparse_moe.experts.153.w1", "model.layers.58.block_sparse_moe.experts.154.w1", "model.layers.58.block_sparse_moe.experts.155.w1", "model.layers.58.block_sparse_moe.experts.156.w1", "model.layers.58.block_sparse_moe.experts.157.w1", "model.layers.58.block_sparse_moe.experts.158.w1", "model.layers.58.block_sparse_moe.experts.159.w1", "model.layers.58.block_sparse_moe.experts.160.w1", "model.layers.58.block_sparse_moe.experts.161.w1", "model.layers.58.block_sparse_moe.experts.162.w1", "model.layers.58.block_sparse_moe.experts.163.w1", "model.layers.58.block_sparse_moe.experts.164.w1", "model.layers.58.block_sparse_moe.experts.165.w1", "model.layers.58.block_sparse_moe.experts.166.w1", "model.layers.58.block_sparse_moe.experts.167.w1", "model.layers.58.block_sparse_moe.experts.168.w1", "model.layers.58.block_sparse_moe.experts.169.w1", "model.layers.58.block_sparse_moe.experts.170.w1", "model.layers.58.block_sparse_moe.experts.171.w1", "model.layers.58.block_sparse_moe.experts.172.w1", "model.layers.58.block_sparse_moe.experts.173.w1", "model.layers.58.block_sparse_moe.experts.174.w1", "model.layers.58.block_sparse_moe.experts.175.w1", "model.layers.58.block_sparse_moe.experts.176.w1", "model.layers.58.block_sparse_moe.experts.177.w1", "model.layers.58.block_sparse_moe.experts.178.w1", "model.layers.58.block_sparse_moe.experts.179.w1", "model.layers.58.block_sparse_moe.experts.180.w1", "model.layers.58.block_sparse_moe.experts.181.w1", "model.layers.58.block_sparse_moe.experts.182.w1", "model.layers.58.block_sparse_moe.experts.183.w1", "model.layers.58.block_sparse_moe.experts.184.w1", "model.layers.58.block_sparse_moe.experts.185.w1", "model.layers.58.block_sparse_moe.experts.186.w1", "model.layers.58.block_sparse_moe.experts.187.w1", "model.layers.58.block_sparse_moe.experts.188.w1", "model.layers.58.block_sparse_moe.experts.189.w1", "model.layers.58.block_sparse_moe.experts.190.w1", "model.layers.58.block_sparse_moe.experts.191.w1", "model.layers.58.block_sparse_moe.experts.192.w1", "model.layers.58.block_sparse_moe.experts.193.w1", "model.layers.58.block_sparse_moe.experts.194.w1", "model.layers.58.block_sparse_moe.experts.195.w1", "model.layers.58.block_sparse_moe.experts.196.w1", "model.layers.58.block_sparse_moe.experts.197.w1", "model.layers.58.block_sparse_moe.experts.198.w1", "model.layers.58.block_sparse_moe.experts.199.w1", "model.layers.58.block_sparse_moe.experts.200.w1", "model.layers.58.block_sparse_moe.experts.201.w1", "model.layers.58.block_sparse_moe.experts.202.w1", "model.layers.58.block_sparse_moe.experts.203.w1", "model.layers.58.block_sparse_moe.experts.204.w1", "model.layers.58.block_sparse_moe.experts.205.w1", "model.layers.58.block_sparse_moe.experts.206.w1", "model.layers.58.block_sparse_moe.experts.207.w1", "model.layers.58.block_sparse_moe.experts.208.w1", "model.layers.58.block_sparse_moe.experts.209.w1", "model.layers.58.block_sparse_moe.experts.210.w1", "model.layers.58.block_sparse_moe.experts.211.w1", "model.layers.58.block_sparse_moe.experts.212.w1", "model.layers.58.block_sparse_moe.experts.213.w1", "model.layers.58.block_sparse_moe.experts.214.w1", "model.layers.58.block_sparse_moe.experts.215.w1", "model.layers.58.block_sparse_moe.experts.216.w1", "model.layers.58.block_sparse_moe.experts.217.w1", "model.layers.58.block_sparse_moe.experts.218.w1", "model.layers.58.block_sparse_moe.experts.219.w1", "model.layers.58.block_sparse_moe.experts.220.w1", "model.layers.58.block_sparse_moe.experts.221.w1", "model.layers.58.block_sparse_moe.experts.222.w1", "model.layers.58.block_sparse_moe.experts.223.w1", "model.layers.58.block_sparse_moe.experts.224.w1", "model.layers.58.block_sparse_moe.experts.225.w1", "model.layers.58.block_sparse_moe.experts.226.w1", "model.layers.58.block_sparse_moe.experts.227.w1", "model.layers.58.block_sparse_moe.experts.228.w1", "model.layers.58.block_sparse_moe.experts.229.w1", "model.layers.58.block_sparse_moe.experts.230.w1", "model.layers.58.block_sparse_moe.experts.231.w1", "model.layers.58.block_sparse_moe.experts.232.w1", "model.layers.58.block_sparse_moe.experts.233.w1", "model.layers.58.block_sparse_moe.experts.234.w1", "model.layers.58.block_sparse_moe.experts.235.w1", "model.layers.58.block_sparse_moe.experts.236.w1", "model.layers.58.block_sparse_moe.experts.237.w1", "model.layers.58.block_sparse_moe.experts.238.w1", "model.layers.58.block_sparse_moe.experts.239.w1", "model.layers.58.block_sparse_moe.experts.240.w1", "model.layers.58.block_sparse_moe.experts.241.w1", "model.layers.58.block_sparse_moe.experts.242.w1", "model.layers.58.block_sparse_moe.experts.243.w1", "model.layers.58.block_sparse_moe.experts.244.w1", "model.layers.58.block_sparse_moe.experts.245.w1", "model.layers.58.block_sparse_moe.experts.246.w1", "model.layers.58.block_sparse_moe.experts.247.w1", "model.layers.58.block_sparse_moe.experts.248.w1", "model.layers.58.block_sparse_moe.experts.249.w1", "model.layers.58.block_sparse_moe.experts.250.w1", "model.layers.58.block_sparse_moe.experts.251.w1", "model.layers.58.block_sparse_moe.experts.252.w1", "model.layers.58.block_sparse_moe.experts.253.w1", "model.layers.58.block_sparse_moe.experts.254.w1", "model.layers.58.block_sparse_moe.experts.255.w1", "model.layers.58.block_sparse_moe.experts.0.w3", "model.layers.58.block_sparse_moe.experts.1.w3", "model.layers.58.block_sparse_moe.experts.2.w3", "model.layers.58.block_sparse_moe.experts.3.w3", "model.layers.58.block_sparse_moe.experts.4.w3", "model.layers.58.block_sparse_moe.experts.5.w3", "model.layers.58.block_sparse_moe.experts.6.w3", "model.layers.58.block_sparse_moe.experts.7.w3", "model.layers.58.block_sparse_moe.experts.8.w3", "model.layers.58.block_sparse_moe.experts.9.w3", "model.layers.58.block_sparse_moe.experts.10.w3", "model.layers.58.block_sparse_moe.experts.11.w3", "model.layers.58.block_sparse_moe.experts.12.w3", "model.layers.58.block_sparse_moe.experts.13.w3", "model.layers.58.block_sparse_moe.experts.14.w3", "model.layers.58.block_sparse_moe.experts.15.w3", "model.layers.58.block_sparse_moe.experts.16.w3", "model.layers.58.block_sparse_moe.experts.17.w3", "model.layers.58.block_sparse_moe.experts.18.w3", "model.layers.58.block_sparse_moe.experts.19.w3", "model.layers.58.block_sparse_moe.experts.20.w3", "model.layers.58.block_sparse_moe.experts.21.w3", "model.layers.58.block_sparse_moe.experts.22.w3", "model.layers.58.block_sparse_moe.experts.23.w3", "model.layers.58.block_sparse_moe.experts.24.w3", "model.layers.58.block_sparse_moe.experts.25.w3", "model.layers.58.block_sparse_moe.experts.26.w3", "model.layers.58.block_sparse_moe.experts.27.w3", "model.layers.58.block_sparse_moe.experts.28.w3", "model.layers.58.block_sparse_moe.experts.29.w3", "model.layers.58.block_sparse_moe.experts.30.w3", "model.layers.58.block_sparse_moe.experts.31.w3", "model.layers.58.block_sparse_moe.experts.32.w3", "model.layers.58.block_sparse_moe.experts.33.w3", "model.layers.58.block_sparse_moe.experts.34.w3", "model.layers.58.block_sparse_moe.experts.35.w3", "model.layers.58.block_sparse_moe.experts.36.w3", "model.layers.58.block_sparse_moe.experts.37.w3", "model.layers.58.block_sparse_moe.experts.38.w3", "model.layers.58.block_sparse_moe.experts.39.w3", "model.layers.58.block_sparse_moe.experts.40.w3", "model.layers.58.block_sparse_moe.experts.41.w3", "model.layers.58.block_sparse_moe.experts.42.w3", "model.layers.58.block_sparse_moe.experts.43.w3", "model.layers.58.block_sparse_moe.experts.44.w3", "model.layers.58.block_sparse_moe.experts.45.w3", "model.layers.58.block_sparse_moe.experts.46.w3", "model.layers.58.block_sparse_moe.experts.47.w3", "model.layers.58.block_sparse_moe.experts.48.w3", "model.layers.58.block_sparse_moe.experts.49.w3", "model.layers.58.block_sparse_moe.experts.50.w3", "model.layers.58.block_sparse_moe.experts.51.w3", "model.layers.58.block_sparse_moe.experts.52.w3", "model.layers.58.block_sparse_moe.experts.53.w3", "model.layers.58.block_sparse_moe.experts.54.w3", "model.layers.58.block_sparse_moe.experts.55.w3", "model.layers.58.block_sparse_moe.experts.56.w3", "model.layers.58.block_sparse_moe.experts.57.w3", "model.layers.58.block_sparse_moe.experts.58.w3", "model.layers.58.block_sparse_moe.experts.59.w3", "model.layers.58.block_sparse_moe.experts.60.w3", "model.layers.58.block_sparse_moe.experts.61.w3", "model.layers.58.block_sparse_moe.experts.62.w3", "model.layers.58.block_sparse_moe.experts.63.w3", "model.layers.58.block_sparse_moe.experts.64.w3", "model.layers.58.block_sparse_moe.experts.65.w3", "model.layers.58.block_sparse_moe.experts.66.w3", "model.layers.58.block_sparse_moe.experts.67.w3", "model.layers.58.block_sparse_moe.experts.68.w3", "model.layers.58.block_sparse_moe.experts.69.w3", "model.layers.58.block_sparse_moe.experts.70.w3", "model.layers.58.block_sparse_moe.experts.71.w3", "model.layers.58.block_sparse_moe.experts.72.w3", "model.layers.58.block_sparse_moe.experts.73.w3", "model.layers.58.block_sparse_moe.experts.74.w3", "model.layers.58.block_sparse_moe.experts.75.w3", "model.layers.58.block_sparse_moe.experts.76.w3", "model.layers.58.block_sparse_moe.experts.77.w3", "model.layers.58.block_sparse_moe.experts.78.w3", "model.layers.58.block_sparse_moe.experts.79.w3", "model.layers.58.block_sparse_moe.experts.80.w3", "model.layers.58.block_sparse_moe.experts.81.w3", "model.layers.58.block_sparse_moe.experts.82.w3", "model.layers.58.block_sparse_moe.experts.83.w3", "model.layers.58.block_sparse_moe.experts.84.w3", "model.layers.58.block_sparse_moe.experts.85.w3", "model.layers.58.block_sparse_moe.experts.86.w3", "model.layers.58.block_sparse_moe.experts.87.w3", "model.layers.58.block_sparse_moe.experts.88.w3", "model.layers.58.block_sparse_moe.experts.89.w3", "model.layers.58.block_sparse_moe.experts.90.w3", "model.layers.58.block_sparse_moe.experts.91.w3", "model.layers.58.block_sparse_moe.experts.92.w3", "model.layers.58.block_sparse_moe.experts.93.w3", "model.layers.58.block_sparse_moe.experts.94.w3", "model.layers.58.block_sparse_moe.experts.95.w3", "model.layers.58.block_sparse_moe.experts.96.w3", "model.layers.58.block_sparse_moe.experts.97.w3", "model.layers.58.block_sparse_moe.experts.98.w3", "model.layers.58.block_sparse_moe.experts.99.w3", "model.layers.58.block_sparse_moe.experts.100.w3", "model.layers.58.block_sparse_moe.experts.101.w3", "model.layers.58.block_sparse_moe.experts.102.w3", "model.layers.58.block_sparse_moe.experts.103.w3", "model.layers.58.block_sparse_moe.experts.104.w3", "model.layers.58.block_sparse_moe.experts.105.w3", "model.layers.58.block_sparse_moe.experts.106.w3", "model.layers.58.block_sparse_moe.experts.107.w3", "model.layers.58.block_sparse_moe.experts.108.w3", "model.layers.58.block_sparse_moe.experts.109.w3", "model.layers.58.block_sparse_moe.experts.110.w3", "model.layers.58.block_sparse_moe.experts.111.w3", "model.layers.58.block_sparse_moe.experts.112.w3", "model.layers.58.block_sparse_moe.experts.113.w3", "model.layers.58.block_sparse_moe.experts.114.w3", "model.layers.58.block_sparse_moe.experts.115.w3", "model.layers.58.block_sparse_moe.experts.116.w3", "model.layers.58.block_sparse_moe.experts.117.w3", "model.layers.58.block_sparse_moe.experts.118.w3", "model.layers.58.block_sparse_moe.experts.119.w3", "model.layers.58.block_sparse_moe.experts.120.w3", "model.layers.58.block_sparse_moe.experts.121.w3", "model.layers.58.block_sparse_moe.experts.122.w3", "model.layers.58.block_sparse_moe.experts.123.w3", "model.layers.58.block_sparse_moe.experts.124.w3", "model.layers.58.block_sparse_moe.experts.125.w3", "model.layers.58.block_sparse_moe.experts.126.w3", "model.layers.58.block_sparse_moe.experts.127.w3", "model.layers.58.block_sparse_moe.experts.128.w3", "model.layers.58.block_sparse_moe.experts.129.w3", "model.layers.58.block_sparse_moe.experts.130.w3", "model.layers.58.block_sparse_moe.experts.131.w3", "model.layers.58.block_sparse_moe.experts.132.w3", "model.layers.58.block_sparse_moe.experts.133.w3", "model.layers.58.block_sparse_moe.experts.134.w3", "model.layers.58.block_sparse_moe.experts.135.w3", "model.layers.58.block_sparse_moe.experts.136.w3", "model.layers.58.block_sparse_moe.experts.137.w3", "model.layers.58.block_sparse_moe.experts.138.w3", "model.layers.58.block_sparse_moe.experts.139.w3", "model.layers.58.block_sparse_moe.experts.140.w3", "model.layers.58.block_sparse_moe.experts.141.w3", "model.layers.58.block_sparse_moe.experts.142.w3", "model.layers.58.block_sparse_moe.experts.143.w3", "model.layers.58.block_sparse_moe.experts.144.w3", "model.layers.58.block_sparse_moe.experts.145.w3", "model.layers.58.block_sparse_moe.experts.146.w3", "model.layers.58.block_sparse_moe.experts.147.w3", "model.layers.58.block_sparse_moe.experts.148.w3", "model.layers.58.block_sparse_moe.experts.149.w3", "model.layers.58.block_sparse_moe.experts.150.w3", "model.layers.58.block_sparse_moe.experts.151.w3", "model.layers.58.block_sparse_moe.experts.152.w3", "model.layers.58.block_sparse_moe.experts.153.w3", "model.layers.58.block_sparse_moe.experts.154.w3", "model.layers.58.block_sparse_moe.experts.155.w3", "model.layers.58.block_sparse_moe.experts.156.w3", "model.layers.58.block_sparse_moe.experts.157.w3", "model.layers.58.block_sparse_moe.experts.158.w3", "model.layers.58.block_sparse_moe.experts.159.w3", "model.layers.58.block_sparse_moe.experts.160.w3", "model.layers.58.block_sparse_moe.experts.161.w3", "model.layers.58.block_sparse_moe.experts.162.w3", "model.layers.58.block_sparse_moe.experts.163.w3", "model.layers.58.block_sparse_moe.experts.164.w3", "model.layers.58.block_sparse_moe.experts.165.w3", "model.layers.58.block_sparse_moe.experts.166.w3", "model.layers.58.block_sparse_moe.experts.167.w3", "model.layers.58.block_sparse_moe.experts.168.w3", "model.layers.58.block_sparse_moe.experts.169.w3", "model.layers.58.block_sparse_moe.experts.170.w3", "model.layers.58.block_sparse_moe.experts.171.w3", "model.layers.58.block_sparse_moe.experts.172.w3", "model.layers.58.block_sparse_moe.experts.173.w3", "model.layers.58.block_sparse_moe.experts.174.w3", "model.layers.58.block_sparse_moe.experts.175.w3", "model.layers.58.block_sparse_moe.experts.176.w3", "model.layers.58.block_sparse_moe.experts.177.w3", "model.layers.58.block_sparse_moe.experts.178.w3", "model.layers.58.block_sparse_moe.experts.179.w3", "model.layers.58.block_sparse_moe.experts.180.w3", "model.layers.58.block_sparse_moe.experts.181.w3", "model.layers.58.block_sparse_moe.experts.182.w3", "model.layers.58.block_sparse_moe.experts.183.w3", "model.layers.58.block_sparse_moe.experts.184.w3", "model.layers.58.block_sparse_moe.experts.185.w3", "model.layers.58.block_sparse_moe.experts.186.w3", "model.layers.58.block_sparse_moe.experts.187.w3", "model.layers.58.block_sparse_moe.experts.188.w3", "model.layers.58.block_sparse_moe.experts.189.w3", "model.layers.58.block_sparse_moe.experts.190.w3", "model.layers.58.block_sparse_moe.experts.191.w3", "model.layers.58.block_sparse_moe.experts.192.w3", "model.layers.58.block_sparse_moe.experts.193.w3", "model.layers.58.block_sparse_moe.experts.194.w3", "model.layers.58.block_sparse_moe.experts.195.w3", "model.layers.58.block_sparse_moe.experts.196.w3", "model.layers.58.block_sparse_moe.experts.197.w3", "model.layers.58.block_sparse_moe.experts.198.w3", "model.layers.58.block_sparse_moe.experts.199.w3", "model.layers.58.block_sparse_moe.experts.200.w3", "model.layers.58.block_sparse_moe.experts.201.w3", "model.layers.58.block_sparse_moe.experts.202.w3", "model.layers.58.block_sparse_moe.experts.203.w3", "model.layers.58.block_sparse_moe.experts.204.w3", "model.layers.58.block_sparse_moe.experts.205.w3", "model.layers.58.block_sparse_moe.experts.206.w3", "model.layers.58.block_sparse_moe.experts.207.w3", "model.layers.58.block_sparse_moe.experts.208.w3", "model.layers.58.block_sparse_moe.experts.209.w3", "model.layers.58.block_sparse_moe.experts.210.w3", "model.layers.58.block_sparse_moe.experts.211.w3", "model.layers.58.block_sparse_moe.experts.212.w3", "model.layers.58.block_sparse_moe.experts.213.w3", "model.layers.58.block_sparse_moe.experts.214.w3", "model.layers.58.block_sparse_moe.experts.215.w3", "model.layers.58.block_sparse_moe.experts.216.w3", "model.layers.58.block_sparse_moe.experts.217.w3", "model.layers.58.block_sparse_moe.experts.218.w3", "model.layers.58.block_sparse_moe.experts.219.w3", "model.layers.58.block_sparse_moe.experts.220.w3", "model.layers.58.block_sparse_moe.experts.221.w3", "model.layers.58.block_sparse_moe.experts.222.w3", "model.layers.58.block_sparse_moe.experts.223.w3", "model.layers.58.block_sparse_moe.experts.224.w3", "model.layers.58.block_sparse_moe.experts.225.w3", "model.layers.58.block_sparse_moe.experts.226.w3", "model.layers.58.block_sparse_moe.experts.227.w3", "model.layers.58.block_sparse_moe.experts.228.w3", "model.layers.58.block_sparse_moe.experts.229.w3", "model.layers.58.block_sparse_moe.experts.230.w3", "model.layers.58.block_sparse_moe.experts.231.w3", "model.layers.58.block_sparse_moe.experts.232.w3", "model.layers.58.block_sparse_moe.experts.233.w3", "model.layers.58.block_sparse_moe.experts.234.w3", "model.layers.58.block_sparse_moe.experts.235.w3", "model.layers.58.block_sparse_moe.experts.236.w3", "model.layers.58.block_sparse_moe.experts.237.w3", "model.layers.58.block_sparse_moe.experts.238.w3", "model.layers.58.block_sparse_moe.experts.239.w3", "model.layers.58.block_sparse_moe.experts.240.w3", "model.layers.58.block_sparse_moe.experts.241.w3", "model.layers.58.block_sparse_moe.experts.242.w3", "model.layers.58.block_sparse_moe.experts.243.w3", "model.layers.58.block_sparse_moe.experts.244.w3", "model.layers.58.block_sparse_moe.experts.245.w3", "model.layers.58.block_sparse_moe.experts.246.w3", "model.layers.58.block_sparse_moe.experts.247.w3", "model.layers.58.block_sparse_moe.experts.248.w3", "model.layers.58.block_sparse_moe.experts.249.w3", "model.layers.58.block_sparse_moe.experts.250.w3", "model.layers.58.block_sparse_moe.experts.251.w3", "model.layers.58.block_sparse_moe.experts.252.w3", "model.layers.58.block_sparse_moe.experts.253.w3", "model.layers.58.block_sparse_moe.experts.254.w3", "model.layers.58.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 7.391888648271283e-05, "dbits": 2415919104 } ] }, { "idx": 294, "layers": [ "model.layers.58.block_sparse_moe.experts.0.w2", "model.layers.58.block_sparse_moe.experts.1.w2", "model.layers.58.block_sparse_moe.experts.2.w2", "model.layers.58.block_sparse_moe.experts.3.w2", "model.layers.58.block_sparse_moe.experts.4.w2", "model.layers.58.block_sparse_moe.experts.5.w2", "model.layers.58.block_sparse_moe.experts.6.w2", "model.layers.58.block_sparse_moe.experts.7.w2", "model.layers.58.block_sparse_moe.experts.8.w2", "model.layers.58.block_sparse_moe.experts.9.w2", "model.layers.58.block_sparse_moe.experts.10.w2", "model.layers.58.block_sparse_moe.experts.11.w2", "model.layers.58.block_sparse_moe.experts.12.w2", "model.layers.58.block_sparse_moe.experts.13.w2", "model.layers.58.block_sparse_moe.experts.14.w2", "model.layers.58.block_sparse_moe.experts.15.w2", "model.layers.58.block_sparse_moe.experts.16.w2", "model.layers.58.block_sparse_moe.experts.17.w2", "model.layers.58.block_sparse_moe.experts.18.w2", "model.layers.58.block_sparse_moe.experts.19.w2", "model.layers.58.block_sparse_moe.experts.20.w2", "model.layers.58.block_sparse_moe.experts.21.w2", "model.layers.58.block_sparse_moe.experts.22.w2", "model.layers.58.block_sparse_moe.experts.23.w2", "model.layers.58.block_sparse_moe.experts.24.w2", "model.layers.58.block_sparse_moe.experts.25.w2", "model.layers.58.block_sparse_moe.experts.26.w2", "model.layers.58.block_sparse_moe.experts.27.w2", "model.layers.58.block_sparse_moe.experts.28.w2", "model.layers.58.block_sparse_moe.experts.29.w2", "model.layers.58.block_sparse_moe.experts.30.w2", "model.layers.58.block_sparse_moe.experts.31.w2", "model.layers.58.block_sparse_moe.experts.32.w2", "model.layers.58.block_sparse_moe.experts.33.w2", "model.layers.58.block_sparse_moe.experts.34.w2", "model.layers.58.block_sparse_moe.experts.35.w2", "model.layers.58.block_sparse_moe.experts.36.w2", "model.layers.58.block_sparse_moe.experts.37.w2", "model.layers.58.block_sparse_moe.experts.38.w2", "model.layers.58.block_sparse_moe.experts.39.w2", "model.layers.58.block_sparse_moe.experts.40.w2", "model.layers.58.block_sparse_moe.experts.41.w2", "model.layers.58.block_sparse_moe.experts.42.w2", "model.layers.58.block_sparse_moe.experts.43.w2", "model.layers.58.block_sparse_moe.experts.44.w2", "model.layers.58.block_sparse_moe.experts.45.w2", "model.layers.58.block_sparse_moe.experts.46.w2", "model.layers.58.block_sparse_moe.experts.47.w2", "model.layers.58.block_sparse_moe.experts.48.w2", "model.layers.58.block_sparse_moe.experts.49.w2", "model.layers.58.block_sparse_moe.experts.50.w2", "model.layers.58.block_sparse_moe.experts.51.w2", "model.layers.58.block_sparse_moe.experts.52.w2", "model.layers.58.block_sparse_moe.experts.53.w2", "model.layers.58.block_sparse_moe.experts.54.w2", "model.layers.58.block_sparse_moe.experts.55.w2", "model.layers.58.block_sparse_moe.experts.56.w2", "model.layers.58.block_sparse_moe.experts.57.w2", "model.layers.58.block_sparse_moe.experts.58.w2", "model.layers.58.block_sparse_moe.experts.59.w2", "model.layers.58.block_sparse_moe.experts.60.w2", "model.layers.58.block_sparse_moe.experts.61.w2", "model.layers.58.block_sparse_moe.experts.62.w2", "model.layers.58.block_sparse_moe.experts.63.w2", "model.layers.58.block_sparse_moe.experts.64.w2", "model.layers.58.block_sparse_moe.experts.65.w2", "model.layers.58.block_sparse_moe.experts.66.w2", "model.layers.58.block_sparse_moe.experts.67.w2", "model.layers.58.block_sparse_moe.experts.68.w2", "model.layers.58.block_sparse_moe.experts.69.w2", "model.layers.58.block_sparse_moe.experts.70.w2", "model.layers.58.block_sparse_moe.experts.71.w2", "model.layers.58.block_sparse_moe.experts.72.w2", "model.layers.58.block_sparse_moe.experts.73.w2", "model.layers.58.block_sparse_moe.experts.74.w2", "model.layers.58.block_sparse_moe.experts.75.w2", "model.layers.58.block_sparse_moe.experts.76.w2", "model.layers.58.block_sparse_moe.experts.77.w2", "model.layers.58.block_sparse_moe.experts.78.w2", "model.layers.58.block_sparse_moe.experts.79.w2", "model.layers.58.block_sparse_moe.experts.80.w2", "model.layers.58.block_sparse_moe.experts.81.w2", "model.layers.58.block_sparse_moe.experts.82.w2", "model.layers.58.block_sparse_moe.experts.83.w2", "model.layers.58.block_sparse_moe.experts.84.w2", "model.layers.58.block_sparse_moe.experts.85.w2", "model.layers.58.block_sparse_moe.experts.86.w2", "model.layers.58.block_sparse_moe.experts.87.w2", "model.layers.58.block_sparse_moe.experts.88.w2", "model.layers.58.block_sparse_moe.experts.89.w2", "model.layers.58.block_sparse_moe.experts.90.w2", "model.layers.58.block_sparse_moe.experts.91.w2", "model.layers.58.block_sparse_moe.experts.92.w2", "model.layers.58.block_sparse_moe.experts.93.w2", "model.layers.58.block_sparse_moe.experts.94.w2", "model.layers.58.block_sparse_moe.experts.95.w2", "model.layers.58.block_sparse_moe.experts.96.w2", "model.layers.58.block_sparse_moe.experts.97.w2", "model.layers.58.block_sparse_moe.experts.98.w2", "model.layers.58.block_sparse_moe.experts.99.w2", "model.layers.58.block_sparse_moe.experts.100.w2", "model.layers.58.block_sparse_moe.experts.101.w2", "model.layers.58.block_sparse_moe.experts.102.w2", "model.layers.58.block_sparse_moe.experts.103.w2", "model.layers.58.block_sparse_moe.experts.104.w2", "model.layers.58.block_sparse_moe.experts.105.w2", "model.layers.58.block_sparse_moe.experts.106.w2", "model.layers.58.block_sparse_moe.experts.107.w2", "model.layers.58.block_sparse_moe.experts.108.w2", "model.layers.58.block_sparse_moe.experts.109.w2", "model.layers.58.block_sparse_moe.experts.110.w2", "model.layers.58.block_sparse_moe.experts.111.w2", "model.layers.58.block_sparse_moe.experts.112.w2", "model.layers.58.block_sparse_moe.experts.113.w2", "model.layers.58.block_sparse_moe.experts.114.w2", "model.layers.58.block_sparse_moe.experts.115.w2", "model.layers.58.block_sparse_moe.experts.116.w2", "model.layers.58.block_sparse_moe.experts.117.w2", "model.layers.58.block_sparse_moe.experts.118.w2", "model.layers.58.block_sparse_moe.experts.119.w2", "model.layers.58.block_sparse_moe.experts.120.w2", "model.layers.58.block_sparse_moe.experts.121.w2", "model.layers.58.block_sparse_moe.experts.122.w2", "model.layers.58.block_sparse_moe.experts.123.w2", "model.layers.58.block_sparse_moe.experts.124.w2", "model.layers.58.block_sparse_moe.experts.125.w2", "model.layers.58.block_sparse_moe.experts.126.w2", "model.layers.58.block_sparse_moe.experts.127.w2", "model.layers.58.block_sparse_moe.experts.128.w2", "model.layers.58.block_sparse_moe.experts.129.w2", "model.layers.58.block_sparse_moe.experts.130.w2", "model.layers.58.block_sparse_moe.experts.131.w2", "model.layers.58.block_sparse_moe.experts.132.w2", "model.layers.58.block_sparse_moe.experts.133.w2", "model.layers.58.block_sparse_moe.experts.134.w2", "model.layers.58.block_sparse_moe.experts.135.w2", "model.layers.58.block_sparse_moe.experts.136.w2", "model.layers.58.block_sparse_moe.experts.137.w2", "model.layers.58.block_sparse_moe.experts.138.w2", "model.layers.58.block_sparse_moe.experts.139.w2", "model.layers.58.block_sparse_moe.experts.140.w2", "model.layers.58.block_sparse_moe.experts.141.w2", "model.layers.58.block_sparse_moe.experts.142.w2", "model.layers.58.block_sparse_moe.experts.143.w2", "model.layers.58.block_sparse_moe.experts.144.w2", "model.layers.58.block_sparse_moe.experts.145.w2", "model.layers.58.block_sparse_moe.experts.146.w2", "model.layers.58.block_sparse_moe.experts.147.w2", "model.layers.58.block_sparse_moe.experts.148.w2", "model.layers.58.block_sparse_moe.experts.149.w2", "model.layers.58.block_sparse_moe.experts.150.w2", "model.layers.58.block_sparse_moe.experts.151.w2", "model.layers.58.block_sparse_moe.experts.152.w2", "model.layers.58.block_sparse_moe.experts.153.w2", "model.layers.58.block_sparse_moe.experts.154.w2", "model.layers.58.block_sparse_moe.experts.155.w2", "model.layers.58.block_sparse_moe.experts.156.w2", "model.layers.58.block_sparse_moe.experts.157.w2", "model.layers.58.block_sparse_moe.experts.158.w2", "model.layers.58.block_sparse_moe.experts.159.w2", "model.layers.58.block_sparse_moe.experts.160.w2", "model.layers.58.block_sparse_moe.experts.161.w2", "model.layers.58.block_sparse_moe.experts.162.w2", "model.layers.58.block_sparse_moe.experts.163.w2", "model.layers.58.block_sparse_moe.experts.164.w2", "model.layers.58.block_sparse_moe.experts.165.w2", "model.layers.58.block_sparse_moe.experts.166.w2", "model.layers.58.block_sparse_moe.experts.167.w2", "model.layers.58.block_sparse_moe.experts.168.w2", "model.layers.58.block_sparse_moe.experts.169.w2", "model.layers.58.block_sparse_moe.experts.170.w2", "model.layers.58.block_sparse_moe.experts.171.w2", "model.layers.58.block_sparse_moe.experts.172.w2", "model.layers.58.block_sparse_moe.experts.173.w2", "model.layers.58.block_sparse_moe.experts.174.w2", "model.layers.58.block_sparse_moe.experts.175.w2", "model.layers.58.block_sparse_moe.experts.176.w2", "model.layers.58.block_sparse_moe.experts.177.w2", "model.layers.58.block_sparse_moe.experts.178.w2", "model.layers.58.block_sparse_moe.experts.179.w2", "model.layers.58.block_sparse_moe.experts.180.w2", "model.layers.58.block_sparse_moe.experts.181.w2", "model.layers.58.block_sparse_moe.experts.182.w2", "model.layers.58.block_sparse_moe.experts.183.w2", "model.layers.58.block_sparse_moe.experts.184.w2", "model.layers.58.block_sparse_moe.experts.185.w2", "model.layers.58.block_sparse_moe.experts.186.w2", "model.layers.58.block_sparse_moe.experts.187.w2", "model.layers.58.block_sparse_moe.experts.188.w2", "model.layers.58.block_sparse_moe.experts.189.w2", "model.layers.58.block_sparse_moe.experts.190.w2", "model.layers.58.block_sparse_moe.experts.191.w2", "model.layers.58.block_sparse_moe.experts.192.w2", "model.layers.58.block_sparse_moe.experts.193.w2", "model.layers.58.block_sparse_moe.experts.194.w2", "model.layers.58.block_sparse_moe.experts.195.w2", "model.layers.58.block_sparse_moe.experts.196.w2", "model.layers.58.block_sparse_moe.experts.197.w2", "model.layers.58.block_sparse_moe.experts.198.w2", "model.layers.58.block_sparse_moe.experts.199.w2", "model.layers.58.block_sparse_moe.experts.200.w2", "model.layers.58.block_sparse_moe.experts.201.w2", "model.layers.58.block_sparse_moe.experts.202.w2", "model.layers.58.block_sparse_moe.experts.203.w2", "model.layers.58.block_sparse_moe.experts.204.w2", "model.layers.58.block_sparse_moe.experts.205.w2", "model.layers.58.block_sparse_moe.experts.206.w2", "model.layers.58.block_sparse_moe.experts.207.w2", "model.layers.58.block_sparse_moe.experts.208.w2", "model.layers.58.block_sparse_moe.experts.209.w2", "model.layers.58.block_sparse_moe.experts.210.w2", "model.layers.58.block_sparse_moe.experts.211.w2", "model.layers.58.block_sparse_moe.experts.212.w2", "model.layers.58.block_sparse_moe.experts.213.w2", "model.layers.58.block_sparse_moe.experts.214.w2", "model.layers.58.block_sparse_moe.experts.215.w2", "model.layers.58.block_sparse_moe.experts.216.w2", "model.layers.58.block_sparse_moe.experts.217.w2", "model.layers.58.block_sparse_moe.experts.218.w2", "model.layers.58.block_sparse_moe.experts.219.w2", "model.layers.58.block_sparse_moe.experts.220.w2", "model.layers.58.block_sparse_moe.experts.221.w2", "model.layers.58.block_sparse_moe.experts.222.w2", "model.layers.58.block_sparse_moe.experts.223.w2", "model.layers.58.block_sparse_moe.experts.224.w2", "model.layers.58.block_sparse_moe.experts.225.w2", "model.layers.58.block_sparse_moe.experts.226.w2", "model.layers.58.block_sparse_moe.experts.227.w2", "model.layers.58.block_sparse_moe.experts.228.w2", "model.layers.58.block_sparse_moe.experts.229.w2", "model.layers.58.block_sparse_moe.experts.230.w2", "model.layers.58.block_sparse_moe.experts.231.w2", "model.layers.58.block_sparse_moe.experts.232.w2", "model.layers.58.block_sparse_moe.experts.233.w2", "model.layers.58.block_sparse_moe.experts.234.w2", "model.layers.58.block_sparse_moe.experts.235.w2", "model.layers.58.block_sparse_moe.experts.236.w2", "model.layers.58.block_sparse_moe.experts.237.w2", "model.layers.58.block_sparse_moe.experts.238.w2", "model.layers.58.block_sparse_moe.experts.239.w2", "model.layers.58.block_sparse_moe.experts.240.w2", "model.layers.58.block_sparse_moe.experts.241.w2", "model.layers.58.block_sparse_moe.experts.242.w2", "model.layers.58.block_sparse_moe.experts.243.w2", "model.layers.58.block_sparse_moe.experts.244.w2", "model.layers.58.block_sparse_moe.experts.245.w2", "model.layers.58.block_sparse_moe.experts.246.w2", "model.layers.58.block_sparse_moe.experts.247.w2", "model.layers.58.block_sparse_moe.experts.248.w2", "model.layers.58.block_sparse_moe.experts.249.w2", "model.layers.58.block_sparse_moe.experts.250.w2", "model.layers.58.block_sparse_moe.experts.251.w2", "model.layers.58.block_sparse_moe.experts.252.w2", "model.layers.58.block_sparse_moe.experts.253.w2", "model.layers.58.block_sparse_moe.experts.254.w2", "model.layers.58.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 2.180878072977066e-05, "dbits": 1207959552 } ] }, { "idx": 295, "layers": [ "model.layers.59.self_attn.q_proj" ], "candidates": [ { "dkld": -2.294778823852539e-05, "dbits": 18874368 } ] }, { "idx": 296, "layers": [ "model.layers.59.self_attn.k_proj", "model.layers.59.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0001233825460076332, "dbits": 6291456 } ] }, { "idx": 297, "layers": [ "model.layers.59.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00016507599502801895, "dbits": 18874368 } ] }, { "idx": 298, "layers": [ "model.layers.59.block_sparse_moe.experts.0.w1", "model.layers.59.block_sparse_moe.experts.1.w1", "model.layers.59.block_sparse_moe.experts.2.w1", "model.layers.59.block_sparse_moe.experts.3.w1", "model.layers.59.block_sparse_moe.experts.4.w1", "model.layers.59.block_sparse_moe.experts.5.w1", "model.layers.59.block_sparse_moe.experts.6.w1", "model.layers.59.block_sparse_moe.experts.7.w1", "model.layers.59.block_sparse_moe.experts.8.w1", "model.layers.59.block_sparse_moe.experts.9.w1", "model.layers.59.block_sparse_moe.experts.10.w1", "model.layers.59.block_sparse_moe.experts.11.w1", "model.layers.59.block_sparse_moe.experts.12.w1", "model.layers.59.block_sparse_moe.experts.13.w1", "model.layers.59.block_sparse_moe.experts.14.w1", "model.layers.59.block_sparse_moe.experts.15.w1", "model.layers.59.block_sparse_moe.experts.16.w1", "model.layers.59.block_sparse_moe.experts.17.w1", "model.layers.59.block_sparse_moe.experts.18.w1", "model.layers.59.block_sparse_moe.experts.19.w1", "model.layers.59.block_sparse_moe.experts.20.w1", "model.layers.59.block_sparse_moe.experts.21.w1", "model.layers.59.block_sparse_moe.experts.22.w1", "model.layers.59.block_sparse_moe.experts.23.w1", "model.layers.59.block_sparse_moe.experts.24.w1", "model.layers.59.block_sparse_moe.experts.25.w1", "model.layers.59.block_sparse_moe.experts.26.w1", "model.layers.59.block_sparse_moe.experts.27.w1", "model.layers.59.block_sparse_moe.experts.28.w1", "model.layers.59.block_sparse_moe.experts.29.w1", "model.layers.59.block_sparse_moe.experts.30.w1", "model.layers.59.block_sparse_moe.experts.31.w1", "model.layers.59.block_sparse_moe.experts.32.w1", "model.layers.59.block_sparse_moe.experts.33.w1", "model.layers.59.block_sparse_moe.experts.34.w1", "model.layers.59.block_sparse_moe.experts.35.w1", "model.layers.59.block_sparse_moe.experts.36.w1", "model.layers.59.block_sparse_moe.experts.37.w1", "model.layers.59.block_sparse_moe.experts.38.w1", "model.layers.59.block_sparse_moe.experts.39.w1", "model.layers.59.block_sparse_moe.experts.40.w1", "model.layers.59.block_sparse_moe.experts.41.w1", "model.layers.59.block_sparse_moe.experts.42.w1", "model.layers.59.block_sparse_moe.experts.43.w1", "model.layers.59.block_sparse_moe.experts.44.w1", "model.layers.59.block_sparse_moe.experts.45.w1", "model.layers.59.block_sparse_moe.experts.46.w1", "model.layers.59.block_sparse_moe.experts.47.w1", "model.layers.59.block_sparse_moe.experts.48.w1", "model.layers.59.block_sparse_moe.experts.49.w1", "model.layers.59.block_sparse_moe.experts.50.w1", "model.layers.59.block_sparse_moe.experts.51.w1", "model.layers.59.block_sparse_moe.experts.52.w1", "model.layers.59.block_sparse_moe.experts.53.w1", "model.layers.59.block_sparse_moe.experts.54.w1", "model.layers.59.block_sparse_moe.experts.55.w1", "model.layers.59.block_sparse_moe.experts.56.w1", "model.layers.59.block_sparse_moe.experts.57.w1", "model.layers.59.block_sparse_moe.experts.58.w1", "model.layers.59.block_sparse_moe.experts.59.w1", "model.layers.59.block_sparse_moe.experts.60.w1", "model.layers.59.block_sparse_moe.experts.61.w1", "model.layers.59.block_sparse_moe.experts.62.w1", "model.layers.59.block_sparse_moe.experts.63.w1", "model.layers.59.block_sparse_moe.experts.64.w1", "model.layers.59.block_sparse_moe.experts.65.w1", "model.layers.59.block_sparse_moe.experts.66.w1", "model.layers.59.block_sparse_moe.experts.67.w1", "model.layers.59.block_sparse_moe.experts.68.w1", "model.layers.59.block_sparse_moe.experts.69.w1", "model.layers.59.block_sparse_moe.experts.70.w1", "model.layers.59.block_sparse_moe.experts.71.w1", "model.layers.59.block_sparse_moe.experts.72.w1", "model.layers.59.block_sparse_moe.experts.73.w1", "model.layers.59.block_sparse_moe.experts.74.w1", "model.layers.59.block_sparse_moe.experts.75.w1", "model.layers.59.block_sparse_moe.experts.76.w1", "model.layers.59.block_sparse_moe.experts.77.w1", "model.layers.59.block_sparse_moe.experts.78.w1", "model.layers.59.block_sparse_moe.experts.79.w1", "model.layers.59.block_sparse_moe.experts.80.w1", "model.layers.59.block_sparse_moe.experts.81.w1", "model.layers.59.block_sparse_moe.experts.82.w1", "model.layers.59.block_sparse_moe.experts.83.w1", "model.layers.59.block_sparse_moe.experts.84.w1", "model.layers.59.block_sparse_moe.experts.85.w1", "model.layers.59.block_sparse_moe.experts.86.w1", "model.layers.59.block_sparse_moe.experts.87.w1", "model.layers.59.block_sparse_moe.experts.88.w1", "model.layers.59.block_sparse_moe.experts.89.w1", "model.layers.59.block_sparse_moe.experts.90.w1", "model.layers.59.block_sparse_moe.experts.91.w1", "model.layers.59.block_sparse_moe.experts.92.w1", "model.layers.59.block_sparse_moe.experts.93.w1", "model.layers.59.block_sparse_moe.experts.94.w1", "model.layers.59.block_sparse_moe.experts.95.w1", "model.layers.59.block_sparse_moe.experts.96.w1", "model.layers.59.block_sparse_moe.experts.97.w1", "model.layers.59.block_sparse_moe.experts.98.w1", "model.layers.59.block_sparse_moe.experts.99.w1", "model.layers.59.block_sparse_moe.experts.100.w1", "model.layers.59.block_sparse_moe.experts.101.w1", "model.layers.59.block_sparse_moe.experts.102.w1", "model.layers.59.block_sparse_moe.experts.103.w1", "model.layers.59.block_sparse_moe.experts.104.w1", "model.layers.59.block_sparse_moe.experts.105.w1", "model.layers.59.block_sparse_moe.experts.106.w1", "model.layers.59.block_sparse_moe.experts.107.w1", "model.layers.59.block_sparse_moe.experts.108.w1", "model.layers.59.block_sparse_moe.experts.109.w1", "model.layers.59.block_sparse_moe.experts.110.w1", "model.layers.59.block_sparse_moe.experts.111.w1", "model.layers.59.block_sparse_moe.experts.112.w1", "model.layers.59.block_sparse_moe.experts.113.w1", "model.layers.59.block_sparse_moe.experts.114.w1", "model.layers.59.block_sparse_moe.experts.115.w1", "model.layers.59.block_sparse_moe.experts.116.w1", "model.layers.59.block_sparse_moe.experts.117.w1", "model.layers.59.block_sparse_moe.experts.118.w1", "model.layers.59.block_sparse_moe.experts.119.w1", "model.layers.59.block_sparse_moe.experts.120.w1", "model.layers.59.block_sparse_moe.experts.121.w1", "model.layers.59.block_sparse_moe.experts.122.w1", "model.layers.59.block_sparse_moe.experts.123.w1", "model.layers.59.block_sparse_moe.experts.124.w1", "model.layers.59.block_sparse_moe.experts.125.w1", "model.layers.59.block_sparse_moe.experts.126.w1", "model.layers.59.block_sparse_moe.experts.127.w1", "model.layers.59.block_sparse_moe.experts.128.w1", "model.layers.59.block_sparse_moe.experts.129.w1", "model.layers.59.block_sparse_moe.experts.130.w1", "model.layers.59.block_sparse_moe.experts.131.w1", "model.layers.59.block_sparse_moe.experts.132.w1", "model.layers.59.block_sparse_moe.experts.133.w1", "model.layers.59.block_sparse_moe.experts.134.w1", "model.layers.59.block_sparse_moe.experts.135.w1", "model.layers.59.block_sparse_moe.experts.136.w1", "model.layers.59.block_sparse_moe.experts.137.w1", "model.layers.59.block_sparse_moe.experts.138.w1", "model.layers.59.block_sparse_moe.experts.139.w1", "model.layers.59.block_sparse_moe.experts.140.w1", "model.layers.59.block_sparse_moe.experts.141.w1", "model.layers.59.block_sparse_moe.experts.142.w1", "model.layers.59.block_sparse_moe.experts.143.w1", "model.layers.59.block_sparse_moe.experts.144.w1", "model.layers.59.block_sparse_moe.experts.145.w1", "model.layers.59.block_sparse_moe.experts.146.w1", "model.layers.59.block_sparse_moe.experts.147.w1", "model.layers.59.block_sparse_moe.experts.148.w1", "model.layers.59.block_sparse_moe.experts.149.w1", "model.layers.59.block_sparse_moe.experts.150.w1", "model.layers.59.block_sparse_moe.experts.151.w1", "model.layers.59.block_sparse_moe.experts.152.w1", "model.layers.59.block_sparse_moe.experts.153.w1", "model.layers.59.block_sparse_moe.experts.154.w1", "model.layers.59.block_sparse_moe.experts.155.w1", "model.layers.59.block_sparse_moe.experts.156.w1", "model.layers.59.block_sparse_moe.experts.157.w1", "model.layers.59.block_sparse_moe.experts.158.w1", "model.layers.59.block_sparse_moe.experts.159.w1", "model.layers.59.block_sparse_moe.experts.160.w1", "model.layers.59.block_sparse_moe.experts.161.w1", "model.layers.59.block_sparse_moe.experts.162.w1", "model.layers.59.block_sparse_moe.experts.163.w1", "model.layers.59.block_sparse_moe.experts.164.w1", "model.layers.59.block_sparse_moe.experts.165.w1", "model.layers.59.block_sparse_moe.experts.166.w1", "model.layers.59.block_sparse_moe.experts.167.w1", "model.layers.59.block_sparse_moe.experts.168.w1", "model.layers.59.block_sparse_moe.experts.169.w1", "model.layers.59.block_sparse_moe.experts.170.w1", "model.layers.59.block_sparse_moe.experts.171.w1", "model.layers.59.block_sparse_moe.experts.172.w1", "model.layers.59.block_sparse_moe.experts.173.w1", "model.layers.59.block_sparse_moe.experts.174.w1", "model.layers.59.block_sparse_moe.experts.175.w1", "model.layers.59.block_sparse_moe.experts.176.w1", "model.layers.59.block_sparse_moe.experts.177.w1", "model.layers.59.block_sparse_moe.experts.178.w1", "model.layers.59.block_sparse_moe.experts.179.w1", "model.layers.59.block_sparse_moe.experts.180.w1", "model.layers.59.block_sparse_moe.experts.181.w1", "model.layers.59.block_sparse_moe.experts.182.w1", "model.layers.59.block_sparse_moe.experts.183.w1", "model.layers.59.block_sparse_moe.experts.184.w1", "model.layers.59.block_sparse_moe.experts.185.w1", "model.layers.59.block_sparse_moe.experts.186.w1", "model.layers.59.block_sparse_moe.experts.187.w1", "model.layers.59.block_sparse_moe.experts.188.w1", "model.layers.59.block_sparse_moe.experts.189.w1", "model.layers.59.block_sparse_moe.experts.190.w1", "model.layers.59.block_sparse_moe.experts.191.w1", "model.layers.59.block_sparse_moe.experts.192.w1", "model.layers.59.block_sparse_moe.experts.193.w1", "model.layers.59.block_sparse_moe.experts.194.w1", "model.layers.59.block_sparse_moe.experts.195.w1", "model.layers.59.block_sparse_moe.experts.196.w1", "model.layers.59.block_sparse_moe.experts.197.w1", "model.layers.59.block_sparse_moe.experts.198.w1", "model.layers.59.block_sparse_moe.experts.199.w1", "model.layers.59.block_sparse_moe.experts.200.w1", "model.layers.59.block_sparse_moe.experts.201.w1", "model.layers.59.block_sparse_moe.experts.202.w1", "model.layers.59.block_sparse_moe.experts.203.w1", "model.layers.59.block_sparse_moe.experts.204.w1", "model.layers.59.block_sparse_moe.experts.205.w1", "model.layers.59.block_sparse_moe.experts.206.w1", "model.layers.59.block_sparse_moe.experts.207.w1", "model.layers.59.block_sparse_moe.experts.208.w1", "model.layers.59.block_sparse_moe.experts.209.w1", "model.layers.59.block_sparse_moe.experts.210.w1", "model.layers.59.block_sparse_moe.experts.211.w1", "model.layers.59.block_sparse_moe.experts.212.w1", "model.layers.59.block_sparse_moe.experts.213.w1", "model.layers.59.block_sparse_moe.experts.214.w1", "model.layers.59.block_sparse_moe.experts.215.w1", "model.layers.59.block_sparse_moe.experts.216.w1", "model.layers.59.block_sparse_moe.experts.217.w1", "model.layers.59.block_sparse_moe.experts.218.w1", "model.layers.59.block_sparse_moe.experts.219.w1", "model.layers.59.block_sparse_moe.experts.220.w1", "model.layers.59.block_sparse_moe.experts.221.w1", "model.layers.59.block_sparse_moe.experts.222.w1", "model.layers.59.block_sparse_moe.experts.223.w1", "model.layers.59.block_sparse_moe.experts.224.w1", "model.layers.59.block_sparse_moe.experts.225.w1", "model.layers.59.block_sparse_moe.experts.226.w1", "model.layers.59.block_sparse_moe.experts.227.w1", "model.layers.59.block_sparse_moe.experts.228.w1", "model.layers.59.block_sparse_moe.experts.229.w1", "model.layers.59.block_sparse_moe.experts.230.w1", "model.layers.59.block_sparse_moe.experts.231.w1", "model.layers.59.block_sparse_moe.experts.232.w1", "model.layers.59.block_sparse_moe.experts.233.w1", "model.layers.59.block_sparse_moe.experts.234.w1", "model.layers.59.block_sparse_moe.experts.235.w1", "model.layers.59.block_sparse_moe.experts.236.w1", "model.layers.59.block_sparse_moe.experts.237.w1", "model.layers.59.block_sparse_moe.experts.238.w1", "model.layers.59.block_sparse_moe.experts.239.w1", "model.layers.59.block_sparse_moe.experts.240.w1", "model.layers.59.block_sparse_moe.experts.241.w1", "model.layers.59.block_sparse_moe.experts.242.w1", "model.layers.59.block_sparse_moe.experts.243.w1", "model.layers.59.block_sparse_moe.experts.244.w1", "model.layers.59.block_sparse_moe.experts.245.w1", "model.layers.59.block_sparse_moe.experts.246.w1", "model.layers.59.block_sparse_moe.experts.247.w1", "model.layers.59.block_sparse_moe.experts.248.w1", "model.layers.59.block_sparse_moe.experts.249.w1", "model.layers.59.block_sparse_moe.experts.250.w1", "model.layers.59.block_sparse_moe.experts.251.w1", "model.layers.59.block_sparse_moe.experts.252.w1", "model.layers.59.block_sparse_moe.experts.253.w1", "model.layers.59.block_sparse_moe.experts.254.w1", "model.layers.59.block_sparse_moe.experts.255.w1", "model.layers.59.block_sparse_moe.experts.0.w3", "model.layers.59.block_sparse_moe.experts.1.w3", "model.layers.59.block_sparse_moe.experts.2.w3", "model.layers.59.block_sparse_moe.experts.3.w3", "model.layers.59.block_sparse_moe.experts.4.w3", "model.layers.59.block_sparse_moe.experts.5.w3", "model.layers.59.block_sparse_moe.experts.6.w3", "model.layers.59.block_sparse_moe.experts.7.w3", "model.layers.59.block_sparse_moe.experts.8.w3", "model.layers.59.block_sparse_moe.experts.9.w3", "model.layers.59.block_sparse_moe.experts.10.w3", "model.layers.59.block_sparse_moe.experts.11.w3", "model.layers.59.block_sparse_moe.experts.12.w3", "model.layers.59.block_sparse_moe.experts.13.w3", "model.layers.59.block_sparse_moe.experts.14.w3", "model.layers.59.block_sparse_moe.experts.15.w3", "model.layers.59.block_sparse_moe.experts.16.w3", "model.layers.59.block_sparse_moe.experts.17.w3", "model.layers.59.block_sparse_moe.experts.18.w3", "model.layers.59.block_sparse_moe.experts.19.w3", "model.layers.59.block_sparse_moe.experts.20.w3", "model.layers.59.block_sparse_moe.experts.21.w3", "model.layers.59.block_sparse_moe.experts.22.w3", "model.layers.59.block_sparse_moe.experts.23.w3", "model.layers.59.block_sparse_moe.experts.24.w3", "model.layers.59.block_sparse_moe.experts.25.w3", "model.layers.59.block_sparse_moe.experts.26.w3", "model.layers.59.block_sparse_moe.experts.27.w3", "model.layers.59.block_sparse_moe.experts.28.w3", "model.layers.59.block_sparse_moe.experts.29.w3", "model.layers.59.block_sparse_moe.experts.30.w3", "model.layers.59.block_sparse_moe.experts.31.w3", "model.layers.59.block_sparse_moe.experts.32.w3", "model.layers.59.block_sparse_moe.experts.33.w3", "model.layers.59.block_sparse_moe.experts.34.w3", "model.layers.59.block_sparse_moe.experts.35.w3", "model.layers.59.block_sparse_moe.experts.36.w3", "model.layers.59.block_sparse_moe.experts.37.w3", "model.layers.59.block_sparse_moe.experts.38.w3", "model.layers.59.block_sparse_moe.experts.39.w3", "model.layers.59.block_sparse_moe.experts.40.w3", "model.layers.59.block_sparse_moe.experts.41.w3", "model.layers.59.block_sparse_moe.experts.42.w3", "model.layers.59.block_sparse_moe.experts.43.w3", "model.layers.59.block_sparse_moe.experts.44.w3", "model.layers.59.block_sparse_moe.experts.45.w3", "model.layers.59.block_sparse_moe.experts.46.w3", "model.layers.59.block_sparse_moe.experts.47.w3", "model.layers.59.block_sparse_moe.experts.48.w3", "model.layers.59.block_sparse_moe.experts.49.w3", "model.layers.59.block_sparse_moe.experts.50.w3", "model.layers.59.block_sparse_moe.experts.51.w3", "model.layers.59.block_sparse_moe.experts.52.w3", "model.layers.59.block_sparse_moe.experts.53.w3", "model.layers.59.block_sparse_moe.experts.54.w3", "model.layers.59.block_sparse_moe.experts.55.w3", "model.layers.59.block_sparse_moe.experts.56.w3", "model.layers.59.block_sparse_moe.experts.57.w3", "model.layers.59.block_sparse_moe.experts.58.w3", "model.layers.59.block_sparse_moe.experts.59.w3", "model.layers.59.block_sparse_moe.experts.60.w3", "model.layers.59.block_sparse_moe.experts.61.w3", "model.layers.59.block_sparse_moe.experts.62.w3", "model.layers.59.block_sparse_moe.experts.63.w3", "model.layers.59.block_sparse_moe.experts.64.w3", "model.layers.59.block_sparse_moe.experts.65.w3", "model.layers.59.block_sparse_moe.experts.66.w3", "model.layers.59.block_sparse_moe.experts.67.w3", "model.layers.59.block_sparse_moe.experts.68.w3", "model.layers.59.block_sparse_moe.experts.69.w3", "model.layers.59.block_sparse_moe.experts.70.w3", "model.layers.59.block_sparse_moe.experts.71.w3", "model.layers.59.block_sparse_moe.experts.72.w3", "model.layers.59.block_sparse_moe.experts.73.w3", "model.layers.59.block_sparse_moe.experts.74.w3", "model.layers.59.block_sparse_moe.experts.75.w3", "model.layers.59.block_sparse_moe.experts.76.w3", "model.layers.59.block_sparse_moe.experts.77.w3", "model.layers.59.block_sparse_moe.experts.78.w3", "model.layers.59.block_sparse_moe.experts.79.w3", "model.layers.59.block_sparse_moe.experts.80.w3", "model.layers.59.block_sparse_moe.experts.81.w3", "model.layers.59.block_sparse_moe.experts.82.w3", "model.layers.59.block_sparse_moe.experts.83.w3", "model.layers.59.block_sparse_moe.experts.84.w3", "model.layers.59.block_sparse_moe.experts.85.w3", "model.layers.59.block_sparse_moe.experts.86.w3", "model.layers.59.block_sparse_moe.experts.87.w3", "model.layers.59.block_sparse_moe.experts.88.w3", "model.layers.59.block_sparse_moe.experts.89.w3", "model.layers.59.block_sparse_moe.experts.90.w3", "model.layers.59.block_sparse_moe.experts.91.w3", "model.layers.59.block_sparse_moe.experts.92.w3", "model.layers.59.block_sparse_moe.experts.93.w3", "model.layers.59.block_sparse_moe.experts.94.w3", "model.layers.59.block_sparse_moe.experts.95.w3", "model.layers.59.block_sparse_moe.experts.96.w3", "model.layers.59.block_sparse_moe.experts.97.w3", "model.layers.59.block_sparse_moe.experts.98.w3", "model.layers.59.block_sparse_moe.experts.99.w3", "model.layers.59.block_sparse_moe.experts.100.w3", "model.layers.59.block_sparse_moe.experts.101.w3", "model.layers.59.block_sparse_moe.experts.102.w3", "model.layers.59.block_sparse_moe.experts.103.w3", "model.layers.59.block_sparse_moe.experts.104.w3", "model.layers.59.block_sparse_moe.experts.105.w3", "model.layers.59.block_sparse_moe.experts.106.w3", "model.layers.59.block_sparse_moe.experts.107.w3", "model.layers.59.block_sparse_moe.experts.108.w3", "model.layers.59.block_sparse_moe.experts.109.w3", "model.layers.59.block_sparse_moe.experts.110.w3", "model.layers.59.block_sparse_moe.experts.111.w3", "model.layers.59.block_sparse_moe.experts.112.w3", "model.layers.59.block_sparse_moe.experts.113.w3", "model.layers.59.block_sparse_moe.experts.114.w3", "model.layers.59.block_sparse_moe.experts.115.w3", "model.layers.59.block_sparse_moe.experts.116.w3", "model.layers.59.block_sparse_moe.experts.117.w3", "model.layers.59.block_sparse_moe.experts.118.w3", "model.layers.59.block_sparse_moe.experts.119.w3", "model.layers.59.block_sparse_moe.experts.120.w3", "model.layers.59.block_sparse_moe.experts.121.w3", "model.layers.59.block_sparse_moe.experts.122.w3", "model.layers.59.block_sparse_moe.experts.123.w3", "model.layers.59.block_sparse_moe.experts.124.w3", "model.layers.59.block_sparse_moe.experts.125.w3", "model.layers.59.block_sparse_moe.experts.126.w3", "model.layers.59.block_sparse_moe.experts.127.w3", "model.layers.59.block_sparse_moe.experts.128.w3", "model.layers.59.block_sparse_moe.experts.129.w3", "model.layers.59.block_sparse_moe.experts.130.w3", "model.layers.59.block_sparse_moe.experts.131.w3", "model.layers.59.block_sparse_moe.experts.132.w3", "model.layers.59.block_sparse_moe.experts.133.w3", "model.layers.59.block_sparse_moe.experts.134.w3", "model.layers.59.block_sparse_moe.experts.135.w3", "model.layers.59.block_sparse_moe.experts.136.w3", "model.layers.59.block_sparse_moe.experts.137.w3", "model.layers.59.block_sparse_moe.experts.138.w3", "model.layers.59.block_sparse_moe.experts.139.w3", "model.layers.59.block_sparse_moe.experts.140.w3", "model.layers.59.block_sparse_moe.experts.141.w3", "model.layers.59.block_sparse_moe.experts.142.w3", "model.layers.59.block_sparse_moe.experts.143.w3", "model.layers.59.block_sparse_moe.experts.144.w3", "model.layers.59.block_sparse_moe.experts.145.w3", "model.layers.59.block_sparse_moe.experts.146.w3", "model.layers.59.block_sparse_moe.experts.147.w3", "model.layers.59.block_sparse_moe.experts.148.w3", "model.layers.59.block_sparse_moe.experts.149.w3", "model.layers.59.block_sparse_moe.experts.150.w3", "model.layers.59.block_sparse_moe.experts.151.w3", "model.layers.59.block_sparse_moe.experts.152.w3", "model.layers.59.block_sparse_moe.experts.153.w3", "model.layers.59.block_sparse_moe.experts.154.w3", "model.layers.59.block_sparse_moe.experts.155.w3", "model.layers.59.block_sparse_moe.experts.156.w3", "model.layers.59.block_sparse_moe.experts.157.w3", "model.layers.59.block_sparse_moe.experts.158.w3", "model.layers.59.block_sparse_moe.experts.159.w3", "model.layers.59.block_sparse_moe.experts.160.w3", "model.layers.59.block_sparse_moe.experts.161.w3", "model.layers.59.block_sparse_moe.experts.162.w3", "model.layers.59.block_sparse_moe.experts.163.w3", "model.layers.59.block_sparse_moe.experts.164.w3", "model.layers.59.block_sparse_moe.experts.165.w3", "model.layers.59.block_sparse_moe.experts.166.w3", "model.layers.59.block_sparse_moe.experts.167.w3", "model.layers.59.block_sparse_moe.experts.168.w3", "model.layers.59.block_sparse_moe.experts.169.w3", "model.layers.59.block_sparse_moe.experts.170.w3", "model.layers.59.block_sparse_moe.experts.171.w3", "model.layers.59.block_sparse_moe.experts.172.w3", "model.layers.59.block_sparse_moe.experts.173.w3", "model.layers.59.block_sparse_moe.experts.174.w3", "model.layers.59.block_sparse_moe.experts.175.w3", "model.layers.59.block_sparse_moe.experts.176.w3", "model.layers.59.block_sparse_moe.experts.177.w3", "model.layers.59.block_sparse_moe.experts.178.w3", "model.layers.59.block_sparse_moe.experts.179.w3", "model.layers.59.block_sparse_moe.experts.180.w3", "model.layers.59.block_sparse_moe.experts.181.w3", "model.layers.59.block_sparse_moe.experts.182.w3", "model.layers.59.block_sparse_moe.experts.183.w3", "model.layers.59.block_sparse_moe.experts.184.w3", "model.layers.59.block_sparse_moe.experts.185.w3", "model.layers.59.block_sparse_moe.experts.186.w3", "model.layers.59.block_sparse_moe.experts.187.w3", "model.layers.59.block_sparse_moe.experts.188.w3", "model.layers.59.block_sparse_moe.experts.189.w3", "model.layers.59.block_sparse_moe.experts.190.w3", "model.layers.59.block_sparse_moe.experts.191.w3", "model.layers.59.block_sparse_moe.experts.192.w3", "model.layers.59.block_sparse_moe.experts.193.w3", "model.layers.59.block_sparse_moe.experts.194.w3", "model.layers.59.block_sparse_moe.experts.195.w3", "model.layers.59.block_sparse_moe.experts.196.w3", "model.layers.59.block_sparse_moe.experts.197.w3", "model.layers.59.block_sparse_moe.experts.198.w3", "model.layers.59.block_sparse_moe.experts.199.w3", "model.layers.59.block_sparse_moe.experts.200.w3", "model.layers.59.block_sparse_moe.experts.201.w3", "model.layers.59.block_sparse_moe.experts.202.w3", "model.layers.59.block_sparse_moe.experts.203.w3", "model.layers.59.block_sparse_moe.experts.204.w3", "model.layers.59.block_sparse_moe.experts.205.w3", "model.layers.59.block_sparse_moe.experts.206.w3", "model.layers.59.block_sparse_moe.experts.207.w3", "model.layers.59.block_sparse_moe.experts.208.w3", "model.layers.59.block_sparse_moe.experts.209.w3", "model.layers.59.block_sparse_moe.experts.210.w3", "model.layers.59.block_sparse_moe.experts.211.w3", "model.layers.59.block_sparse_moe.experts.212.w3", "model.layers.59.block_sparse_moe.experts.213.w3", "model.layers.59.block_sparse_moe.experts.214.w3", "model.layers.59.block_sparse_moe.experts.215.w3", "model.layers.59.block_sparse_moe.experts.216.w3", "model.layers.59.block_sparse_moe.experts.217.w3", "model.layers.59.block_sparse_moe.experts.218.w3", "model.layers.59.block_sparse_moe.experts.219.w3", "model.layers.59.block_sparse_moe.experts.220.w3", "model.layers.59.block_sparse_moe.experts.221.w3", "model.layers.59.block_sparse_moe.experts.222.w3", "model.layers.59.block_sparse_moe.experts.223.w3", "model.layers.59.block_sparse_moe.experts.224.w3", "model.layers.59.block_sparse_moe.experts.225.w3", "model.layers.59.block_sparse_moe.experts.226.w3", "model.layers.59.block_sparse_moe.experts.227.w3", "model.layers.59.block_sparse_moe.experts.228.w3", "model.layers.59.block_sparse_moe.experts.229.w3", "model.layers.59.block_sparse_moe.experts.230.w3", "model.layers.59.block_sparse_moe.experts.231.w3", "model.layers.59.block_sparse_moe.experts.232.w3", "model.layers.59.block_sparse_moe.experts.233.w3", "model.layers.59.block_sparse_moe.experts.234.w3", "model.layers.59.block_sparse_moe.experts.235.w3", "model.layers.59.block_sparse_moe.experts.236.w3", "model.layers.59.block_sparse_moe.experts.237.w3", "model.layers.59.block_sparse_moe.experts.238.w3", "model.layers.59.block_sparse_moe.experts.239.w3", "model.layers.59.block_sparse_moe.experts.240.w3", "model.layers.59.block_sparse_moe.experts.241.w3", "model.layers.59.block_sparse_moe.experts.242.w3", "model.layers.59.block_sparse_moe.experts.243.w3", "model.layers.59.block_sparse_moe.experts.244.w3", "model.layers.59.block_sparse_moe.experts.245.w3", "model.layers.59.block_sparse_moe.experts.246.w3", "model.layers.59.block_sparse_moe.experts.247.w3", "model.layers.59.block_sparse_moe.experts.248.w3", "model.layers.59.block_sparse_moe.experts.249.w3", "model.layers.59.block_sparse_moe.experts.250.w3", "model.layers.59.block_sparse_moe.experts.251.w3", "model.layers.59.block_sparse_moe.experts.252.w3", "model.layers.59.block_sparse_moe.experts.253.w3", "model.layers.59.block_sparse_moe.experts.254.w3", "model.layers.59.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00017227325588464737, "dbits": 2415919104 } ] }, { "idx": 299, "layers": [ "model.layers.59.block_sparse_moe.experts.0.w2", "model.layers.59.block_sparse_moe.experts.1.w2", "model.layers.59.block_sparse_moe.experts.2.w2", "model.layers.59.block_sparse_moe.experts.3.w2", "model.layers.59.block_sparse_moe.experts.4.w2", "model.layers.59.block_sparse_moe.experts.5.w2", "model.layers.59.block_sparse_moe.experts.6.w2", "model.layers.59.block_sparse_moe.experts.7.w2", "model.layers.59.block_sparse_moe.experts.8.w2", "model.layers.59.block_sparse_moe.experts.9.w2", "model.layers.59.block_sparse_moe.experts.10.w2", "model.layers.59.block_sparse_moe.experts.11.w2", "model.layers.59.block_sparse_moe.experts.12.w2", "model.layers.59.block_sparse_moe.experts.13.w2", "model.layers.59.block_sparse_moe.experts.14.w2", "model.layers.59.block_sparse_moe.experts.15.w2", "model.layers.59.block_sparse_moe.experts.16.w2", "model.layers.59.block_sparse_moe.experts.17.w2", "model.layers.59.block_sparse_moe.experts.18.w2", "model.layers.59.block_sparse_moe.experts.19.w2", "model.layers.59.block_sparse_moe.experts.20.w2", "model.layers.59.block_sparse_moe.experts.21.w2", "model.layers.59.block_sparse_moe.experts.22.w2", "model.layers.59.block_sparse_moe.experts.23.w2", "model.layers.59.block_sparse_moe.experts.24.w2", "model.layers.59.block_sparse_moe.experts.25.w2", "model.layers.59.block_sparse_moe.experts.26.w2", "model.layers.59.block_sparse_moe.experts.27.w2", "model.layers.59.block_sparse_moe.experts.28.w2", "model.layers.59.block_sparse_moe.experts.29.w2", "model.layers.59.block_sparse_moe.experts.30.w2", "model.layers.59.block_sparse_moe.experts.31.w2", "model.layers.59.block_sparse_moe.experts.32.w2", "model.layers.59.block_sparse_moe.experts.33.w2", "model.layers.59.block_sparse_moe.experts.34.w2", "model.layers.59.block_sparse_moe.experts.35.w2", "model.layers.59.block_sparse_moe.experts.36.w2", "model.layers.59.block_sparse_moe.experts.37.w2", "model.layers.59.block_sparse_moe.experts.38.w2", "model.layers.59.block_sparse_moe.experts.39.w2", "model.layers.59.block_sparse_moe.experts.40.w2", "model.layers.59.block_sparse_moe.experts.41.w2", "model.layers.59.block_sparse_moe.experts.42.w2", "model.layers.59.block_sparse_moe.experts.43.w2", "model.layers.59.block_sparse_moe.experts.44.w2", "model.layers.59.block_sparse_moe.experts.45.w2", "model.layers.59.block_sparse_moe.experts.46.w2", "model.layers.59.block_sparse_moe.experts.47.w2", "model.layers.59.block_sparse_moe.experts.48.w2", "model.layers.59.block_sparse_moe.experts.49.w2", "model.layers.59.block_sparse_moe.experts.50.w2", "model.layers.59.block_sparse_moe.experts.51.w2", "model.layers.59.block_sparse_moe.experts.52.w2", "model.layers.59.block_sparse_moe.experts.53.w2", "model.layers.59.block_sparse_moe.experts.54.w2", "model.layers.59.block_sparse_moe.experts.55.w2", "model.layers.59.block_sparse_moe.experts.56.w2", "model.layers.59.block_sparse_moe.experts.57.w2", "model.layers.59.block_sparse_moe.experts.58.w2", "model.layers.59.block_sparse_moe.experts.59.w2", "model.layers.59.block_sparse_moe.experts.60.w2", "model.layers.59.block_sparse_moe.experts.61.w2", "model.layers.59.block_sparse_moe.experts.62.w2", "model.layers.59.block_sparse_moe.experts.63.w2", "model.layers.59.block_sparse_moe.experts.64.w2", "model.layers.59.block_sparse_moe.experts.65.w2", "model.layers.59.block_sparse_moe.experts.66.w2", "model.layers.59.block_sparse_moe.experts.67.w2", "model.layers.59.block_sparse_moe.experts.68.w2", "model.layers.59.block_sparse_moe.experts.69.w2", "model.layers.59.block_sparse_moe.experts.70.w2", "model.layers.59.block_sparse_moe.experts.71.w2", "model.layers.59.block_sparse_moe.experts.72.w2", "model.layers.59.block_sparse_moe.experts.73.w2", "model.layers.59.block_sparse_moe.experts.74.w2", "model.layers.59.block_sparse_moe.experts.75.w2", "model.layers.59.block_sparse_moe.experts.76.w2", "model.layers.59.block_sparse_moe.experts.77.w2", "model.layers.59.block_sparse_moe.experts.78.w2", "model.layers.59.block_sparse_moe.experts.79.w2", "model.layers.59.block_sparse_moe.experts.80.w2", "model.layers.59.block_sparse_moe.experts.81.w2", "model.layers.59.block_sparse_moe.experts.82.w2", "model.layers.59.block_sparse_moe.experts.83.w2", "model.layers.59.block_sparse_moe.experts.84.w2", "model.layers.59.block_sparse_moe.experts.85.w2", "model.layers.59.block_sparse_moe.experts.86.w2", "model.layers.59.block_sparse_moe.experts.87.w2", "model.layers.59.block_sparse_moe.experts.88.w2", "model.layers.59.block_sparse_moe.experts.89.w2", "model.layers.59.block_sparse_moe.experts.90.w2", "model.layers.59.block_sparse_moe.experts.91.w2", "model.layers.59.block_sparse_moe.experts.92.w2", "model.layers.59.block_sparse_moe.experts.93.w2", "model.layers.59.block_sparse_moe.experts.94.w2", "model.layers.59.block_sparse_moe.experts.95.w2", "model.layers.59.block_sparse_moe.experts.96.w2", "model.layers.59.block_sparse_moe.experts.97.w2", "model.layers.59.block_sparse_moe.experts.98.w2", "model.layers.59.block_sparse_moe.experts.99.w2", "model.layers.59.block_sparse_moe.experts.100.w2", "model.layers.59.block_sparse_moe.experts.101.w2", "model.layers.59.block_sparse_moe.experts.102.w2", "model.layers.59.block_sparse_moe.experts.103.w2", "model.layers.59.block_sparse_moe.experts.104.w2", "model.layers.59.block_sparse_moe.experts.105.w2", "model.layers.59.block_sparse_moe.experts.106.w2", "model.layers.59.block_sparse_moe.experts.107.w2", "model.layers.59.block_sparse_moe.experts.108.w2", "model.layers.59.block_sparse_moe.experts.109.w2", "model.layers.59.block_sparse_moe.experts.110.w2", "model.layers.59.block_sparse_moe.experts.111.w2", "model.layers.59.block_sparse_moe.experts.112.w2", "model.layers.59.block_sparse_moe.experts.113.w2", "model.layers.59.block_sparse_moe.experts.114.w2", "model.layers.59.block_sparse_moe.experts.115.w2", "model.layers.59.block_sparse_moe.experts.116.w2", "model.layers.59.block_sparse_moe.experts.117.w2", "model.layers.59.block_sparse_moe.experts.118.w2", "model.layers.59.block_sparse_moe.experts.119.w2", "model.layers.59.block_sparse_moe.experts.120.w2", "model.layers.59.block_sparse_moe.experts.121.w2", "model.layers.59.block_sparse_moe.experts.122.w2", "model.layers.59.block_sparse_moe.experts.123.w2", "model.layers.59.block_sparse_moe.experts.124.w2", "model.layers.59.block_sparse_moe.experts.125.w2", "model.layers.59.block_sparse_moe.experts.126.w2", "model.layers.59.block_sparse_moe.experts.127.w2", "model.layers.59.block_sparse_moe.experts.128.w2", "model.layers.59.block_sparse_moe.experts.129.w2", "model.layers.59.block_sparse_moe.experts.130.w2", "model.layers.59.block_sparse_moe.experts.131.w2", "model.layers.59.block_sparse_moe.experts.132.w2", "model.layers.59.block_sparse_moe.experts.133.w2", "model.layers.59.block_sparse_moe.experts.134.w2", "model.layers.59.block_sparse_moe.experts.135.w2", "model.layers.59.block_sparse_moe.experts.136.w2", "model.layers.59.block_sparse_moe.experts.137.w2", "model.layers.59.block_sparse_moe.experts.138.w2", "model.layers.59.block_sparse_moe.experts.139.w2", "model.layers.59.block_sparse_moe.experts.140.w2", "model.layers.59.block_sparse_moe.experts.141.w2", "model.layers.59.block_sparse_moe.experts.142.w2", "model.layers.59.block_sparse_moe.experts.143.w2", "model.layers.59.block_sparse_moe.experts.144.w2", "model.layers.59.block_sparse_moe.experts.145.w2", "model.layers.59.block_sparse_moe.experts.146.w2", "model.layers.59.block_sparse_moe.experts.147.w2", "model.layers.59.block_sparse_moe.experts.148.w2", "model.layers.59.block_sparse_moe.experts.149.w2", "model.layers.59.block_sparse_moe.experts.150.w2", "model.layers.59.block_sparse_moe.experts.151.w2", "model.layers.59.block_sparse_moe.experts.152.w2", "model.layers.59.block_sparse_moe.experts.153.w2", "model.layers.59.block_sparse_moe.experts.154.w2", "model.layers.59.block_sparse_moe.experts.155.w2", "model.layers.59.block_sparse_moe.experts.156.w2", "model.layers.59.block_sparse_moe.experts.157.w2", "model.layers.59.block_sparse_moe.experts.158.w2", "model.layers.59.block_sparse_moe.experts.159.w2", "model.layers.59.block_sparse_moe.experts.160.w2", "model.layers.59.block_sparse_moe.experts.161.w2", "model.layers.59.block_sparse_moe.experts.162.w2", "model.layers.59.block_sparse_moe.experts.163.w2", "model.layers.59.block_sparse_moe.experts.164.w2", "model.layers.59.block_sparse_moe.experts.165.w2", "model.layers.59.block_sparse_moe.experts.166.w2", "model.layers.59.block_sparse_moe.experts.167.w2", "model.layers.59.block_sparse_moe.experts.168.w2", "model.layers.59.block_sparse_moe.experts.169.w2", "model.layers.59.block_sparse_moe.experts.170.w2", "model.layers.59.block_sparse_moe.experts.171.w2", "model.layers.59.block_sparse_moe.experts.172.w2", "model.layers.59.block_sparse_moe.experts.173.w2", "model.layers.59.block_sparse_moe.experts.174.w2", "model.layers.59.block_sparse_moe.experts.175.w2", "model.layers.59.block_sparse_moe.experts.176.w2", "model.layers.59.block_sparse_moe.experts.177.w2", "model.layers.59.block_sparse_moe.experts.178.w2", "model.layers.59.block_sparse_moe.experts.179.w2", "model.layers.59.block_sparse_moe.experts.180.w2", "model.layers.59.block_sparse_moe.experts.181.w2", "model.layers.59.block_sparse_moe.experts.182.w2", "model.layers.59.block_sparse_moe.experts.183.w2", "model.layers.59.block_sparse_moe.experts.184.w2", "model.layers.59.block_sparse_moe.experts.185.w2", "model.layers.59.block_sparse_moe.experts.186.w2", "model.layers.59.block_sparse_moe.experts.187.w2", "model.layers.59.block_sparse_moe.experts.188.w2", "model.layers.59.block_sparse_moe.experts.189.w2", "model.layers.59.block_sparse_moe.experts.190.w2", "model.layers.59.block_sparse_moe.experts.191.w2", "model.layers.59.block_sparse_moe.experts.192.w2", "model.layers.59.block_sparse_moe.experts.193.w2", "model.layers.59.block_sparse_moe.experts.194.w2", "model.layers.59.block_sparse_moe.experts.195.w2", "model.layers.59.block_sparse_moe.experts.196.w2", "model.layers.59.block_sparse_moe.experts.197.w2", "model.layers.59.block_sparse_moe.experts.198.w2", "model.layers.59.block_sparse_moe.experts.199.w2", "model.layers.59.block_sparse_moe.experts.200.w2", "model.layers.59.block_sparse_moe.experts.201.w2", "model.layers.59.block_sparse_moe.experts.202.w2", "model.layers.59.block_sparse_moe.experts.203.w2", "model.layers.59.block_sparse_moe.experts.204.w2", "model.layers.59.block_sparse_moe.experts.205.w2", "model.layers.59.block_sparse_moe.experts.206.w2", "model.layers.59.block_sparse_moe.experts.207.w2", "model.layers.59.block_sparse_moe.experts.208.w2", "model.layers.59.block_sparse_moe.experts.209.w2", "model.layers.59.block_sparse_moe.experts.210.w2", "model.layers.59.block_sparse_moe.experts.211.w2", "model.layers.59.block_sparse_moe.experts.212.w2", "model.layers.59.block_sparse_moe.experts.213.w2", "model.layers.59.block_sparse_moe.experts.214.w2", "model.layers.59.block_sparse_moe.experts.215.w2", "model.layers.59.block_sparse_moe.experts.216.w2", "model.layers.59.block_sparse_moe.experts.217.w2", "model.layers.59.block_sparse_moe.experts.218.w2", "model.layers.59.block_sparse_moe.experts.219.w2", "model.layers.59.block_sparse_moe.experts.220.w2", "model.layers.59.block_sparse_moe.experts.221.w2", "model.layers.59.block_sparse_moe.experts.222.w2", "model.layers.59.block_sparse_moe.experts.223.w2", "model.layers.59.block_sparse_moe.experts.224.w2", "model.layers.59.block_sparse_moe.experts.225.w2", "model.layers.59.block_sparse_moe.experts.226.w2", "model.layers.59.block_sparse_moe.experts.227.w2", "model.layers.59.block_sparse_moe.experts.228.w2", "model.layers.59.block_sparse_moe.experts.229.w2", "model.layers.59.block_sparse_moe.experts.230.w2", "model.layers.59.block_sparse_moe.experts.231.w2", "model.layers.59.block_sparse_moe.experts.232.w2", "model.layers.59.block_sparse_moe.experts.233.w2", "model.layers.59.block_sparse_moe.experts.234.w2", "model.layers.59.block_sparse_moe.experts.235.w2", "model.layers.59.block_sparse_moe.experts.236.w2", "model.layers.59.block_sparse_moe.experts.237.w2", "model.layers.59.block_sparse_moe.experts.238.w2", "model.layers.59.block_sparse_moe.experts.239.w2", "model.layers.59.block_sparse_moe.experts.240.w2", "model.layers.59.block_sparse_moe.experts.241.w2", "model.layers.59.block_sparse_moe.experts.242.w2", "model.layers.59.block_sparse_moe.experts.243.w2", "model.layers.59.block_sparse_moe.experts.244.w2", "model.layers.59.block_sparse_moe.experts.245.w2", "model.layers.59.block_sparse_moe.experts.246.w2", "model.layers.59.block_sparse_moe.experts.247.w2", "model.layers.59.block_sparse_moe.experts.248.w2", "model.layers.59.block_sparse_moe.experts.249.w2", "model.layers.59.block_sparse_moe.experts.250.w2", "model.layers.59.block_sparse_moe.experts.251.w2", "model.layers.59.block_sparse_moe.experts.252.w2", "model.layers.59.block_sparse_moe.experts.253.w2", "model.layers.59.block_sparse_moe.experts.254.w2", "model.layers.59.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 3.6876648664418976e-06, "dbits": 1207959552 } ] }, { "idx": 300, "layers": [ "model.layers.60.self_attn.q_proj" ], "candidates": [ { "dkld": -6.984751671552936e-05, "dbits": 18874368 } ] }, { "idx": 301, "layers": [ "model.layers.60.self_attn.k_proj", "model.layers.60.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0007828202098607989, "dbits": 6291456 } ] }, { "idx": 302, "layers": [ "model.layers.60.self_attn.o_proj" ], "candidates": [ { "dkld": 3.4619867801663484e-05, "dbits": 18874368 } ] }, { "idx": 303, "layers": [ "model.layers.60.block_sparse_moe.experts.0.w1", "model.layers.60.block_sparse_moe.experts.1.w1", "model.layers.60.block_sparse_moe.experts.2.w1", "model.layers.60.block_sparse_moe.experts.3.w1", "model.layers.60.block_sparse_moe.experts.4.w1", "model.layers.60.block_sparse_moe.experts.5.w1", "model.layers.60.block_sparse_moe.experts.6.w1", "model.layers.60.block_sparse_moe.experts.7.w1", "model.layers.60.block_sparse_moe.experts.8.w1", "model.layers.60.block_sparse_moe.experts.9.w1", "model.layers.60.block_sparse_moe.experts.10.w1", "model.layers.60.block_sparse_moe.experts.11.w1", "model.layers.60.block_sparse_moe.experts.12.w1", "model.layers.60.block_sparse_moe.experts.13.w1", "model.layers.60.block_sparse_moe.experts.14.w1", "model.layers.60.block_sparse_moe.experts.15.w1", "model.layers.60.block_sparse_moe.experts.16.w1", "model.layers.60.block_sparse_moe.experts.17.w1", "model.layers.60.block_sparse_moe.experts.18.w1", "model.layers.60.block_sparse_moe.experts.19.w1", "model.layers.60.block_sparse_moe.experts.20.w1", "model.layers.60.block_sparse_moe.experts.21.w1", "model.layers.60.block_sparse_moe.experts.22.w1", "model.layers.60.block_sparse_moe.experts.23.w1", "model.layers.60.block_sparse_moe.experts.24.w1", "model.layers.60.block_sparse_moe.experts.25.w1", "model.layers.60.block_sparse_moe.experts.26.w1", "model.layers.60.block_sparse_moe.experts.27.w1", "model.layers.60.block_sparse_moe.experts.28.w1", "model.layers.60.block_sparse_moe.experts.29.w1", "model.layers.60.block_sparse_moe.experts.30.w1", "model.layers.60.block_sparse_moe.experts.31.w1", "model.layers.60.block_sparse_moe.experts.32.w1", "model.layers.60.block_sparse_moe.experts.33.w1", "model.layers.60.block_sparse_moe.experts.34.w1", "model.layers.60.block_sparse_moe.experts.35.w1", "model.layers.60.block_sparse_moe.experts.36.w1", "model.layers.60.block_sparse_moe.experts.37.w1", "model.layers.60.block_sparse_moe.experts.38.w1", "model.layers.60.block_sparse_moe.experts.39.w1", "model.layers.60.block_sparse_moe.experts.40.w1", "model.layers.60.block_sparse_moe.experts.41.w1", "model.layers.60.block_sparse_moe.experts.42.w1", "model.layers.60.block_sparse_moe.experts.43.w1", "model.layers.60.block_sparse_moe.experts.44.w1", "model.layers.60.block_sparse_moe.experts.45.w1", "model.layers.60.block_sparse_moe.experts.46.w1", "model.layers.60.block_sparse_moe.experts.47.w1", "model.layers.60.block_sparse_moe.experts.48.w1", "model.layers.60.block_sparse_moe.experts.49.w1", "model.layers.60.block_sparse_moe.experts.50.w1", "model.layers.60.block_sparse_moe.experts.51.w1", "model.layers.60.block_sparse_moe.experts.52.w1", "model.layers.60.block_sparse_moe.experts.53.w1", "model.layers.60.block_sparse_moe.experts.54.w1", "model.layers.60.block_sparse_moe.experts.55.w1", "model.layers.60.block_sparse_moe.experts.56.w1", "model.layers.60.block_sparse_moe.experts.57.w1", "model.layers.60.block_sparse_moe.experts.58.w1", "model.layers.60.block_sparse_moe.experts.59.w1", "model.layers.60.block_sparse_moe.experts.60.w1", "model.layers.60.block_sparse_moe.experts.61.w1", "model.layers.60.block_sparse_moe.experts.62.w1", "model.layers.60.block_sparse_moe.experts.63.w1", "model.layers.60.block_sparse_moe.experts.64.w1", "model.layers.60.block_sparse_moe.experts.65.w1", "model.layers.60.block_sparse_moe.experts.66.w1", "model.layers.60.block_sparse_moe.experts.67.w1", "model.layers.60.block_sparse_moe.experts.68.w1", "model.layers.60.block_sparse_moe.experts.69.w1", "model.layers.60.block_sparse_moe.experts.70.w1", "model.layers.60.block_sparse_moe.experts.71.w1", "model.layers.60.block_sparse_moe.experts.72.w1", "model.layers.60.block_sparse_moe.experts.73.w1", "model.layers.60.block_sparse_moe.experts.74.w1", "model.layers.60.block_sparse_moe.experts.75.w1", "model.layers.60.block_sparse_moe.experts.76.w1", "model.layers.60.block_sparse_moe.experts.77.w1", "model.layers.60.block_sparse_moe.experts.78.w1", "model.layers.60.block_sparse_moe.experts.79.w1", "model.layers.60.block_sparse_moe.experts.80.w1", "model.layers.60.block_sparse_moe.experts.81.w1", "model.layers.60.block_sparse_moe.experts.82.w1", "model.layers.60.block_sparse_moe.experts.83.w1", "model.layers.60.block_sparse_moe.experts.84.w1", "model.layers.60.block_sparse_moe.experts.85.w1", "model.layers.60.block_sparse_moe.experts.86.w1", "model.layers.60.block_sparse_moe.experts.87.w1", "model.layers.60.block_sparse_moe.experts.88.w1", "model.layers.60.block_sparse_moe.experts.89.w1", "model.layers.60.block_sparse_moe.experts.90.w1", "model.layers.60.block_sparse_moe.experts.91.w1", "model.layers.60.block_sparse_moe.experts.92.w1", "model.layers.60.block_sparse_moe.experts.93.w1", "model.layers.60.block_sparse_moe.experts.94.w1", "model.layers.60.block_sparse_moe.experts.95.w1", "model.layers.60.block_sparse_moe.experts.96.w1", "model.layers.60.block_sparse_moe.experts.97.w1", "model.layers.60.block_sparse_moe.experts.98.w1", "model.layers.60.block_sparse_moe.experts.99.w1", "model.layers.60.block_sparse_moe.experts.100.w1", "model.layers.60.block_sparse_moe.experts.101.w1", "model.layers.60.block_sparse_moe.experts.102.w1", "model.layers.60.block_sparse_moe.experts.103.w1", "model.layers.60.block_sparse_moe.experts.104.w1", "model.layers.60.block_sparse_moe.experts.105.w1", "model.layers.60.block_sparse_moe.experts.106.w1", "model.layers.60.block_sparse_moe.experts.107.w1", "model.layers.60.block_sparse_moe.experts.108.w1", "model.layers.60.block_sparse_moe.experts.109.w1", "model.layers.60.block_sparse_moe.experts.110.w1", "model.layers.60.block_sparse_moe.experts.111.w1", "model.layers.60.block_sparse_moe.experts.112.w1", "model.layers.60.block_sparse_moe.experts.113.w1", "model.layers.60.block_sparse_moe.experts.114.w1", "model.layers.60.block_sparse_moe.experts.115.w1", "model.layers.60.block_sparse_moe.experts.116.w1", "model.layers.60.block_sparse_moe.experts.117.w1", "model.layers.60.block_sparse_moe.experts.118.w1", "model.layers.60.block_sparse_moe.experts.119.w1", "model.layers.60.block_sparse_moe.experts.120.w1", "model.layers.60.block_sparse_moe.experts.121.w1", "model.layers.60.block_sparse_moe.experts.122.w1", "model.layers.60.block_sparse_moe.experts.123.w1", "model.layers.60.block_sparse_moe.experts.124.w1", "model.layers.60.block_sparse_moe.experts.125.w1", "model.layers.60.block_sparse_moe.experts.126.w1", "model.layers.60.block_sparse_moe.experts.127.w1", "model.layers.60.block_sparse_moe.experts.128.w1", "model.layers.60.block_sparse_moe.experts.129.w1", "model.layers.60.block_sparse_moe.experts.130.w1", "model.layers.60.block_sparse_moe.experts.131.w1", "model.layers.60.block_sparse_moe.experts.132.w1", "model.layers.60.block_sparse_moe.experts.133.w1", "model.layers.60.block_sparse_moe.experts.134.w1", "model.layers.60.block_sparse_moe.experts.135.w1", "model.layers.60.block_sparse_moe.experts.136.w1", "model.layers.60.block_sparse_moe.experts.137.w1", "model.layers.60.block_sparse_moe.experts.138.w1", "model.layers.60.block_sparse_moe.experts.139.w1", "model.layers.60.block_sparse_moe.experts.140.w1", "model.layers.60.block_sparse_moe.experts.141.w1", "model.layers.60.block_sparse_moe.experts.142.w1", "model.layers.60.block_sparse_moe.experts.143.w1", "model.layers.60.block_sparse_moe.experts.144.w1", "model.layers.60.block_sparse_moe.experts.145.w1", "model.layers.60.block_sparse_moe.experts.146.w1", "model.layers.60.block_sparse_moe.experts.147.w1", "model.layers.60.block_sparse_moe.experts.148.w1", "model.layers.60.block_sparse_moe.experts.149.w1", "model.layers.60.block_sparse_moe.experts.150.w1", "model.layers.60.block_sparse_moe.experts.151.w1", "model.layers.60.block_sparse_moe.experts.152.w1", "model.layers.60.block_sparse_moe.experts.153.w1", "model.layers.60.block_sparse_moe.experts.154.w1", "model.layers.60.block_sparse_moe.experts.155.w1", "model.layers.60.block_sparse_moe.experts.156.w1", "model.layers.60.block_sparse_moe.experts.157.w1", "model.layers.60.block_sparse_moe.experts.158.w1", "model.layers.60.block_sparse_moe.experts.159.w1", "model.layers.60.block_sparse_moe.experts.160.w1", "model.layers.60.block_sparse_moe.experts.161.w1", "model.layers.60.block_sparse_moe.experts.162.w1", "model.layers.60.block_sparse_moe.experts.163.w1", "model.layers.60.block_sparse_moe.experts.164.w1", "model.layers.60.block_sparse_moe.experts.165.w1", "model.layers.60.block_sparse_moe.experts.166.w1", "model.layers.60.block_sparse_moe.experts.167.w1", "model.layers.60.block_sparse_moe.experts.168.w1", "model.layers.60.block_sparse_moe.experts.169.w1", "model.layers.60.block_sparse_moe.experts.170.w1", "model.layers.60.block_sparse_moe.experts.171.w1", "model.layers.60.block_sparse_moe.experts.172.w1", "model.layers.60.block_sparse_moe.experts.173.w1", "model.layers.60.block_sparse_moe.experts.174.w1", "model.layers.60.block_sparse_moe.experts.175.w1", "model.layers.60.block_sparse_moe.experts.176.w1", "model.layers.60.block_sparse_moe.experts.177.w1", "model.layers.60.block_sparse_moe.experts.178.w1", "model.layers.60.block_sparse_moe.experts.179.w1", "model.layers.60.block_sparse_moe.experts.180.w1", "model.layers.60.block_sparse_moe.experts.181.w1", "model.layers.60.block_sparse_moe.experts.182.w1", "model.layers.60.block_sparse_moe.experts.183.w1", "model.layers.60.block_sparse_moe.experts.184.w1", "model.layers.60.block_sparse_moe.experts.185.w1", "model.layers.60.block_sparse_moe.experts.186.w1", "model.layers.60.block_sparse_moe.experts.187.w1", "model.layers.60.block_sparse_moe.experts.188.w1", "model.layers.60.block_sparse_moe.experts.189.w1", "model.layers.60.block_sparse_moe.experts.190.w1", "model.layers.60.block_sparse_moe.experts.191.w1", "model.layers.60.block_sparse_moe.experts.192.w1", "model.layers.60.block_sparse_moe.experts.193.w1", "model.layers.60.block_sparse_moe.experts.194.w1", "model.layers.60.block_sparse_moe.experts.195.w1", "model.layers.60.block_sparse_moe.experts.196.w1", "model.layers.60.block_sparse_moe.experts.197.w1", "model.layers.60.block_sparse_moe.experts.198.w1", "model.layers.60.block_sparse_moe.experts.199.w1", "model.layers.60.block_sparse_moe.experts.200.w1", "model.layers.60.block_sparse_moe.experts.201.w1", "model.layers.60.block_sparse_moe.experts.202.w1", "model.layers.60.block_sparse_moe.experts.203.w1", "model.layers.60.block_sparse_moe.experts.204.w1", "model.layers.60.block_sparse_moe.experts.205.w1", "model.layers.60.block_sparse_moe.experts.206.w1", "model.layers.60.block_sparse_moe.experts.207.w1", "model.layers.60.block_sparse_moe.experts.208.w1", "model.layers.60.block_sparse_moe.experts.209.w1", "model.layers.60.block_sparse_moe.experts.210.w1", "model.layers.60.block_sparse_moe.experts.211.w1", "model.layers.60.block_sparse_moe.experts.212.w1", "model.layers.60.block_sparse_moe.experts.213.w1", "model.layers.60.block_sparse_moe.experts.214.w1", "model.layers.60.block_sparse_moe.experts.215.w1", "model.layers.60.block_sparse_moe.experts.216.w1", "model.layers.60.block_sparse_moe.experts.217.w1", "model.layers.60.block_sparse_moe.experts.218.w1", "model.layers.60.block_sparse_moe.experts.219.w1", "model.layers.60.block_sparse_moe.experts.220.w1", "model.layers.60.block_sparse_moe.experts.221.w1", "model.layers.60.block_sparse_moe.experts.222.w1", "model.layers.60.block_sparse_moe.experts.223.w1", "model.layers.60.block_sparse_moe.experts.224.w1", "model.layers.60.block_sparse_moe.experts.225.w1", "model.layers.60.block_sparse_moe.experts.226.w1", "model.layers.60.block_sparse_moe.experts.227.w1", "model.layers.60.block_sparse_moe.experts.228.w1", "model.layers.60.block_sparse_moe.experts.229.w1", "model.layers.60.block_sparse_moe.experts.230.w1", "model.layers.60.block_sparse_moe.experts.231.w1", "model.layers.60.block_sparse_moe.experts.232.w1", "model.layers.60.block_sparse_moe.experts.233.w1", "model.layers.60.block_sparse_moe.experts.234.w1", "model.layers.60.block_sparse_moe.experts.235.w1", "model.layers.60.block_sparse_moe.experts.236.w1", "model.layers.60.block_sparse_moe.experts.237.w1", "model.layers.60.block_sparse_moe.experts.238.w1", "model.layers.60.block_sparse_moe.experts.239.w1", "model.layers.60.block_sparse_moe.experts.240.w1", "model.layers.60.block_sparse_moe.experts.241.w1", "model.layers.60.block_sparse_moe.experts.242.w1", "model.layers.60.block_sparse_moe.experts.243.w1", "model.layers.60.block_sparse_moe.experts.244.w1", "model.layers.60.block_sparse_moe.experts.245.w1", "model.layers.60.block_sparse_moe.experts.246.w1", "model.layers.60.block_sparse_moe.experts.247.w1", "model.layers.60.block_sparse_moe.experts.248.w1", "model.layers.60.block_sparse_moe.experts.249.w1", "model.layers.60.block_sparse_moe.experts.250.w1", "model.layers.60.block_sparse_moe.experts.251.w1", "model.layers.60.block_sparse_moe.experts.252.w1", "model.layers.60.block_sparse_moe.experts.253.w1", "model.layers.60.block_sparse_moe.experts.254.w1", "model.layers.60.block_sparse_moe.experts.255.w1", "model.layers.60.block_sparse_moe.experts.0.w3", "model.layers.60.block_sparse_moe.experts.1.w3", "model.layers.60.block_sparse_moe.experts.2.w3", "model.layers.60.block_sparse_moe.experts.3.w3", "model.layers.60.block_sparse_moe.experts.4.w3", "model.layers.60.block_sparse_moe.experts.5.w3", "model.layers.60.block_sparse_moe.experts.6.w3", "model.layers.60.block_sparse_moe.experts.7.w3", "model.layers.60.block_sparse_moe.experts.8.w3", "model.layers.60.block_sparse_moe.experts.9.w3", "model.layers.60.block_sparse_moe.experts.10.w3", "model.layers.60.block_sparse_moe.experts.11.w3", "model.layers.60.block_sparse_moe.experts.12.w3", "model.layers.60.block_sparse_moe.experts.13.w3", "model.layers.60.block_sparse_moe.experts.14.w3", "model.layers.60.block_sparse_moe.experts.15.w3", "model.layers.60.block_sparse_moe.experts.16.w3", "model.layers.60.block_sparse_moe.experts.17.w3", "model.layers.60.block_sparse_moe.experts.18.w3", "model.layers.60.block_sparse_moe.experts.19.w3", "model.layers.60.block_sparse_moe.experts.20.w3", "model.layers.60.block_sparse_moe.experts.21.w3", "model.layers.60.block_sparse_moe.experts.22.w3", "model.layers.60.block_sparse_moe.experts.23.w3", "model.layers.60.block_sparse_moe.experts.24.w3", "model.layers.60.block_sparse_moe.experts.25.w3", "model.layers.60.block_sparse_moe.experts.26.w3", "model.layers.60.block_sparse_moe.experts.27.w3", "model.layers.60.block_sparse_moe.experts.28.w3", "model.layers.60.block_sparse_moe.experts.29.w3", "model.layers.60.block_sparse_moe.experts.30.w3", "model.layers.60.block_sparse_moe.experts.31.w3", "model.layers.60.block_sparse_moe.experts.32.w3", "model.layers.60.block_sparse_moe.experts.33.w3", "model.layers.60.block_sparse_moe.experts.34.w3", "model.layers.60.block_sparse_moe.experts.35.w3", "model.layers.60.block_sparse_moe.experts.36.w3", "model.layers.60.block_sparse_moe.experts.37.w3", "model.layers.60.block_sparse_moe.experts.38.w3", "model.layers.60.block_sparse_moe.experts.39.w3", "model.layers.60.block_sparse_moe.experts.40.w3", "model.layers.60.block_sparse_moe.experts.41.w3", "model.layers.60.block_sparse_moe.experts.42.w3", "model.layers.60.block_sparse_moe.experts.43.w3", "model.layers.60.block_sparse_moe.experts.44.w3", "model.layers.60.block_sparse_moe.experts.45.w3", "model.layers.60.block_sparse_moe.experts.46.w3", "model.layers.60.block_sparse_moe.experts.47.w3", "model.layers.60.block_sparse_moe.experts.48.w3", "model.layers.60.block_sparse_moe.experts.49.w3", "model.layers.60.block_sparse_moe.experts.50.w3", "model.layers.60.block_sparse_moe.experts.51.w3", "model.layers.60.block_sparse_moe.experts.52.w3", "model.layers.60.block_sparse_moe.experts.53.w3", "model.layers.60.block_sparse_moe.experts.54.w3", "model.layers.60.block_sparse_moe.experts.55.w3", "model.layers.60.block_sparse_moe.experts.56.w3", "model.layers.60.block_sparse_moe.experts.57.w3", "model.layers.60.block_sparse_moe.experts.58.w3", "model.layers.60.block_sparse_moe.experts.59.w3", "model.layers.60.block_sparse_moe.experts.60.w3", "model.layers.60.block_sparse_moe.experts.61.w3", "model.layers.60.block_sparse_moe.experts.62.w3", "model.layers.60.block_sparse_moe.experts.63.w3", "model.layers.60.block_sparse_moe.experts.64.w3", "model.layers.60.block_sparse_moe.experts.65.w3", "model.layers.60.block_sparse_moe.experts.66.w3", "model.layers.60.block_sparse_moe.experts.67.w3", "model.layers.60.block_sparse_moe.experts.68.w3", "model.layers.60.block_sparse_moe.experts.69.w3", "model.layers.60.block_sparse_moe.experts.70.w3", "model.layers.60.block_sparse_moe.experts.71.w3", "model.layers.60.block_sparse_moe.experts.72.w3", "model.layers.60.block_sparse_moe.experts.73.w3", "model.layers.60.block_sparse_moe.experts.74.w3", "model.layers.60.block_sparse_moe.experts.75.w3", "model.layers.60.block_sparse_moe.experts.76.w3", "model.layers.60.block_sparse_moe.experts.77.w3", "model.layers.60.block_sparse_moe.experts.78.w3", "model.layers.60.block_sparse_moe.experts.79.w3", "model.layers.60.block_sparse_moe.experts.80.w3", "model.layers.60.block_sparse_moe.experts.81.w3", "model.layers.60.block_sparse_moe.experts.82.w3", "model.layers.60.block_sparse_moe.experts.83.w3", "model.layers.60.block_sparse_moe.experts.84.w3", "model.layers.60.block_sparse_moe.experts.85.w3", "model.layers.60.block_sparse_moe.experts.86.w3", "model.layers.60.block_sparse_moe.experts.87.w3", "model.layers.60.block_sparse_moe.experts.88.w3", "model.layers.60.block_sparse_moe.experts.89.w3", "model.layers.60.block_sparse_moe.experts.90.w3", "model.layers.60.block_sparse_moe.experts.91.w3", "model.layers.60.block_sparse_moe.experts.92.w3", "model.layers.60.block_sparse_moe.experts.93.w3", "model.layers.60.block_sparse_moe.experts.94.w3", "model.layers.60.block_sparse_moe.experts.95.w3", "model.layers.60.block_sparse_moe.experts.96.w3", "model.layers.60.block_sparse_moe.experts.97.w3", "model.layers.60.block_sparse_moe.experts.98.w3", "model.layers.60.block_sparse_moe.experts.99.w3", "model.layers.60.block_sparse_moe.experts.100.w3", "model.layers.60.block_sparse_moe.experts.101.w3", "model.layers.60.block_sparse_moe.experts.102.w3", "model.layers.60.block_sparse_moe.experts.103.w3", "model.layers.60.block_sparse_moe.experts.104.w3", "model.layers.60.block_sparse_moe.experts.105.w3", "model.layers.60.block_sparse_moe.experts.106.w3", "model.layers.60.block_sparse_moe.experts.107.w3", "model.layers.60.block_sparse_moe.experts.108.w3", "model.layers.60.block_sparse_moe.experts.109.w3", "model.layers.60.block_sparse_moe.experts.110.w3", "model.layers.60.block_sparse_moe.experts.111.w3", "model.layers.60.block_sparse_moe.experts.112.w3", "model.layers.60.block_sparse_moe.experts.113.w3", "model.layers.60.block_sparse_moe.experts.114.w3", "model.layers.60.block_sparse_moe.experts.115.w3", "model.layers.60.block_sparse_moe.experts.116.w3", "model.layers.60.block_sparse_moe.experts.117.w3", "model.layers.60.block_sparse_moe.experts.118.w3", "model.layers.60.block_sparse_moe.experts.119.w3", "model.layers.60.block_sparse_moe.experts.120.w3", "model.layers.60.block_sparse_moe.experts.121.w3", "model.layers.60.block_sparse_moe.experts.122.w3", "model.layers.60.block_sparse_moe.experts.123.w3", "model.layers.60.block_sparse_moe.experts.124.w3", "model.layers.60.block_sparse_moe.experts.125.w3", "model.layers.60.block_sparse_moe.experts.126.w3", "model.layers.60.block_sparse_moe.experts.127.w3", "model.layers.60.block_sparse_moe.experts.128.w3", "model.layers.60.block_sparse_moe.experts.129.w3", "model.layers.60.block_sparse_moe.experts.130.w3", "model.layers.60.block_sparse_moe.experts.131.w3", "model.layers.60.block_sparse_moe.experts.132.w3", "model.layers.60.block_sparse_moe.experts.133.w3", "model.layers.60.block_sparse_moe.experts.134.w3", "model.layers.60.block_sparse_moe.experts.135.w3", "model.layers.60.block_sparse_moe.experts.136.w3", "model.layers.60.block_sparse_moe.experts.137.w3", "model.layers.60.block_sparse_moe.experts.138.w3", "model.layers.60.block_sparse_moe.experts.139.w3", "model.layers.60.block_sparse_moe.experts.140.w3", "model.layers.60.block_sparse_moe.experts.141.w3", "model.layers.60.block_sparse_moe.experts.142.w3", "model.layers.60.block_sparse_moe.experts.143.w3", "model.layers.60.block_sparse_moe.experts.144.w3", "model.layers.60.block_sparse_moe.experts.145.w3", "model.layers.60.block_sparse_moe.experts.146.w3", "model.layers.60.block_sparse_moe.experts.147.w3", "model.layers.60.block_sparse_moe.experts.148.w3", "model.layers.60.block_sparse_moe.experts.149.w3", "model.layers.60.block_sparse_moe.experts.150.w3", "model.layers.60.block_sparse_moe.experts.151.w3", "model.layers.60.block_sparse_moe.experts.152.w3", "model.layers.60.block_sparse_moe.experts.153.w3", "model.layers.60.block_sparse_moe.experts.154.w3", "model.layers.60.block_sparse_moe.experts.155.w3", "model.layers.60.block_sparse_moe.experts.156.w3", "model.layers.60.block_sparse_moe.experts.157.w3", "model.layers.60.block_sparse_moe.experts.158.w3", "model.layers.60.block_sparse_moe.experts.159.w3", "model.layers.60.block_sparse_moe.experts.160.w3", "model.layers.60.block_sparse_moe.experts.161.w3", "model.layers.60.block_sparse_moe.experts.162.w3", "model.layers.60.block_sparse_moe.experts.163.w3", "model.layers.60.block_sparse_moe.experts.164.w3", "model.layers.60.block_sparse_moe.experts.165.w3", "model.layers.60.block_sparse_moe.experts.166.w3", "model.layers.60.block_sparse_moe.experts.167.w3", "model.layers.60.block_sparse_moe.experts.168.w3", "model.layers.60.block_sparse_moe.experts.169.w3", "model.layers.60.block_sparse_moe.experts.170.w3", "model.layers.60.block_sparse_moe.experts.171.w3", "model.layers.60.block_sparse_moe.experts.172.w3", "model.layers.60.block_sparse_moe.experts.173.w3", "model.layers.60.block_sparse_moe.experts.174.w3", "model.layers.60.block_sparse_moe.experts.175.w3", "model.layers.60.block_sparse_moe.experts.176.w3", "model.layers.60.block_sparse_moe.experts.177.w3", "model.layers.60.block_sparse_moe.experts.178.w3", "model.layers.60.block_sparse_moe.experts.179.w3", "model.layers.60.block_sparse_moe.experts.180.w3", "model.layers.60.block_sparse_moe.experts.181.w3", "model.layers.60.block_sparse_moe.experts.182.w3", "model.layers.60.block_sparse_moe.experts.183.w3", "model.layers.60.block_sparse_moe.experts.184.w3", "model.layers.60.block_sparse_moe.experts.185.w3", "model.layers.60.block_sparse_moe.experts.186.w3", "model.layers.60.block_sparse_moe.experts.187.w3", "model.layers.60.block_sparse_moe.experts.188.w3", "model.layers.60.block_sparse_moe.experts.189.w3", "model.layers.60.block_sparse_moe.experts.190.w3", "model.layers.60.block_sparse_moe.experts.191.w3", "model.layers.60.block_sparse_moe.experts.192.w3", "model.layers.60.block_sparse_moe.experts.193.w3", "model.layers.60.block_sparse_moe.experts.194.w3", "model.layers.60.block_sparse_moe.experts.195.w3", "model.layers.60.block_sparse_moe.experts.196.w3", "model.layers.60.block_sparse_moe.experts.197.w3", "model.layers.60.block_sparse_moe.experts.198.w3", "model.layers.60.block_sparse_moe.experts.199.w3", "model.layers.60.block_sparse_moe.experts.200.w3", "model.layers.60.block_sparse_moe.experts.201.w3", "model.layers.60.block_sparse_moe.experts.202.w3", "model.layers.60.block_sparse_moe.experts.203.w3", "model.layers.60.block_sparse_moe.experts.204.w3", "model.layers.60.block_sparse_moe.experts.205.w3", "model.layers.60.block_sparse_moe.experts.206.w3", "model.layers.60.block_sparse_moe.experts.207.w3", "model.layers.60.block_sparse_moe.experts.208.w3", "model.layers.60.block_sparse_moe.experts.209.w3", "model.layers.60.block_sparse_moe.experts.210.w3", "model.layers.60.block_sparse_moe.experts.211.w3", "model.layers.60.block_sparse_moe.experts.212.w3", "model.layers.60.block_sparse_moe.experts.213.w3", "model.layers.60.block_sparse_moe.experts.214.w3", "model.layers.60.block_sparse_moe.experts.215.w3", "model.layers.60.block_sparse_moe.experts.216.w3", "model.layers.60.block_sparse_moe.experts.217.w3", "model.layers.60.block_sparse_moe.experts.218.w3", "model.layers.60.block_sparse_moe.experts.219.w3", "model.layers.60.block_sparse_moe.experts.220.w3", "model.layers.60.block_sparse_moe.experts.221.w3", "model.layers.60.block_sparse_moe.experts.222.w3", "model.layers.60.block_sparse_moe.experts.223.w3", "model.layers.60.block_sparse_moe.experts.224.w3", "model.layers.60.block_sparse_moe.experts.225.w3", "model.layers.60.block_sparse_moe.experts.226.w3", "model.layers.60.block_sparse_moe.experts.227.w3", "model.layers.60.block_sparse_moe.experts.228.w3", "model.layers.60.block_sparse_moe.experts.229.w3", "model.layers.60.block_sparse_moe.experts.230.w3", "model.layers.60.block_sparse_moe.experts.231.w3", "model.layers.60.block_sparse_moe.experts.232.w3", "model.layers.60.block_sparse_moe.experts.233.w3", "model.layers.60.block_sparse_moe.experts.234.w3", "model.layers.60.block_sparse_moe.experts.235.w3", "model.layers.60.block_sparse_moe.experts.236.w3", "model.layers.60.block_sparse_moe.experts.237.w3", "model.layers.60.block_sparse_moe.experts.238.w3", "model.layers.60.block_sparse_moe.experts.239.w3", "model.layers.60.block_sparse_moe.experts.240.w3", "model.layers.60.block_sparse_moe.experts.241.w3", "model.layers.60.block_sparse_moe.experts.242.w3", "model.layers.60.block_sparse_moe.experts.243.w3", "model.layers.60.block_sparse_moe.experts.244.w3", "model.layers.60.block_sparse_moe.experts.245.w3", "model.layers.60.block_sparse_moe.experts.246.w3", "model.layers.60.block_sparse_moe.experts.247.w3", "model.layers.60.block_sparse_moe.experts.248.w3", "model.layers.60.block_sparse_moe.experts.249.w3", "model.layers.60.block_sparse_moe.experts.250.w3", "model.layers.60.block_sparse_moe.experts.251.w3", "model.layers.60.block_sparse_moe.experts.252.w3", "model.layers.60.block_sparse_moe.experts.253.w3", "model.layers.60.block_sparse_moe.experts.254.w3", "model.layers.60.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 8.813608437775733e-05, "dbits": 2415919104 } ] }, { "idx": 304, "layers": [ "model.layers.60.block_sparse_moe.experts.0.w2", "model.layers.60.block_sparse_moe.experts.1.w2", "model.layers.60.block_sparse_moe.experts.2.w2", "model.layers.60.block_sparse_moe.experts.3.w2", "model.layers.60.block_sparse_moe.experts.4.w2", "model.layers.60.block_sparse_moe.experts.5.w2", "model.layers.60.block_sparse_moe.experts.6.w2", "model.layers.60.block_sparse_moe.experts.7.w2", "model.layers.60.block_sparse_moe.experts.8.w2", "model.layers.60.block_sparse_moe.experts.9.w2", "model.layers.60.block_sparse_moe.experts.10.w2", "model.layers.60.block_sparse_moe.experts.11.w2", "model.layers.60.block_sparse_moe.experts.12.w2", "model.layers.60.block_sparse_moe.experts.13.w2", "model.layers.60.block_sparse_moe.experts.14.w2", "model.layers.60.block_sparse_moe.experts.15.w2", "model.layers.60.block_sparse_moe.experts.16.w2", "model.layers.60.block_sparse_moe.experts.17.w2", "model.layers.60.block_sparse_moe.experts.18.w2", "model.layers.60.block_sparse_moe.experts.19.w2", "model.layers.60.block_sparse_moe.experts.20.w2", "model.layers.60.block_sparse_moe.experts.21.w2", "model.layers.60.block_sparse_moe.experts.22.w2", "model.layers.60.block_sparse_moe.experts.23.w2", "model.layers.60.block_sparse_moe.experts.24.w2", "model.layers.60.block_sparse_moe.experts.25.w2", "model.layers.60.block_sparse_moe.experts.26.w2", "model.layers.60.block_sparse_moe.experts.27.w2", "model.layers.60.block_sparse_moe.experts.28.w2", "model.layers.60.block_sparse_moe.experts.29.w2", "model.layers.60.block_sparse_moe.experts.30.w2", "model.layers.60.block_sparse_moe.experts.31.w2", "model.layers.60.block_sparse_moe.experts.32.w2", "model.layers.60.block_sparse_moe.experts.33.w2", "model.layers.60.block_sparse_moe.experts.34.w2", "model.layers.60.block_sparse_moe.experts.35.w2", "model.layers.60.block_sparse_moe.experts.36.w2", "model.layers.60.block_sparse_moe.experts.37.w2", "model.layers.60.block_sparse_moe.experts.38.w2", "model.layers.60.block_sparse_moe.experts.39.w2", "model.layers.60.block_sparse_moe.experts.40.w2", "model.layers.60.block_sparse_moe.experts.41.w2", "model.layers.60.block_sparse_moe.experts.42.w2", "model.layers.60.block_sparse_moe.experts.43.w2", "model.layers.60.block_sparse_moe.experts.44.w2", "model.layers.60.block_sparse_moe.experts.45.w2", "model.layers.60.block_sparse_moe.experts.46.w2", "model.layers.60.block_sparse_moe.experts.47.w2", "model.layers.60.block_sparse_moe.experts.48.w2", "model.layers.60.block_sparse_moe.experts.49.w2", "model.layers.60.block_sparse_moe.experts.50.w2", "model.layers.60.block_sparse_moe.experts.51.w2", "model.layers.60.block_sparse_moe.experts.52.w2", "model.layers.60.block_sparse_moe.experts.53.w2", "model.layers.60.block_sparse_moe.experts.54.w2", "model.layers.60.block_sparse_moe.experts.55.w2", "model.layers.60.block_sparse_moe.experts.56.w2", "model.layers.60.block_sparse_moe.experts.57.w2", "model.layers.60.block_sparse_moe.experts.58.w2", "model.layers.60.block_sparse_moe.experts.59.w2", "model.layers.60.block_sparse_moe.experts.60.w2", "model.layers.60.block_sparse_moe.experts.61.w2", "model.layers.60.block_sparse_moe.experts.62.w2", "model.layers.60.block_sparse_moe.experts.63.w2", "model.layers.60.block_sparse_moe.experts.64.w2", "model.layers.60.block_sparse_moe.experts.65.w2", "model.layers.60.block_sparse_moe.experts.66.w2", "model.layers.60.block_sparse_moe.experts.67.w2", "model.layers.60.block_sparse_moe.experts.68.w2", "model.layers.60.block_sparse_moe.experts.69.w2", "model.layers.60.block_sparse_moe.experts.70.w2", "model.layers.60.block_sparse_moe.experts.71.w2", "model.layers.60.block_sparse_moe.experts.72.w2", "model.layers.60.block_sparse_moe.experts.73.w2", "model.layers.60.block_sparse_moe.experts.74.w2", "model.layers.60.block_sparse_moe.experts.75.w2", "model.layers.60.block_sparse_moe.experts.76.w2", "model.layers.60.block_sparse_moe.experts.77.w2", "model.layers.60.block_sparse_moe.experts.78.w2", "model.layers.60.block_sparse_moe.experts.79.w2", "model.layers.60.block_sparse_moe.experts.80.w2", "model.layers.60.block_sparse_moe.experts.81.w2", "model.layers.60.block_sparse_moe.experts.82.w2", "model.layers.60.block_sparse_moe.experts.83.w2", "model.layers.60.block_sparse_moe.experts.84.w2", "model.layers.60.block_sparse_moe.experts.85.w2", "model.layers.60.block_sparse_moe.experts.86.w2", "model.layers.60.block_sparse_moe.experts.87.w2", "model.layers.60.block_sparse_moe.experts.88.w2", "model.layers.60.block_sparse_moe.experts.89.w2", "model.layers.60.block_sparse_moe.experts.90.w2", "model.layers.60.block_sparse_moe.experts.91.w2", "model.layers.60.block_sparse_moe.experts.92.w2", "model.layers.60.block_sparse_moe.experts.93.w2", "model.layers.60.block_sparse_moe.experts.94.w2", "model.layers.60.block_sparse_moe.experts.95.w2", "model.layers.60.block_sparse_moe.experts.96.w2", "model.layers.60.block_sparse_moe.experts.97.w2", "model.layers.60.block_sparse_moe.experts.98.w2", "model.layers.60.block_sparse_moe.experts.99.w2", "model.layers.60.block_sparse_moe.experts.100.w2", "model.layers.60.block_sparse_moe.experts.101.w2", "model.layers.60.block_sparse_moe.experts.102.w2", "model.layers.60.block_sparse_moe.experts.103.w2", "model.layers.60.block_sparse_moe.experts.104.w2", "model.layers.60.block_sparse_moe.experts.105.w2", "model.layers.60.block_sparse_moe.experts.106.w2", "model.layers.60.block_sparse_moe.experts.107.w2", "model.layers.60.block_sparse_moe.experts.108.w2", "model.layers.60.block_sparse_moe.experts.109.w2", "model.layers.60.block_sparse_moe.experts.110.w2", "model.layers.60.block_sparse_moe.experts.111.w2", "model.layers.60.block_sparse_moe.experts.112.w2", "model.layers.60.block_sparse_moe.experts.113.w2", "model.layers.60.block_sparse_moe.experts.114.w2", "model.layers.60.block_sparse_moe.experts.115.w2", "model.layers.60.block_sparse_moe.experts.116.w2", "model.layers.60.block_sparse_moe.experts.117.w2", "model.layers.60.block_sparse_moe.experts.118.w2", "model.layers.60.block_sparse_moe.experts.119.w2", "model.layers.60.block_sparse_moe.experts.120.w2", "model.layers.60.block_sparse_moe.experts.121.w2", "model.layers.60.block_sparse_moe.experts.122.w2", "model.layers.60.block_sparse_moe.experts.123.w2", "model.layers.60.block_sparse_moe.experts.124.w2", "model.layers.60.block_sparse_moe.experts.125.w2", "model.layers.60.block_sparse_moe.experts.126.w2", "model.layers.60.block_sparse_moe.experts.127.w2", "model.layers.60.block_sparse_moe.experts.128.w2", "model.layers.60.block_sparse_moe.experts.129.w2", "model.layers.60.block_sparse_moe.experts.130.w2", "model.layers.60.block_sparse_moe.experts.131.w2", "model.layers.60.block_sparse_moe.experts.132.w2", "model.layers.60.block_sparse_moe.experts.133.w2", "model.layers.60.block_sparse_moe.experts.134.w2", "model.layers.60.block_sparse_moe.experts.135.w2", "model.layers.60.block_sparse_moe.experts.136.w2", "model.layers.60.block_sparse_moe.experts.137.w2", "model.layers.60.block_sparse_moe.experts.138.w2", "model.layers.60.block_sparse_moe.experts.139.w2", "model.layers.60.block_sparse_moe.experts.140.w2", "model.layers.60.block_sparse_moe.experts.141.w2", "model.layers.60.block_sparse_moe.experts.142.w2", "model.layers.60.block_sparse_moe.experts.143.w2", "model.layers.60.block_sparse_moe.experts.144.w2", "model.layers.60.block_sparse_moe.experts.145.w2", "model.layers.60.block_sparse_moe.experts.146.w2", "model.layers.60.block_sparse_moe.experts.147.w2", "model.layers.60.block_sparse_moe.experts.148.w2", "model.layers.60.block_sparse_moe.experts.149.w2", "model.layers.60.block_sparse_moe.experts.150.w2", "model.layers.60.block_sparse_moe.experts.151.w2", "model.layers.60.block_sparse_moe.experts.152.w2", "model.layers.60.block_sparse_moe.experts.153.w2", "model.layers.60.block_sparse_moe.experts.154.w2", "model.layers.60.block_sparse_moe.experts.155.w2", "model.layers.60.block_sparse_moe.experts.156.w2", "model.layers.60.block_sparse_moe.experts.157.w2", "model.layers.60.block_sparse_moe.experts.158.w2", "model.layers.60.block_sparse_moe.experts.159.w2", "model.layers.60.block_sparse_moe.experts.160.w2", "model.layers.60.block_sparse_moe.experts.161.w2", "model.layers.60.block_sparse_moe.experts.162.w2", "model.layers.60.block_sparse_moe.experts.163.w2", "model.layers.60.block_sparse_moe.experts.164.w2", "model.layers.60.block_sparse_moe.experts.165.w2", "model.layers.60.block_sparse_moe.experts.166.w2", "model.layers.60.block_sparse_moe.experts.167.w2", "model.layers.60.block_sparse_moe.experts.168.w2", "model.layers.60.block_sparse_moe.experts.169.w2", "model.layers.60.block_sparse_moe.experts.170.w2", "model.layers.60.block_sparse_moe.experts.171.w2", "model.layers.60.block_sparse_moe.experts.172.w2", "model.layers.60.block_sparse_moe.experts.173.w2", "model.layers.60.block_sparse_moe.experts.174.w2", "model.layers.60.block_sparse_moe.experts.175.w2", "model.layers.60.block_sparse_moe.experts.176.w2", "model.layers.60.block_sparse_moe.experts.177.w2", "model.layers.60.block_sparse_moe.experts.178.w2", "model.layers.60.block_sparse_moe.experts.179.w2", "model.layers.60.block_sparse_moe.experts.180.w2", "model.layers.60.block_sparse_moe.experts.181.w2", "model.layers.60.block_sparse_moe.experts.182.w2", "model.layers.60.block_sparse_moe.experts.183.w2", "model.layers.60.block_sparse_moe.experts.184.w2", "model.layers.60.block_sparse_moe.experts.185.w2", "model.layers.60.block_sparse_moe.experts.186.w2", "model.layers.60.block_sparse_moe.experts.187.w2", "model.layers.60.block_sparse_moe.experts.188.w2", "model.layers.60.block_sparse_moe.experts.189.w2", "model.layers.60.block_sparse_moe.experts.190.w2", "model.layers.60.block_sparse_moe.experts.191.w2", "model.layers.60.block_sparse_moe.experts.192.w2", "model.layers.60.block_sparse_moe.experts.193.w2", "model.layers.60.block_sparse_moe.experts.194.w2", "model.layers.60.block_sparse_moe.experts.195.w2", "model.layers.60.block_sparse_moe.experts.196.w2", "model.layers.60.block_sparse_moe.experts.197.w2", "model.layers.60.block_sparse_moe.experts.198.w2", "model.layers.60.block_sparse_moe.experts.199.w2", "model.layers.60.block_sparse_moe.experts.200.w2", "model.layers.60.block_sparse_moe.experts.201.w2", "model.layers.60.block_sparse_moe.experts.202.w2", "model.layers.60.block_sparse_moe.experts.203.w2", "model.layers.60.block_sparse_moe.experts.204.w2", "model.layers.60.block_sparse_moe.experts.205.w2", "model.layers.60.block_sparse_moe.experts.206.w2", "model.layers.60.block_sparse_moe.experts.207.w2", "model.layers.60.block_sparse_moe.experts.208.w2", "model.layers.60.block_sparse_moe.experts.209.w2", "model.layers.60.block_sparse_moe.experts.210.w2", "model.layers.60.block_sparse_moe.experts.211.w2", "model.layers.60.block_sparse_moe.experts.212.w2", "model.layers.60.block_sparse_moe.experts.213.w2", "model.layers.60.block_sparse_moe.experts.214.w2", "model.layers.60.block_sparse_moe.experts.215.w2", "model.layers.60.block_sparse_moe.experts.216.w2", "model.layers.60.block_sparse_moe.experts.217.w2", "model.layers.60.block_sparse_moe.experts.218.w2", "model.layers.60.block_sparse_moe.experts.219.w2", "model.layers.60.block_sparse_moe.experts.220.w2", "model.layers.60.block_sparse_moe.experts.221.w2", "model.layers.60.block_sparse_moe.experts.222.w2", "model.layers.60.block_sparse_moe.experts.223.w2", "model.layers.60.block_sparse_moe.experts.224.w2", "model.layers.60.block_sparse_moe.experts.225.w2", "model.layers.60.block_sparse_moe.experts.226.w2", "model.layers.60.block_sparse_moe.experts.227.w2", "model.layers.60.block_sparse_moe.experts.228.w2", "model.layers.60.block_sparse_moe.experts.229.w2", "model.layers.60.block_sparse_moe.experts.230.w2", "model.layers.60.block_sparse_moe.experts.231.w2", "model.layers.60.block_sparse_moe.experts.232.w2", "model.layers.60.block_sparse_moe.experts.233.w2", "model.layers.60.block_sparse_moe.experts.234.w2", "model.layers.60.block_sparse_moe.experts.235.w2", "model.layers.60.block_sparse_moe.experts.236.w2", "model.layers.60.block_sparse_moe.experts.237.w2", "model.layers.60.block_sparse_moe.experts.238.w2", "model.layers.60.block_sparse_moe.experts.239.w2", "model.layers.60.block_sparse_moe.experts.240.w2", "model.layers.60.block_sparse_moe.experts.241.w2", "model.layers.60.block_sparse_moe.experts.242.w2", "model.layers.60.block_sparse_moe.experts.243.w2", "model.layers.60.block_sparse_moe.experts.244.w2", "model.layers.60.block_sparse_moe.experts.245.w2", "model.layers.60.block_sparse_moe.experts.246.w2", "model.layers.60.block_sparse_moe.experts.247.w2", "model.layers.60.block_sparse_moe.experts.248.w2", "model.layers.60.block_sparse_moe.experts.249.w2", "model.layers.60.block_sparse_moe.experts.250.w2", "model.layers.60.block_sparse_moe.experts.251.w2", "model.layers.60.block_sparse_moe.experts.252.w2", "model.layers.60.block_sparse_moe.experts.253.w2", "model.layers.60.block_sparse_moe.experts.254.w2", "model.layers.60.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 3.0333176255226135e-05, "dbits": 1207959552 } ] }, { "idx": 305, "layers": [ "model.layers.61.self_attn.q_proj" ], "candidates": [ { "dkld": 1.9433349370948094e-05, "dbits": 18874368 } ] }, { "idx": 306, "layers": [ "model.layers.61.self_attn.k_proj", "model.layers.61.self_attn.v_proj" ], "candidates": [ { "dkld": -0.00017458293586969376, "dbits": 6291456 } ] }, { "idx": 307, "layers": [ "model.layers.61.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0002414634451270159, "dbits": 18874368 } ] }, { "idx": 308, "layers": [ "model.layers.61.block_sparse_moe.experts.0.w1", "model.layers.61.block_sparse_moe.experts.1.w1", "model.layers.61.block_sparse_moe.experts.2.w1", "model.layers.61.block_sparse_moe.experts.3.w1", "model.layers.61.block_sparse_moe.experts.4.w1", "model.layers.61.block_sparse_moe.experts.5.w1", "model.layers.61.block_sparse_moe.experts.6.w1", "model.layers.61.block_sparse_moe.experts.7.w1", "model.layers.61.block_sparse_moe.experts.8.w1", "model.layers.61.block_sparse_moe.experts.9.w1", "model.layers.61.block_sparse_moe.experts.10.w1", "model.layers.61.block_sparse_moe.experts.11.w1", "model.layers.61.block_sparse_moe.experts.12.w1", "model.layers.61.block_sparse_moe.experts.13.w1", "model.layers.61.block_sparse_moe.experts.14.w1", "model.layers.61.block_sparse_moe.experts.15.w1", "model.layers.61.block_sparse_moe.experts.16.w1", "model.layers.61.block_sparse_moe.experts.17.w1", "model.layers.61.block_sparse_moe.experts.18.w1", "model.layers.61.block_sparse_moe.experts.19.w1", "model.layers.61.block_sparse_moe.experts.20.w1", "model.layers.61.block_sparse_moe.experts.21.w1", "model.layers.61.block_sparse_moe.experts.22.w1", "model.layers.61.block_sparse_moe.experts.23.w1", "model.layers.61.block_sparse_moe.experts.24.w1", "model.layers.61.block_sparse_moe.experts.25.w1", "model.layers.61.block_sparse_moe.experts.26.w1", "model.layers.61.block_sparse_moe.experts.27.w1", "model.layers.61.block_sparse_moe.experts.28.w1", "model.layers.61.block_sparse_moe.experts.29.w1", "model.layers.61.block_sparse_moe.experts.30.w1", "model.layers.61.block_sparse_moe.experts.31.w1", "model.layers.61.block_sparse_moe.experts.32.w1", "model.layers.61.block_sparse_moe.experts.33.w1", "model.layers.61.block_sparse_moe.experts.34.w1", "model.layers.61.block_sparse_moe.experts.35.w1", "model.layers.61.block_sparse_moe.experts.36.w1", "model.layers.61.block_sparse_moe.experts.37.w1", "model.layers.61.block_sparse_moe.experts.38.w1", "model.layers.61.block_sparse_moe.experts.39.w1", "model.layers.61.block_sparse_moe.experts.40.w1", "model.layers.61.block_sparse_moe.experts.41.w1", "model.layers.61.block_sparse_moe.experts.42.w1", "model.layers.61.block_sparse_moe.experts.43.w1", "model.layers.61.block_sparse_moe.experts.44.w1", "model.layers.61.block_sparse_moe.experts.45.w1", "model.layers.61.block_sparse_moe.experts.46.w1", "model.layers.61.block_sparse_moe.experts.47.w1", "model.layers.61.block_sparse_moe.experts.48.w1", "model.layers.61.block_sparse_moe.experts.49.w1", "model.layers.61.block_sparse_moe.experts.50.w1", "model.layers.61.block_sparse_moe.experts.51.w1", "model.layers.61.block_sparse_moe.experts.52.w1", "model.layers.61.block_sparse_moe.experts.53.w1", "model.layers.61.block_sparse_moe.experts.54.w1", "model.layers.61.block_sparse_moe.experts.55.w1", "model.layers.61.block_sparse_moe.experts.56.w1", "model.layers.61.block_sparse_moe.experts.57.w1", "model.layers.61.block_sparse_moe.experts.58.w1", "model.layers.61.block_sparse_moe.experts.59.w1", "model.layers.61.block_sparse_moe.experts.60.w1", "model.layers.61.block_sparse_moe.experts.61.w1", "model.layers.61.block_sparse_moe.experts.62.w1", "model.layers.61.block_sparse_moe.experts.63.w1", "model.layers.61.block_sparse_moe.experts.64.w1", "model.layers.61.block_sparse_moe.experts.65.w1", "model.layers.61.block_sparse_moe.experts.66.w1", "model.layers.61.block_sparse_moe.experts.67.w1", "model.layers.61.block_sparse_moe.experts.68.w1", "model.layers.61.block_sparse_moe.experts.69.w1", "model.layers.61.block_sparse_moe.experts.70.w1", "model.layers.61.block_sparse_moe.experts.71.w1", "model.layers.61.block_sparse_moe.experts.72.w1", "model.layers.61.block_sparse_moe.experts.73.w1", "model.layers.61.block_sparse_moe.experts.74.w1", "model.layers.61.block_sparse_moe.experts.75.w1", "model.layers.61.block_sparse_moe.experts.76.w1", "model.layers.61.block_sparse_moe.experts.77.w1", "model.layers.61.block_sparse_moe.experts.78.w1", "model.layers.61.block_sparse_moe.experts.79.w1", "model.layers.61.block_sparse_moe.experts.80.w1", "model.layers.61.block_sparse_moe.experts.81.w1", "model.layers.61.block_sparse_moe.experts.82.w1", "model.layers.61.block_sparse_moe.experts.83.w1", "model.layers.61.block_sparse_moe.experts.84.w1", "model.layers.61.block_sparse_moe.experts.85.w1", "model.layers.61.block_sparse_moe.experts.86.w1", "model.layers.61.block_sparse_moe.experts.87.w1", "model.layers.61.block_sparse_moe.experts.88.w1", "model.layers.61.block_sparse_moe.experts.89.w1", "model.layers.61.block_sparse_moe.experts.90.w1", "model.layers.61.block_sparse_moe.experts.91.w1", "model.layers.61.block_sparse_moe.experts.92.w1", "model.layers.61.block_sparse_moe.experts.93.w1", "model.layers.61.block_sparse_moe.experts.94.w1", "model.layers.61.block_sparse_moe.experts.95.w1", "model.layers.61.block_sparse_moe.experts.96.w1", "model.layers.61.block_sparse_moe.experts.97.w1", "model.layers.61.block_sparse_moe.experts.98.w1", "model.layers.61.block_sparse_moe.experts.99.w1", "model.layers.61.block_sparse_moe.experts.100.w1", "model.layers.61.block_sparse_moe.experts.101.w1", "model.layers.61.block_sparse_moe.experts.102.w1", "model.layers.61.block_sparse_moe.experts.103.w1", "model.layers.61.block_sparse_moe.experts.104.w1", "model.layers.61.block_sparse_moe.experts.105.w1", "model.layers.61.block_sparse_moe.experts.106.w1", "model.layers.61.block_sparse_moe.experts.107.w1", "model.layers.61.block_sparse_moe.experts.108.w1", "model.layers.61.block_sparse_moe.experts.109.w1", "model.layers.61.block_sparse_moe.experts.110.w1", "model.layers.61.block_sparse_moe.experts.111.w1", "model.layers.61.block_sparse_moe.experts.112.w1", "model.layers.61.block_sparse_moe.experts.113.w1", "model.layers.61.block_sparse_moe.experts.114.w1", "model.layers.61.block_sparse_moe.experts.115.w1", "model.layers.61.block_sparse_moe.experts.116.w1", "model.layers.61.block_sparse_moe.experts.117.w1", "model.layers.61.block_sparse_moe.experts.118.w1", "model.layers.61.block_sparse_moe.experts.119.w1", "model.layers.61.block_sparse_moe.experts.120.w1", "model.layers.61.block_sparse_moe.experts.121.w1", "model.layers.61.block_sparse_moe.experts.122.w1", "model.layers.61.block_sparse_moe.experts.123.w1", "model.layers.61.block_sparse_moe.experts.124.w1", "model.layers.61.block_sparse_moe.experts.125.w1", "model.layers.61.block_sparse_moe.experts.126.w1", "model.layers.61.block_sparse_moe.experts.127.w1", "model.layers.61.block_sparse_moe.experts.128.w1", "model.layers.61.block_sparse_moe.experts.129.w1", "model.layers.61.block_sparse_moe.experts.130.w1", "model.layers.61.block_sparse_moe.experts.131.w1", "model.layers.61.block_sparse_moe.experts.132.w1", "model.layers.61.block_sparse_moe.experts.133.w1", "model.layers.61.block_sparse_moe.experts.134.w1", "model.layers.61.block_sparse_moe.experts.135.w1", "model.layers.61.block_sparse_moe.experts.136.w1", "model.layers.61.block_sparse_moe.experts.137.w1", "model.layers.61.block_sparse_moe.experts.138.w1", "model.layers.61.block_sparse_moe.experts.139.w1", "model.layers.61.block_sparse_moe.experts.140.w1", "model.layers.61.block_sparse_moe.experts.141.w1", "model.layers.61.block_sparse_moe.experts.142.w1", "model.layers.61.block_sparse_moe.experts.143.w1", "model.layers.61.block_sparse_moe.experts.144.w1", "model.layers.61.block_sparse_moe.experts.145.w1", "model.layers.61.block_sparse_moe.experts.146.w1", "model.layers.61.block_sparse_moe.experts.147.w1", "model.layers.61.block_sparse_moe.experts.148.w1", "model.layers.61.block_sparse_moe.experts.149.w1", "model.layers.61.block_sparse_moe.experts.150.w1", "model.layers.61.block_sparse_moe.experts.151.w1", "model.layers.61.block_sparse_moe.experts.152.w1", "model.layers.61.block_sparse_moe.experts.153.w1", "model.layers.61.block_sparse_moe.experts.154.w1", "model.layers.61.block_sparse_moe.experts.155.w1", "model.layers.61.block_sparse_moe.experts.156.w1", "model.layers.61.block_sparse_moe.experts.157.w1", "model.layers.61.block_sparse_moe.experts.158.w1", "model.layers.61.block_sparse_moe.experts.159.w1", "model.layers.61.block_sparse_moe.experts.160.w1", "model.layers.61.block_sparse_moe.experts.161.w1", "model.layers.61.block_sparse_moe.experts.162.w1", "model.layers.61.block_sparse_moe.experts.163.w1", "model.layers.61.block_sparse_moe.experts.164.w1", "model.layers.61.block_sparse_moe.experts.165.w1", "model.layers.61.block_sparse_moe.experts.166.w1", "model.layers.61.block_sparse_moe.experts.167.w1", "model.layers.61.block_sparse_moe.experts.168.w1", "model.layers.61.block_sparse_moe.experts.169.w1", "model.layers.61.block_sparse_moe.experts.170.w1", "model.layers.61.block_sparse_moe.experts.171.w1", "model.layers.61.block_sparse_moe.experts.172.w1", "model.layers.61.block_sparse_moe.experts.173.w1", "model.layers.61.block_sparse_moe.experts.174.w1", "model.layers.61.block_sparse_moe.experts.175.w1", "model.layers.61.block_sparse_moe.experts.176.w1", "model.layers.61.block_sparse_moe.experts.177.w1", "model.layers.61.block_sparse_moe.experts.178.w1", "model.layers.61.block_sparse_moe.experts.179.w1", "model.layers.61.block_sparse_moe.experts.180.w1", "model.layers.61.block_sparse_moe.experts.181.w1", "model.layers.61.block_sparse_moe.experts.182.w1", "model.layers.61.block_sparse_moe.experts.183.w1", "model.layers.61.block_sparse_moe.experts.184.w1", "model.layers.61.block_sparse_moe.experts.185.w1", "model.layers.61.block_sparse_moe.experts.186.w1", "model.layers.61.block_sparse_moe.experts.187.w1", "model.layers.61.block_sparse_moe.experts.188.w1", "model.layers.61.block_sparse_moe.experts.189.w1", "model.layers.61.block_sparse_moe.experts.190.w1", "model.layers.61.block_sparse_moe.experts.191.w1", "model.layers.61.block_sparse_moe.experts.192.w1", "model.layers.61.block_sparse_moe.experts.193.w1", "model.layers.61.block_sparse_moe.experts.194.w1", "model.layers.61.block_sparse_moe.experts.195.w1", "model.layers.61.block_sparse_moe.experts.196.w1", "model.layers.61.block_sparse_moe.experts.197.w1", "model.layers.61.block_sparse_moe.experts.198.w1", "model.layers.61.block_sparse_moe.experts.199.w1", "model.layers.61.block_sparse_moe.experts.200.w1", "model.layers.61.block_sparse_moe.experts.201.w1", "model.layers.61.block_sparse_moe.experts.202.w1", "model.layers.61.block_sparse_moe.experts.203.w1", "model.layers.61.block_sparse_moe.experts.204.w1", "model.layers.61.block_sparse_moe.experts.205.w1", "model.layers.61.block_sparse_moe.experts.206.w1", "model.layers.61.block_sparse_moe.experts.207.w1", "model.layers.61.block_sparse_moe.experts.208.w1", "model.layers.61.block_sparse_moe.experts.209.w1", "model.layers.61.block_sparse_moe.experts.210.w1", "model.layers.61.block_sparse_moe.experts.211.w1", "model.layers.61.block_sparse_moe.experts.212.w1", "model.layers.61.block_sparse_moe.experts.213.w1", "model.layers.61.block_sparse_moe.experts.214.w1", "model.layers.61.block_sparse_moe.experts.215.w1", "model.layers.61.block_sparse_moe.experts.216.w1", "model.layers.61.block_sparse_moe.experts.217.w1", "model.layers.61.block_sparse_moe.experts.218.w1", "model.layers.61.block_sparse_moe.experts.219.w1", "model.layers.61.block_sparse_moe.experts.220.w1", "model.layers.61.block_sparse_moe.experts.221.w1", "model.layers.61.block_sparse_moe.experts.222.w1", "model.layers.61.block_sparse_moe.experts.223.w1", "model.layers.61.block_sparse_moe.experts.224.w1", "model.layers.61.block_sparse_moe.experts.225.w1", "model.layers.61.block_sparse_moe.experts.226.w1", "model.layers.61.block_sparse_moe.experts.227.w1", "model.layers.61.block_sparse_moe.experts.228.w1", "model.layers.61.block_sparse_moe.experts.229.w1", "model.layers.61.block_sparse_moe.experts.230.w1", "model.layers.61.block_sparse_moe.experts.231.w1", "model.layers.61.block_sparse_moe.experts.232.w1", "model.layers.61.block_sparse_moe.experts.233.w1", "model.layers.61.block_sparse_moe.experts.234.w1", "model.layers.61.block_sparse_moe.experts.235.w1", "model.layers.61.block_sparse_moe.experts.236.w1", "model.layers.61.block_sparse_moe.experts.237.w1", "model.layers.61.block_sparse_moe.experts.238.w1", "model.layers.61.block_sparse_moe.experts.239.w1", "model.layers.61.block_sparse_moe.experts.240.w1", "model.layers.61.block_sparse_moe.experts.241.w1", "model.layers.61.block_sparse_moe.experts.242.w1", "model.layers.61.block_sparse_moe.experts.243.w1", "model.layers.61.block_sparse_moe.experts.244.w1", "model.layers.61.block_sparse_moe.experts.245.w1", "model.layers.61.block_sparse_moe.experts.246.w1", "model.layers.61.block_sparse_moe.experts.247.w1", "model.layers.61.block_sparse_moe.experts.248.w1", "model.layers.61.block_sparse_moe.experts.249.w1", "model.layers.61.block_sparse_moe.experts.250.w1", "model.layers.61.block_sparse_moe.experts.251.w1", "model.layers.61.block_sparse_moe.experts.252.w1", "model.layers.61.block_sparse_moe.experts.253.w1", "model.layers.61.block_sparse_moe.experts.254.w1", "model.layers.61.block_sparse_moe.experts.255.w1", "model.layers.61.block_sparse_moe.experts.0.w3", "model.layers.61.block_sparse_moe.experts.1.w3", "model.layers.61.block_sparse_moe.experts.2.w3", "model.layers.61.block_sparse_moe.experts.3.w3", "model.layers.61.block_sparse_moe.experts.4.w3", "model.layers.61.block_sparse_moe.experts.5.w3", "model.layers.61.block_sparse_moe.experts.6.w3", "model.layers.61.block_sparse_moe.experts.7.w3", "model.layers.61.block_sparse_moe.experts.8.w3", "model.layers.61.block_sparse_moe.experts.9.w3", "model.layers.61.block_sparse_moe.experts.10.w3", "model.layers.61.block_sparse_moe.experts.11.w3", "model.layers.61.block_sparse_moe.experts.12.w3", "model.layers.61.block_sparse_moe.experts.13.w3", "model.layers.61.block_sparse_moe.experts.14.w3", "model.layers.61.block_sparse_moe.experts.15.w3", "model.layers.61.block_sparse_moe.experts.16.w3", "model.layers.61.block_sparse_moe.experts.17.w3", "model.layers.61.block_sparse_moe.experts.18.w3", "model.layers.61.block_sparse_moe.experts.19.w3", "model.layers.61.block_sparse_moe.experts.20.w3", "model.layers.61.block_sparse_moe.experts.21.w3", "model.layers.61.block_sparse_moe.experts.22.w3", "model.layers.61.block_sparse_moe.experts.23.w3", "model.layers.61.block_sparse_moe.experts.24.w3", "model.layers.61.block_sparse_moe.experts.25.w3", "model.layers.61.block_sparse_moe.experts.26.w3", "model.layers.61.block_sparse_moe.experts.27.w3", "model.layers.61.block_sparse_moe.experts.28.w3", "model.layers.61.block_sparse_moe.experts.29.w3", "model.layers.61.block_sparse_moe.experts.30.w3", "model.layers.61.block_sparse_moe.experts.31.w3", "model.layers.61.block_sparse_moe.experts.32.w3", "model.layers.61.block_sparse_moe.experts.33.w3", "model.layers.61.block_sparse_moe.experts.34.w3", "model.layers.61.block_sparse_moe.experts.35.w3", "model.layers.61.block_sparse_moe.experts.36.w3", "model.layers.61.block_sparse_moe.experts.37.w3", "model.layers.61.block_sparse_moe.experts.38.w3", "model.layers.61.block_sparse_moe.experts.39.w3", "model.layers.61.block_sparse_moe.experts.40.w3", "model.layers.61.block_sparse_moe.experts.41.w3", "model.layers.61.block_sparse_moe.experts.42.w3", "model.layers.61.block_sparse_moe.experts.43.w3", "model.layers.61.block_sparse_moe.experts.44.w3", "model.layers.61.block_sparse_moe.experts.45.w3", "model.layers.61.block_sparse_moe.experts.46.w3", "model.layers.61.block_sparse_moe.experts.47.w3", "model.layers.61.block_sparse_moe.experts.48.w3", "model.layers.61.block_sparse_moe.experts.49.w3", "model.layers.61.block_sparse_moe.experts.50.w3", "model.layers.61.block_sparse_moe.experts.51.w3", "model.layers.61.block_sparse_moe.experts.52.w3", "model.layers.61.block_sparse_moe.experts.53.w3", "model.layers.61.block_sparse_moe.experts.54.w3", "model.layers.61.block_sparse_moe.experts.55.w3", "model.layers.61.block_sparse_moe.experts.56.w3", "model.layers.61.block_sparse_moe.experts.57.w3", "model.layers.61.block_sparse_moe.experts.58.w3", "model.layers.61.block_sparse_moe.experts.59.w3", "model.layers.61.block_sparse_moe.experts.60.w3", "model.layers.61.block_sparse_moe.experts.61.w3", "model.layers.61.block_sparse_moe.experts.62.w3", "model.layers.61.block_sparse_moe.experts.63.w3", "model.layers.61.block_sparse_moe.experts.64.w3", "model.layers.61.block_sparse_moe.experts.65.w3", "model.layers.61.block_sparse_moe.experts.66.w3", "model.layers.61.block_sparse_moe.experts.67.w3", "model.layers.61.block_sparse_moe.experts.68.w3", "model.layers.61.block_sparse_moe.experts.69.w3", "model.layers.61.block_sparse_moe.experts.70.w3", "model.layers.61.block_sparse_moe.experts.71.w3", "model.layers.61.block_sparse_moe.experts.72.w3", "model.layers.61.block_sparse_moe.experts.73.w3", "model.layers.61.block_sparse_moe.experts.74.w3", "model.layers.61.block_sparse_moe.experts.75.w3", "model.layers.61.block_sparse_moe.experts.76.w3", "model.layers.61.block_sparse_moe.experts.77.w3", "model.layers.61.block_sparse_moe.experts.78.w3", "model.layers.61.block_sparse_moe.experts.79.w3", "model.layers.61.block_sparse_moe.experts.80.w3", "model.layers.61.block_sparse_moe.experts.81.w3", "model.layers.61.block_sparse_moe.experts.82.w3", "model.layers.61.block_sparse_moe.experts.83.w3", "model.layers.61.block_sparse_moe.experts.84.w3", "model.layers.61.block_sparse_moe.experts.85.w3", "model.layers.61.block_sparse_moe.experts.86.w3", "model.layers.61.block_sparse_moe.experts.87.w3", "model.layers.61.block_sparse_moe.experts.88.w3", "model.layers.61.block_sparse_moe.experts.89.w3", "model.layers.61.block_sparse_moe.experts.90.w3", "model.layers.61.block_sparse_moe.experts.91.w3", "model.layers.61.block_sparse_moe.experts.92.w3", "model.layers.61.block_sparse_moe.experts.93.w3", "model.layers.61.block_sparse_moe.experts.94.w3", "model.layers.61.block_sparse_moe.experts.95.w3", "model.layers.61.block_sparse_moe.experts.96.w3", "model.layers.61.block_sparse_moe.experts.97.w3", "model.layers.61.block_sparse_moe.experts.98.w3", "model.layers.61.block_sparse_moe.experts.99.w3", "model.layers.61.block_sparse_moe.experts.100.w3", "model.layers.61.block_sparse_moe.experts.101.w3", "model.layers.61.block_sparse_moe.experts.102.w3", "model.layers.61.block_sparse_moe.experts.103.w3", "model.layers.61.block_sparse_moe.experts.104.w3", "model.layers.61.block_sparse_moe.experts.105.w3", "model.layers.61.block_sparse_moe.experts.106.w3", "model.layers.61.block_sparse_moe.experts.107.w3", "model.layers.61.block_sparse_moe.experts.108.w3", "model.layers.61.block_sparse_moe.experts.109.w3", "model.layers.61.block_sparse_moe.experts.110.w3", "model.layers.61.block_sparse_moe.experts.111.w3", "model.layers.61.block_sparse_moe.experts.112.w3", "model.layers.61.block_sparse_moe.experts.113.w3", "model.layers.61.block_sparse_moe.experts.114.w3", "model.layers.61.block_sparse_moe.experts.115.w3", "model.layers.61.block_sparse_moe.experts.116.w3", "model.layers.61.block_sparse_moe.experts.117.w3", "model.layers.61.block_sparse_moe.experts.118.w3", "model.layers.61.block_sparse_moe.experts.119.w3", "model.layers.61.block_sparse_moe.experts.120.w3", "model.layers.61.block_sparse_moe.experts.121.w3", "model.layers.61.block_sparse_moe.experts.122.w3", "model.layers.61.block_sparse_moe.experts.123.w3", "model.layers.61.block_sparse_moe.experts.124.w3", "model.layers.61.block_sparse_moe.experts.125.w3", "model.layers.61.block_sparse_moe.experts.126.w3", "model.layers.61.block_sparse_moe.experts.127.w3", "model.layers.61.block_sparse_moe.experts.128.w3", "model.layers.61.block_sparse_moe.experts.129.w3", "model.layers.61.block_sparse_moe.experts.130.w3", "model.layers.61.block_sparse_moe.experts.131.w3", "model.layers.61.block_sparse_moe.experts.132.w3", "model.layers.61.block_sparse_moe.experts.133.w3", "model.layers.61.block_sparse_moe.experts.134.w3", "model.layers.61.block_sparse_moe.experts.135.w3", "model.layers.61.block_sparse_moe.experts.136.w3", "model.layers.61.block_sparse_moe.experts.137.w3", "model.layers.61.block_sparse_moe.experts.138.w3", "model.layers.61.block_sparse_moe.experts.139.w3", "model.layers.61.block_sparse_moe.experts.140.w3", "model.layers.61.block_sparse_moe.experts.141.w3", "model.layers.61.block_sparse_moe.experts.142.w3", "model.layers.61.block_sparse_moe.experts.143.w3", "model.layers.61.block_sparse_moe.experts.144.w3", "model.layers.61.block_sparse_moe.experts.145.w3", "model.layers.61.block_sparse_moe.experts.146.w3", "model.layers.61.block_sparse_moe.experts.147.w3", "model.layers.61.block_sparse_moe.experts.148.w3", "model.layers.61.block_sparse_moe.experts.149.w3", "model.layers.61.block_sparse_moe.experts.150.w3", "model.layers.61.block_sparse_moe.experts.151.w3", "model.layers.61.block_sparse_moe.experts.152.w3", "model.layers.61.block_sparse_moe.experts.153.w3", "model.layers.61.block_sparse_moe.experts.154.w3", "model.layers.61.block_sparse_moe.experts.155.w3", "model.layers.61.block_sparse_moe.experts.156.w3", "model.layers.61.block_sparse_moe.experts.157.w3", "model.layers.61.block_sparse_moe.experts.158.w3", "model.layers.61.block_sparse_moe.experts.159.w3", "model.layers.61.block_sparse_moe.experts.160.w3", "model.layers.61.block_sparse_moe.experts.161.w3", "model.layers.61.block_sparse_moe.experts.162.w3", "model.layers.61.block_sparse_moe.experts.163.w3", "model.layers.61.block_sparse_moe.experts.164.w3", "model.layers.61.block_sparse_moe.experts.165.w3", "model.layers.61.block_sparse_moe.experts.166.w3", "model.layers.61.block_sparse_moe.experts.167.w3", "model.layers.61.block_sparse_moe.experts.168.w3", "model.layers.61.block_sparse_moe.experts.169.w3", "model.layers.61.block_sparse_moe.experts.170.w3", "model.layers.61.block_sparse_moe.experts.171.w3", "model.layers.61.block_sparse_moe.experts.172.w3", "model.layers.61.block_sparse_moe.experts.173.w3", "model.layers.61.block_sparse_moe.experts.174.w3", "model.layers.61.block_sparse_moe.experts.175.w3", "model.layers.61.block_sparse_moe.experts.176.w3", "model.layers.61.block_sparse_moe.experts.177.w3", "model.layers.61.block_sparse_moe.experts.178.w3", "model.layers.61.block_sparse_moe.experts.179.w3", "model.layers.61.block_sparse_moe.experts.180.w3", "model.layers.61.block_sparse_moe.experts.181.w3", "model.layers.61.block_sparse_moe.experts.182.w3", "model.layers.61.block_sparse_moe.experts.183.w3", "model.layers.61.block_sparse_moe.experts.184.w3", "model.layers.61.block_sparse_moe.experts.185.w3", "model.layers.61.block_sparse_moe.experts.186.w3", "model.layers.61.block_sparse_moe.experts.187.w3", "model.layers.61.block_sparse_moe.experts.188.w3", "model.layers.61.block_sparse_moe.experts.189.w3", "model.layers.61.block_sparse_moe.experts.190.w3", "model.layers.61.block_sparse_moe.experts.191.w3", "model.layers.61.block_sparse_moe.experts.192.w3", "model.layers.61.block_sparse_moe.experts.193.w3", "model.layers.61.block_sparse_moe.experts.194.w3", "model.layers.61.block_sparse_moe.experts.195.w3", "model.layers.61.block_sparse_moe.experts.196.w3", "model.layers.61.block_sparse_moe.experts.197.w3", "model.layers.61.block_sparse_moe.experts.198.w3", "model.layers.61.block_sparse_moe.experts.199.w3", "model.layers.61.block_sparse_moe.experts.200.w3", "model.layers.61.block_sparse_moe.experts.201.w3", "model.layers.61.block_sparse_moe.experts.202.w3", "model.layers.61.block_sparse_moe.experts.203.w3", "model.layers.61.block_sparse_moe.experts.204.w3", "model.layers.61.block_sparse_moe.experts.205.w3", "model.layers.61.block_sparse_moe.experts.206.w3", "model.layers.61.block_sparse_moe.experts.207.w3", "model.layers.61.block_sparse_moe.experts.208.w3", "model.layers.61.block_sparse_moe.experts.209.w3", "model.layers.61.block_sparse_moe.experts.210.w3", "model.layers.61.block_sparse_moe.experts.211.w3", "model.layers.61.block_sparse_moe.experts.212.w3", "model.layers.61.block_sparse_moe.experts.213.w3", "model.layers.61.block_sparse_moe.experts.214.w3", "model.layers.61.block_sparse_moe.experts.215.w3", "model.layers.61.block_sparse_moe.experts.216.w3", "model.layers.61.block_sparse_moe.experts.217.w3", "model.layers.61.block_sparse_moe.experts.218.w3", "model.layers.61.block_sparse_moe.experts.219.w3", "model.layers.61.block_sparse_moe.experts.220.w3", "model.layers.61.block_sparse_moe.experts.221.w3", "model.layers.61.block_sparse_moe.experts.222.w3", "model.layers.61.block_sparse_moe.experts.223.w3", "model.layers.61.block_sparse_moe.experts.224.w3", "model.layers.61.block_sparse_moe.experts.225.w3", "model.layers.61.block_sparse_moe.experts.226.w3", "model.layers.61.block_sparse_moe.experts.227.w3", "model.layers.61.block_sparse_moe.experts.228.w3", "model.layers.61.block_sparse_moe.experts.229.w3", "model.layers.61.block_sparse_moe.experts.230.w3", "model.layers.61.block_sparse_moe.experts.231.w3", "model.layers.61.block_sparse_moe.experts.232.w3", "model.layers.61.block_sparse_moe.experts.233.w3", "model.layers.61.block_sparse_moe.experts.234.w3", "model.layers.61.block_sparse_moe.experts.235.w3", "model.layers.61.block_sparse_moe.experts.236.w3", "model.layers.61.block_sparse_moe.experts.237.w3", "model.layers.61.block_sparse_moe.experts.238.w3", "model.layers.61.block_sparse_moe.experts.239.w3", "model.layers.61.block_sparse_moe.experts.240.w3", "model.layers.61.block_sparse_moe.experts.241.w3", "model.layers.61.block_sparse_moe.experts.242.w3", "model.layers.61.block_sparse_moe.experts.243.w3", "model.layers.61.block_sparse_moe.experts.244.w3", "model.layers.61.block_sparse_moe.experts.245.w3", "model.layers.61.block_sparse_moe.experts.246.w3", "model.layers.61.block_sparse_moe.experts.247.w3", "model.layers.61.block_sparse_moe.experts.248.w3", "model.layers.61.block_sparse_moe.experts.249.w3", "model.layers.61.block_sparse_moe.experts.250.w3", "model.layers.61.block_sparse_moe.experts.251.w3", "model.layers.61.block_sparse_moe.experts.252.w3", "model.layers.61.block_sparse_moe.experts.253.w3", "model.layers.61.block_sparse_moe.experts.254.w3", "model.layers.61.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00016367193311452033, "dbits": 2415919104 } ] }, { "idx": 309, "layers": [ "model.layers.61.block_sparse_moe.experts.0.w2", "model.layers.61.block_sparse_moe.experts.1.w2", "model.layers.61.block_sparse_moe.experts.2.w2", "model.layers.61.block_sparse_moe.experts.3.w2", "model.layers.61.block_sparse_moe.experts.4.w2", "model.layers.61.block_sparse_moe.experts.5.w2", "model.layers.61.block_sparse_moe.experts.6.w2", "model.layers.61.block_sparse_moe.experts.7.w2", "model.layers.61.block_sparse_moe.experts.8.w2", "model.layers.61.block_sparse_moe.experts.9.w2", "model.layers.61.block_sparse_moe.experts.10.w2", "model.layers.61.block_sparse_moe.experts.11.w2", "model.layers.61.block_sparse_moe.experts.12.w2", "model.layers.61.block_sparse_moe.experts.13.w2", "model.layers.61.block_sparse_moe.experts.14.w2", "model.layers.61.block_sparse_moe.experts.15.w2", "model.layers.61.block_sparse_moe.experts.16.w2", "model.layers.61.block_sparse_moe.experts.17.w2", "model.layers.61.block_sparse_moe.experts.18.w2", "model.layers.61.block_sparse_moe.experts.19.w2", "model.layers.61.block_sparse_moe.experts.20.w2", "model.layers.61.block_sparse_moe.experts.21.w2", "model.layers.61.block_sparse_moe.experts.22.w2", "model.layers.61.block_sparse_moe.experts.23.w2", "model.layers.61.block_sparse_moe.experts.24.w2", "model.layers.61.block_sparse_moe.experts.25.w2", "model.layers.61.block_sparse_moe.experts.26.w2", "model.layers.61.block_sparse_moe.experts.27.w2", "model.layers.61.block_sparse_moe.experts.28.w2", "model.layers.61.block_sparse_moe.experts.29.w2", "model.layers.61.block_sparse_moe.experts.30.w2", "model.layers.61.block_sparse_moe.experts.31.w2", "model.layers.61.block_sparse_moe.experts.32.w2", "model.layers.61.block_sparse_moe.experts.33.w2", "model.layers.61.block_sparse_moe.experts.34.w2", "model.layers.61.block_sparse_moe.experts.35.w2", "model.layers.61.block_sparse_moe.experts.36.w2", "model.layers.61.block_sparse_moe.experts.37.w2", "model.layers.61.block_sparse_moe.experts.38.w2", "model.layers.61.block_sparse_moe.experts.39.w2", "model.layers.61.block_sparse_moe.experts.40.w2", "model.layers.61.block_sparse_moe.experts.41.w2", "model.layers.61.block_sparse_moe.experts.42.w2", "model.layers.61.block_sparse_moe.experts.43.w2", "model.layers.61.block_sparse_moe.experts.44.w2", "model.layers.61.block_sparse_moe.experts.45.w2", "model.layers.61.block_sparse_moe.experts.46.w2", "model.layers.61.block_sparse_moe.experts.47.w2", "model.layers.61.block_sparse_moe.experts.48.w2", "model.layers.61.block_sparse_moe.experts.49.w2", "model.layers.61.block_sparse_moe.experts.50.w2", "model.layers.61.block_sparse_moe.experts.51.w2", "model.layers.61.block_sparse_moe.experts.52.w2", "model.layers.61.block_sparse_moe.experts.53.w2", "model.layers.61.block_sparse_moe.experts.54.w2", "model.layers.61.block_sparse_moe.experts.55.w2", "model.layers.61.block_sparse_moe.experts.56.w2", "model.layers.61.block_sparse_moe.experts.57.w2", "model.layers.61.block_sparse_moe.experts.58.w2", "model.layers.61.block_sparse_moe.experts.59.w2", "model.layers.61.block_sparse_moe.experts.60.w2", "model.layers.61.block_sparse_moe.experts.61.w2", "model.layers.61.block_sparse_moe.experts.62.w2", "model.layers.61.block_sparse_moe.experts.63.w2", "model.layers.61.block_sparse_moe.experts.64.w2", "model.layers.61.block_sparse_moe.experts.65.w2", "model.layers.61.block_sparse_moe.experts.66.w2", "model.layers.61.block_sparse_moe.experts.67.w2", "model.layers.61.block_sparse_moe.experts.68.w2", "model.layers.61.block_sparse_moe.experts.69.w2", "model.layers.61.block_sparse_moe.experts.70.w2", "model.layers.61.block_sparse_moe.experts.71.w2", "model.layers.61.block_sparse_moe.experts.72.w2", "model.layers.61.block_sparse_moe.experts.73.w2", "model.layers.61.block_sparse_moe.experts.74.w2", "model.layers.61.block_sparse_moe.experts.75.w2", "model.layers.61.block_sparse_moe.experts.76.w2", "model.layers.61.block_sparse_moe.experts.77.w2", "model.layers.61.block_sparse_moe.experts.78.w2", "model.layers.61.block_sparse_moe.experts.79.w2", "model.layers.61.block_sparse_moe.experts.80.w2", "model.layers.61.block_sparse_moe.experts.81.w2", "model.layers.61.block_sparse_moe.experts.82.w2", "model.layers.61.block_sparse_moe.experts.83.w2", "model.layers.61.block_sparse_moe.experts.84.w2", "model.layers.61.block_sparse_moe.experts.85.w2", "model.layers.61.block_sparse_moe.experts.86.w2", "model.layers.61.block_sparse_moe.experts.87.w2", "model.layers.61.block_sparse_moe.experts.88.w2", "model.layers.61.block_sparse_moe.experts.89.w2", "model.layers.61.block_sparse_moe.experts.90.w2", "model.layers.61.block_sparse_moe.experts.91.w2", "model.layers.61.block_sparse_moe.experts.92.w2", "model.layers.61.block_sparse_moe.experts.93.w2", "model.layers.61.block_sparse_moe.experts.94.w2", "model.layers.61.block_sparse_moe.experts.95.w2", "model.layers.61.block_sparse_moe.experts.96.w2", "model.layers.61.block_sparse_moe.experts.97.w2", "model.layers.61.block_sparse_moe.experts.98.w2", "model.layers.61.block_sparse_moe.experts.99.w2", "model.layers.61.block_sparse_moe.experts.100.w2", "model.layers.61.block_sparse_moe.experts.101.w2", "model.layers.61.block_sparse_moe.experts.102.w2", "model.layers.61.block_sparse_moe.experts.103.w2", "model.layers.61.block_sparse_moe.experts.104.w2", "model.layers.61.block_sparse_moe.experts.105.w2", "model.layers.61.block_sparse_moe.experts.106.w2", "model.layers.61.block_sparse_moe.experts.107.w2", "model.layers.61.block_sparse_moe.experts.108.w2", "model.layers.61.block_sparse_moe.experts.109.w2", "model.layers.61.block_sparse_moe.experts.110.w2", "model.layers.61.block_sparse_moe.experts.111.w2", "model.layers.61.block_sparse_moe.experts.112.w2", "model.layers.61.block_sparse_moe.experts.113.w2", "model.layers.61.block_sparse_moe.experts.114.w2", "model.layers.61.block_sparse_moe.experts.115.w2", "model.layers.61.block_sparse_moe.experts.116.w2", "model.layers.61.block_sparse_moe.experts.117.w2", "model.layers.61.block_sparse_moe.experts.118.w2", "model.layers.61.block_sparse_moe.experts.119.w2", "model.layers.61.block_sparse_moe.experts.120.w2", "model.layers.61.block_sparse_moe.experts.121.w2", "model.layers.61.block_sparse_moe.experts.122.w2", "model.layers.61.block_sparse_moe.experts.123.w2", "model.layers.61.block_sparse_moe.experts.124.w2", "model.layers.61.block_sparse_moe.experts.125.w2", "model.layers.61.block_sparse_moe.experts.126.w2", "model.layers.61.block_sparse_moe.experts.127.w2", "model.layers.61.block_sparse_moe.experts.128.w2", "model.layers.61.block_sparse_moe.experts.129.w2", "model.layers.61.block_sparse_moe.experts.130.w2", "model.layers.61.block_sparse_moe.experts.131.w2", "model.layers.61.block_sparse_moe.experts.132.w2", "model.layers.61.block_sparse_moe.experts.133.w2", "model.layers.61.block_sparse_moe.experts.134.w2", "model.layers.61.block_sparse_moe.experts.135.w2", "model.layers.61.block_sparse_moe.experts.136.w2", "model.layers.61.block_sparse_moe.experts.137.w2", "model.layers.61.block_sparse_moe.experts.138.w2", "model.layers.61.block_sparse_moe.experts.139.w2", "model.layers.61.block_sparse_moe.experts.140.w2", "model.layers.61.block_sparse_moe.experts.141.w2", "model.layers.61.block_sparse_moe.experts.142.w2", "model.layers.61.block_sparse_moe.experts.143.w2", "model.layers.61.block_sparse_moe.experts.144.w2", "model.layers.61.block_sparse_moe.experts.145.w2", "model.layers.61.block_sparse_moe.experts.146.w2", "model.layers.61.block_sparse_moe.experts.147.w2", "model.layers.61.block_sparse_moe.experts.148.w2", "model.layers.61.block_sparse_moe.experts.149.w2", "model.layers.61.block_sparse_moe.experts.150.w2", "model.layers.61.block_sparse_moe.experts.151.w2", "model.layers.61.block_sparse_moe.experts.152.w2", "model.layers.61.block_sparse_moe.experts.153.w2", "model.layers.61.block_sparse_moe.experts.154.w2", "model.layers.61.block_sparse_moe.experts.155.w2", "model.layers.61.block_sparse_moe.experts.156.w2", "model.layers.61.block_sparse_moe.experts.157.w2", "model.layers.61.block_sparse_moe.experts.158.w2", "model.layers.61.block_sparse_moe.experts.159.w2", "model.layers.61.block_sparse_moe.experts.160.w2", "model.layers.61.block_sparse_moe.experts.161.w2", "model.layers.61.block_sparse_moe.experts.162.w2", "model.layers.61.block_sparse_moe.experts.163.w2", "model.layers.61.block_sparse_moe.experts.164.w2", "model.layers.61.block_sparse_moe.experts.165.w2", "model.layers.61.block_sparse_moe.experts.166.w2", "model.layers.61.block_sparse_moe.experts.167.w2", "model.layers.61.block_sparse_moe.experts.168.w2", "model.layers.61.block_sparse_moe.experts.169.w2", "model.layers.61.block_sparse_moe.experts.170.w2", "model.layers.61.block_sparse_moe.experts.171.w2", "model.layers.61.block_sparse_moe.experts.172.w2", "model.layers.61.block_sparse_moe.experts.173.w2", "model.layers.61.block_sparse_moe.experts.174.w2", "model.layers.61.block_sparse_moe.experts.175.w2", "model.layers.61.block_sparse_moe.experts.176.w2", "model.layers.61.block_sparse_moe.experts.177.w2", "model.layers.61.block_sparse_moe.experts.178.w2", "model.layers.61.block_sparse_moe.experts.179.w2", "model.layers.61.block_sparse_moe.experts.180.w2", "model.layers.61.block_sparse_moe.experts.181.w2", "model.layers.61.block_sparse_moe.experts.182.w2", "model.layers.61.block_sparse_moe.experts.183.w2", "model.layers.61.block_sparse_moe.experts.184.w2", "model.layers.61.block_sparse_moe.experts.185.w2", "model.layers.61.block_sparse_moe.experts.186.w2", "model.layers.61.block_sparse_moe.experts.187.w2", "model.layers.61.block_sparse_moe.experts.188.w2", "model.layers.61.block_sparse_moe.experts.189.w2", "model.layers.61.block_sparse_moe.experts.190.w2", "model.layers.61.block_sparse_moe.experts.191.w2", "model.layers.61.block_sparse_moe.experts.192.w2", "model.layers.61.block_sparse_moe.experts.193.w2", "model.layers.61.block_sparse_moe.experts.194.w2", "model.layers.61.block_sparse_moe.experts.195.w2", "model.layers.61.block_sparse_moe.experts.196.w2", "model.layers.61.block_sparse_moe.experts.197.w2", "model.layers.61.block_sparse_moe.experts.198.w2", "model.layers.61.block_sparse_moe.experts.199.w2", "model.layers.61.block_sparse_moe.experts.200.w2", "model.layers.61.block_sparse_moe.experts.201.w2", "model.layers.61.block_sparse_moe.experts.202.w2", "model.layers.61.block_sparse_moe.experts.203.w2", "model.layers.61.block_sparse_moe.experts.204.w2", "model.layers.61.block_sparse_moe.experts.205.w2", "model.layers.61.block_sparse_moe.experts.206.w2", "model.layers.61.block_sparse_moe.experts.207.w2", "model.layers.61.block_sparse_moe.experts.208.w2", "model.layers.61.block_sparse_moe.experts.209.w2", "model.layers.61.block_sparse_moe.experts.210.w2", "model.layers.61.block_sparse_moe.experts.211.w2", "model.layers.61.block_sparse_moe.experts.212.w2", "model.layers.61.block_sparse_moe.experts.213.w2", "model.layers.61.block_sparse_moe.experts.214.w2", "model.layers.61.block_sparse_moe.experts.215.w2", "model.layers.61.block_sparse_moe.experts.216.w2", "model.layers.61.block_sparse_moe.experts.217.w2", "model.layers.61.block_sparse_moe.experts.218.w2", "model.layers.61.block_sparse_moe.experts.219.w2", "model.layers.61.block_sparse_moe.experts.220.w2", "model.layers.61.block_sparse_moe.experts.221.w2", "model.layers.61.block_sparse_moe.experts.222.w2", "model.layers.61.block_sparse_moe.experts.223.w2", "model.layers.61.block_sparse_moe.experts.224.w2", "model.layers.61.block_sparse_moe.experts.225.w2", "model.layers.61.block_sparse_moe.experts.226.w2", "model.layers.61.block_sparse_moe.experts.227.w2", "model.layers.61.block_sparse_moe.experts.228.w2", "model.layers.61.block_sparse_moe.experts.229.w2", "model.layers.61.block_sparse_moe.experts.230.w2", "model.layers.61.block_sparse_moe.experts.231.w2", "model.layers.61.block_sparse_moe.experts.232.w2", "model.layers.61.block_sparse_moe.experts.233.w2", "model.layers.61.block_sparse_moe.experts.234.w2", "model.layers.61.block_sparse_moe.experts.235.w2", "model.layers.61.block_sparse_moe.experts.236.w2", "model.layers.61.block_sparse_moe.experts.237.w2", "model.layers.61.block_sparse_moe.experts.238.w2", "model.layers.61.block_sparse_moe.experts.239.w2", "model.layers.61.block_sparse_moe.experts.240.w2", "model.layers.61.block_sparse_moe.experts.241.w2", "model.layers.61.block_sparse_moe.experts.242.w2", "model.layers.61.block_sparse_moe.experts.243.w2", "model.layers.61.block_sparse_moe.experts.244.w2", "model.layers.61.block_sparse_moe.experts.245.w2", "model.layers.61.block_sparse_moe.experts.246.w2", "model.layers.61.block_sparse_moe.experts.247.w2", "model.layers.61.block_sparse_moe.experts.248.w2", "model.layers.61.block_sparse_moe.experts.249.w2", "model.layers.61.block_sparse_moe.experts.250.w2", "model.layers.61.block_sparse_moe.experts.251.w2", "model.layers.61.block_sparse_moe.experts.252.w2", "model.layers.61.block_sparse_moe.experts.253.w2", "model.layers.61.block_sparse_moe.experts.254.w2", "model.layers.61.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -3.895536065098848e-06, "dbits": 1207959552 } ] } ], "base_kld": 0.10264693032950163, "arch_string": "MiniMaxM2ForCausalLM" }