Upload folder using huggingface_hub
Browse files
checkpoint-200/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 249323242
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7c9f6209c224a23b708f315f04453b7cc4803a7764fe291f14e159899ba995f
|
| 3 |
size 249323242
|
checkpoint-225/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 249323242
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b35188d753a2de13f875f831c22747d631bd20d4c77f0f268580cbfadfb5acf
|
| 3 |
size 249323242
|
choice_distribution.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"gpt-4o": 0.0,
|
| 3 |
-
"DeepSeek-V3-0324": 0.
|
| 4 |
"Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
|
| 5 |
-
"qwen25-coder-32b-instruct": 0.
|
| 6 |
-
"gpt-4.1-mini": 0.
|
| 7 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"gpt-4o": 0.0,
|
| 3 |
+
"DeepSeek-V3-0324": 0.325,
|
| 4 |
"Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
|
| 5 |
+
"qwen25-coder-32b-instruct": 0.55,
|
| 6 |
+
"gpt-4.1-mini": 0.125
|
| 7 |
}
|
logs/events.out.tfevents.1754579550.209-20-159-47.43998.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71eb240364939c43e03aeb2ee1e2e35a30862d5f1efe81f4e7685311d8321d43
|
| 3 |
+
size 7091
|
logs/events.out.tfevents.1754579550.209-20-159-47.43998.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2ddee3604959657ecb44aec848f678c3c0019e16150326096c9f34635fa3d6e
|
| 3 |
+
size 7091
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 249323242
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7c9f6209c224a23b708f315f04453b7cc4803a7764fe291f14e159899ba995f
|
| 3 |
size 249323242
|
per_sample_predictions.csv
CHANGED
|
@@ -1,81 +1,84 @@
|
|
| 1 |
logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
|
| 2 |
-
-0.
|
| 3 |
-
-0.
|
| 4 |
-
-0.
|
| 5 |
-
-0.
|
| 6 |
-
-0.
|
| 7 |
-
-0.
|
| 8 |
-
-0.91015625,-0.
|
| 9 |
-
-0.
|
| 10 |
-
-0.
|
| 11 |
-
-0.
|
| 12 |
-
-0.
|
| 13 |
-
-0.
|
| 14 |
-
-0.
|
| 15 |
-
-0.
|
| 16 |
-
-0.
|
| 17 |
-
-0.
|
| 18 |
-
-0.
|
| 19 |
-
-0.
|
| 20 |
-
-0.
|
| 21 |
-
-
|
| 22 |
-
-0.
|
| 23 |
-
-0.
|
| 24 |
-
-0.
|
| 25 |
-
-0.90625,-0.
|
| 26 |
-
-0.
|
| 27 |
-
-0.
|
| 28 |
-
-0.
|
| 29 |
-
-0.
|
| 30 |
-
-0.
|
| 31 |
-
-0.
|
| 32 |
-
-0.
|
| 33 |
-
-0.
|
| 34 |
-
-0.
|
| 35 |
-
-0.
|
| 36 |
-
-
|
| 37 |
-
-0.
|
| 38 |
-
-0.
|
| 39 |
-
-0.
|
| 40 |
-
-0.
|
| 41 |
-
-0.
|
| 42 |
-
-0.
|
| 43 |
-
-0.
|
| 44 |
-
-
|
| 45 |
-
-0.
|
| 46 |
-
-0.
|
| 47 |
-
-0.
|
| 48 |
-
-0.
|
| 49 |
-
-0.
|
| 50 |
-
-
|
| 51 |
-
-0.
|
| 52 |
-
-0.
|
| 53 |
-
-0.
|
| 54 |
-
-0.
|
| 55 |
-
-0.
|
| 56 |
-
-0.
|
| 57 |
-
-0.97265625,-0.
|
| 58 |
-
-
|
| 59 |
-
-0.
|
| 60 |
-
-0.
|
| 61 |
-
-0.
|
| 62 |
-
-0.
|
| 63 |
-
-0.
|
| 64 |
-
-
|
| 65 |
-
-0.
|
| 66 |
-
-0.
|
| 67 |
-
-0.
|
| 68 |
-
-0.89453125,-0.
|
| 69 |
-
-0.
|
| 70 |
-
-0.
|
| 71 |
-
-0.
|
| 72 |
-
-0.921875,-0.
|
| 73 |
-
-0.
|
| 74 |
-
-0.
|
| 75 |
-
-0.
|
| 76 |
-
-0.
|
| 77 |
-
-0.
|
| 78 |
-
-0.
|
| 79 |
-
-0.
|
| 80 |
-
-0.
|
| 81 |
-
-0.
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
|
| 2 |
+
-0.8203125,-0.73046875,-0.734375,-0.6015625,-0.66796875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 3 |
+
-0.8671875,-0.7890625,-0.7890625,-0.72265625,-0.8125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 4 |
+
-0.9375,-0.7734375,-0.83203125,-0.7578125,-0.7890625,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 5 |
+
-0.8984375,-0.7734375,-0.81640625,-0.75390625,-0.796875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 6 |
+
-0.8984375,-0.7734375,-0.796875,-0.7109375,-0.78125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 7 |
+
-0.89453125,-0.7734375,-0.84765625,-0.78515625,-0.8125,DeepSeek-V3-0324,0.0,0.0,1.0,0.0,1.0
|
| 8 |
+
-0.91015625,-0.74609375,-0.8203125,-0.71875,-0.72265625,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 9 |
+
-0.90625,-0.80078125,-0.78125,-0.7109375,-0.78515625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 10 |
+
-0.8359375,-0.73828125,-0.7421875,-0.5703125,-0.6328125,qwen25-coder-32b-instruct,0.0,1.0,1.0,1.0,1.0
|
| 11 |
+
-0.859375,-0.71875,-0.78125,-0.70703125,-0.75,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 12 |
+
-0.953125,-0.78515625,-0.83984375,-0.8203125,-0.796875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 13 |
+
-0.89453125,-0.7421875,-0.8046875,-0.74609375,-0.76171875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 14 |
+
-0.91796875,-0.79296875,-0.8125,-0.75,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 15 |
+
-0.8828125,-0.7578125,-0.76953125,-0.73046875,-0.7421875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 16 |
+
-0.953125,-0.7421875,-0.859375,-0.80078125,-0.79296875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 17 |
+
-0.91796875,-0.78125,-0.81640625,-0.74609375,-0.83203125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 18 |
+
-0.94140625,-0.796875,-0.796875,-0.80859375,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 19 |
+
-0.90234375,-0.79296875,-0.79296875,-0.78125,-0.79296875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 20 |
+
-0.9375,-0.75,-0.8046875,-0.77734375,-0.80078125,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 21 |
+
-0.88671875,-0.72265625,-0.81640625,-0.7578125,-0.76171875,DeepSeek-V3-0324,1.0,1.0,0.0,1.0,1.0
|
| 22 |
+
-0.90234375,-0.75390625,-0.84375,-0.734375,-0.8125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 23 |
+
-0.8515625,-0.66796875,-0.6875,-0.65234375,-0.64453125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 24 |
+
-0.92578125,-0.7890625,-0.8203125,-0.81640625,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 25 |
+
-0.90625,-0.78125,-0.8046875,-0.7578125,-0.8046875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 26 |
+
-0.94140625,-0.82421875,-0.88671875,-0.80859375,-0.85546875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 27 |
+
-0.80078125,-0.7109375,-0.67578125,-0.6953125,-0.61328125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 28 |
+
-0.90234375,-0.78515625,-0.84765625,-0.70703125,-0.8046875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 29 |
+
-0.94140625,-0.73046875,-0.83984375,-0.76171875,-0.78125,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
| 30 |
+
-0.90234375,-0.73046875,-0.78515625,-0.73828125,-0.74609375,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 31 |
+
-0.83203125,-0.71484375,-0.734375,-0.66796875,-0.69140625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 32 |
+
-0.90234375,-0.7578125,-0.80859375,-0.73046875,-0.79296875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 33 |
+
-0.90234375,-0.80078125,-0.80078125,-0.72265625,-0.79296875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 34 |
+
-0.859375,-0.65234375,-0.78515625,-0.703125,-0.71875,DeepSeek-V3-0324,1.0,0.0,1.0,1.0,1.0
|
| 35 |
+
-0.875,-0.7578125,-0.796875,-0.7109375,-0.79296875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 36 |
+
-0.9375,-0.8046875,-0.8203125,-0.80078125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 37 |
+
-0.9375,-0.78125,-0.828125,-0.79296875,-0.81640625,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
| 38 |
+
-0.921875,-0.7421875,-0.81640625,-0.72265625,-0.7578125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 39 |
+
-0.9375,-0.75390625,-0.84375,-0.765625,-0.796875,DeepSeek-V3-0324,1.0,1.0,0.0,1.0,1.0
|
| 40 |
+
-0.93359375,-0.78125,-0.78125,-0.76171875,-0.7890625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 41 |
+
-0.890625,-0.75390625,-0.78515625,-0.765625,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 42 |
+
-0.91015625,-0.79296875,-0.87109375,-0.7578125,-0.80078125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 43 |
+
-0.921875,-0.75390625,-0.8125,-0.765625,-0.78515625,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
| 44 |
+
-0.94921875,-0.7734375,-0.90234375,-0.7578125,-0.82421875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 45 |
+
-0.921875,-0.796875,-0.8125,-0.76953125,-0.80859375,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 46 |
+
-0.91796875,-0.7890625,-0.85546875,-0.78125,-0.7890625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 47 |
+
-0.90234375,-0.80078125,-0.82421875,-0.7265625,-0.78515625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 48 |
+
-0.921875,-0.75,-0.7890625,-0.7265625,-0.76953125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 49 |
+
-0.91796875,-0.7578125,-0.8203125,-0.8125,-0.80859375,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 50 |
+
-0.95703125,-0.76953125,-0.83984375,-0.8203125,-0.78515625,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
| 51 |
+
-0.9453125,-0.76953125,-0.76953125,-0.68359375,-0.70703125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 52 |
+
-0.88671875,-0.7890625,-0.8046875,-0.76953125,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 53 |
+
-0.875,-0.68359375,-0.81640625,-0.78515625,-0.7421875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 54 |
+
-0.93359375,-0.7890625,-0.7890625,-0.78125,-0.78515625,qwen25-coder-32b-instruct,1.0,1.0,1.0,0.0,1.0
|
| 55 |
+
-0.82421875,-0.7265625,-0.71875,-0.65234375,-0.64453125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 56 |
+
-0.92578125,-0.75390625,-0.828125,-0.80859375,-0.8046875,DeepSeek-V3-0324,1.0,0.0,0.0,0.0,0.0
|
| 57 |
+
-0.97265625,-0.734375,-0.8515625,-0.859375,-0.7890625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 58 |
+
-0.9453125,-0.765625,-0.86328125,-0.78125,-0.80078125,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 59 |
+
-0.8359375,-0.7109375,-0.73046875,-0.70703125,-0.6640625,gpt-4.1-mini,0.0,1.0,0.0,0.0,0.0
|
| 60 |
+
-0.9140625,-0.71484375,-0.83984375,-0.68359375,-0.78515625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 61 |
+
-0.9296875,-0.7734375,-0.8359375,-0.78125,-0.79296875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 62 |
+
-0.96484375,-0.75390625,-0.8203125,-0.76171875,-0.734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 63 |
+
-0.87109375,-0.71875,-0.83984375,-0.734375,-0.7890625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 64 |
+
-0.94140625,-0.765625,-0.859375,-0.7578125,-0.80859375,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 65 |
+
-0.90234375,-0.74609375,-0.83203125,-0.72265625,-0.80078125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 66 |
+
-0.9453125,-0.72265625,-0.83203125,-0.71484375,-0.78125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,0.0
|
| 67 |
+
-0.86328125,-0.71875,-0.75,-0.6796875,-0.7265625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 68 |
+
-0.89453125,-0.7421875,-0.78515625,-0.7421875,-0.76953125,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 69 |
+
-0.90625,-0.73046875,-0.78125,-0.734375,-0.74609375,DeepSeek-V3-0324,1.0,0.0,0.0,1.0,1.0
|
| 70 |
+
-0.88671875,-0.76953125,-0.82421875,-0.7109375,-0.765625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 71 |
+
-0.8828125,-0.765625,-0.8046875,-0.75,-0.75,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 72 |
+
-0.921875,-0.7734375,-0.8046875,-0.78515625,-0.79296875,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
| 73 |
+
-0.89453125,-0.7109375,-0.8203125,-0.7265625,-0.71875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 74 |
+
-0.91015625,-0.74609375,-0.78125,-0.71484375,-0.7734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 75 |
+
-0.8828125,-0.734375,-0.796875,-0.71875,-0.7734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 76 |
+
-0.9609375,-0.765625,-0.85546875,-0.84375,-0.8046875,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
| 77 |
+
-0.96875,-0.796875,-0.86328125,-0.79296875,-0.80859375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 78 |
+
-0.85546875,-0.70703125,-0.78515625,-0.6875,-0.734375,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 79 |
+
-0.95703125,-0.76953125,-0.87109375,-0.71875,-0.828125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 80 |
+
-0.99609375,-0.73046875,-0.8515625,-0.80078125,-0.7734375,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
| 81 |
+
-0.90625,-0.78125,-0.8671875,-0.76171875,-0.85546875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 82 |
+
|
| 83 |
+
predicted_proportions,0.0000,0.3250,0.0000,0.5500,0.1250
|
| 84 |
+
true_proportions,0.9625,0.0250,0.0125,0.0000,0.0000
|