Upload folder using huggingface_hub
Browse files
checkpoint-200/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 249323242
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c6e09a21dcb26a17d03fe73b4a58156f53f84d3abb5fb3dc9877b9cb348a8dc
|
| 3 |
size 249323242
|
checkpoint-225/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 249323242
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85afa3559924d8e598e87b57f11617f919bec98cfca85e983d0b7184ed264e80
|
| 3 |
size 249323242
|
choice_distribution.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"gpt-4o": 0.0,
|
| 3 |
-
"DeepSeek-V3-0324": 0.
|
| 4 |
"Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
|
| 5 |
-
"qwen25-coder-32b-instruct": 0.
|
| 6 |
-
"gpt-4.1-mini": 0.
|
| 7 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"gpt-4o": 0.0,
|
| 3 |
+
"DeepSeek-V3-0324": 0.05,
|
| 4 |
"Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
|
| 5 |
+
"qwen25-coder-32b-instruct": 0.15,
|
| 6 |
+
"gpt-4.1-mini": 0.8
|
| 7 |
}
|
logs/events.out.tfevents.1754579928.209-20-159-47.44655.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac1ad2e67b689a5d930e272625f2c782899025a1fa6640256a8a435b8572e2e9
|
| 3 |
+
size 7091
|
logs/events.out.tfevents.1754579928.209-20-159-47.44655.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7557e3aefe35343339020f4d613dced2aec601871122b7ec4b8de505093bf924
|
| 3 |
+
size 7091
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 249323242
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c6e09a21dcb26a17d03fe73b4a58156f53f84d3abb5fb3dc9877b9cb348a8dc
|
| 3 |
size 249323242
|
per_sample_predictions.csv
CHANGED
|
@@ -1,83 +1,84 @@
|
|
| 1 |
logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
|
| 2 |
-
-0.
|
| 3 |
-
-0.
|
| 4 |
-
-0.
|
| 5 |
-
-0.
|
| 6 |
-
-0.
|
| 7 |
-
-0.
|
| 8 |
-
-0.
|
| 9 |
-
-0.
|
| 10 |
-
-0.
|
| 11 |
-
-0.
|
| 12 |
-
-
|
| 13 |
-
-0.
|
| 14 |
-
-0.9609375,-0.
|
| 15 |
-
-0.
|
| 16 |
-
-1.0078125,-0.
|
| 17 |
-
-0.
|
| 18 |
-
-0
|
| 19 |
-
-0.
|
| 20 |
-
-0
|
| 21 |
-
-
|
| 22 |
-
-0.
|
| 23 |
-
-0.
|
| 24 |
-
-0
|
| 25 |
-
-0.
|
| 26 |
-
-0.
|
| 27 |
-
-0.
|
| 28 |
-
-0.
|
| 29 |
-
-0.
|
| 30 |
-
-
|
| 31 |
-
-0.
|
| 32 |
-
-0.
|
| 33 |
-
-0.
|
| 34 |
-
-0.
|
| 35 |
-
-0.
|
| 36 |
-
-
|
| 37 |
-
-0.
|
| 38 |
-
-0.
|
| 39 |
-
-0.
|
| 40 |
-
-0.
|
| 41 |
-
-0.
|
| 42 |
-
-0.
|
| 43 |
-
-0
|
| 44 |
-
-
|
| 45 |
-
-0.
|
| 46 |
-
-
|
| 47 |
-
-
|
| 48 |
-
-0.
|
| 49 |
-
-0.
|
| 50 |
-
-1.
|
| 51 |
-
-0.
|
| 52 |
-
-0.
|
| 53 |
-
-0.
|
| 54 |
-
-0.
|
| 55 |
-
-0.
|
| 56 |
-
-0.984375,-0.
|
| 57 |
-
-0.
|
| 58 |
-
-
|
| 59 |
-
-0.
|
| 60 |
-
-0.
|
| 61 |
-
-
|
| 62 |
-
-0.
|
| 63 |
-
-
|
| 64 |
-
-0.
|
| 65 |
-
-0.
|
| 66 |
-
-0.
|
| 67 |
-
-0.
|
| 68 |
-
-0.
|
| 69 |
-
-0.
|
| 70 |
-
-0.
|
| 71 |
-
-0.
|
| 72 |
-
-0.
|
| 73 |
-
-0.
|
| 74 |
-
-0.96875,-0.
|
| 75 |
-
-0.
|
| 76 |
-
-0.
|
| 77 |
-
-0
|
| 78 |
-
-0.
|
| 79 |
-
-0.
|
| 80 |
-
-
|
| 81 |
-
-0.
|
| 82 |
|
| 83 |
-
predicted_proportions,0.0000,0.
|
|
|
|
|
|
| 1 |
logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
|
| 2 |
+
-0.84765625,-0.69140625,-0.76171875,-0.7265625,-0.68359375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 3 |
+
-0.9296875,-0.8125,-0.84375,-0.80078125,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 4 |
+
-0.9609375,-0.8515625,-0.8671875,-0.7890625,-0.765625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 5 |
+
-0.953125,-0.8046875,-0.83984375,-0.8125,-0.80859375,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 6 |
+
-0.953125,-0.8125,-0.8359375,-0.8046875,-0.79296875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 7 |
+
-0.95703125,-0.8046875,-0.83984375,-0.8125,-0.78515625,gpt-4.1-mini,0.0,0.0,1.0,0.0,1.0
|
| 8 |
+
-0.88671875,-0.7578125,-0.83984375,-0.77734375,-0.74609375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 9 |
+
-0.9453125,-0.796875,-0.828125,-0.7890625,-0.7578125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 10 |
+
-0.8671875,-0.78515625,-0.72265625,-0.69140625,-0.75,qwen25-coder-32b-instruct,0.0,1.0,1.0,1.0,1.0
|
| 11 |
+
-0.93359375,-0.79296875,-0.80859375,-0.8046875,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 12 |
+
-0.97265625,-0.85546875,-0.89453125,-0.82421875,-0.79296875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 13 |
+
-0.94140625,-0.80078125,-0.8203125,-0.80078125,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 14 |
+
-0.9609375,-0.8515625,-0.84765625,-0.81640625,-0.8125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 15 |
+
-0.96875,-0.7421875,-0.8671875,-0.78515625,-0.69921875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 16 |
+
-1.0078125,-0.890625,-0.88671875,-0.8359375,-0.8203125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 17 |
+
-0.94921875,-0.80078125,-0.8125,-0.7890625,-0.75,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 18 |
+
-1.0,-0.8203125,-0.91015625,-0.80078125,-0.765625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 19 |
+
-0.91796875,-0.8125,-0.85546875,-0.78515625,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 20 |
+
-1.0,-0.90625,-0.88671875,-0.83203125,-0.81640625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 21 |
+
-0.96875,-0.90625,-0.890625,-0.8125,-0.8125,qwen25-coder-32b-instruct,1.0,1.0,0.0,1.0,1.0
|
| 22 |
+
-0.9296875,-0.83203125,-0.82421875,-0.78515625,-0.796875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 23 |
+
-0.90625,-0.73046875,-0.81640625,-0.8046875,-0.71484375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 24 |
+
-1.0,-0.875,-0.95703125,-0.83203125,-0.81640625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 25 |
+
-0.9609375,-0.859375,-0.84765625,-0.8125,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 26 |
+
-0.9609375,-0.86328125,-0.86328125,-0.828125,-0.828125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 27 |
+
-0.85546875,-0.71875,-0.81640625,-0.76171875,-0.62109375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 28 |
+
-0.96484375,-0.8125,-0.84765625,-0.7890625,-0.80078125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 29 |
+
-0.98046875,-0.83984375,-0.87109375,-0.828125,-0.8125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 30 |
+
-0.984375,-0.91015625,-0.88671875,-0.78125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 31 |
+
-0.9296875,-0.74609375,-0.7890625,-0.75,-0.703125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 32 |
+
-0.91796875,-0.7890625,-0.8125,-0.8046875,-0.79296875,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
| 33 |
+
-0.92578125,-0.8046875,-0.84375,-0.796875,-0.78125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 34 |
+
-0.97265625,-0.76953125,-0.85546875,-0.7734375,-0.73046875,gpt-4.1-mini,1.0,0.0,1.0,1.0,1.0
|
| 35 |
+
-0.9609375,-0.78515625,-0.8515625,-0.78515625,-0.7578125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 36 |
+
-0.98046875,-0.87109375,-0.921875,-0.8125,-0.765625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 37 |
+
-0.953125,-0.84375,-0.8359375,-0.8125,-0.7890625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 38 |
+
-0.9609375,-0.81640625,-0.85546875,-0.83203125,-0.80078125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 39 |
+
-0.96484375,-0.84765625,-0.84375,-0.80859375,-0.80859375,qwen25-coder-32b-instruct,1.0,1.0,0.0,1.0,1.0
|
| 40 |
+
-0.95703125,-0.81640625,-0.84375,-0.80859375,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 41 |
+
-0.98046875,-0.8125,-0.87109375,-0.81640625,-0.7890625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 42 |
+
-0.921875,-0.8359375,-0.828125,-0.8046875,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 43 |
+
-1.0,-0.83984375,-0.83984375,-0.79296875,-0.77734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 44 |
+
-0.97265625,-0.890625,-0.875,-0.80859375,-0.7890625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 45 |
+
-0.984375,-0.875,-0.8515625,-0.8203125,-0.82421875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 46 |
+
-0.97265625,-0.85546875,-0.859375,-0.828125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 47 |
+
-0.9296875,-0.79296875,-0.79296875,-0.7734375,-0.7734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 48 |
+
-0.94140625,-0.78515625,-0.80859375,-0.78125,-0.74609375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 49 |
+
-0.97265625,-0.90234375,-0.91015625,-0.8125,-0.8046875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 50 |
+
-1.0078125,-0.91015625,-0.94140625,-0.82421875,-0.8125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 51 |
+
-0.92578125,-0.82421875,-0.828125,-0.81640625,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 52 |
+
-0.95703125,-0.765625,-0.85546875,-0.8046875,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 53 |
+
-0.99609375,-0.77734375,-0.87890625,-0.765625,-0.73046875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 54 |
+
-0.96875,-0.84375,-0.8203125,-0.7890625,-0.765625,gpt-4.1-mini,1.0,1.0,1.0,0.0,1.0
|
| 55 |
+
-0.85546875,-0.6640625,-0.7109375,-0.7421875,-0.63671875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 56 |
+
-0.984375,-0.87890625,-0.8828125,-0.8125,-0.796875,gpt-4.1-mini,1.0,0.0,0.0,0.0,0.0
|
| 57 |
+
-0.984375,-0.8515625,-0.87890625,-0.84375,-0.828125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 58 |
+
-0.98046875,-0.90625,-0.85546875,-0.79296875,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 59 |
+
-0.9609375,-0.79296875,-0.84765625,-0.765625,-0.703125,gpt-4.1-mini,0.0,1.0,0.0,0.0,0.0
|
| 60 |
+
-0.9296875,-0.78125,-0.8515625,-0.8125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 61 |
+
-0.9609375,-0.89453125,-0.87109375,-0.8046875,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 62 |
+
-0.9765625,-0.84765625,-0.890625,-0.8203125,-0.796875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 63 |
+
-0.98828125,-0.90234375,-0.87890625,-0.81640625,-0.78515625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 64 |
+
-0.98828125,-0.890625,-0.86328125,-0.8359375,-0.82421875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 65 |
+
-0.9453125,-0.82421875,-0.828125,-0.828125,-0.796875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 66 |
+
-0.96484375,-0.796875,-0.828125,-0.8203125,-0.76953125,gpt-4.1-mini,1.0,1.0,1.0,1.0,0.0
|
| 67 |
+
-0.91796875,-0.734375,-0.796875,-0.78515625,-0.7421875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 68 |
+
-0.9296875,-0.76171875,-0.83203125,-0.79296875,-0.7421875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 69 |
+
-0.96875,-0.82421875,-0.8671875,-0.76953125,-0.75390625,gpt-4.1-mini,1.0,0.0,0.0,1.0,1.0
|
| 70 |
+
-0.87109375,-0.71484375,-0.79296875,-0.78125,-0.72265625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
| 71 |
+
-0.9609375,-0.90234375,-0.87109375,-0.8046875,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
| 72 |
+
-0.953125,-0.8359375,-0.859375,-0.80078125,-0.8125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
| 73 |
+
-0.96875,-0.8046875,-0.84765625,-0.78515625,-0.73828125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 74 |
+
-0.96875,-0.82421875,-0.8515625,-0.80078125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 75 |
+
-0.94921875,-0.8515625,-0.88671875,-0.859375,-0.83203125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 76 |
+
-0.9765625,-0.84375,-0.90625,-0.796875,-0.79296875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 77 |
+
-1.0,-0.828125,-0.87109375,-0.82421875,-0.7734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
| 78 |
+
-0.921875,-0.828125,-0.84375,-0.796875,-0.77734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 79 |
+
-0.97265625,-0.83984375,-0.85546875,-0.83203125,-0.80859375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 80 |
+
-1.015625,-0.8828125,-0.94140625,-0.859375,-0.828125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 81 |
+
-0.94140625,-0.8203125,-0.8515625,-0.82421875,-0.796875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
| 82 |
|
| 83 |
+
predicted_proportions,0.0000,0.0500,0.0000,0.1500,0.8000
|
| 84 |
+
true_proportions,0.0848,0.0806,0.0713,0.0723,0.0785
|