Update calibrate_software_engineer.yaml with new multilingual calibration
Browse files- calibrate_software_engineer.yaml +24 -18
calibrate_software_engineer.yaml
CHANGED
|
@@ -1,29 +1,29 @@
|
|
| 1 |
calibration_set:
|
| 2 |
_templates:
|
| 3 |
programming_languages: &programming_languages "Solve the following problem using {{ ['Zephyr', 'Prolog', 'Cobol', 'Apex', 'Crystal', 'Fortran', 'Nim', 'Delphi', 'Ada', 'Objective-C', 'VBA', 'Perl', 'Groovy', 'MATLAB', 'Solidity', 'Visual Basic', 'OCaml', 'Erlang', 'Julia', 'Lisp', 'F#', 'Clojure', 'GDScript', 'Scala', 'R', 'Haskell', 'Ruby', 'Elixir', 'Lua', 'Zig', 'Dart', 'Swift', 'Metal', 'PowerShell', 'PHP', 'Kotlin', 'C', 'Java', 'C++', 'C#', 'Bash/Shell', 'Go', 'Rust', 'TypeScript', 'HTML/CSS', 'SQL', 'JavaScript', 'Python', 'Lean', 'Coq', 'Pony', 'D', 'Racket', 'Haxe', 'x86-64 ASM', 'ARM-64 ASM', 'LLVM IR', 'GLSL', 'CUDA', 'Vulkan'][hash(row|string) % 60] }}\n***\n"
|
| 4 |
-
spoken_languages: &spoken_languages "Answer in {{ ['Arabic', 'Chinese', 'French', 'German', 'Hebrew', 'Hindi', 'Japanese', 'Korean', 'Portuguese', 'Russian', 'Spanish', 'Turkish'][hash(row|string) %
|
| 5 |
max_seq_length: 8192
|
| 6 |
shuffle: true
|
| 7 |
seed: 42
|
| 8 |
datasets:
|
| 9 |
|
| 10 |
-
# Category Summary (Total:
|
| 11 |
# =====================================================
|
| 12 |
-
# General chat (24 samples -
|
| 13 |
-
# Instruction and Reasoning tuning (14 samples - 2.
|
| 14 |
-
# Multilingual (
|
| 15 |
-
# Tool use (100 samples - 16.
|
| 16 |
-
# Code / Programming / Software Engineering / Devops (328 samples -
|
| 17 |
-
# Math (12 samples -
|
| 18 |
-
# Sciences (16 samples - 2.
|
| 19 |
-
# Medical (8 samples - 1.
|
| 20 |
-
# Finance (8 samples - 1.
|
| 21 |
-
# Business (16 samples - 2.
|
| 22 |
-
# Humanities and Philosophy (8 samples - 1.
|
| 23 |
-
# Creative Writing, Adventure, Roleplay (13 samples - 2.
|
| 24 |
-
# General Knowledge and Pop Culture (2 samples - 0.
|
| 25 |
-
# Behavioral skills (4 samples - 0.
|
| 26 |
-
# Misc (1 sample - 0.
|
| 27 |
# =====================================================
|
| 28 |
|
| 29 |
# Research
|
|
@@ -90,7 +90,7 @@ calibration_set:
|
|
| 90 |
formatter: sharegpt
|
| 91 |
num_samples: 4
|
| 92 |
|
| 93 |
-
# Multilingual (
|
| 94 |
# ---------------------------------------------------------------------------
|
| 95 |
- dataset: HuggingFaceH4/Multilingual-Thinking
|
| 96 |
split: train
|
|
@@ -108,6 +108,12 @@ calibration_set:
|
|
| 108 |
num_samples: 4
|
| 109 |
streaming: true
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
# Tool use (include commented out ToolAce) (100 samples)
|
| 112 |
# ---------------------------------------------------------------------------
|
| 113 |
|
|
|
|
| 1 |
calibration_set:
|
| 2 |
_templates:
|
| 3 |
programming_languages: &programming_languages "Solve the following problem using {{ ['Zephyr', 'Prolog', 'Cobol', 'Apex', 'Crystal', 'Fortran', 'Nim', 'Delphi', 'Ada', 'Objective-C', 'VBA', 'Perl', 'Groovy', 'MATLAB', 'Solidity', 'Visual Basic', 'OCaml', 'Erlang', 'Julia', 'Lisp', 'F#', 'Clojure', 'GDScript', 'Scala', 'R', 'Haskell', 'Ruby', 'Elixir', 'Lua', 'Zig', 'Dart', 'Swift', 'Metal', 'PowerShell', 'PHP', 'Kotlin', 'C', 'Java', 'C++', 'C#', 'Bash/Shell', 'Go', 'Rust', 'TypeScript', 'HTML/CSS', 'SQL', 'JavaScript', 'Python', 'Lean', 'Coq', 'Pony', 'D', 'Racket', 'Haxe', 'x86-64 ASM', 'ARM-64 ASM', 'LLVM IR', 'GLSL', 'CUDA', 'Vulkan'][hash(row|string) % 60] }}\n***\n"
|
| 4 |
+
spoken_languages: &spoken_languages "Answer in {{ ['Arabic', 'Chinese', 'French', 'German', 'Greek', 'Hebrew', 'Hindi', 'Japanese', 'Korean', 'Portuguese', 'Russian', 'Spanish', 'Turkish'][hash(row|string) % 13] }}\n***\n"
|
| 5 |
max_seq_length: 8192
|
| 6 |
shuffle: true
|
| 7 |
seed: 42
|
| 8 |
datasets:
|
| 9 |
|
| 10 |
+
# Category Summary (Total: 624 samples)
|
| 11 |
# =====================================================
|
| 12 |
+
# General chat (24 samples - 3.85%)
|
| 13 |
+
# Instruction and Reasoning tuning (14 samples - 2.24%)
|
| 14 |
+
# Multilingual (70 samples - 11.22%)
|
| 15 |
+
# Tool use (100 samples - 16.03%)
|
| 16 |
+
# Code / Programming / Software Engineering / Devops (328 samples - 52.56%)
|
| 17 |
+
# Math (12 samples - 1.92%)
|
| 18 |
+
# Sciences (16 samples - 2.56%)
|
| 19 |
+
# Medical (8 samples - 1.28%)
|
| 20 |
+
# Finance (8 samples - 1.28%)
|
| 21 |
+
# Business (16 samples - 2.56%)
|
| 22 |
+
# Humanities and Philosophy (8 samples - 1.28%)
|
| 23 |
+
# Creative Writing, Adventure, Roleplay (13 samples - 2.08%)
|
| 24 |
+
# General Knowledge and Pop Culture (2 samples - 0.32%)
|
| 25 |
+
# Behavioral skills (4 samples - 0.64%)
|
| 26 |
+
# Misc (1 sample - 0.16%)
|
| 27 |
# =====================================================
|
| 28 |
|
| 29 |
# Research
|
|
|
|
| 90 |
formatter: sharegpt
|
| 91 |
num_samples: 4
|
| 92 |
|
| 93 |
+
# Multilingual (70 samples)
|
| 94 |
# ---------------------------------------------------------------------------
|
| 95 |
- dataset: HuggingFaceH4/Multilingual-Thinking
|
| 96 |
split: train
|
|
|
|
| 108 |
num_samples: 4
|
| 109 |
streaming: true
|
| 110 |
|
| 111 |
+
- dataset: droussis/euroblocks_sft_1sample_per_lang
|
| 112 |
+
split: train
|
| 113 |
+
columns: [conversations]
|
| 114 |
+
formatter: chat_completion
|
| 115 |
+
num_samples: 34
|
| 116 |
+
|
| 117 |
# Tool use (include commented out ToolAce) (100 samples)
|
| 118 |
# ---------------------------------------------------------------------------
|
| 119 |
|