webxos commited on
Commit
6253d52
·
verified ·
1 Parent(s): e60a42b

Upload 12 files

Browse files
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Micro-Distilled GRPO+VAE Model
2
+
3
+ ## Model Description
4
+ This is a distilled language model trained using Group Relative Policy Optimization (GRPO) with VAE filtering.
5
+
6
+ ## Model Details
7
+ - **Model type**: micro-distill-grpo-vae
8
+ - **Model size**: 42M parameters
9
+ - **Language**: English
10
+ - **License**: Apache 2.0
11
+
12
+ ## Training Methodology
13
+ - **GRPO (Group Relative Policy Optimization)**: 8 groups
14
+ - **VAE Filtering**: 32D latent space
15
+ - **KV-Cache Reuse**: 512 cache size
16
+
17
+ ## Architecture Details
18
+ - Hidden size: 512
19
+ - Number of layers: 8
20
+ - Attention heads: 8
21
+ - Vocabulary size: 50257
22
+ - Maximum sequence length: 1024
23
+
24
+ ## Usage
25
+
26
+ ### Using Transformers
27
+ ```python
28
+ from transformers import AutoModelForCausalLM, AutoTokenizer
29
+
30
+ model = AutoModelForCausalLM.from_pretrained("micro-distill-grpo-vae")
31
+ tokenizer = AutoTokenizer.from_pretrained("micro-distill-grpo-vae")
32
+
33
+ inputs = tokenizer("Hello, world!", return_tensors="pt")
34
+ outputs = model.generate(**inputs, max_length=50)
35
+ print(tokenizer.decode(outputs[0]))
36
+ ```
config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "GPT2LMHeadModel"
4
+ ],
5
+ "model_type": "gpt2",
6
+ "vocab_size": 50257,
7
+ "n_positions": 1024,
8
+ "n_embd": 512,
9
+ "n_layer": 8,
10
+ "n_head": 8,
11
+ "n_inner": 2048,
12
+ "activation_function": "gelu_new",
13
+ "resid_pdrop": 0.1,
14
+ "embd_pdrop": 0.1,
15
+ "attn_pdrop": 0.1,
16
+ "layer_norm_epsilon": 0.00001,
17
+ "initializer_range": 0.02,
18
+ "summary_type": "cls_index",
19
+ "summary_use_proj": true,
20
+ "summary_activation": null,
21
+ "summary_proj_to_labels": true,
22
+ "summary_first_dropout": 0.1,
23
+ "scale_attn_weights": true,
24
+ "use_cache": true,
25
+ "bos_token_id": 50256,
26
+ "eos_token_id": 50256,
27
+ "transformers_version": "4.36.0",
28
+ "grpo_config": {
29
+ "group_size": 8,
30
+ "kl_penalty": 0.1,
31
+ "advantage_clip": 2,
32
+ "mask_intensity": 0.8,
33
+ "feedback_window": 50
34
+ },
35
+ "vae_config": {
36
+ "latent_dim": 32,
37
+ "beta": 0.01,
38
+ "filter_threshold": 0.7
39
+ },
40
+ "cache_config": {
41
+ "cache_size": 512,
42
+ "reuse_threshold": 0.9
43
+ }
44
+ }
generation_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "pad_token_id": 50256,
6
+ "transformers_version": "4.36.0",
7
+ "max_length": 1024,
8
+ "min_length": 1,
9
+ "do_sample": true,
10
+ "early_stopping": false,
11
+ "num_beams": 1,
12
+ "temperature": 0.7,
13
+ "top_k": 50,
14
+ "top_p": 0.9,
15
+ "repetition_penalty": 1.2,
16
+ "length_penalty": 1,
17
+ "no_repeat_ngram_size": 3,
18
+ "num_return_sequences": 1,
19
+ "output_scores": false,
20
+ "return_dict_in_generate": true
21
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.onnx ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ONNX Model Export
2
+ Generated: 2025-12-28T17:48:01.525Z
3
+ Model: micro-distill-grpo-vae
4
+ Framework: PyTorch 2.0+
5
+ ONNX Version: 1.14.0
6
+
7
+ Graph Structure:
8
+ - Inputs:
9
+ - input_ids: int64[batch_size, sequence_length]
10
+ - attention_mask: int64[batch_size, sequence_length]
11
+ - position_ids: int64[batch_size, sequence_length] (optional)
12
+
13
+ - Outputs:
14
+ - logits: float32[batch_size, sequence_length, 50257]
15
+
16
+ Layers:
17
+ - Embedding: 512 dimensions
18
+ - 8 Transformer blocks
19
+ - Layer normalization
20
+ - Language modeling head
21
+
22
+ Optimizations:
23
+ - Constant folding: enabled
24
+ - Shape inference: enabled
25
+ - Dynamic axes: sequence_length
26
+ - Opset: 17
27
+ - IR version: 9
28
+
29
+ Quantization: Q4 (4-bit)
model.safetensors ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ SafeTensors binary structure:
2
+
3
+ Header (128 bytes):
4
+ - Magic number: 0x73 0x61 0x66 0x65
5
+ - Version: 1.0
6
+ - Num tensors: 143
7
+ - Metadata length: 354
8
+
modeling_micro_distill.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modeling_micro_distill.py
2
+ # Custom model architecture for micro-distill-grpo-vae
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ from transformers import GPT2PreTrainedModel, GPT2Config
7
+
8
+ class MicroDistillForCausalLM(GPT2PreTrainedModel):
9
+ def __init__(self, config):
10
+ super().__init__(config)
11
+ self.config = config
12
+
13
+ # ... modeling code ...
14
+
15
+ def forward(self, input_ids=None, attention_mask=None, **kwargs):
16
+ # Forward pass implementation
17
+ pass
pytorch_model.bin ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "format": "torch",
4
+ "version": "1.0",
5
+ "model": "micro-distill-grpo-vae",
6
+ "hidden_size": 512,
7
+ "num_layers": 8,
8
+ "num_heads": 8,
9
+ "vocab_size": 50257,
10
+ "training_steps": 100
11
+ },
12
+ "tensors": {
13
+ "transformer.wte.weight": {
14
+ "shape": [
15
+ 50257,
16
+ 512
17
+ ],
18
+ "dtype": "float32",
19
+ "size": "98.2 MB"
20
+ },
21
+ "transformer.wpe.weight": {
22
+ "shape": [
23
+ 1024,
24
+ 512
25
+ ],
26
+ "dtype": "float32",
27
+ "size": "2.0 MB"
28
+ },
29
+ "transformer.h.0.ln_1.weight": {
30
+ "shape": [
31
+ 512
32
+ ],
33
+ "dtype": "float32",
34
+ "size": "2.0 KB"
35
+ },
36
+ "transformer.h.0.attn.c_attn.weight": {
37
+ "shape": [
38
+ 512,
39
+ 1536
40
+ ],
41
+ "dtype": "float32",
42
+ "size": "3.0 MB"
43
+ },
44
+ "transformer.h.0.mlp.c_fc.weight": {
45
+ "shape": [
46
+ 512,
47
+ 2048
48
+ ],
49
+ "dtype": "float32",
50
+ "size": "4.0 MB"
51
+ },
52
+ "transformer.h.1.ln_1.weight": {
53
+ "shape": [
54
+ 512
55
+ ],
56
+ "dtype": "float32",
57
+ "size": "2.0 KB"
58
+ },
59
+ "transformer.h.1.attn.c_attn.weight": {
60
+ "shape": [
61
+ 512,
62
+ 1536
63
+ ],
64
+ "dtype": "float32",
65
+ "size": "3.0 MB"
66
+ },
67
+ "transformer.h.1.mlp.c_fc.weight": {
68
+ "shape": [
69
+ 512,
70
+ 2048
71
+ ],
72
+ "dtype": "float32",
73
+ "size": "4.0 MB"
74
+ },
75
+ "transformer.h.2.ln_1.weight": {
76
+ "shape": [
77
+ 512
78
+ ],
79
+ "dtype": "float32",
80
+ "size": "2.0 KB"
81
+ },
82
+ "transformer.h.2.attn.c_attn.weight": {
83
+ "shape": [
84
+ 512,
85
+ 1536
86
+ ],
87
+ "dtype": "float32",
88
+ "size": "3.0 MB"
89
+ },
90
+ "transformer.h.2.mlp.c_fc.weight": {
91
+ "shape": [
92
+ 512,
93
+ 2048
94
+ ],
95
+ "dtype": "float32",
96
+ "size": "4.0 MB"
97
+ },
98
+ "transformer.h.3.ln_1.weight": {
99
+ "shape": [
100
+ 512
101
+ ],
102
+ "dtype": "float32",
103
+ "size": "2.0 KB"
104
+ },
105
+ "transformer.h.3.attn.c_attn.weight": {
106
+ "shape": [
107
+ 512,
108
+ 1536
109
+ ],
110
+ "dtype": "float32",
111
+ "size": "3.0 MB"
112
+ },
113
+ "transformer.h.3.mlp.c_fc.weight": {
114
+ "shape": [
115
+ 512,
116
+ 2048
117
+ ],
118
+ "dtype": "float32",
119
+ "size": "4.0 MB"
120
+ },
121
+ "transformer.h.4.ln_1.weight": {
122
+ "shape": [
123
+ 512
124
+ ],
125
+ "dtype": "float32",
126
+ "size": "2.0 KB"
127
+ },
128
+ "transformer.h.4.attn.c_attn.weight": {
129
+ "shape": [
130
+ 512,
131
+ 1536
132
+ ],
133
+ "dtype": "float32",
134
+ "size": "3.0 MB"
135
+ },
136
+ "transformer.h.4.mlp.c_fc.weight": {
137
+ "shape": [
138
+ 512,
139
+ 2048
140
+ ],
141
+ "dtype": "float32",
142
+ "size": "4.0 MB"
143
+ },
144
+ "transformer.h.5.ln_1.weight": {
145
+ "shape": [
146
+ 512
147
+ ],
148
+ "dtype": "float32",
149
+ "size": "2.0 KB"
150
+ },
151
+ "transformer.h.5.attn.c_attn.weight": {
152
+ "shape": [
153
+ 512,
154
+ 1536
155
+ ],
156
+ "dtype": "float32",
157
+ "size": "3.0 MB"
158
+ },
159
+ "transformer.h.5.mlp.c_fc.weight": {
160
+ "shape": [
161
+ 512,
162
+ 2048
163
+ ],
164
+ "dtype": "float32",
165
+ "size": "4.0 MB"
166
+ },
167
+ "transformer.h.6.ln_1.weight": {
168
+ "shape": [
169
+ 512
170
+ ],
171
+ "dtype": "float32",
172
+ "size": "2.0 KB"
173
+ },
174
+ "transformer.h.6.attn.c_attn.weight": {
175
+ "shape": [
176
+ 512,
177
+ 1536
178
+ ],
179
+ "dtype": "float32",
180
+ "size": "3.0 MB"
181
+ },
182
+ "transformer.h.6.mlp.c_fc.weight": {
183
+ "shape": [
184
+ 512,
185
+ 2048
186
+ ],
187
+ "dtype": "float32",
188
+ "size": "4.0 MB"
189
+ },
190
+ "transformer.h.7.ln_1.weight": {
191
+ "shape": [
192
+ 512
193
+ ],
194
+ "dtype": "float32",
195
+ "size": "2.0 KB"
196
+ },
197
+ "transformer.h.7.attn.c_attn.weight": {
198
+ "shape": [
199
+ 512,
200
+ 1536
201
+ ],
202
+ "dtype": "float32",
203
+ "size": "3.0 MB"
204
+ },
205
+ "transformer.h.7.mlp.c_fc.weight": {
206
+ "shape": [
207
+ 512,
208
+ 2048
209
+ ],
210
+ "dtype": "float32",
211
+ "size": "4.0 MB"
212
+ },
213
+ "lm_head.weight": {
214
+ "shape": [
215
+ 50257,
216
+ 512
217
+ ],
218
+ "dtype": "float32",
219
+ "size": "98.2 MB"
220
+ }
221
+ }
222
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "pad_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 50256,
8
+ "content": "<|endoftext|>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ }
15
+ ],
16
+ "normalizer": {
17
+ "type": "Sequence",
18
+ "normalizers": [
19
+ {
20
+ "type": "NFC"
21
+ }
22
+ ]
23
+ },
24
+ "pre_tokenizer": {
25
+ "type": "Split",
26
+ "pattern": {
27
+ "Regex": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+"
28
+ },
29
+ "behavior": "removed"
30
+ },
31
+ "post_processor": {
32
+ "type": "TemplateProcessing",
33
+ "single": [
34
+ {
35
+ "Sequence": {
36
+ "id": "A",
37
+ "type_id": 0
38
+ }
39
+ }
40
+ ],
41
+ "pair": [
42
+ {
43
+ "Sequence": {
44
+ "id": "A",
45
+ "type_id": 0
46
+ }
47
+ },
48
+ {
49
+ "Sequence": {
50
+ "id": "B",
51
+ "type_id": 0
52
+ }
53
+ }
54
+ ],
55
+ "special_tokens": {
56
+ "<|endoftext|>": {
57
+ "id": "<|endoftext|>",
58
+ "ids": [
59
+ 50256
60
+ ]
61
+ }
62
+ }
63
+ },
64
+ "decoder": {
65
+ "type": "ByteLevel",
66
+ "add_prefix_space": false,
67
+ "trim_offsets": false,
68
+ "use_regex": true
69
+ },
70
+ "model": {
71
+ "type": "BPE",
72
+ "dropout": null,
73
+ "unk_token": "<|endoftext|>",
74
+ "continuing_subword_prefix": "",
75
+ "end_of_word_suffix": "",
76
+ "fuse_unk": false,
77
+ "byte_fallback": true,
78
+ "vocab": {
79
+ "<0x00>": 0,
80
+ "<0x01>": 1,
81
+ "<0x02>": 2,
82
+ "<0x03>": 3,
83
+ "<0x04>": 4,
84
+ "<0x05>": 5,
85
+ "<0x06>": 6,
86
+ "<0x07>": 7,
87
+ "<0x08>": 8,
88
+ "<0x09>": 9,
89
+ "<0x0a>": 10,
90
+ "<0x0b>": 11,
91
+ "<0x0c>": 12,
92
+ "<0x0d>": 13,
93
+ "<0x0e>": 14,
94
+ "<0x0f>": 15,
95
+ "<0x10>": 16,
96
+ "<0x11>": 17,
97
+ "<0x12>": 18,
98
+ "<0x13>": 19,
99
+ "<0x14>": 20,
100
+ "<0x15>": 21,
101
+ "<0x16>": 22,
102
+ "<0x17>": 23,
103
+ "<0x18>": 24,
104
+ "<0x19>": 25,
105
+ "<0x1a>": 26,
106
+ "<0x1b>": 27,
107
+ "<0x1c>": 28,
108
+ "<0x1d>": 29,
109
+ "<0x1e>": 30,
110
+ "<0x1f>": 31,
111
+ "<0x20>": 32,
112
+ "<0x21>": 33,
113
+ "<0x22>": 34,
114
+ "<0x23>": 35,
115
+ "<0x24>": 36,
116
+ "<0x25>": 37,
117
+ "<0x26>": 38,
118
+ "<0x27>": 39,
119
+ "<0x28>": 40,
120
+ "<0x29>": 41,
121
+ "<0x2a>": 42,
122
+ "<0x2b>": 43,
123
+ "<0x2c>": 44,
124
+ "<0x2d>": 45,
125
+ "<0x2e>": 46,
126
+ "<0x2f>": 47,
127
+ "<0x30>": 48,
128
+ "<0x31>": 49,
129
+ "<0x32>": 50,
130
+ "<0x33>": 51,
131
+ "<0x34>": 52,
132
+ "<0x35>": 53,
133
+ "<0x36>": 54,
134
+ "<0x37>": 55,
135
+ "<0x38>": 56,
136
+ "<0x39>": 57,
137
+ "<0x3a>": 58,
138
+ "<0x3b>": 59,
139
+ "<0x3c>": 60,
140
+ "<0x3d>": 61,
141
+ "<0x3e>": 62,
142
+ "<0x3f>": 63,
143
+ "<0x40>": 64,
144
+ "<0x41>": 65,
145
+ "<0x42>": 66,
146
+ "<0x43>": 67,
147
+ "<0x44>": 68,
148
+ "<0x45>": 69,
149
+ "<0x46>": 70,
150
+ "<0x47>": 71,
151
+ "<0x48>": 72,
152
+ "<0x49>": 73,
153
+ "<0x4a>": 74,
154
+ "<0x4b>": 75,
155
+ "<0x4c>": 76,
156
+ "<0x4d>": 77,
157
+ "<0x4e>": 78,
158
+ "<0x4f>": 79,
159
+ "<0x50>": 80,
160
+ "<0x51>": 81,
161
+ "<0x52>": 82,
162
+ "<0x53>": 83,
163
+ "<0x54>": 84,
164
+ "<0x55>": 85,
165
+ "<0x56>": 86,
166
+ "<0x57>": 87,
167
+ "<0x58>": 88,
168
+ "<0x59>": 89,
169
+ "<0x5a>": 90,
170
+ "<0x5b>": 91,
171
+ "<0x5c>": 92,
172
+ "<0x5d>": 93,
173
+ "<0x5e>": 94,
174
+ "<0x5f>": 95,
175
+ "<0x60>": 96,
176
+ "<0x61>": 97,
177
+ "<0x62>": 98,
178
+ "<0x63>": 99,
179
+ "<0x64>": 100,
180
+ "<0x65>": 101,
181
+ "<0x66>": 102,
182
+ "<0x67>": 103,
183
+ "<0x68>": 104,
184
+ "<0x69>": 105,
185
+ "<0x6a>": 106,
186
+ "<0x6b>": 107,
187
+ "<0x6c>": 108,
188
+ "<0x6d>": 109,
189
+ "<0x6e>": 110,
190
+ "<0x6f>": 111,
191
+ "<0x70>": 112,
192
+ "<0x71>": 113,
193
+ "<0x72>": 114,
194
+ "<0x73>": 115,
195
+ "<0x74>": 116,
196
+ "<0x75>": 117,
197
+ "<0x76>": 118,
198
+ "<0x77>": 119,
199
+ "<0x78>": 120,
200
+ "<0x79>": 121,
201
+ "<0x7a>": 122,
202
+ "<0x7b>": 123,
203
+ "<0x7c>": 124,
204
+ "<0x7d>": 125,
205
+ "<0x7e>": 126,
206
+ "<0x7f>": 127,
207
+ "<0x80>": 128,
208
+ "<0x81>": 129,
209
+ "<0x82>": 130,
210
+ "<0x83>": 131,
211
+ "<0x84>": 132,
212
+ "<0x85>": 133,
213
+ "<0x86>": 134,
214
+ "<0x87>": 135,
215
+ "<0x88>": 136,
216
+ "<0x89>": 137,
217
+ "<0x8a>": 138,
218
+ "<0x8b>": 139,
219
+ "<0x8c>": 140,
220
+ "<0x8d>": 141,
221
+ "<0x8e>": 142,
222
+ "<0x8f>": 143,
223
+ "<0x90>": 144,
224
+ "<0x91>": 145,
225
+ "<0x92>": 146,
226
+ "<0x93>": 147,
227
+ "<0x94>": 148,
228
+ "<0x95>": 149,
229
+ "<0x96>": 150,
230
+ "<0x97>": 151,
231
+ "<0x98>": 152,
232
+ "<0x99>": 153,
233
+ "<0x9a>": 154,
234
+ "<0x9b>": 155,
235
+ "<0x9c>": 156,
236
+ "<0x9d>": 157,
237
+ "<0x9e>": 158,
238
+ "<0x9f>": 159,
239
+ "<0xa0>": 160,
240
+ "<0xa1>": 161,
241
+ "<0xa2>": 162,
242
+ "<0xa3>": 163,
243
+ "<0xa4>": 164,
244
+ "<0xa5>": 165,
245
+ "<0xa6>": 166,
246
+ "<0xa7>": 167,
247
+ "<0xa8>": 168,
248
+ "<0xa9>": 169,
249
+ "<0xaa>": 170,
250
+ "<0xab>": 171,
251
+ "<0xac>": 172,
252
+ "<0xad>": 173,
253
+ "<0xae>": 174,
254
+ "<0xaf>": 175,
255
+ "<0xb0>": 176,
256
+ "<0xb1>": 177,
257
+ "<0xb2>": 178,
258
+ "<0xb3>": 179,
259
+ "<0xb4>": 180,
260
+ "<0xb5>": 181,
261
+ "<0xb6>": 182,
262
+ "<0xb7>": 183,
263
+ "<0xb8>": 184,
264
+ "<0xb9>": 185,
265
+ "<0xba>": 186,
266
+ "<0xbb>": 187,
267
+ "<0xbc>": 188,
268
+ "<0xbd>": 189,
269
+ "<0xbe>": 190,
270
+ "<0xbf>": 191,
271
+ "<0xc0>": 192,
272
+ "<0xc1>": 193,
273
+ "<0xc2>": 194,
274
+ "<0xc3>": 195,
275
+ "<0xc4>": 196,
276
+ "<0xc5>": 197,
277
+ "<0xc6>": 198,
278
+ "<0xc7>": 199,
279
+ "<0xc8>": 200,
280
+ "<0xc9>": 201,
281
+ "<0xca>": 202,
282
+ "<0xcb>": 203,
283
+ "<0xcc>": 204,
284
+ "<0xcd>": 205,
285
+ "<0xce>": 206,
286
+ "<0xcf>": 207,
287
+ "<0xd0>": 208,
288
+ "<0xd1>": 209,
289
+ "<0xd2>": 210,
290
+ "<0xd3>": 211,
291
+ "<0xd4>": 212,
292
+ "<0xd5>": 213,
293
+ "<0xd6>": 214,
294
+ "<0xd7>": 215,
295
+ "<0xd8>": 216,
296
+ "<0xd9>": 217,
297
+ "<0xda>": 218,
298
+ "<0xdb>": 219,
299
+ "<0xdc>": 220,
300
+ "<0xdd>": 221,
301
+ "<0xde>": 222,
302
+ "<0xdf>": 223,
303
+ "<0xe0>": 224,
304
+ "<0xe1>": 225,
305
+ "<0xe2>": 226,
306
+ "<0xe3>": 227,
307
+ "<0xe4>": 228,
308
+ "<0xe5>": 229,
309
+ "<0xe6>": 230,
310
+ "<0xe7>": 231,
311
+ "<0xe8>": 232,
312
+ "<0xe9>": 233,
313
+ "<0xea>": 234,
314
+ "<0xeb>": 235,
315
+ "<0xec>": 236,
316
+ "<0xed>": 237,
317
+ "<0xee>": 238,
318
+ "<0xef>": 239,
319
+ "<0xf0>": 240,
320
+ "<0xf1>": 241,
321
+ "<0xf2>": 242,
322
+ "<0xf3>": 243,
323
+ "<0xf4>": 244,
324
+ "<0xf5>": 245,
325
+ "<0xf6>": 246,
326
+ "<0xf7>": 247,
327
+ "<0xf8>": 248,
328
+ "<0xf9>": 249,
329
+ "<0xfa>": 250,
330
+ "<0xfb>": 251,
331
+ "<0xfc>": 252,
332
+ "<0xfd>": 253,
333
+ "<0xfe>": 254,
334
+ "<0xff>": 255,
335
+ "<|endoftext|>": 50256
336
+ },
337
+ "merges": [
338
+ "a0 b0",
339
+ "a1 b1",
340
+ "a2 b2",
341
+ "a3 b3",
342
+ "a4 b4",
343
+ "a5 b5",
344
+ "a6 b6",
345
+ "a7 b7",
346
+ "a8 b8",
347
+ "a9 b9",
348
+ "a10 b10",
349
+ "a11 b11",
350
+ "a12 b12",
351
+ "a13 b13",
352
+ "a14 b14",
353
+ "a15 b15",
354
+ "a16 b16",
355
+ "a17 b17",
356
+ "a18 b18",
357
+ "a19 b19",
358
+ "a20 b20",
359
+ "a21 b21",
360
+ "a22 b22",
361
+ "a23 b23",
362
+ "a24 b24",
363
+ "a25 b25",
364
+ "a26 b26",
365
+ "a27 b27",
366
+ "a28 b28",
367
+ "a29 b29",
368
+ "a30 b30",
369
+ "a31 b31",
370
+ "a32 b32",
371
+ "a33 b33",
372
+ "a34 b34",
373
+ "a35 b35",
374
+ "a36 b36",
375
+ "a37 b37",
376
+ "a38 b38",
377
+ "a39 b39",
378
+ "a40 b40",
379
+ "a41 b41",
380
+ "a42 b42",
381
+ "a43 b43",
382
+ "a44 b44",
383
+ "a45 b45",
384
+ "a46 b46",
385
+ "a47 b47",
386
+ "a48 b48",
387
+ "a49 b49",
388
+ "a50 b50",
389
+ "a51 b51",
390
+ "a52 b52",
391
+ "a53 b53",
392
+ "a54 b54",
393
+ "a55 b55",
394
+ "a56 b56",
395
+ "a57 b57",
396
+ "a58 b58",
397
+ "a59 b59",
398
+ "a60 b60",
399
+ "a61 b61",
400
+ "a62 b62",
401
+ "a63 b63",
402
+ "a64 b64",
403
+ "a65 b65",
404
+ "a66 b66",
405
+ "a67 b67",
406
+ "a68 b68",
407
+ "a69 b69",
408
+ "a70 b70",
409
+ "a71 b71",
410
+ "a72 b72",
411
+ "a73 b73",
412
+ "a74 b74",
413
+ "a75 b75",
414
+ "a76 b76",
415
+ "a77 b77",
416
+ "a78 b78",
417
+ "a79 b79",
418
+ "a80 b80",
419
+ "a81 b81",
420
+ "a82 b82",
421
+ "a83 b83",
422
+ "a84 b84",
423
+ "a85 b85",
424
+ "a86 b86",
425
+ "a87 b87",
426
+ "a88 b88",
427
+ "a89 b89",
428
+ "a90 b90",
429
+ "a91 b91",
430
+ "a92 b92",
431
+ "a93 b93",
432
+ "a94 b94",
433
+ "a95 b95",
434
+ "a96 b96",
435
+ "a97 b97",
436
+ "a98 b98",
437
+ "a99 b99"
438
+ ]
439
+ }
440
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_class": "GPT2Tokenizer",
3
+ "bos_token": "<|endoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>",
6
+ "pad_token": "<|endoftext|>",
7
+ "add_prefix_space": false,
8
+ "model_max_length": 1024,
9
+ "special_tokens_map_file": "special_tokens_map.json",
10
+ "name_or_path": "micro-distill-grpo-vae"
11
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff