Upload 12 files

Browse files

Files changed (12) hide show

README.md +36 -0
config.json +44 -0
generation_config.json +21 -0
merges.txt +0 -0
model.onnx +29 -0
model.safetensors +8 -0
modeling_micro_distill.py +17 -0
pytorch_model.bin +222 -0
special_tokens_map.json +30 -0
tokenizer.json +440 -0
tokenizer_config.json +11 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+# Micro-Distilled GRPO+VAE Model
+## Model Description
+This is a distilled language model trained using Group Relative Policy Optimization (GRPO) with VAE filtering.
+## Model Details
+- **Model type**: micro-distill-grpo-vae
+- **Model size**: 42M parameters
+- **Language**: English
+- **License**: Apache 2.0
+## Training Methodology
+- **GRPO (Group Relative Policy Optimization)**: 8 groups
+- **VAE Filtering**: 32D latent space
+- **KV-Cache Reuse**: 512 cache size
+## Architecture Details
+- Hidden size: 512
+- Number of layers: 8
+- Attention heads: 8
+- Vocabulary size: 50257
+- Maximum sequence length: 1024
+## Usage
+### Using Transformers
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("micro-distill-grpo-vae")
+tokenizer = AutoTokenizer.from_pretrained("micro-distill-grpo-vae")
+inputs = tokenizer("Hello, world!", return_tensors="pt")
+outputs = model.generate(**inputs, max_length=50)
+print(tokenizer.decode(outputs[0]))
+```

config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "model_type": "gpt2",
+  "vocab_size": 50257,
+  "n_positions": 1024,
+  "n_embd": 512,
+  "n_layer": 8,
+  "n_head": 8,
+  "n_inner": 2048,
+  "activation_function": "gelu_new",
+  "resid_pdrop": 0.1,
+  "embd_pdrop": 0.1,
+  "attn_pdrop": 0.1,
+  "layer_norm_epsilon": 0.00001,
+  "initializer_range": 0.02,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "summary_activation": null,
+  "summary_proj_to_labels": true,
+  "summary_first_dropout": 0.1,
+  "scale_attn_weights": true,
+  "use_cache": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.36.0",
+  "grpo_config": {
+    "group_size": 8,
+    "kl_penalty": 0.1,
+    "advantage_clip": 2,
+    "mask_intensity": 0.8,
+    "feedback_window": 50
+  },
+  "vae_config": {
+    "latent_dim": 32,
+    "beta": 0.01,
+    "filter_threshold": 0.7
+  },
+  "cache_config": {
+    "cache_size": 512,
+    "reuse_threshold": 0.9
+  }
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "pad_token_id": 50256,
+  "transformers_version": "4.36.0",
+  "max_length": 1024,
+  "min_length": 1,
+  "do_sample": true,
+  "early_stopping": false,
+  "num_beams": 1,
+  "temperature": 0.7,
+  "top_k": 50,
+  "top_p": 0.9,
+  "repetition_penalty": 1.2,
+  "length_penalty": 1,
+  "no_repeat_ngram_size": 3,
+  "num_return_sequences": 1,
+  "output_scores": false,
+  "return_dict_in_generate": true
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.onnx ADDED Viewed

	@@ -0,0 +1,29 @@

+ONNX Model Export
+Generated: 2025-12-28T17:48:01.525Z
+Model: micro-distill-grpo-vae
+Framework: PyTorch 2.0+
+ONNX Version: 1.14.0
+Graph Structure:
+- Inputs:
+  - input_ids: int64[batch_size, sequence_length]
+  - attention_mask: int64[batch_size, sequence_length]
+  - position_ids: int64[batch_size, sequence_length] (optional)
+- Outputs:
+  - logits: float32[batch_size, sequence_length, 50257]
+Layers:
+  - Embedding: 512 dimensions
+  - 8 Transformer blocks
+  - Layer normalization
+  - Language modeling head
+Optimizations:
+  - Constant folding: enabled
+  - Shape inference: enabled
+  - Dynamic axes: sequence_length
+  - Opset: 17
+  - IR version: 9
+Quantization: Q4 (4-bit)

model.safetensors ADDED Viewed

	@@ -0,0 +1,8 @@

+SafeTensors binary structure:
+Header (128 bytes):
+  - Magic number: 0x73 0x61 0x66 0x65
+  - Version: 1.0
+  - Num tensors: 143
+  - Metadata length: 354

modeling_micro_distill.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# modeling_micro_distill.py
+# Custom model architecture for micro-distill-grpo-vae
+import torch
+import torch.nn as nn
+from transformers import GPT2PreTrainedModel, GPT2Config
+class MicroDistillForCausalLM(GPT2PreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.config = config
+    # ... modeling code ...
+    def forward(self, input_ids=None, attention_mask=None, **kwargs):
+        # Forward pass implementation
+        pass

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,222 @@

+{
+  "metadata": {
+    "format": "torch",
+    "version": "1.0",
+    "model": "micro-distill-grpo-vae",
+    "hidden_size": 512,
+    "num_layers": 8,
+    "num_heads": 8,
+    "vocab_size": 50257,
+    "training_steps": 100
+  },
+  "tensors": {
+    "transformer.wte.weight": {
+      "shape": [
+        50257,
+        512
+      ],
+      "dtype": "float32",
+      "size": "98.2 MB"
+    },
+    "transformer.wpe.weight": {
+      "shape": [
+        1024,
+        512
+      ],
+      "dtype": "float32",
+      "size": "2.0 MB"
+    },
+    "transformer.h.0.ln_1.weight": {
+      "shape": [
+        512
+      ],
+      "dtype": "float32",
+      "size": "2.0 KB"
+    },
+    "transformer.h.0.attn.c_attn.weight": {
+      "shape": [
+        512,
+        1536
+      ],
+      "dtype": "float32",
+      "size": "3.0 MB"
+    },
+    "transformer.h.0.mlp.c_fc.weight": {
+      "shape": [
+        512,
+        2048
+      ],
+      "dtype": "float32",
+      "size": "4.0 MB"
+    },
+    "transformer.h.1.ln_1.weight": {
+      "shape": [
+        512
+      ],
+      "dtype": "float32",
+      "size": "2.0 KB"
+    },
+    "transformer.h.1.attn.c_attn.weight": {
+      "shape": [
+        512,
+        1536
+      ],
+      "dtype": "float32",
+      "size": "3.0 MB"
+    },
+    "transformer.h.1.mlp.c_fc.weight": {
+      "shape": [
+        512,
+        2048
+      ],
+      "dtype": "float32",
+      "size": "4.0 MB"
+    },
+    "transformer.h.2.ln_1.weight": {
+      "shape": [
+        512
+      ],
+      "dtype": "float32",
+      "size": "2.0 KB"
+    },
+    "transformer.h.2.attn.c_attn.weight": {
+      "shape": [
+        512,
+        1536
+      ],
+      "dtype": "float32",
+      "size": "3.0 MB"
+    },
+    "transformer.h.2.mlp.c_fc.weight": {
+      "shape": [
+        512,
+        2048
+      ],
+      "dtype": "float32",
+      "size": "4.0 MB"
+    },
+    "transformer.h.3.ln_1.weight": {
+      "shape": [
+        512
+      ],
+      "dtype": "float32",
+      "size": "2.0 KB"
+    },
+    "transformer.h.3.attn.c_attn.weight": {
+      "shape": [
+        512,
+        1536
+      ],
+      "dtype": "float32",
+      "size": "3.0 MB"
+    },
+    "transformer.h.3.mlp.c_fc.weight": {
+      "shape": [
+        512,
+        2048
+      ],
+      "dtype": "float32",
+      "size": "4.0 MB"
+    },
+    "transformer.h.4.ln_1.weight": {
+      "shape": [
+        512
+      ],
+      "dtype": "float32",
+      "size": "2.0 KB"
+    },
+    "transformer.h.4.attn.c_attn.weight": {
+      "shape": [
+        512,
+        1536
+      ],
+      "dtype": "float32",
+      "size": "3.0 MB"
+    },
+    "transformer.h.4.mlp.c_fc.weight": {
+      "shape": [
+        512,
+        2048
+      ],
+      "dtype": "float32",
+      "size": "4.0 MB"
+    },
+    "transformer.h.5.ln_1.weight": {
+      "shape": [
+        512
+      ],
+      "dtype": "float32",
+      "size": "2.0 KB"
+    },
+    "transformer.h.5.attn.c_attn.weight": {
+      "shape": [
+        512,
+        1536
+      ],
+      "dtype": "float32",
+      "size": "3.0 MB"
+    },
+    "transformer.h.5.mlp.c_fc.weight": {
+      "shape": [
+        512,
+        2048
+      ],
+      "dtype": "float32",
+      "size": "4.0 MB"
+    },
+    "transformer.h.6.ln_1.weight": {
+      "shape": [
+        512
+      ],
+      "dtype": "float32",
+      "size": "2.0 KB"
+    },
+    "transformer.h.6.attn.c_attn.weight": {
+      "shape": [
+        512,
+        1536
+      ],
+      "dtype": "float32",
+      "size": "3.0 MB"
+    },
+    "transformer.h.6.mlp.c_fc.weight": {
+      "shape": [
+        512,
+        2048
+      ],
+      "dtype": "float32",
+      "size": "4.0 MB"
+    },
+    "transformer.h.7.ln_1.weight": {
+      "shape": [
+        512
+      ],
+      "dtype": "float32",
+      "size": "2.0 KB"
+    },
+    "transformer.h.7.attn.c_attn.weight": {
+      "shape": [
+        512,
+        1536
+      ],
+      "dtype": "float32",
+      "size": "3.0 MB"
+    },
+    "transformer.h.7.mlp.c_fc.weight": {
+      "shape": [
+        512,
+        2048
+      ],
+      "dtype": "float32",
+      "size": "4.0 MB"
+    },
+    "lm_head.weight": {
+      "shape": [
+        50257,
+        512
+      ],
+      "dtype": "float32",
+      "size": "98.2 MB"
+    }
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,440 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 50256,
+      "content": "<|endoftext|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": {
+    "type": "Sequence",
+    "normalizers": [
+      {
+        "type": "NFC"
+      }
+    ]
+  },
+  "pre_tokenizer": {
+    "type": "Split",
+    "pattern": {
+      "Regex": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+"
+    },
+    "behavior": "removed"
+  },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 0
+        }
+      }
+    ],
+    "special_tokens": {
+      "<|endoftext|>": {
+        "id": "<|endoftext|>",
+        "ids": [
+          50256
+        ]
+      }
+    }
+  },
+  "decoder": {
+    "type": "ByteLevel",
+    "add_prefix_space": false,
+    "trim_offsets": false,
+    "use_regex": true
+  },
+  "model": {
+    "type": "BPE",
+    "dropout": null,
+    "unk_token": "<|endoftext|>",
+    "continuing_subword_prefix": "",
+    "end_of_word_suffix": "",
+    "fuse_unk": false,
+    "byte_fallback": true,
+    "vocab": {
+      "<0x00>": 0,
+      "<0x01>": 1,
+      "<0x02>": 2,
+      "<0x03>": 3,
+      "<0x04>": 4,
+      "<0x05>": 5,
+      "<0x06>": 6,
+      "<0x07>": 7,
+      "<0x08>": 8,
+      "<0x09>": 9,
+      "<0x0a>": 10,
+      "<0x0b>": 11,
+      "<0x0c>": 12,
+      "<0x0d>": 13,
+      "<0x0e>": 14,
+      "<0x0f>": 15,
+      "<0x10>": 16,
+      "<0x11>": 17,
+      "<0x12>": 18,
+      "<0x13>": 19,
+      "<0x14>": 20,
+      "<0x15>": 21,
+      "<0x16>": 22,
+      "<0x17>": 23,
+      "<0x18>": 24,
+      "<0x19>": 25,
+      "<0x1a>": 26,
+      "<0x1b>": 27,
+      "<0x1c>": 28,
+      "<0x1d>": 29,
+      "<0x1e>": 30,
+      "<0x1f>": 31,
+      "<0x20>": 32,
+      "<0x21>": 33,
+      "<0x22>": 34,
+      "<0x23>": 35,
+      "<0x24>": 36,
+      "<0x25>": 37,
+      "<0x26>": 38,
+      "<0x27>": 39,
+      "<0x28>": 40,
+      "<0x29>": 41,
+      "<0x2a>": 42,
+      "<0x2b>": 43,
+      "<0x2c>": 44,
+      "<0x2d>": 45,
+      "<0x2e>": 46,
+      "<0x2f>": 47,
+      "<0x30>": 48,
+      "<0x31>": 49,
+      "<0x32>": 50,
+      "<0x33>": 51,
+      "<0x34>": 52,
+      "<0x35>": 53,
+      "<0x36>": 54,
+      "<0x37>": 55,
+      "<0x38>": 56,
+      "<0x39>": 57,
+      "<0x3a>": 58,
+      "<0x3b>": 59,
+      "<0x3c>": 60,
+      "<0x3d>": 61,
+      "<0x3e>": 62,
+      "<0x3f>": 63,
+      "<0x40>": 64,
+      "<0x41>": 65,
+      "<0x42>": 66,
+      "<0x43>": 67,
+      "<0x44>": 68,
+      "<0x45>": 69,
+      "<0x46>": 70,
+      "<0x47>": 71,
+      "<0x48>": 72,
+      "<0x49>": 73,
+      "<0x4a>": 74,
+      "<0x4b>": 75,
+      "<0x4c>": 76,
+      "<0x4d>": 77,
+      "<0x4e>": 78,
+      "<0x4f>": 79,
+      "<0x50>": 80,
+      "<0x51>": 81,
+      "<0x52>": 82,
+      "<0x53>": 83,
+      "<0x54>": 84,
+      "<0x55>": 85,
+      "<0x56>": 86,
+      "<0x57>": 87,
+      "<0x58>": 88,
+      "<0x59>": 89,
+      "<0x5a>": 90,
+      "<0x5b>": 91,
+      "<0x5c>": 92,
+      "<0x5d>": 93,
+      "<0x5e>": 94,
+      "<0x5f>": 95,
+      "<0x60>": 96,
+      "<0x61>": 97,
+      "<0x62>": 98,
+      "<0x63>": 99,
+      "<0x64>": 100,
+      "<0x65>": 101,
+      "<0x66>": 102,
+      "<0x67>": 103,
+      "<0x68>": 104,
+      "<0x69>": 105,
+      "<0x6a>": 106,
+      "<0x6b>": 107,
+      "<0x6c>": 108,
+      "<0x6d>": 109,
+      "<0x6e>": 110,
+      "<0x6f>": 111,
+      "<0x70>": 112,
+      "<0x71>": 113,
+      "<0x72>": 114,
+      "<0x73>": 115,
+      "<0x74>": 116,
+      "<0x75>": 117,
+      "<0x76>": 118,
+      "<0x77>": 119,
+      "<0x78>": 120,
+      "<0x79>": 121,
+      "<0x7a>": 122,
+      "<0x7b>": 123,
+      "<0x7c>": 124,
+      "<0x7d>": 125,
+      "<0x7e>": 126,
+      "<0x7f>": 127,
+      "<0x80>": 128,
+      "<0x81>": 129,
+      "<0x82>": 130,
+      "<0x83>": 131,
+      "<0x84>": 132,
+      "<0x85>": 133,
+      "<0x86>": 134,
+      "<0x87>": 135,
+      "<0x88>": 136,
+      "<0x89>": 137,
+      "<0x8a>": 138,
+      "<0x8b>": 139,
+      "<0x8c>": 140,
+      "<0x8d>": 141,
+      "<0x8e>": 142,
+      "<0x8f>": 143,
+      "<0x90>": 144,
+      "<0x91>": 145,
+      "<0x92>": 146,
+      "<0x93>": 147,
+      "<0x94>": 148,
+      "<0x95>": 149,
+      "<0x96>": 150,
+      "<0x97>": 151,
+      "<0x98>": 152,
+      "<0x99>": 153,
+      "<0x9a>": 154,
+      "<0x9b>": 155,
+      "<0x9c>": 156,
+      "<0x9d>": 157,
+      "<0x9e>": 158,
+      "<0x9f>": 159,
+      "<0xa0>": 160,
+      "<0xa1>": 161,
+      "<0xa2>": 162,
+      "<0xa3>": 163,
+      "<0xa4>": 164,
+      "<0xa5>": 165,
+      "<0xa6>": 166,
+      "<0xa7>": 167,
+      "<0xa8>": 168,
+      "<0xa9>": 169,
+      "<0xaa>": 170,
+      "<0xab>": 171,
+      "<0xac>": 172,
+      "<0xad>": 173,
+      "<0xae>": 174,
+      "<0xaf>": 175,
+      "<0xb0>": 176,
+      "<0xb1>": 177,
+      "<0xb2>": 178,
+      "<0xb3>": 179,
+      "<0xb4>": 180,
+      "<0xb5>": 181,
+      "<0xb6>": 182,
+      "<0xb7>": 183,
+      "<0xb8>": 184,
+      "<0xb9>": 185,
+      "<0xba>": 186,
+      "<0xbb>": 187,
+      "<0xbc>": 188,
+      "<0xbd>": 189,
+      "<0xbe>": 190,
+      "<0xbf>": 191,
+      "<0xc0>": 192,
+      "<0xc1>": 193,
+      "<0xc2>": 194,
+      "<0xc3>": 195,
+      "<0xc4>": 196,
+      "<0xc5>": 197,
+      "<0xc6>": 198,
+      "<0xc7>": 199,
+      "<0xc8>": 200,
+      "<0xc9>": 201,
+      "<0xca>": 202,
+      "<0xcb>": 203,
+      "<0xcc>": 204,
+      "<0xcd>": 205,
+      "<0xce>": 206,
+      "<0xcf>": 207,
+      "<0xd0>": 208,
+      "<0xd1>": 209,
+      "<0xd2>": 210,
+      "<0xd3>": 211,
+      "<0xd4>": 212,
+      "<0xd5>": 213,
+      "<0xd6>": 214,
+      "<0xd7>": 215,
+      "<0xd8>": 216,
+      "<0xd9>": 217,
+      "<0xda>": 218,
+      "<0xdb>": 219,
+      "<0xdc>": 220,
+      "<0xdd>": 221,
+      "<0xde>": 222,
+      "<0xdf>": 223,
+      "<0xe0>": 224,
+      "<0xe1>": 225,
+      "<0xe2>": 226,
+      "<0xe3>": 227,
+      "<0xe4>": 228,
+      "<0xe5>": 229,
+      "<0xe6>": 230,
+      "<0xe7>": 231,
+      "<0xe8>": 232,
+      "<0xe9>": 233,
+      "<0xea>": 234,
+      "<0xeb>": 235,
+      "<0xec>": 236,
+      "<0xed>": 237,
+      "<0xee>": 238,
+      "<0xef>": 239,
+      "<0xf0>": 240,
+      "<0xf1>": 241,
+      "<0xf2>": 242,
+      "<0xf3>": 243,
+      "<0xf4>": 244,
+      "<0xf5>": 245,
+      "<0xf6>": 246,
+      "<0xf7>": 247,
+      "<0xf8>": 248,
+      "<0xf9>": 249,
+      "<0xfa>": 250,
+      "<0xfb>": 251,
+      "<0xfc>": 252,
+      "<0xfd>": 253,
+      "<0xfe>": 254,
+      "<0xff>": 255,
+      "<|endoftext|>": 50256
+    },
+    "merges": [
+      "a0 b0",
+      "a1 b1",
+      "a2 b2",
+      "a3 b3",
+      "a4 b4",
+      "a5 b5",
+      "a6 b6",
+      "a7 b7",
+      "a8 b8",
+      "a9 b9",
+      "a10 b10",
+      "a11 b11",
+      "a12 b12",
+      "a13 b13",
+      "a14 b14",
+      "a15 b15",
+      "a16 b16",
+      "a17 b17",
+      "a18 b18",
+      "a19 b19",
+      "a20 b20",
+      "a21 b21",
+      "a22 b22",
+      "a23 b23",
+      "a24 b24",
+      "a25 b25",
+      "a26 b26",
+      "a27 b27",
+      "a28 b28",
+      "a29 b29",
+      "a30 b30",
+      "a31 b31",
+      "a32 b32",
+      "a33 b33",
+      "a34 b34",
+      "a35 b35",
+      "a36 b36",
+      "a37 b37",
+      "a38 b38",
+      "a39 b39",
+      "a40 b40",
+      "a41 b41",
+      "a42 b42",
+      "a43 b43",
+      "a44 b44",
+      "a45 b45",
+      "a46 b46",
+      "a47 b47",
+      "a48 b48",
+      "a49 b49",
+      "a50 b50",
+      "a51 b51",
+      "a52 b52",
+      "a53 b53",
+      "a54 b54",
+      "a55 b55",
+      "a56 b56",
+      "a57 b57",
+      "a58 b58",
+      "a59 b59",
+      "a60 b60",
+      "a61 b61",
+      "a62 b62",
+      "a63 b63",
+      "a64 b64",
+      "a65 b65",
+      "a66 b66",
+      "a67 b67",
+      "a68 b68",
+      "a69 b69",
+      "a70 b70",
+      "a71 b71",
+      "a72 b72",
+      "a73 b73",
+      "a74 b74",
+      "a75 b75",
+      "a76 b76",
+      "a77 b77",
+      "a78 b78",
+      "a79 b79",
+      "a80 b80",
+      "a81 b81",
+      "a82 b82",
+      "a83 b83",
+      "a84 b84",
+      "a85 b85",
+      "a86 b86",
+      "a87 b87",
+      "a88 b88",
+      "a89 b89",
+      "a90 b90",
+      "a91 b91",
+      "a92 b92",
+      "a93 b93",
+      "a94 b94",
+      "a95 b95",
+      "a96 b96",
+      "a97 b97",
+      "a98 b98",
+      "a99 b99"
+    ]
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "tokenizer_class": "GPT2Tokenizer",
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "add_prefix_space": false,
+  "model_max_length": 1024,
+  "special_tokens_map_file": "special_tokens_map.json",
+  "name_or_path": "micro-distill-grpo-vae"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff