aklein4's picture
Upload folder using huggingface_hub
8e59c43 verified
{
"type": "oloop.OLoopModel",
"pretrained_url": "aklein4/Llama-3.2-1B-TPU",
"pretrained_step": 0,
"pretrained_strict": false,
"torch_dtype": "float32",
"vocab_size": 128256,
"bos_token_id": 128000,
"eos_token_id": 128001,
"pad_token_id": -100,
"hidden_size": 2048,
"num_hidden_layers": 16,
"num_attention_heads": 32,
"num_key_value_heads": 8,
"intermediate_size": 8192,
"hidden_act": "silu",
"max_position_embeddings": 131072,
"rope_theta": 500000.0,
"rope_scaling": {
"factor": 32.0,
"high_freq_factor": 4.0,
"low_freq_factor": 1.0,
"original_context_len": 8192
},
"initializer_range": 0.02,
"attention_dropout": false,
"attention_bias": false,
"rms_norm_eps": 1e-05,
"attention_kernel": "flash_attention",
"pure_modules": [],
"fast_weight_size": 2048,
"base_lr": 0.001,
"momentum_beta": 0.9,
"momentum_dtype": "bfloat16",
"state_dtype": "float32",
"sharding": {
"model.layers.*": [
[
"data",
"fsdp"
],
null,
null
],
"lm_head": [
[
"data",
"fsdp"
],
null,
null
]
},
"remat": {
"activation_checkpoint_layers": [
"LlamaDecoderLayer"
],
"optimization_barrier_layers": [
"LlamaDecoderLayer"
],
"scan_layers": "model.layers",
"offload_tensors": [
"decoder_input"
]
}
}