[2025-12-26 10:10:04,765] [DEBUG] [axolotl.utils.config.resolve_dtype:66] [PID:1090] bf16 support detected, enabling for this configuration. config.json: 0%| | 0.00/760 [00:00" }, "streaming_multipack_buffer_size": 10000, "strict": false, "tensor_parallel_size": 1, "tf32": true, "tiled_mlp_use_original_mlp": true, "tokenizer_config": "deepseek-ai/deepseek-coder-6.7b-instruct", "tokenizer_save_jinja_files": true, "torch_dtype": "torch.bfloat16", "train_on_inputs": false, "trl": { "log_completions": false, "mask_truncated_completions": false, "ref_model_mixup_alpha": 0.9, "ref_model_sync_steps": 64, "scale_rewards": true, "sync_ref_model": false, "use_vllm": false, "vllm_server_host": "0.0.0.0", "vllm_server_port": 8000 }, "trust_remote_code": true, "use_otel_metrics": false, "use_ray": false, "use_wandb": true, "val_set_size": 0.05, "vllm": { "device": "auto", "dtype": "auto", "gpu_memory_utilization": 0.9, "host": "0.0.0.0", "port": 8000 }, "wandb_name": "deepseek-coder-6.7b-luau", "wandb_project": "deepseek-luau-finetune", "warmup_ratio": 0.1, "weight_decay": 0.01, "world_size": 1 } tokenizer_config.json: 0.00B [00:00, ?B/s] tokenizer_config.json: 1.87kB [00:00, 9.83MB/s] tokenizer.json: 0.00B [00:00, ?B/s] tokenizer.json: 578kB [00:00, 5.75MB/s] tokenizer.json: 1.37MB [00:00, 12.5MB/s] [2025-12-26 10:10:09,472] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:280] [PID:1090] EOS: 32021 / <|EOT|> [2025-12-26 10:10:09,472] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:281] [PID:1090] BOS: 32013 / <|begin▁of▁sentence|> [2025-12-26 10:10:09,472] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:282] [PID:1090] PAD: 32021 / <|EOT|> [2025-12-26 10:10:09,472] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:283] [PID:1090] UNK: None / None [2025-12-26 10:10:09,472] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:481] [PID:1090] Unable to find prepared dataset in last_run_prepared/b55957ea74fd9e928a892829ce7838eb [2025-12-26 10:10:09,472] [INFO] [axolotl.utils.data.sft._load_raw_datasets:320] [PID:1090] Loading raw datasets... [2025-12-26 10:10:09,472] [WARNING] [axolotl.utils.data.sft._load_raw_datasets:322] [PID:1090] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset using `axolotl preprocess path/to/config.yml`. README.md: 0.00B [00:00, ?B/s] README.md: 1.79kB [00:00, 11.7MB/s] train.jsonl: 0%| | 0.00/41.1M [00:003072) (num_proc=32): 0%| | 0/22636 [00:003072) (num_proc=32): 3%|█▊ | 708/22636 [00:00<00:04, 4664.70 examples/s] Dropping Long Sequences (>3072) (num_proc=32): 100%|███████████████████████████████████████████████████████| 22636/22636 [00:00<00:00, 72336.69 examples/s] Drop Samples with Zero Trainable Tokens (num_proc=32): 0%| | 0/22636 [00:00 [2025-12-26 10:10:20,767] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:281] [PID:1090] BOS: 32013 / <|begin▁of▁sentence|> [2025-12-26 10:10:20,767] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:282] [PID:1090] PAD: 32021 / <|EOT|> [2025-12-26 10:10:20,767] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:283] [PID:1090] UNK: None / None [2025-12-26 10:10:20,767] [DEBUG] [axolotl.train.setup_model_and_tokenizer:82] [PID:1090] Loading model [2025-12-26 10:10:21,021] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:1090] Patched Trainer.evaluation_loop with nanmean loss calculation [2025-12-26 10:10:21,022] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:1090] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation [2025-12-26 10:10:21,022] [INFO] [axolotl.loaders.patch_manager._apply_multipack_patches:301] [PID:1090] Applying multipack dataloader patch for sample packing... model.safetensors.index.json: 0.00B [00:00, ?B/s] model.safetensors.index.json: 25.1kB [00:00, 109MB/s] model-00001-of-00002.safetensors: 0%| | 0.00/9.98G [00:00