{
  "quantization_method": "bitsandbytes_nf4",
  "load_in_4bit": true,
  "bnb_4bit_quant_type": "nf4",
  "bnb_4bit_use_double_quant": true,
  "bnb_4bit_compute_dtype": "torch.bfloat16",
  "expected_vram_gb": 45,
  "notes": "Load with BitsAndBytesConfig for NF4 quantization. Attention layers kept in full precision.",
  "attention_layers_quantized": false
}