Uploaded model
- Developed by: metascroy
- License: apache-2.0
- Finetuned from model : unsloth/Ministral-3-3B-Instruct-2512
This mistral3 model was trained 2x faster with Unsloth and Huggingface's TRL library.
Finetune with unsloth and torchao
Below we show how to finetune Ministral-3-3B using unsloth in a way that can be deployed with ExecuTorch. The example is based on the notebook here.
################################################################################
# We first load the model for QAT using the mobile CPU friendly int8-int4 scheme
################################################################################
from unsloth import FastVisionModel
from unsloth.chat_templates import (
get_chat_template,
)
import torch
MODEL_ID = "unsloth/Ministral-3-3B-Instruct-2512"
QAT_SCHEME = "int8-int4"
model, tokenizer = FastVisionModel.from_pretrained(
model_name = MODEL_ID,
max_seq_length = 2048,
dtype = torch.bfloat16,
load_in_4bit = False,
full_finetuning = True,
# ExecuTorch CPU quantization scheme
# Quantize embedding to 8-bits, and quantize linear layers to 4-bits
# with 8-bit dynamically quantized activations
qat_scheme = QAT_SCHEME,
)
print(model)
################################################################################
# Data prep
################################################################################
from datasets import load_dataset
dataset = load_dataset("unsloth/LaTeX_OCR", split = "train")
# Convert the dataset into a conversational format
instruction = "Write the LaTeX representation for this image."
def convert_to_conversation(sample):
conversation = [
{ "role": "user",
"content" : [
{"type" : "text", "text" : instruction},
{"type" : "image", "image" : sample["image"]} ]
},
{ "role" : "assistant",
"content" : [
{"type" : "text", "text" : sample["text"]} ]
},
]
return { "messages" : conversation }
converted_dataset = [convert_to_conversation(sample) for sample in dataset]
print(converted_dataset[0])
################################################################################
# Before finetuning
################################################################################
FastVisionModel.for_inference(model) # Enable for inference!
image = dataset[2]["image"]
instruction = "Write the LaTeX representation for this image."
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": instruction}
]}
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
inputs = tokenizer(
image,
input_text,
add_special_tokens = False,
return_tensors = "pt",
).to("cuda")
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 64,
use_cache = True, temperature = 1.5, min_p = 0.1)
################################################################################
# Define trainer
################################################################################
from unsloth.trainer import UnslothVisionDataCollator
from trl import SFTTrainer, SFTConfig
from unsloth import is_bf16_supported
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
data_collator = UnslothVisionDataCollator(model, tokenizer), # Must use!
train_dataset = converted_dataset,
args = SFTConfig(
per_device_train_batch_size = 4,
gradient_accumulation_steps = 2,
warmup_steps = 5,
max_steps = 30,
# num_train_epochs = 1, # Set this instead of max_steps for full training runs
learning_rate = 3e-5,
logging_steps = 1,
optim = "adamw_8bit",
fp16 = not is_bf16_supported(), # Use fp16 if bf16 is not supported
bf16 = is_bf16_supported(), # Use bf16 if supported
weight_decay = 0.001,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = "outputs",
report_to = "none",
# You MUST put the below items for vision finetuning:
remove_unused_columns = False,
dataset_text_field = "",
dataset_kwargs = {"skip_prepare_dataset": True},
max_length = 2048,
),
)
################################################################################
# Do fine tuning
################################################################################
trainer_stats = trainer.train()
################################################################################
# Inference after finetuning
################################################################################
FastVisionModel.for_inference(model) # Enable for inference!
image = dataset[2]["image"]
instruction = "Write the LaTeX representation for this image."
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": instruction}
]}
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
inputs = tokenizer(
image,
input_text,
add_special_tokens = False,
return_tensors = "pt",
).to("cuda")
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128,
use_cache = True, temperature = 1.5, min_p = 0.1)
# ################################################################################
# # Convert model to torchao format and save
# ################################################################################
from unsloth.models._utils import _convert_torchao_model
_convert_torchao_model(model)
model_name = MODEL_ID.split("/")[-1]
save_to = f"{model_name}-{QAT_SCHEME}-unsloth"
# Save locally
model.save_pretrained(save_to, safe_serialization=False)
tokenizer.save_pretrained(save_to)
# Or save to hub
from huggingface_hub import get_token, whoami
def _get_username():
token = get_token()
username = whoami(token=token)["name"]
return username
username = _get_username()
model.push_to_hub(f"{username}/{save_to}", safe_serialization=False)
tokenizer.push_to_hub(f"{username}/{save_to}")
- Downloads last month
- 36
Model tree for metascroy/Ministral-3-3B-Instruct-2512-int8-int4-unsloth
Base model
mistralai/Ministral-3-3B-Base-2512
Quantized
mistralai/Ministral-3-3B-Instruct-2512
Finetuned
unsloth/Ministral-3-3B-Instruct-2512
