Uploaded model

  • Developed by: metascroy
  • License: apache-2.0
  • Finetuned from model : unsloth/Ministral-3-3B-Instruct-2512

This mistral3 model was trained 2x faster with Unsloth and Huggingface's TRL library.

Finetune with unsloth and torchao

Below we show how to finetune Ministral-3-3B using unsloth in a way that can be deployed with ExecuTorch. The example is based on the notebook here.

################################################################################
# We first load the model for QAT using the mobile CPU friendly int8-int4 scheme
################################################################################

from unsloth import FastVisionModel
from unsloth.chat_templates import (
    get_chat_template,
)
import torch

MODEL_ID = "unsloth/Ministral-3-3B-Instruct-2512"
QAT_SCHEME = "int8-int4"

model, tokenizer = FastVisionModel.from_pretrained(
    model_name = MODEL_ID,
    max_seq_length = 2048,
    dtype = torch.bfloat16,
    load_in_4bit = False,
    full_finetuning = True,
    # ExecuTorch CPU quantization scheme
    # Quantize embedding to 8-bits, and quantize linear layers to 4-bits
    # with 8-bit dynamically quantized activations
    qat_scheme = QAT_SCHEME,
)

print(model)

################################################################################
# Data prep
################################################################################

from datasets import load_dataset
dataset = load_dataset("unsloth/LaTeX_OCR", split = "train")

# Convert the dataset into a conversational format
instruction = "Write the LaTeX representation for this image."

def convert_to_conversation(sample):
    conversation = [
        { "role": "user",
          "content" : [
            {"type" : "text",  "text"  : instruction},
            {"type" : "image", "image" : sample["image"]} ]
        },
        { "role" : "assistant",
          "content" : [
            {"type" : "text",  "text"  : sample["text"]} ]
        },
    ]
    return { "messages" : conversation }

converted_dataset = [convert_to_conversation(sample) for sample in dataset]

print(converted_dataset[0])


################################################################################
# Before finetuning
################################################################################
FastVisionModel.for_inference(model) # Enable for inference!

image = dataset[2]["image"]
instruction = "Write the LaTeX representation for this image."

messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens = False,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 64,
                   use_cache = True, temperature = 1.5, min_p = 0.1)


################################################################################
# Define trainer
################################################################################

from unsloth.trainer import UnslothVisionDataCollator
from trl import SFTTrainer, SFTConfig
from unsloth import is_bf16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    data_collator = UnslothVisionDataCollator(model, tokenizer), # Must use!
    train_dataset = converted_dataset,
    args = SFTConfig(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 2,
        warmup_steps = 5,
        max_steps = 30,
        # num_train_epochs = 1, # Set this instead of max_steps for full training runs
        learning_rate = 3e-5,
        logging_steps = 1,
        optim = "adamw_8bit",
        fp16 = not is_bf16_supported(), # Use fp16 if bf16 is not supported
        bf16 = is_bf16_supported(), # Use bf16 if supported
        weight_decay = 0.001,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",

        # You MUST put the below items for vision finetuning:
        remove_unused_columns = False,
        dataset_text_field = "",
        dataset_kwargs = {"skip_prepare_dataset": True},
        max_length = 2048,
    ),
)


################################################################################
# Do fine tuning
################################################################################
trainer_stats = trainer.train()

################################################################################
# Inference after finetuning
################################################################################
FastVisionModel.for_inference(model) # Enable for inference!

image = dataset[2]["image"]
instruction = "Write the LaTeX representation for this image."

messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens = False,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128,
                   use_cache = True, temperature = 1.5, min_p = 0.1)


# ################################################################################
# # Convert model to torchao format and save
# ################################################################################

from unsloth.models._utils import _convert_torchao_model
_convert_torchao_model(model)

model_name = MODEL_ID.split("/")[-1]
save_to = f"{model_name}-{QAT_SCHEME}-unsloth"

# Save locally
model.save_pretrained(save_to, safe_serialization=False)
tokenizer.save_pretrained(save_to)

# Or save to hub
from huggingface_hub import get_token, whoami
def _get_username():
    token = get_token()
    username = whoami(token=token)["name"]
    return username
username = _get_username()
model.push_to_hub(f"{username}/{save_to}", safe_serialization=False)
tokenizer.push_to_hub(f"{username}/{save_to}")
Downloads last month
36
Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐Ÿ™‹ Ask for provider support

Model tree for metascroy/Ministral-3-3B-Instruct-2512-int8-int4-unsloth