medasr-mlx-int8 / quantization_report.json
ainergiz's picture
Upload MedASR MLX int8 model
7320ff5 verified
{
"timestamp_utc": "2026-02-09T20:38:09.856840+00:00",
"source_model_dir": "artifacts/medasr-mlx-fp16",
"output_model_dir": "artifacts/medasr-mlx-int8",
"quantization": {
"bits": 8,
"group_size": 64,
"mode": "affine",
"target_modules": "mlx.nn.quantize default predicate (Linear/Embedding layers)"
},
"timing": {
"quantization_time_s": 0.0034
},
"size_mb": {
"source_weights": 200.9,
"output_weights": 121.108,
"compression_ratio_x": 1.659,
"reduction_percent": 39.72
}
}