Update README.md
Browse files
README.md
CHANGED
|
@@ -22,8 +22,17 @@ pip install sdnq
|
|
| 22 |
import torch
|
| 23 |
import diffusers
|
| 24 |
from sdnq import SDNQConfig # import sdnq to register it into diffusers and transformers
|
|
|
|
|
|
|
| 25 |
|
| 26 |
pipe = diffusers.FluxPipeline.from_pretrained("Disty0/FLUX.1-dev-SDNQ-uint4-svd-r32", torch_dtype=torch.bfloat16)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
pipe.enable_model_cpu_offload()
|
| 28 |
|
| 29 |
prompt = "A cat holding a sign that says hello world"
|
|
|
|
| 22 |
import torch
|
| 23 |
import diffusers
|
| 24 |
from sdnq import SDNQConfig # import sdnq to register it into diffusers and transformers
|
| 25 |
+
from sdnq.common import use_torch_compile as triton_is_available
|
| 26 |
+
from sdnq.loader import apply_sdnq_options_to_model
|
| 27 |
|
| 28 |
pipe = diffusers.FluxPipeline.from_pretrained("Disty0/FLUX.1-dev-SDNQ-uint4-svd-r32", torch_dtype=torch.bfloat16)
|
| 29 |
+
|
| 30 |
+
# Enable INT8 MatMul for AMD, Intel ARC and Nvidia GPUs:
|
| 31 |
+
if triton_is_available and (torch.cuda.is_available() or torch.xpu.is_available()):
|
| 32 |
+
pipe.transformer = apply_sdnq_options_to_model(pipe.transformer, use_quantized_matmul=True)
|
| 33 |
+
pipe.text_encoder_2 = apply_sdnq_options_to_model(pipe.text_encoder_2, use_quantized_matmul=True)
|
| 34 |
+
pipe.transformer = torch.compile(pipe.transformer) # optional for faster speeds
|
| 35 |
+
|
| 36 |
pipe.enable_model_cpu_offload()
|
| 37 |
|
| 38 |
prompt = "A cat holding a sign that says hello world"
|