import os import random import torch import diffusers import gradio as gr # 关键:导入 sdnq,用于注册 SDNQ 量化算子,否则会报 # “QuantizationMethod.SDNQ is not available yet” 之类的错误 from sdnq import SDNQConfig # noqa: F401 # 仅用于 side-effect 注册 # ------------------------- # 1. 全局加载 Z-Image-Turbo SDNQ 4bit 管线 # ------------------------- MODEL_ID = "Disty0/Z-Image-Turbo-SDNQ-uint4-svd-r32" def load_pipeline(): """ 加载 Z-Image-Turbo 4bit SDNQ 模型。 优先使用 bfloat16(与模型卡保持一致), 若在某些 CPU 环境下不支持,则回退到 float32。 """ dtype = torch.bfloat16 try: print(f"Try loading pipeline with dtype={dtype} ...") pipe = diffusers.ZImagePipeline.from_pretrained( MODEL_ID, torch_dtype=dtype, ) except Exception as e: print(f"bfloat16 加载失败,回退到 float32: {e}") pipe = diffusers.ZImagePipeline.from_pretrained( MODEL_ID, torch_dtype=torch.float32, ) # 在低显存 / CPU 环境下,官方推荐开启 CPU offload # 对纯 CPU Space 也兼容,只是会多一层模块管理 try: pipe.enable_model_cpu_offload() except Exception as e: # 某些环境(老版 accelerate)可能没有该方法,忽略即可 print(f"enable_model_cpu_offload 失败,直接使用 CPU: {e}") return pipe pipe = load_pipeline() # ------------------------- # 2. 推理函数(Gradio 回调) # ------------------------- def generate_image( prompt: str, height: int = 768, width: int = 768, num_inference_steps: int = 9, guidance_scale: float = 0.0, seed: int | None = 0, randomize_seed: bool = True, ): """ 使用 Z-Image-Turbo-SDNQ 生成单张图片。 参数说明: - prompt: 文本提示词 - height / width: 图像分辨率,建议 CPU 空间下 512 或 768 起步 - num_inference_steps: 采样步数,Z-Image-Turbo 一般 6~10 步即可 - guidance_scale: CFG scale,官方推荐 0.0(关闭 CFG) - seed: 随机种子,便于复现 - randomize_seed: 是否自动随机 seed(方便连点出图) """ if not prompt or prompt.strip() == "": raise gr.Error("提示词不能为空,请输入 prompt。") # 处理 seed if randomize_seed or seed is None or seed < 0: seed = random.randint(0, 2**31 - 1) # Z-Image-Turbo 对 1024×1024 表现最好 # 但在 CPU Space 中性能会较慢,建议从 768×768 起 generator = torch.Generator(device="cpu").manual_seed(seed) image = pipe( prompt=prompt, height=height, width=width, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=generator, ).images[0] return image, seed # ------------------------- # 3. Gradio UI 定义 # ------------------------- with gr.Blocks(title="Z-Image-Turbo SDNQ 4bit (CPU Friendly)") as demo: gr.Markdown( """ # Z-Image-Turbo SDNQ 4bit (CPU / 低显存友好) 基于 **Disty0/Z-Image-Turbo-SDNQ-uint4-svd-r32** 的 Gradio Demo。 可部署在 Hugging Face **CPU Space** 中运行(仅 CPU,也能出图,但会较慢)。 - 模型:Z-Image-Turbo 6B(4bit SDNQ 量化) - 推荐:先从 512 或 768 分辨率开始,步数 6~10 步 - 提示:完全在 CPU 上运行时,生成一张 768×768 可能需要数十秒甚至更久 """ ) with gr.Row(): with gr.Column(scale=3): prompt = gr.Textbox( label="提示词 Prompt", placeholder="例如:a photo of a beautiful Chinese city at night, neon lights, high detail, 4k", lines=4, ) with gr.Row(): height = gr.Slider( label="高度 Height", minimum=256, maximum=1024, step=64, value=768, ) width = gr.Slider( label="宽度 Width", minimum=256, maximum=1024, step=64, value=768, ) with gr.Row(): steps = gr.Slider( label="采样步数 num_inference_steps", minimum=4, maximum=20, step=1, value=9, ) guidance = gr.Slider( label="CFG Scale (guidance_scale)", minimum=0.0, maximum=3.0, step=0.1, value=0.0, ) with gr.Row(): seed = gr.Number( label="随机种子 Seed(<0 或留空=自动随机)", value=0, precision=0, ) randomize_seed = gr.Checkbox( label="每次自动随机 Seed", value=True, ) generate_btn = gr.Button("生成图片 Generate", variant="primary") with gr.Column(scale=2): output_image = gr.Image( label="生成结果", type="pil", format="png", ) used_seed = gr.Number( label="实际使用的 Seed(方便复现)", value=0, precision=0, interactive=False, ) # 绑定事件 generate_btn.click( fn=generate_image, inputs=[prompt, height, width, steps, guidance, seed, randomize_seed], outputs=[output_image, used_seed], ) # Hugging Face Space & 本地运行入口 if __name__ == "__main__": # 本地调试时可直接运行:python app.py demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))