Spaces:
Runtime error
Runtime error
Update app_lora.py
Browse files- app_lora.py +33 -9
app_lora.py
CHANGED
|
@@ -5,25 +5,23 @@ from transformers import CLIPVisionModel
|
|
| 5 |
import gradio as gr
|
| 6 |
import tempfile
|
| 7 |
import os
|
|
|
|
|
|
|
| 8 |
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
import numpy as np
|
| 11 |
from PIL import Image
|
| 12 |
import random
|
| 13 |
-
# imageio will be installed via requirements.txt
|
| 14 |
-
import imageio
|
| 15 |
|
| 16 |
import warnings
|
| 17 |
warnings.filterwarnings("ignore")
|
| 18 |
|
| 19 |
# --- Model Loading at Startup ---
|
| 20 |
-
# This loads the model into VRAM when the Space boots. This is the correct
|
| 21 |
-
# approach for your powerful hardware to ensure fast user-facing inference.
|
| 22 |
MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
|
| 23 |
LORA_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
|
| 24 |
LORA_FILENAME = "FusionX_LoRa/Wan2.1_I2V_14B_FusionX_LoRA.safetensors"
|
| 25 |
|
| 26 |
-
# Using mixed precision (float16/bfloat16) is optimal for modern GPUs
|
| 27 |
image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float16)
|
| 28 |
vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float16)
|
| 29 |
pipe = WanImageToVideoPipeline.from_pretrained(
|
|
@@ -52,7 +50,6 @@ MIN_FRAMES, MAX_FRAMES = 8, 81
|
|
| 52 |
default_prompt = "make this image come alive, cinematic motion, smooth animation"
|
| 53 |
default_neg_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
|
| 54 |
|
| 55 |
-
# Using a fixed, generous duration for the decorator is the correct approach.
|
| 56 |
@spaces.GPU(duration=180)
|
| 57 |
def generate_video(input_image, prompt, height, width,
|
| 58 |
negative_prompt, duration_seconds,
|
|
@@ -65,7 +62,6 @@ def generate_video(input_image, prompt, height, width,
|
|
| 65 |
target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
|
| 66 |
target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
|
| 67 |
|
| 68 |
-
# This calculation ensures num_frames - 1 is divisible by 4, a requirement for this model
|
| 69 |
raw_frames = int(round(duration_seconds * FIXED_FPS))
|
| 70 |
num_frames = ((raw_frames - 1) // 4) * 4 + 1 if raw_frames > 4 else MIN_FRAMES
|
| 71 |
num_frames = np.clip(num_frames, MIN_FRAMES, MAX_FRAMES)
|
|
@@ -83,7 +79,6 @@ def generate_video(input_image, prompt, height, width,
|
|
| 83 |
|
| 84 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
|
| 85 |
video_path = tmpfile.name
|
| 86 |
-
# Using imageio for high-quality video export
|
| 87 |
writer = imageio.get_writer(video_path, fps=FIXED_FPS, codec='libx264',
|
| 88 |
pixelformat='yuv420p', quality=8)
|
| 89 |
for frame in frames:
|
|
@@ -103,4 +98,33 @@ with gr.Blocks() as demo:
|
|
| 103 |
duration_comp = gr.Slider(minimum=round(MIN_FRAMES/FIXED_FPS,1), maximum=round(MAX_FRAMES/FIXED_FPS,1), step=0.1, value=2, label="Duration (s)")
|
| 104 |
with gr.Accordion("Advanced Settings", open=False):
|
| 105 |
neg_prompt_comp = gr.Textbox(label="Negative Prompt", value=default_neg_prompt, lines=3)
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
+
# imageio will be installed via your requirements.txt
|
| 9 |
+
import imageio
|
| 10 |
|
| 11 |
from huggingface_hub import hf_hub_download
|
| 12 |
import numpy as np
|
| 13 |
from PIL import Image
|
| 14 |
import random
|
|
|
|
|
|
|
| 15 |
|
| 16 |
import warnings
|
| 17 |
warnings.filterwarnings("ignore")
|
| 18 |
|
| 19 |
# --- Model Loading at Startup ---
|
|
|
|
|
|
|
| 20 |
MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
|
| 21 |
LORA_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
|
| 22 |
LORA_FILENAME = "FusionX_LoRa/Wan2.1_I2V_14B_FusionX_LoRA.safetensors"
|
| 23 |
|
| 24 |
+
# Using mixed precision (float16/bfloat16) is optimal for modern GPUs
|
| 25 |
image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float16)
|
| 26 |
vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float16)
|
| 27 |
pipe = WanImageToVideoPipeline.from_pretrained(
|
|
|
|
| 50 |
default_prompt = "make this image come alive, cinematic motion, smooth animation"
|
| 51 |
default_neg_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
|
| 52 |
|
|
|
|
| 53 |
@spaces.GPU(duration=180)
|
| 54 |
def generate_video(input_image, prompt, height, width,
|
| 55 |
negative_prompt, duration_seconds,
|
|
|
|
| 62 |
target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
|
| 63 |
target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
|
| 64 |
|
|
|
|
| 65 |
raw_frames = int(round(duration_seconds * FIXED_FPS))
|
| 66 |
num_frames = ((raw_frames - 1) // 4) * 4 + 1 if raw_frames > 4 else MIN_FRAMES
|
| 67 |
num_frames = np.clip(num_frames, MIN_FRAMES, MAX_FRAMES)
|
|
|
|
| 79 |
|
| 80 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
|
| 81 |
video_path = tmpfile.name
|
|
|
|
| 82 |
writer = imageio.get_writer(video_path, fps=FIXED_FPS, codec='libx264',
|
| 83 |
pixelformat='yuv420p', quality=8)
|
| 84 |
for frame in frames:
|
|
|
|
| 98 |
duration_comp = gr.Slider(minimum=round(MIN_FRAMES/FIXED_FPS,1), maximum=round(MAX_FRAMES/FIXED_FPS,1), step=0.1, value=2, label="Duration (s)")
|
| 99 |
with gr.Accordion("Advanced Settings", open=False):
|
| 100 |
neg_prompt_comp = gr.Textbox(label="Negative Prompt", value=default_neg_prompt, lines=3)
|
| 101 |
+
# THE FIX IS HERE: The parenthesis was not closed on this line.
|
| 102 |
+
seed_comp = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
|
| 103 |
+
rand_seed_comp = gr.Checkbox(label="Randomize seed", value=True)
|
| 104 |
+
with gr.Row():
|
| 105 |
+
height_comp = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H, label="Height")
|
| 106 |
+
width_comp = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W, label="Width")
|
| 107 |
+
steps_comp = gr.Slider(minimum=1, maximum=30, step=1, value=4, label="Steps")
|
| 108 |
+
guidance_comp = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="CFG Scale", visible=False)
|
| 109 |
+
gen_button = gr.Button("Generate Video", variant="primary")
|
| 110 |
+
with gr.Column():
|
| 111 |
+
video_comp = gr.Video(label="Generated Video", autoplay=True, interactive=False)
|
| 112 |
+
|
| 113 |
+
def handle_upload(img):
|
| 114 |
+
if img is None: return gr.update(value=DEFAULT_H), gr.update(value=DEFAULT_W)
|
| 115 |
+
try:
|
| 116 |
+
w, h = img.size; a = h / w if w > 0 else 0
|
| 117 |
+
h_new = int(np.sqrt(MAX_AREA * a)); w_new = int(np.sqrt(MAX_AREA / a))
|
| 118 |
+
h_final = max(MOD_VALUE, h_new // MOD_VALUE * MOD_VALUE)
|
| 119 |
+
w_final = max(MOD_VALUE, w_new // MOD_VALUE * MOD_VALUE)
|
| 120 |
+
return gr.update(value=h_final), gr.update(value=w_final)
|
| 121 |
+
except Exception: return gr.update(value=DEFAULT_H), gr.update(value=DEFAULT_W)
|
| 122 |
+
|
| 123 |
+
input_image_comp.upload(handle_upload, inputs=input_image_comp, outputs=[height_comp, width_comp])
|
| 124 |
+
|
| 125 |
+
inputs = [input_image_comp, prompt_comp, height_comp, width_comp, neg_prompt_comp, duration_comp, guidance_comp, steps_comp, seed_comp, rand_seed_comp]
|
| 126 |
+
outputs = [video_comp, seed_comp]
|
| 127 |
+
gen_button.click(fn=generate_video, inputs=inputs, outputs=outputs)
|
| 128 |
+
|
| 129 |
+
if __name__ == "__main__":
|
| 130 |
+
demo.queue().launch()
|