wan-fusionx-lora

Runtime error

App Files Files Community

Lemonator commited on Jul 22

Commit

9864165

verified ·

1 Parent(s): 6ce6e68

Update app_lora.py

Browse files

Files changed (1) hide show

app_lora.py +33 -9

app_lora.py CHANGED Viewed

@@ -5,25 +5,23 @@ from transformers import CLIPVisionModel
 import gradio as gr
 import tempfile
 import os
 from huggingface_hub import hf_hub_download
 import numpy as np
 from PIL import Image
 import random
-# imageio will be installed via requirements.txt
-import imageio
 import warnings
 warnings.filterwarnings("ignore")
 # --- Model Loading at Startup ---
-# This loads the model into VRAM when the Space boots. This is the correct
-# approach for your powerful hardware to ensure fast user-facing inference.
 MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
 LORA_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
 LORA_FILENAME = "FusionX_LoRa/Wan2.1_I2V_14B_FusionX_LoRA.safetensors"
-# Using mixed precision (float16/bfloat16) is optimal for modern GPUs like the H200
 image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float16)
 vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float16)
 pipe = WanImageToVideoPipeline.from_pretrained(
@@ -52,7 +50,6 @@ MIN_FRAMES, MAX_FRAMES = 8, 81
 default_prompt = "make this image come alive, cinematic motion, smooth animation"
 default_neg_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
-# Using a fixed, generous duration for the decorator is the correct approach.
 @spaces.GPU(duration=180)
 def generate_video(input_image, prompt, height, width,
                    negative_prompt, duration_seconds,
@@ -65,7 +62,6 @@ def generate_video(input_image, prompt, height, width,
     target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
     target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
-    # This calculation ensures num_frames - 1 is divisible by 4, a requirement for this model
     raw_frames = int(round(duration_seconds * FIXED_FPS))
     num_frames = ((raw_frames - 1) // 4) * 4 + 1 if raw_frames > 4 else MIN_FRAMES
     num_frames = np.clip(num_frames, MIN_FRAMES, MAX_FRAMES)
@@ -83,7 +79,6 @@ def generate_video(input_image, prompt, height, width,
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
-        # Using imageio for high-quality video export
         writer = imageio.get_writer(video_path, fps=FIXED_FPS, codec='libx264',
                                    pixelformat='yuv420p', quality=8)
         for frame in frames:
@@ -103,4 +98,33 @@ with gr.Blocks() as demo:
             duration_comp = gr.Slider(minimum=round(MIN_FRAMES/FIXED_FPS,1), maximum=round(MAX_FRAMES/FIXED_FPS,1), step=0.1, value=2, label="Duration (s)")
             with gr.Accordion("Advanced Settings", open=False):
                 neg_prompt_comp = gr.Textbox(label="Negative Prompt", value=default_neg_prompt, lines=3)
-                seed_comp = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=

 import gradio as gr
 import tempfile
 import os
+# imageio will be installed via your requirements.txt
+import imageio
 from huggingface_hub import hf_hub_download
 import numpy as np
 from PIL import Image
 import random
 import warnings
 warnings.filterwarnings("ignore")
 # --- Model Loading at Startup ---
 MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
 LORA_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
 LORA_FILENAME = "FusionX_LoRa/Wan2.1_I2V_14B_FusionX_LoRA.safetensors"
+# Using mixed precision (float16/bfloat16) is optimal for modern GPUs
 image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float16)
 vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float16)
 pipe = WanImageToVideoPipeline.from_pretrained(
 default_prompt = "make this image come alive, cinematic motion, smooth animation"
 default_neg_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
 @spaces.GPU(duration=180)
 def generate_video(input_image, prompt, height, width,
                    negative_prompt, duration_seconds,
     target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
     target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
     raw_frames = int(round(duration_seconds * FIXED_FPS))
     num_frames = ((raw_frames - 1) // 4) * 4 + 1 if raw_frames > 4 else MIN_FRAMES
     num_frames = np.clip(num_frames, MIN_FRAMES, MAX_FRAMES)
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
         writer = imageio.get_writer(video_path, fps=FIXED_FPS, codec='libx264',
                                    pixelformat='yuv420p', quality=8)
         for frame in frames:
             duration_comp = gr.Slider(minimum=round(MIN_FRAMES/FIXED_FPS,1), maximum=round(MAX_FRAMES/FIXED_FPS,1), step=0.1, value=2, label="Duration (s)")
             with gr.Accordion("Advanced Settings", open=False):
                 neg_prompt_comp = gr.Textbox(label="Negative Prompt", value=default_neg_prompt, lines=3)
+                # THE FIX IS HERE: The parenthesis was not closed on this line.
+                seed_comp = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
+                rand_seed_comp = gr.Checkbox(label="Randomize seed", value=True)
+                with gr.Row():
+                    height_comp = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H, label="Height")
+                    width_comp = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W, label="Width")
+                steps_comp = gr.Slider(minimum=1, maximum=30, step=1, value=4, label="Steps")
+                guidance_comp = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="CFG Scale", visible=False)
+            gen_button = gr.Button("Generate Video", variant="primary")
+        with gr.Column():
+            video_comp = gr.Video(label="Generated Video", autoplay=True, interactive=False)
+    def handle_upload(img):
+        if img is None: return gr.update(value=DEFAULT_H), gr.update(value=DEFAULT_W)
+        try:
+            w, h = img.size; a = h / w if w > 0 else 0
+            h_new = int(np.sqrt(MAX_AREA * a)); w_new = int(np.sqrt(MAX_AREA / a))
+            h_final = max(MOD_VALUE, h_new // MOD_VALUE * MOD_VALUE)
+            w_final = max(MOD_VALUE, w_new // MOD_VALUE * MOD_VALUE)
+            return gr.update(value=h_final), gr.update(value=w_final)
+        except Exception: return gr.update(value=DEFAULT_H), gr.update(value=DEFAULT_W)
+    input_image_comp.upload(handle_upload, inputs=input_image_comp, outputs=[height_comp, width_comp])
+    inputs = [input_image_comp, prompt_comp, height_comp, width_comp, neg_prompt_comp, duration_comp, guidance_comp, steps_comp, seed_comp, rand_seed_comp]
+    outputs = [video_comp, seed_comp]
+    gen_button.click(fn=generate_video, inputs=inputs, outputs=outputs)
+if __name__ == "__main__":
+    demo.queue().launch()