Spaces:

OneEyeDJ
/

test

Sleeping

App Files Files Community

OneEyeDJ commited on Jun 16

Commit

40afd1d

verified ·

1 Parent(s): d681921

Update main.py

Browse files

Files changed (1) hide show

main.py +31 -20

main.py CHANGED Viewed

@@ -6,25 +6,40 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoProcessor
 import argparse
 import os
 class SimpleVideoLLaMA3Interface:
     def __init__(self, model_path):
-        print(f"Loading model from {model_path}...")
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_path,
-            trust_remote_code=True,
-            device_map="auto",
-            torch_dtype=torch.bfloat16,
-            attn_implementation="flash_attention_2",
-        )
-        self.processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
-        print("Model loaded successfully!")
         self.image_formats = ("png", "jpg", "jpeg", "bmp", "gif", "webp")
         self.video_formats = ("mp4", "avi", "mov", "mkv", "webm", "m4v", "3gp", "flv")
     @torch.inference_mode()
     def predict(self, messages, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=4096, fps=10, max_frames=256):
         if not messages or len(messages) == 0:
             return messages
@@ -202,13 +217,9 @@ class SimpleVideoLLaMA3Interface:
         return interface
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-path", type=str, default="DAMO-NLP-SG/VideoLLaMA3-7B")
-    parser.add_argument("--port", type=int, default=7860)
-    parser.add_argument("--share", action="store_true")
-    args = parser.parse_args()
-    app = SimpleVideoLLaMA3Interface(args.model_path)
-    interface = app.create_interface()
-    interface.launch(server_port=args.port, share=args.share, server_name="0.0.0.0")

 from transformers import AutoModelForCausalLM, AutoProcessor
 import argparse
 import os
+import spaces  # Import spaces for ZEROGPU
 class SimpleVideoLLaMA3Interface:
     def __init__(self, model_path):
+        self.model_path = model_path
+        self.model = None
+        self.processor = None
         self.image_formats = ("png", "jpg", "jpeg", "bmp", "gif", "webp")
         self.video_formats = ("mp4", "avi", "mov", "mkv", "webm", "m4v", "3gp", "flv")
+        # Load processor on CPU (doesn't need GPU)
+        print(f"Loading processor from {model_path}...")
+        self.processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
+        print("Processor loaded successfully!")
+    def load_model(self):
+        """Load model - this will be called inside GPU-decorated functions"""
+        if self.model is None:
+            print(f"Loading model from {self.model_path}...")
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_path,
+                trust_remote_code=True,
+                device_map="auto",
+                torch_dtype=torch.bfloat16,
+                attn_implementation="flash_attention_2",
+            )
+            print("Model loaded successfully!")
+    @spaces.GPU(duration=120)  # Allocate GPU for up to 120 seconds
     @torch.inference_mode()
     def predict(self, messages, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=4096, fps=10, max_frames=256):
+        # Load model inside GPU context
+        self.load_model()
         if not messages or len(messages) == 0:
             return messages
         return interface
+# For Hugging Face Spaces
+app = SimpleVideoLLaMA3Interface("DAMO-NLP-SG/VideoLLaMA3-7B")
+interface = app.create_interface()
+if __name__ == "__main__":
+    interface.launch()