MiniMax-Remover

Running on Zero

PengWeixuanSZU commited on Jun 21

Commit

ca68585

verified ·

1 Parent(s): 6dd3fdf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -244,9 +244,8 @@ def track_video(n_frames,video_state):
     video_state["origin_images"] = images
     images = np.array(images)
-    global video_predictor
-    video_predictor=video_predictor.to("cuda")
-    inference_state = video_predictor.init_state(images=images/255, device="cuda")
     video_state["inference_state"] = inference_state
     if len(torch.from_numpy(video_state["masks"][0]).shape) == 3:
@@ -254,7 +253,7 @@ def track_video(n_frames,video_state):
     else:
         mask = torch.from_numpy(video_state["masks"][0])
-    video_predictor.add_new_mask(
         inference_state=inference_state,
         frame_idx=0,
         obj_id=obj_id,
@@ -265,7 +264,7 @@ def track_video(n_frames,video_state):
     mask_frames = []
     color = np.array(COLOR_PALETTE[int(time.time()) % len(COLOR_PALETTE)], dtype=np.float32) / 255.0
     color = color[None, None, :]
-    for out_frame_idx, out_obj_ids, out_mask_logits in video_predictor.propagate_in_video(inference_state):
         frame = images[out_frame_idx].astype(np.float32) / 255.0
         mask = np.zeros((H, W, 3), dtype=np.float32)
         for i, logit in enumerate(out_mask_logits):

     video_state["origin_images"] = images
     images = np.array(images)
+    video_predictor_local=video_predictor.to("cuda")
+    inference_state = video_predictor_local.init_state(images=images/255, device="cuda")
     video_state["inference_state"] = inference_state
     if len(torch.from_numpy(video_state["masks"][0]).shape) == 3:
     else:
         mask = torch.from_numpy(video_state["masks"][0])
+    video_predictor_local.add_new_mask(
         inference_state=inference_state,
         frame_idx=0,
         obj_id=obj_id,
     mask_frames = []
     color = np.array(COLOR_PALETTE[int(time.time()) % len(COLOR_PALETTE)], dtype=np.float32) / 255.0
     color = color[None, None, :]
+    for out_frame_idx, out_obj_ids, out_mask_logits in video_predictor_local.propagate_in_video(inference_state):
         frame = images[out_frame_idx].astype(np.float32) / 255.0
         mask = np.zeros((H, W, 3), dtype=np.float32)
         for i, logit in enumerate(out_mask_logits):