import os
import gradio as gr
import json
import ast
import atexit
import shutil
import sys
import torch
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from gradio_image_prompter import ImagePrompter
from omegaconf import OmegaConf
from PIL import Image, ImageDraw
import numpy as np
from copy import deepcopy
import cv2
import spaces
sys.path.append("libs")
sys.path.append("libs/LGM")
sys.path.append("libs/das")
sys.path.append("libs/sam2")
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from einops import rearrange
import tempfile
import gc
from diffusers.utils import export_to_gif
import imageio
import sys
from sam2.sam2_image_predictor import SAM2ImagePredictor
from kiui.cam import orbit_camera
from src.utils.image_process import pred_bbox
from src.utils.load_utils import load_sv3d_pipeline, load_LGM, load_diffusion, gen_tracking_video, normalize_points, load_das
from src.utils.ui_utils import mask_image, image_preprocess, plot_point_cloud
from das.infer import load_media
from huggingface_hub import snapshot_download
if not os.path.exists("./checkpoints"):
snapshot_download(
repo_id="chenwang/physctrl",
local_dir="./",
local_dir_use_symlinks=False
)
import tyro
from tqdm import tqdm
from LGM.core.options import AllConfigs
from LGM.core.gs import GaussianRenderer
from LGM.mvdream.pipeline_mvdream import MVDreamPipeline
import h5py
os.environ["OMP_NUM_THREADS"] = "1"
# if torch.cuda.is_available():
# device = torch.device("cuda")
# elif torch.backends.mps.is_available():
# device = torch.device("mps")
# else:
# device = torch.device("cpu")
# print(f"using device: {device}")
device = torch.device('cuda')
segmentor = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-tiny", cache_dir="ckpt", device='cuda')
height, width = 480, 720
num_frames, sv3d_res = 20, 576
print(f"loading sv3d pipeline...")
sv3d_pipeline = load_sv3d_pipeline(device)
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
sys.argv = ['pipeline_track_gen.py', 'big']
opt = tyro.cli(AllConfigs)
lgm_model = load_LGM(opt, device)
print(f'loading diffusion model...')
diffusion_model = load_diffusion(device=device, model_cfg_path='./src/configs/eval_base.yaml', diffusion_ckpt_path='./checkpoints/physctrl_base.safetensors')
temp_dir = tempfile.mkdtemp()
#s delete temp_dir after program exits
atexit.register(lambda: shutil.rmtree(temp_dir))
# temp_dir = './debug'
output_dir = temp_dir
print(f"using temp directory: {output_dir}")
print('loading das...')
das_model = load_das(0, output_dir)
import random
def set_all_seeds(seed):
"""Sets random seeds for Python, NumPy, and PyTorch."""
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # if using multiple GPUs
set_all_seeds(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
def process_image(raw_input):
image, points = raw_input['image'], raw_input['points']
image = image.resize((width, height))
image.save(f'{output_dir}/image.png')
return image, {'image': image, 'points': points}
@spaces.GPU
def segment(canvas, image, logits):
if logits is not None:
logits *= 32.0
_, points = canvas['image'], canvas['points']
image = np.array(image)
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
segmentor.set_image(image)
input_points = []
input_boxes = []
for p in points:
[x1, y1, _, x2, y2, _] = p
if x2==0 and y2==0:
input_points.append([x1, y1])
else:
input_boxes.append([x1, y1, x2, y2])
if len(input_points) == 0:
input_points = None
input_labels = None
else:
input_points = np.array(input_points)
input_labels = np.ones(len(input_points))
input_boxes = pred_bbox(Image.fromarray(image))
if len(input_boxes) == 0:
input_boxes = None
else:
input_boxes = np.array(input_boxes)
masks, _, logits = segmentor.predict(
point_coords=input_points,
point_labels=input_labels,
box=input_boxes,
multimask_output=False,
return_logits=True,
mask_input=logits,
)
mask = masks > 0
masked_img = mask_image(image, mask[0], color=[252, 140, 90], alpha=0.9)
masked_img = Image.fromarray(masked_img)
out_image = np.zeros((image.shape[0], image.shape[1], 4), dtype=np.uint8)
out_image[:, :, :3] = image
out_image_bbox = out_image.copy()
out_image_bbox[:, :, 3] = (
mask.astype(np.uint8) * 255
)
out_image_bbox = Image.fromarray(out_image_bbox)
y, x, res, sv3d_image = image_preprocess(out_image_bbox, target_res=sv3d_res, lower_contrast=False, rescale=True)
np.save(f'{output_dir}/crop_info.npy', np.array([y, x, res]))
print(f'crop_info: {y}, {x}, {res}')
return mask[0], {'image': masked_img, 'points': points}, out_image_bbox, {'crop_y_start': y, 'crop_x_start': x, 'crop_res': res}, sv3d_image
@spaces.GPU
def run_sv3d(image, seed=0):
num_frames, sv3d_res = 20, 576
elevations_deg = [0] * num_frames
polars_rad = [np.deg2rad(90 - e) for e in elevations_deg]
azimuths_deg = np.linspace(0, 360, num_frames + 1)[1:] % 360
azimuths_rad = [np.deg2rad((a - azimuths_deg[-1]) % 360) for a in azimuths_deg]
azimuths_rad[:-1].sort()
with torch.no_grad():
with torch.autocast("cuda", dtype=torch.float16, enabled=True):
if len(image.split()) == 4: # RGBA
input_image = Image.new("RGB", image.size, (255, 255, 255)) # pure white bg
input_image.paste(image, mask=image.split()[3]) # 3rd is the alpha channel
else:
input_image = image
video_frames = sv3d_pipeline(
input_image.resize((sv3d_res, sv3d_res)),
height=sv3d_res,
width=sv3d_res,
num_frames=num_frames,
decode_chunk_size=8, # smaller to save memory
polars_rad=polars_rad,
azimuths_rad=azimuths_rad,
generator=torch.manual_seed(seed),
).frames[0]
torch.cuda.empty_cache()
gc.collect()
# export_to_gif(video_frames, f"./debug/view_animation.gif", fps=7)
for i, frame in enumerate(video_frames):
# frame = frame.resize((res, res))
frame.save(f"{output_dir}/{i:03d}.png")
save_idx = [19, 4, 9, 14]
for i in range(4):
video_frames[save_idx[i]].save(f"{output_dir}/view_{i}.png")
return [video_frames[i] for i in save_idx]
@spaces.GPU
def run_LGM(image, seed=0):
sv3d_frames = run_sv3d(image, seed)
model = lgm_model
rays_embeddings = model.prepare_default_rays(device)
tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
proj_matrix = torch.zeros(4, 4, dtype=torch.float32, device=device)
proj_matrix[0, 0] = 1 / tan_half_fov
proj_matrix[1, 1] = 1 / tan_half_fov
proj_matrix[2, 2] = (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
proj_matrix[2, 3] = 1
images = []
for i in range(4):
# image = Image.open(f"{base_dir}/view_{i}.png")
image = sv3d_frames[i]
image = image.resize((256, 256))
image = np.array(image)
image = image.astype(np.float32) / 255.0
if image.shape[-1] == 4:
image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
images.append(image)
mv_image = np.stack(images, axis=0)
# generate gaussians
input_image = torch.from_numpy(mv_image).permute(0, 3, 1, 2).float().to(device) # [4, 3, 256, 256]
input_image = F.interpolate(input_image, size=(opt.input_size, opt.input_size), mode='bilinear', align_corners=False)
input_image = TF.normalize(input_image, IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)
input_image = torch.cat([input_image, rays_embeddings], dim=1).unsqueeze(0) # [1, 4, 9, H, W]
with torch.no_grad():
with torch.autocast(device_type='cuda', dtype=torch.float16):
# generate gaussians
gaussians = model.forward_gaussians(input_image)
# save gaussians
model.gs.save_ply(gaussians, f'{output_dir}/point_cloud.ply')
# render front view
cam_poses = torch.from_numpy(orbit_camera(0, 0, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
# cam_poses = torch.from_numpy(orbit_camera(45, 225, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
np.save(f'{output_dir}/projection.npy', cam_view_proj[0].cpu().numpy())
cam_pos = - cam_poses[:, :3, 3] # [V, 3]
image = model.gs.render(gaussians, cam_view.unsqueeze(0), cam_view_proj.unsqueeze(0), cam_pos.unsqueeze(0), scale_modifier=1)['image']
image_save = (image[0, 0].permute(1, 2, 0).contiguous().float().cpu().numpy() * 255).astype(np.uint8)
Image.fromarray(image_save).save(f'{output_dir}/front_view.png')
images = []
azimuth = np.arange(0, 360, 2, dtype=np.int32)
elevation = 0
for azi in tqdm(azimuth):
cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
# cameras needed by gaussian rasterizer
cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
cam_pos = - cam_poses[:, :3, 3] # [V, 3]
image = model.gs.render(gaussians, cam_view.unsqueeze(0), cam_view_proj.unsqueeze(0), cam_pos.unsqueeze(0), scale_modifier=1)['image']
images.append((image.squeeze(1).permute(0,2,3,1).contiguous().float().cpu().numpy() * 255).astype(np.uint8))
images = np.concatenate(images, axis=0)
out_video_dir = f'{output_dir}/gs_animation.mp4'
imageio.mimwrite(out_video_dir, images, fps=30)
points, center, scale = normalize_points(output_dir)
points_plot = plot_point_cloud(points, [])
np.save(f'{output_dir}/center.npy', center)
np.save(f'{output_dir}/scale.npy', scale)
print('center: ', center, 'scale: ', scale)
return points_plot, points
norm_fac = 5
mat_labels = {'elastic': 0, 'plasticine': 1, 'sand': 2, 'rigid': 3}
@spaces.GPU
def run_diffusion(points, E_val, nu_val, x, y, z, u, v, w, force_coeff_val, floor_height=-1, fluid=False, seed=0, device='cuda'):
drag_point = np.array([x, y, z])
drag_dir = np.array([u, v, w])
drag_dir /= np.linalg.norm(drag_dir)
force_coeff = np.array(force_coeff_val)
drag_force = drag_dir * force_coeff
batch = {}
batch['floor_height'] = torch.from_numpy(np.array([floor_height])).unsqueeze(-1).float()
batch['points_src'] = (torch.from_numpy(points).float().unsqueeze(0) - norm_fac) / 2
if not fluid:
batch['drag_point'] = (torch.from_numpy(drag_point).float() - norm_fac) / 2
batch['force'] = torch.from_numpy(np.array(drag_force)).float()
batch['force'] = batch['force'] * torch.from_numpy(force_coeff) / torch.norm(batch['force'])
batch['E'] = torch.from_numpy(np.array(E_val)).unsqueeze(-1).float()
batch['nu'] = torch.from_numpy(np.array(nu_val)).unsqueeze(-1).float()
else:
batch['mask'] = torch.ones_like(batch['points_src'])
batch['drag_point'] = torch.zeros(1, 3)
batch['force'] = torch.zeros(1, 3)
batch['E'] = torch.zeros(1, 1)
batch['nu'] = torch.zeros(1, 1)
for k in batch:
batch[k] = batch[k].unsqueeze(0).to(device)
with torch.autocast("cuda", dtype=torch.bfloat16):
output = diffusion_model(batch['points_src'], batch['force'], batch['E'], batch['nu'], torch.ones_like(batch['points_src']).to(device)[..., :1],
batch['drag_point'], batch['floor_height'], gravity=None, y=None, coeff=batch['E'], device=device, batch_size=1,
generator=torch.Generator().manual_seed(seed), n_frames=24, num_inference_steps=25)
output = output.cpu().numpy()
for j in range(output.shape[0]):
# save_pointcloud_video(((output[j:j+1] * 2) + norm_fac).squeeze(), [], f'{output_dir}/gen_animation.gif', grid_lim=10)
np.save(f'{output_dir}/gen_data.npy', output[j:j+1].squeeze())
gen_tracking_video(output_dir)
return os.path.join(output_dir, 'tracks_gen/tracking/tracks_tracking.mp4')
@spaces.GPU
def run_diffusion_new(points, E_val, nu_val, x, y, z, u, v, w, force_coeff_val, material='elastic', drag_mode='point', drag_axis='z', seed=0, device='cuda'):
drag_point = np.array([x, y, z])
drag_dir = np.array([u, v, w])
# User input
has_gravity = (material != 'elastic')
force_coeff = np.array(force_coeff_val)
max_num_forces = 1
if drag_mode is not None and not has_gravity:
if drag_mode == "point":
drag_point = np.array(drag_point)
elif drag_mode == "max":
drag_point_idx = np.argmax(points[:, drag_axis]) if drag_mode == "max" \
else np.argmin(points[:, drag_axis])
drag_point = points[drag_point_idx]
else:
raise ValueError(f"Invalid drag mode: {drag_mode}")
drag_offset = np.abs(points - drag_point)
drag_mask = (drag_offset < 0.4).all(axis=-1)
drag_dir = np.array(drag_dir, dtype=np.float32)
drag_dir /= np.linalg.norm(drag_dir)
drag_force = drag_dir * force_coeff
else:
drag_mask = np.ones(N, dtype=bool)
drag_point = np.zeros(4)
drag_dir = np.zeros(3)
drag_force = np.zeros(3)
if material == "elastic":
log_E, nu = np.array(E_val), np.array(nu_val)
else:
log_E, nu = np.array(6), np.array(0.4) # Default values for non-elastic materials
print(f'[Diffusion Simulation] Number of drag points: {drag_mask.sum()}/{2048}')
print(f'[Diffusion Simulation] Drag point: {drag_point}')
print(f'[Diffusion Simulation] log_E: {log_E}, ν: {nu}')
print(f'[Diffusion Simulation] Drag force: {drag_force}')
print(f'[Diffusion Simulation] Material type: {material})')
print(f'[Diffusion Simulation] Has gravity: {has_gravity}')
force_order = torch.arange(max_num_forces)
mask = torch.from_numpy(drag_mask).bool()
mask = mask.unsqueeze(0) if mask.ndim == 1 else mask
batch = {}
batch['gravity'] = torch.from_numpy(np.array(has_gravity)).long().unsqueeze(0)
batch['drag_point'] = torch.from_numpy(drag_point - norm_fac).float() / 2
batch['drag_point'] = batch['drag_point'].unsqueeze(0) # (1, 4)
batch['points_src'] = (torch.from_numpy(points).float().unsqueeze(0) - norm_fac) / 2
if has_gravity:
floor_normal = np.load(f'{output_dir}/floor_normal.npy')
floor_height = np.load(f'{output_dir}/floor_height.npy') * scale / 2.
batch['floor_height'] = torch.from_numpy(np.array(floor_height)).float().unsqueeze(0)
# Create rotation matrix to align floor normal with [0, 1, 0] (upward direction)
target_normal = np.array([0, 1, 0])
# Use Rodrigues' rotation formula to find rotation matrix
# Rotate from floor_normal to target_normal
v = np.cross(floor_normal, target_normal)
s = np.linalg.norm(v)
c = np.dot(floor_normal, target_normal)
if s < 1e-6: # If vectors are parallel
if c > 0: # Same direction
R_floor = np.eye(3)
else: # Opposite direction
R_floor = -np.eye(3)
else:
v = v / s
K = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
R_floor = np.eye(3) + s * K + (1 - c) * (K @ K)
R_floor_tensor = torch.from_numpy(R_floor).float().to(device)
for i in range(batch['points_src'].shape[0]):
batch['points_src'][i] = (R_floor_tensor @ batch['points_src'][i].T).T
else:
batch['floor_height'] = torch.ones(1).float() * -2.4
print(f'[Diffusion Simulation] Floor height: {batch["floor_height"]}')
if mask.shape[1] == 0:
mask = torch.zeros(0, N).bool()
batch['force'] = torch.zeros(0, 3)
batch['drag_point'] = torch.zeros(0, 4)
else:
batch['force'] = torch.from_numpy(drag_force).float().unsqueeze(0)
batch['force'] = batch['force'] * torch.from_numpy(force_coeff) / torch.norm(batch['force'])
batch['mat_type'] = torch.from_numpy(np.array(mat_labels[material])).long()
if np.array(batch['mat_type']).item() == 3: # Rigid dataset
batch['is_mpm'] = torch.tensor(0).bool()
else:
batch['is_mpm'] = torch.tensor(1).bool()
if has_gravity: # Currently we only have either drag force or gravity
batch['force'] = torch.tensor([[0, -1.0, 0]]).to(device)
all_forces = torch.zeros(max_num_forces, 3)
all_forces[:batch['force'].shape[0]] = batch['force']
all_forces = all_forces[force_order]
batch['force'] = all_forces
all_drag_points = torch.zeros(max_num_forces, 4)
all_drag_points[:batch['drag_point'].shape[0], :batch['drag_point'].shape[1]] = batch['drag_point'] # The last dim of drag_point is not used now
all_drag_points = all_drag_points[force_order]
batch['drag_point'] = all_drag_points
if batch['gravity'][0] == 1: # add gravity to force
batch['force'] = torch.tensor([[0, -1.0, 0]]).float().to(device)
all_mask = torch.zeros(max_num_forces, 2048).bool()
all_mask[:mask.shape[0]] = mask
all_mask = all_mask[force_order]
batch['mask'] = all_mask[..., None] # (n_forces, N, 1) for compatibility
batch['E'] = torch.from_numpy(log_E).unsqueeze(-1).float() if log_E > 0 else torch.zeros(1).float()
batch['nu'] = torch.from_numpy(nu).unsqueeze(-1).float()
for k in batch:
batch[k] = batch[k].unsqueeze(0).to(device)
with torch.autocast("cuda", dtype=torch.bfloat16):
output = diffusion_model(batch['points_src'], batch['force'], batch['E'], batch['nu'], batch['mask'][..., :1],
batch['drag_point'], batch['floor_height'], batch['gravity'], coeff=batch['E'], generator=torch.Generator().manual_seed(seed),
device=device, batch_size=1, y=batch['mat_type'], n_frames=24, num_inference_steps=25)
output = output.cpu().numpy()
for j in range(output.shape[0]):
if batch['gravity'][0] == 1:
for k in range(output.shape[1]):
output[j, k] = (np.linalg.inv(R_floor) @ output[j, k].T).T
np.save(f'{output_dir}/gen_data.npy', output[j:j+1].squeeze())
gen_tracking_video(output_dir)
return os.path.join(output_dir, 'tracks_gen/tracking/tracks_tracking.mp4')
@spaces.GPU(duration=500)
def run_das(prompt, tracking_path, checkpoint_path='./checkpoints/cogshader5B'):
print(prompt, tracking_path)
input_path = os.path.join(output_dir, 'image.png')
video_tensor, fps, is_video = load_media(input_path)
tracking_tensor, _, _ = load_media(tracking_path)
das_model.apply_tracking(
video_tensor=video_tensor,
fps=24,
tracking_tensor=tracking_tensor,
img_cond_tensor=None,
prompt=prompt,
checkpoint_path=checkpoint_path
)
return os.path.join(output_dir, 'result.mp4')
def add_arrow(points, x, y, z, u, v, w, force_coeff):
direction = np.array([u, v, w])
direction /= np.linalg.norm(direction)
arrow = {'origin': [x, y, z], 'dir': direction * force_coeff}
arrows = [arrow]
points_plot = plot_point_cloud(points, arrows)
return points_plot
material_slider_config = {
"Elastic": [
{"label": "E", "minimum": 4, "maximum": 7, "step": 0.5, "value": 5.5},
{"label": "nu", "minimum": 0.2, "maximum": 0.4, "step": 0.05, "value": 0.3},
],
"Plasticine": [
{"label": "E", "minimum": 4, "maximum": 7, "step": 0.5, "value": 5.5},
{"label": "nu", "minimum": 0.2, "maximum": 0.4, "step": 0.05, "value": 0.3},
],
"Plastic": [
{"label": "E", "minimum": 4, "maximum": 7, "step": 0.5, "value": 5.5},
{"label": "nu", "minimum": 0.2, "maximum": 0.4, "step": 0.05, "value": 0.3},
],
"Rigid": [] # No sliders
}
def update_sliders(material):
sliders = material_slider_config[material]
# Prepare updates for both sliders
if len(sliders) == 2:
return (
gr.update(visible=True, interactive=True, **sliders[0]),
gr.update(visible=True, interactive=True, **sliders[1])
)
elif len(sliders) == 1:
return (
gr.update(visible=True, interactive=True, **sliders[0]),
gr.update(visible=False, interactive=False)
)
else:
return (
gr.update(visible=False, interactive=False),
gr.update(visible=False, interactive=False)
)
update_sliders('Elastic')
with gr.Blocks() as demo:
gr.Markdown("""
## PhysCtrl: Generative Physics for Controllable and Physics-Grounded Video Generation
### You can upload your own input image and set the force and material to generate the trajectory and final video.
### The text prompt of video generation should describe the action of the object, e.g., "the penguin is fully lifted upwards, as if there is a force applied onto its left wing".
### Given the limit of ZeroGPU usage at huggingface, the final video generation is not available currently. We are working on to fix that.
""")
mask = gr.State(value=None) # store mask
original_image = gr.State(value=None) # store original input image
mask_logits = gr.State(value=None) # store mask logits
masked_image = gr.State(value=None) # store masked image
crop_info = gr.State(value=None) # store crop info
sv3d_input = gr.State(value=None) # store sv3d input
sv3d_frames = gr.State(value=None) # store sv3d frames
points = gr.State(value=None) # store points
with gr.Column():
with gr.Row():
with gr.Column():
step1_dec = """
Step 1: Upload Input Image and Segment Subject
"""
step1 = gr.Markdown(step1_dec)
raw_input = ImagePrompter(type="pil", label="Input Image", show_label=True, interactive=True)
process_button = gr.Button("Process")
with gr.Column():
# Step 2: Get Subject Mask and Point Clouds
step2_dec = """
Step 2.1: Get Subject Mask
"""
step2 = gr.Markdown(step2_dec)
canvas = ImagePrompter(type="pil", label="Input Image", show_label=True, interactive=True) # for mask painting
step2_notes = """
- Click to add points to select the subject.
- Press `Segment Subject` to get the mask. Can be refined iteratively by updating points.
"""
notes = gr.Markdown(step2_notes)
segment_button = gr.Button("Segment Subject")
# with gr.Column():
# output_video = gr.Video(label="Rendered Video", format="mp4", width="auto", autoplay=True, interactive=False)
with gr.Column(scale=1):
step22_dec = """
Step 2.2: Get 3D Points
"""
step22 = gr.Markdown(step22_dec)
points_plot = gr.Plot(label="Point Cloud")
sv3d_button = gr.Button("Get 3D Points")
with gr.Column():
step3_dec = """
Step 3: Add Force
"""
step3 = gr.Markdown(step3_dec)
with gr.Row():
gr.Markdown('Add Drag Point')
with gr.Row():
x = gr.Number(label="X", min_width=50)
y = gr.Number(label="Y", min_width=50)
z = gr.Number(label="Z", min_width=50)
with gr.Row():
gr.Markdown('Add Drag Direction')
with gr.Row():
u = gr.Number(label="U", min_width=50)
v = gr.Number(label="V", min_width=50)
w = gr.Number(label="W", min_width=50)
step3_notes = """
Direction will be normalized to unit length.
"""
notes = gr.Markdown(step3_notes)
with gr.Row():
force_coeff = gr.Slider(label="Force Magnitude", minimum=0.02, maximum=0.2, step=0.02, value=0.045)
add_arrow_button = gr.Button("Add Force")
with gr.Row():
with gr.Column():
step4_dec = """
Step 4: Select Material and Generate Trajectory
"""
step4 = gr.Markdown(step4_dec)
tracking_video = gr.Video(label="Tracking Video", format="mp4", width="auto", autoplay=True, interactive=False)
with gr.Row():
# material_radio = gr.Radio(
# choices=list(material_slider_config.keys()),
# label="Choose Material",
# value="Rigid"
# )
# slider1 = gr.Slider(visible=True)
# slider2 = gr.Slider(visible=True)
slider1 = gr.Slider(label="E", visible=True, interactive=True, minimum=4, maximum=7, step=0.5, value=5.5)
slider2 = gr.Slider(visible=False, minimum=0.2, maximum=0.4, step=0.05, value=0.3)
run_diffusion_button = gr.Button("Generate Trajectory")
with gr.Column():
step5_dec = """
Step 5: Generate Final Video
"""
step5 = gr.Markdown(step5_dec)
final_video = gr.Video(label="Final Video", format="mp4", width="auto", autoplay=True, interactive=False)
text = gr.Textbox(label="Prompt")
gen_video_button = gr.Button("Generate Final Video")
# material_radio.change(
# fn=update_sliders,
# inputs=material_radio,
# outputs=[slider1, slider2]
# )
process_button.click(
fn = process_image,
inputs = [raw_input],
outputs = [original_image, canvas]
)
segment_button.click(
fn = segment,
inputs = [canvas, original_image, mask_logits],
outputs = [mask, canvas, masked_image, crop_info, sv3d_input]
)
sv3d_button.click(
fn = run_LGM,
inputs = [sv3d_input],
outputs = [points_plot, points]
)
add_arrow_button.click(
fn=add_arrow,
inputs=[points, x, y, z, u, v, w, force_coeff],
outputs=points_plot
)
run_diffusion_button.click(
fn=run_diffusion_new,
inputs=[points, slider1, slider2, x, y, z, u, v, w, force_coeff],
outputs=tracking_video
)
gen_video_button.click(
fn=run_das,
inputs=[text, tracking_video],
outputs=final_video
)
demo.queue().launch()