ttxskk
update
d7e58f0
import json
import cv2
import h5py
import numpy as np
import torch
import tqdm
from detrsmpl.models.body_models.builder import build_body_model
from detrsmpl.models.body_models.utils import batch_transform_to_camera_frame
class SMCReader:
def __init__(self, file_path, body_model=None):
"""Read SenseMocapFile endswith ".smc", see: https://github.com/open-
mmlab/detrsmpl/blob/main/docs/smc.md.
Args:
file_path (str):
Path to an SMC file.
body_model (nn.Module or dict):
Only needed for SMPL transformation to device frame
if nn.Module: a body_model instance
if dict: a body_model config
"""
self.smc = h5py.File(file_path, 'r')
self.__calibration_dict__ = None
self.action_id = self.smc.attrs['action_id']
self.actor_id = self.smc.attrs['actor_id']
self.datetime_str = self.smc.attrs['datetime_str'] # .decode()
self.kinect_num_frames = self.smc['Kinect'].attrs['num_frame']
self.num_kinects = self.smc['Kinect'].attrs['num_device']
self.kinect_color_resolution = self.get_kinect_color_resolution(0)
self.kinect_depth_resolution = self.get_kinect_depth_resolution(0)
self.iphone_exists = 'iPhone' in self.smc.keys()
self.num_iphones = 1
if self.iphone_exists:
self.iphone_num_frames = self.smc['iPhone'].attrs['num_frame']
self.iphone_color_resolution = \
self.smc['iPhone'].attrs['color_resolution'] # vertical
self.iphone_depth_resolution = \
self.smc['iPhone'].attrs['depth_resolution'] # vertical
self.keypoint_exists = 'Keypoints3D' in self.smc.keys()
if self.keypoint_exists:
self.keypoints_num_frames = self.smc['Keypoints3D'].attrs[
'num_frame']
self.keypoints_convention = self.smc['Keypoints3D'].attrs[
'convention']
self.keypoints_created_time = self.smc['Keypoints3D'].attrs[
'created_time']
self.smpl_exists = 'SMPL' in self.smc.keys()
if self.smpl_exists:
self.smpl_num_frames = self.smc['SMPL'].attrs['num_frame']
self.smpl_created_time = self.smc['SMPL'].attrs['created_time']
# initialize body model
if isinstance(body_model, torch.nn.Module):
self.body_model = body_model
elif isinstance(body_model, dict):
self.body_model = build_body_model(body_model)
else:
# in most cases, SMCReader is instantiated for image reading
# only. Hence, it is wasteful to initialize a body model until
# really needed in get_smpl()
self.body_model = None
self.default_body_model_config = dict(
type='SMPL',
gender='neutral',
num_betas=10,
keypoint_src='smpl_45',
keypoint_dst='smpl_45',
model_path='data/body_models/smpl',
batch_size=1,
)
def get_kinect_color_extrinsics(self, kinect_id, homogeneous=True):
"""Get extrinsics(cam2world) of a kinect RGB camera by kinect id.
Args:
kinect_id (int):
ID of a kinect, starts from 0.
homogeneous (bool, optional):
If true, returns rotation and translation in
one 4x4 matrix. Defaults to True.
Returns:
homogeneous is True
ndarray: A 4x4 matrix of rotation and translation(cam2world).
homogeneous is False
dict: A dict of rotation and translation,
keys are R and T,
each value is an ndarray.
"""
R = np.asarray(self.calibration_dict[str(kinect_id * 2)]['R']).reshape(
3, 3)
T = np.asarray(self.calibration_dict[str(kinect_id *
2)]['T']).reshape(3)
if homogeneous:
extrinsics = np.identity(4, dtype=float)
extrinsics[:3, :3] = R
extrinsics[:3, 3] = T
return extrinsics
else:
return {'R': R, 'T': T}
@property
def calibration_dict(self):
"""Get the dict of calibration.
Returns:
dict:
A dict of calibrated extrinsics.
"""
if self.__calibration_dict__ is not None:
return self.__calibration_dict__
else:
return json.loads(self.smc['Extrinsics'][()])
def get_kinect_depth_extrinsics(self, kinect_id, homogeneous=True):
"""Get extrinsics(cam2world) of a kinect depth camera by kinect id.
Args:
kinect_id (int):
ID of a kinect, starts from 0.
homogeneous (bool, optional):
If true, returns rotation and translation in
one 4x4 matrix. Defaults to True.
Returns:
homogeneous is True
ndarray: A 4x4 matrix of rotation and translation(cam2world).
homogeneous is False
dict: A dict of rotation and translation,
keys are R and T,
each value is an ndarray.
"""
R = np.asarray(self.calibration_dict[str(kinect_id * 2 +
1)]['R']).reshape(3, 3)
T = np.asarray(self.calibration_dict[str(kinect_id * 2 +
1)]['T']).reshape(3)
if homogeneous:
extrinsics = np.identity(4, dtype=float)
extrinsics[:3, :3] = R
extrinsics[:3, 3] = T
return extrinsics
else:
return {'R': R, 'T': T}
def get_kinect_color_intrinsics(self, kinect_id):
"""Get intrinsics of a kinect RGB camera by kinect id.
Args:
kinect_id (int):
ID of a kinect, starts from 0.
Returns:
ndarray: A 3x3 matrix.
"""
kinect_dict = self.smc['Kinect'][str(kinect_id)]
intrinsics = \
kinect_dict['Calibration']['Color']['Intrinsics'][()]
cx, cy, fx, fy = intrinsics[:4]
intrinsics = \
np.asarray([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
return intrinsics
def get_kinect_color_resolution(self, kinect_id):
"""Get resolution of a kinect RGB camera by kinect id.
Args:
kinect_id (int):
ID of a kinect, starts from 0.
Returns:
ndarray:
An ndarray of (width, height), shape=[2, ].
"""
kinect_dict = self.smc['Kinect'][str(kinect_id)]
resolution = \
kinect_dict['Calibration']['Color']['Resolution'][()]
return resolution
def get_kinect_depth_resolution(self, kinect_id):
"""Get resolution of a kinect depth camera by kinect id.
Args:
kinect_id (int):
ID of a kinect, starts from 0.
Returns:
ndarray:
An ndarray of (width, height), shape=[2, ].
"""
kinect_dict = self.smc['Kinect'][str(kinect_id)]
resolution = \
kinect_dict['Calibration']['Depth']['Resolution'][()]
return resolution
def get_kinect_depth_intrinsics(self, kinect_id):
"""Get intrinsics of a kinect depth camera by kinect id.
Args:
kinect_id (int):
ID of a kinect, starts from 0.
Returns:
ndarray: A 3x3 matrix.
"""
kinect_dict = self.smc['Kinect'][str(kinect_id)]
intrinsics = \
kinect_dict['Calibration']['Depth']['Intrinsics'][()]
cx, cy, fx, fy = intrinsics[:4]
intrinsics = \
np.asarray([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
return intrinsics
def get_iphone_intrinsics(self, iphone_id=0, frame_id=0, vertical=True):
"""Get intrinsics of an iPhone RGB camera by iPhone id.
Args:
iphone_id (int, optional):
ID of an iPhone, starts from 0.
Defaults to 0.
frame_id (int, optional):
int: frame id of one selected frame
Defaults to 0.
vertical (bool, optional):
iPhone assumes landscape orientation
if True, convert data to vertical orientation
Defaults to True.
Returns:
ndarray: A 3x3 matrix.
"""
camera_info = self.smc['iPhone'][str(iphone_id)]['CameraInfo'][str(
frame_id)]
camera_info = json.loads(camera_info[()])
intrinsics = np.asarray(camera_info['cameraIntrinsics']).transpose()
# Intrinsics have to be adjusted to achieve rotation
# 1. swapping fx, fy
# 2. cx -> image height - cy; cy -> cx
if vertical:
fx, fy = intrinsics[0, 0], intrinsics[1, 1]
cx, cy = intrinsics[0, 2], intrinsics[1, 2]
W, H = self.get_iphone_color_resolution(vertical=False)
intrinsics = np.eye(3)
intrinsics[0, 0], intrinsics[1, 1] = fy, fx
intrinsics[0, 2], intrinsics[1, 2] = H - cy, cx
return intrinsics
def get_iphone_extrinsics(self,
iphone_id=0,
homogeneous=True,
vertical=True):
"""Get extrinsics(cam2world) of an iPhone RGB camera by iPhone id.
Args:
iphone_id (int, optional):
ID of an iPhone, starts from 0.
Defaults to 0.
homogeneous (bool, optional):
If true, returns rotation and translation in
one 4x4 matrix. Defaults to True.
vertical (bool, optional):
iPhone assumes landscape orientation
if True, convert data to vertical orientation
Defaults to True.
Returns:
homogeneous is True
ndarray: A 4x4 transformation matrix(cam2world).
homogeneous is False
dict: A dict of rotation and translation,
keys are R and T,
each value is an ndarray.
"""
if iphone_id != 0:
raise KeyError('Currently only one iPhone.')
R = np.asarray(self.calibration_dict['iPhone']['R']).reshape(3, 3)
T = np.asarray(self.calibration_dict['iPhone']['T']).reshape(3)
# cam2world
extrinsics = np.identity(4, dtype=float)
extrinsics[:3, :3] = R
extrinsics[:3, 3] = T
# Extrinsics have to be adjusted to achieve rotation
# A rotation matrix is applied on the extrinsics
if vertical:
# 90-degree clockwise rotation around z-axis
R = np.eye(4)
R[:2, :2] = np.array([[0, -1], [1, 0]])
# Note the extrinsics is cam2world
# world2cam_adjusted = R @ world2cam
# => cam2world_adjusted = cam2world @ inv(R)
extrinsics = extrinsics @ np.linalg.inv(R)
R = extrinsics[:3, :3]
T = extrinsics[:3, 3]
if homogeneous:
return extrinsics
else:
return {'R': R, 'T': T}
def get_iphone_color_resolution(self, iphone_id=0, vertical=True):
"""Get color image resolution of an iPhone RGB camera by iPhone id.
Args:
iphone_id (int, optional):
ID of an iPhone, starts from 0.
Defaults to 0.
vertical (bool, optional):
iPhone assumes landscape orientation
if True, convert data to vertical orientation
Defaults to True.
Returns:
ndarray:get_iphone_keypoints2d
An ndarray of (width, height), shape=[2, ].
"""
if iphone_id != 0:
raise KeyError('Currently only one iPhone.')
if vertical:
W_horizontal, H_horizontal = self.iphone_color_resolution
W_vertical, H_vertical = H_horizontal, W_horizontal
return np.array([W_vertical, H_vertical])
else:
return self.iphone_color_resolution
def get_kinect_color(self, kinect_id, frame_id=None, disable_tqdm=True):
"""Get several frames captured by a kinect RGB camera.
Args:
kinect_id (int):
ID of a kinect, starts from 0.
frame_id (int, list or None, optional):
int: frame id of one selected frame
list: a list of frame id
None: all frames will be returned
Defaults to None.
disable_tqdm (bool, optional):
Whether to disable the entire progressbar wrapper.
Defaults to True.
Returns:
ndarray:
An ndarray in shape [frame_number, height, width, channels].
"""
frames = []
if frame_id is None:
frame_list = range(self.get_kinect_num_frames())
elif isinstance(frame_id, list):
frame_list = frame_id
elif isinstance(frame_id, int):
assert frame_id < self.get_kinect_num_frames(),\
'Index out of range...'
frame_list = [frame_id]
else:
raise TypeError('frame_id should be int, list or None.')
for i in tqdm.tqdm(frame_list, disable=disable_tqdm):
frames.append(
self.__read_color_from_bytes__(
self.smc['Kinect'][str(kinect_id)]['Color'][str(i)][()]))
return np.stack(frames, axis=0)
def get_kinect_rgbd(self,
kinect_id,
frame_id,
mode='color2depth',
threshold=0):
if mode == 'color2depth':
mapped_color = \
self.__map_color_to_depth__(
kinect_id, frame_id, threshold=threshold
)
depth = self.get_kinect_depth(kinect_id, frame_id)[0]
return mapped_color, depth
else:
print('Model {} is not supported...'.format(mode))
def get_kinect_depth(self, kinect_id, frame_id=None, disable_tqdm=True):
"""Get several frames captured by a kinect depth camera.
Args:
kinect_id (int):
ID of a kinect, starts from 0.
frame_id (int, list or None, optional):
int: frame id of one selected frame
list: a list of frame id
None: all frames will be returned
Defaults to None.
disable_tqdm (bool, optional):
Whether to disable the entire progressbar wrapper.
Defaults to True.
Returns:
ndarray:
An ndarray in shape [frame_number, height, width, channels].
"""
frames = []
frame_list = []
if frame_id is None or type(frame_id) == list:
frame_list = range(self.get_kinect_num_frames())
if frame_id:
frame_list = frame_id
else:
assert frame_id < self.get_kinect_num_frames(),\
'Index out of range...'
frame_list.append(frame_id)
for i in tqdm.tqdm(frame_list, disable=disable_tqdm):
frames.append(
self.smc['Kinect'][str(kinect_id)]['Depth'][str(i)][()])
return np.stack(frames, axis=0)
def __read_color_from_bytes__(self, color_array):
"""Decode an RGB image from an encoded byte array."""
return cv2.cvtColor(cv2.imdecode(color_array, cv2.IMREAD_COLOR),
cv2.COLOR_BGR2RGB)
def get_num_kinect(self):
"""Get the number of Kinect devices.
Returns:
int:
Number of Kinect devices.
"""
return self.num_kinects
def get_kinect_num_frames(self):
"""Get the number of frames recorded by one Kinect RGB camera.
Returns:
int:
Number of frames.
"""
return self.kinect_num_frames
def get_iphone_num_frames(self):
"""Get the number of frames recorded by one iPhone RGB camera.
Returns:
int:
Number of frames.
"""
return self.iphone_num_frames
def get_depth_mask(self, device_id, frame_id):
return self.smc['Kinect'][str(device_id)]['Mask'][str(frame_id)][()]
def get_kinect_mask(self, device_id, frame_id):
kinect_dict = self.smc['Kinect'][str(device_id)]
return kinect_dict['Mask_k4abt'][str(frame_id)][()]
def get_num_iphone(self):
"""Get the number of iPhone devices.
Returns:
int:
Number of iPhone devices.
"""
return self.num_iphones
def get_iphone_color(self,
iphone_id=0,
frame_id=None,
disable_tqdm=True,
vertical=True):
"""Get several frames captured by an iPhone RGB camera.
Args:
iphone_id (int):
ID of an iPhone, starts from 0.
frame_id (int, list or None, optional):
int: frame id of one selected frame
list: a list of frame id
None: all frames will be returned
Defaults to None.
disable_tqdm (bool, optional):
Whether to disable the entire progressbar wrapper.
Defaults to True.
vertical (bool, optional):
iPhone assumes horizontal orientation
if True, convert data to vertical orientation
Defaults to True.
Returns:
frames:
An ndarray in shape [frame_number, height, width, channels].
"""
frames = []
if frame_id is None:
frame_list = range(self.get_iphone_num_frames())
elif isinstance(frame_id, list):
frame_list = frame_id
elif isinstance(frame_id, int):
assert frame_id < self.get_iphone_num_frames(),\
'Index out of range...'
frame_list = [frame_id]
else:
raise TypeError('frame_id should be int, list or None.')
for i in tqdm.tqdm(frame_list, disable=disable_tqdm):
frame = self.__read_color_from_bytes__(
self.smc['iPhone'][str(iphone_id)]['Color'][str(i)][()])
if vertical:
frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
frames.append(frame)
return np.stack(frames, axis=0)
def get_iphone_depth(self,
iphone_id=0,
frame_id=None,
disable_tqdm=True,
vertical=True):
"""Get several frames captured by an iPhone RGB camera.
Args:
iphone_id (int):
ID of an iPhone, starts from 0.
frame_id (int, list or None, optional):
int: frame id of one selected frame
list: a list of frame id
None: all frames will be returned
Defaults to None.
disable_tqdm (bool, optional):
Whether to disable the entire progressbar wrapper.
Defaults to True.
vertical (bool, optional):
iPhone assumes horizontal orientation
if True, convert data to vertical orientation
Defaults to True.
Returns:
frames:
An ndarray in shape [frame_number, height, width, channels].
"""
frames = []
if frame_id is None:
frame_list = range(self.get_iphone_num_frames())
elif isinstance(frame_id, list):
frame_list = frame_id
elif isinstance(frame_id, int):
assert frame_id < self.get_iphone_num_frames(),\
'Index out of range...'
frame_list = [frame_id]
else:
raise TypeError('frame_id should be int, list or None.')
for i in tqdm.tqdm(frame_list, disable=disable_tqdm):
frame = self.smc['iPhone'][str(iphone_id)]['Depth'][str(i)][()]
if vertical:
frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
frames.append(frame)
return np.stack(frames, axis=0)
def get_kinect_transformation_depth_to_color(self, device_id):
"""Get transformation matrix from depth to color from a single kinect.
Args:
kinect_id (int, optional):
ID of a Kinect, starts from 0.
Returns:
ndarray: A 4x4 transformation matrix.
"""
return np.linalg.inv(self.get_kinect_color_extrinsics(
device_id)) @ self.get_kinect_depth_extrinsics(device_id)
def get_kinect_transformation_color_to_depth(self, device_id):
"""Get transformation matrix from color to depth from a single kinect.
Args:
kinect_id (int, optional):
ID of a Kinect, starts from 0.
Returns:
ndarray: A 4x4 transformation matrix.
"""
return np.linalg.inv(self.get_kinect_depth_extrinsics(
device_id)) @ self.get_kinect_color_extrinsics(device_id)
def __map_color_to_depth__(self, device_id, frame_id, threshold=100):
color_image = self.get_kinect_color(device_id, frame_id)[0]
depth_image = self.get_kinect_depth(device_id, frame_id)[0]
color_intrinsic = self.get_kinect_color_intrinsics(device_id)
depth_intrinsic = self.get_kinect_depth_intrinsics(device_id)
mask = self.get_depth_mask(device_id, frame_id)
Td2c = self.get_kinect_transformation_depth_to_color(device_id)
colidx = np.arange(depth_image.shape[1])
rowidx = np.arange(depth_image.shape[0])
colidx_map, rowidx_map = np.meshgrid(colidx, rowidx)
col_indices = colidx_map[mask >= threshold]
row_indices = rowidx_map[mask >= threshold]
homo_padding = \
np.ones((col_indices.shape[0], 1), dtype=np.float32)
homo_indices = \
np.concatenate(
(col_indices[..., None], row_indices[..., None], homo_padding),
axis=1
)
depth_intrinsic_inv = np.linalg.inv(depth_intrinsic)
normalized_points = \
depth_intrinsic_inv[None, ...] @ homo_indices[..., None]
z_values = (depth_image / 1000)[mask >= threshold]
valid_points = \
normalized_points.squeeze() * z_values[..., None]
R = Td2c[:3, :3]
T = Td2c[:3, 3]
valid_points = \
R[None, ...] @ valid_points[..., None] + T[None, ..., None]
valid_uvs = \
color_intrinsic[None, ...] @\
valid_points / valid_points[:, 2][..., None]
valid_uvs = np.int32(valid_uvs.squeeze()[..., :2] + 0.5)
valid_uvs[:, 0] = np.clip(valid_uvs[:, 0], 0, color_image.shape[1] - 1)
valid_uvs[:, 1] = np.clip(valid_uvs[:, 1], 0, color_image.shape[0] - 1)
mapped_color = np.ones((depth_image.shape[0], depth_image.shape[1], 3),
dtype=np.uint8) * 255
mapped_color[mask >= threshold] = \
color_image[valid_uvs[:, 1], valid_uvs[:, 0]]
if threshold == 1:
return valid_uvs
return mapped_color
def get_kinect_skeleton_3d(self, device_id, frame_id):
"""Get the 3D skeleton key points from a certain kinect.
Args:
device_id (int):
ID of a kinect, starts from 0.
Returns:
list:
A list with 3D keypoints
"""
kinect_dict = self.smc['Kinect'][str(device_id)]
return json.loads(kinect_dict['Skeleton_k4abt'][str(frame_id)][()])
def get_depth_floor(self, device_id: int) -> dict:
"""Get the floor plane defined by a normal vector and a center point
from a certain kinect.
Args:
device_id (int):
ID of a kinect, starts from 0.
Raises:
KeyError:
Key 'floor' not in ID of a kinect.
Returns:
dict:
A dict with 'center', 'normal' and 'pnum'.
"""
device_dict = self.calibration_dict[str(device_id * 2 + 1)]
if 'floor' in device_dict:
return device_dict['floor']
else:
raise KeyError(f'Kinect {device_id} has no floor data.')
def get_keypoints2d(self, device, device_id, frame_id=None, vertical=True):
"""Get keypoints2d projected from keypoints3d.
Args:
device (str):
Device name, should be Kinect or iPhone.
device_id (int):
ID of a device, starts from 0.
frame_id (int, list or None, optional):
int: frame id of one selected frame
list: a list of frame id
None: all frames will be returned
Defaults to None.
vertical (bool, optional):
Only applicable to iPhone as device
iPhone assumes horizontal orientation
if True, convert data to vertical orientation
Defaults to True.
Returns:
Tuple[np.ndarray, np.ndarray]:
keypoints2d (N, J, 3) and its mask (J, )
"""
assert device in {
'Kinect', 'iPhone'
}, f'Undefined device: {device}, should be "Kinect" or "iPhone"'
assert device_id >= 0
kps2d_dict = self.smc['Keypoints2D'][device][str(device_id)]
keypoints2d = kps2d_dict['keypoints2d'][...]
keypoints2d_mask = kps2d_dict['keypoints2d_mask'][...]
if frame_id is None:
frame_list = range(self.get_keypoints_num_frames())
elif isinstance(frame_id, list):
frame_list = frame_id
elif isinstance(frame_id, int):
assert frame_id < self.get_keypoints_num_frames(),\
'Index out of range...'
frame_list = [frame_id]
else:
raise TypeError('frame_id should be int, list or None.')
keypoints2d = keypoints2d[frame_list, ...]
if device == 'iPhone' and vertical:
# rotate keypoints 2D clockwise by 90 degrees
W, H = self.get_iphone_color_resolution(vertical=False)
xs, ys, conf = \
keypoints2d[..., 0], keypoints2d[..., 1], keypoints2d[..., 2]
xs, ys = H - ys, xs # horizontal -> vertical
keypoints2d[..., 0], keypoints2d[..., 1] = xs.copy(), ys.copy()
keypoints2d[conf == 0.0] = 0.0
return keypoints2d, keypoints2d_mask
def get_kinect_keypoints2d(self, device_id, frame_id=None):
"""Get Kinect 2D keypoints.
Args:
device_id (int):
ID of Kinect, starts from 0.
frame_id (int, list or None, optional):
int: frame id of one selected frame
list: a list of frame id
None: all frames will be returned
Defaults to None.
Returns:
Tuple[np.ndarray, np.ndarray]:
keypoints2d (N, J, 3) and its mask (J, )
"""
assert self.num_kinects > device_id >= 0
return self.get_keypoints2d('Kinect', device_id, frame_id)
def get_iphone_keypoints2d(self,
device_id=0,
frame_id=None,
vertical=True):
"""Get iPhone 2D keypoints.
Args:
device_id (int):
ID of iPhone, starts from 0.
frame_id (int, list or None, optional):
int: frame id of one selected frame
list: a list of frame id
None: all frames will be returned
Defaults to None.
vertical (bool, optional):
iPhone assumes horizontal orientation
if True, convert data to vertical orientation
Defaults to True.
Returns:
Tuple[np.ndarray, np.ndarray]:
keypoints2d (N, J, 3) and its mask (J, )
"""
assert device_id >= 0
return self.get_keypoints2d('iPhone',
device_id,
frame_id,
vertical=vertical)
def get_color(self,
device,
device_id,
frame_id=None,
disable_tqdm=True,
vertical=True):
"""Get RGB image(s) from Kinect RGB or iPhone RGB camera.
Args:
device (str):
Device name, should be Kinect or iPhone.
device_id (int):
Device ID, starts from 0.
frame_id (int, list or None, optional):
int: frame id of one selected frame
list: a list of frame id
None: all frames will be returned
Defaults to None.
disable_tqdm (bool, optional):
Whether to disable the entire progressbar wrapper.
Defaults to True.
vertical (bool, optional):
Only applicable to iPhone as device
iPhone assumes horizontal orientation
if True, convert data to vertical orientation
Defaults to True.
Returns:
img (ndarray):
An ndarray in shape [frame_number, height, width, channels].
"""
assert device in {
'Kinect', 'iPhone'
}, f'Undefined device: {device}, should be "Kinect" or "iPhone"'
if device == 'Kinect':
img = self.get_kinect_color(device_id, frame_id, disable_tqdm)
else:
img = self.get_iphone_color(device_id,
frame_id,
disable_tqdm,
vertical=vertical)
return img
def get_keypoints_num_frames(self):
return self.keypoints_num_frames
def get_keypoints_convention(self):
return self.keypoints_convention
def get_keypoints_created_time(self):
return self.keypoints_created_time
def get_keypoints3d(self,
device=None,
device_id=None,
frame_id=None,
vertical=True):
"""Get keypoints3d (world coordinate) computed by mocap processing
pipeline.
Args:
device (str):
Device name, should be Kinect or iPhone.
None: world coordinate
Defaults to None.
device_id (int):
ID of a device, starts from 0.
None: world coordinate
Defaults to None
frame_id (int, list or None, optional):
int: frame id of one selected frame
list: a list of frame id
None: all frames will be returned
Defaults to None.
vertical (bool, optional):
Only applicable to iPhone as device
iPhone assumes horizontal orientation
if True, convert data to vertical orientation
Defaults to True.
Returns:
Tuple[np.ndarray, np.ndarray]:
keypoints3d (N, J, 4) and its mask (J, )
"""
assert (device is None and device_id is None) or \
(device is not None and device_id is not None), \
'device and device_id should be both None or both not None.'
if device is not None:
assert device in {
'Kinect', 'iPhone'
}, f'Undefined device: {device}, should be "Kinect" or "iPhone"'
if device_id is not None:
assert device_id >= 0
if frame_id is None:
frame_list = range(self.get_keypoints_num_frames())
elif isinstance(frame_id, list):
frame_list = frame_id
elif isinstance(frame_id, int):
assert frame_id < self.get_keypoints_num_frames(),\
'Index out of range...'
frame_list = [frame_id]
else:
raise TypeError('frame_id should be int, list or None.')
kps3d_dict = self.smc['Keypoints3D']
# keypoints3d are in world coordinate system
keypoints3d_world = kps3d_dict['keypoints3d'][...]
keypoints3d_world = keypoints3d_world[frame_list, ...]
keypoints3d_mask = kps3d_dict['keypoints3d_mask'][...]
# return keypoints3d in world coordinate system
if device is None:
return keypoints3d_world, keypoints3d_mask
# return keypoints3d in device coordinate system
else:
if device == 'Kinect':
cam2world = self.get_kinect_color_extrinsics(
kinect_id=device_id, homogeneous=True)
else:
cam2world = self.get_iphone_extrinsics(iphone_id=device_id,
vertical=vertical)
xyz, conf = keypoints3d_world[..., :3], keypoints3d_world[..., [3]]
xyz_homogeneous = np.ones([*xyz.shape[:-1], 4])
xyz_homogeneous[..., :3] = xyz
world2cam = np.linalg.inv(cam2world)
keypoints3d = np.einsum('ij,kmj->kmi', world2cam, xyz_homogeneous)
keypoints3d = np.concatenate([keypoints3d[..., :3], conf], axis=-1)
return keypoints3d, keypoints3d_mask
def get_smpl_num_frames(self):
return self.smpl_num_frames
def get_smpl_created_time(self):
return self.smpl_created_time
def get_smpl(self,
device=None,
device_id=None,
frame_id=None,
vertical=True):
"""Get SMPL (world coordinate) computed by mocap processing pipeline.
Args:
device (str):
Device name, should be Kinect or iPhone.
None: world coordinate
Defaults to None.
device_id (int):
ID of a device, starts from 0.
None: world coordinate
Defaults to None
frame_id (int, list or None, optional):
int: frame id of one selected frame
list: a list of frame id
None: all frames will be returned
Defaults to None.
vertical (bool, optional):
Only applicable to iPhone as device
iPhone assumes horizontal orientation
if True, convert data to vertical orientation
Defaults to True.
Returns:
dict:
'global_orient': np.ndarray of shape (N, 3)
'body_pose': np.ndarray of shape (N, 69)
'transl': np.ndarray of shape (N, 3)
'betas': np.ndarray of shape (N, 10)
"""
smpl_dict = self.smc['SMPL']
global_orient = smpl_dict['global_orient'][...]
body_pose = smpl_dict['body_pose'][...]
transl = smpl_dict['transl'][...]
betas = smpl_dict['betas'][...]
if frame_id is None:
frame_list = range(self.get_smpl_num_frames())
elif isinstance(frame_id, list):
frame_list = frame_id
elif isinstance(frame_id, int):
assert frame_id < self.get_keypoints_num_frames(),\
'Index out of range...'
frame_list = [frame_id]
else:
raise TypeError('frame_id should be int, list or None.')
body_pose = body_pose[frame_list, ...]
global_orient = global_orient[frame_list, ...]
transl = transl[frame_list, ...]
# return SMPL parameters in world coordinate system
if device is None:
smpl_dict = dict(global_orient=global_orient,
body_pose=body_pose,
transl=transl,
betas=betas)
return smpl_dict
# return SMPL parameters in device coordinate system
else:
if self.body_model is None:
self.body_model = \
build_body_model(self.default_body_model_config)
torch_device = self.body_model.global_orient.device
assert device in {
'Kinect', 'iPhone'
}, f'Undefined device: {device}, should be "Kinect" or "iPhone"'
assert device_id >= 0
if device == 'Kinect':
T_cam2world = self.get_kinect_color_extrinsics(
kinect_id=device_id, homogeneous=True)
else:
T_cam2world = self.get_iphone_extrinsics(iphone_id=device_id,
vertical=vertical)
T_world2cam = np.linalg.inv(T_cam2world)
output = self.body_model(
global_orient=torch.tensor(global_orient, device=torch_device),
body_pose=torch.tensor(body_pose, device=torch_device),
transl=torch.tensor(transl, device=torch_device),
betas=torch.tensor(betas, device=torch_device))
joints = output['joints'].detach().cpu().numpy()
pelvis = joints[:, 0, :]
new_global_orient, new_transl = batch_transform_to_camera_frame(
global_orient=global_orient,
transl=transl,
pelvis=pelvis,
extrinsic=T_world2cam)
smpl_dict = dict(global_orient=new_global_orient,
body_pose=body_pose,
transl=new_transl,
betas=betas)
return smpl_dict