Spaces:

ttxskk
/

AiOS

Runtime error

AiOS / detrsmpl /data /data_structures /smc_reader.py

ttxskk

update

d7e58f0 about 1 year ago

38.4 kB

	import json

	import cv2
	import h5py
	import numpy as np
	import torch
	import tqdm

	from detrsmpl.models.body_models.builder import build_body_model
	from detrsmpl.models.body_models.utils import batch_transform_to_camera_frame


	class SMCReader:
	def __init__(self, file_path, body_model=None):
	"""Read SenseMocapFile endswith ".smc", see: https://github.com/open-
	mmlab/detrsmpl/blob/main/docs/smc.md.

	Args:
	file_path (str):
	Path to an SMC file.
	body_model (nn.Module or dict):
	Only needed for SMPL transformation to device frame
	if nn.Module: a body_model instance
	if dict: a body_model config
	"""
	self.smc = h5py.File(file_path, 'r')
	self.__calibration_dict__ = None
	self.action_id = self.smc.attrs['action_id']
	self.actor_id = self.smc.attrs['actor_id']
	self.datetime_str = self.smc.attrs['datetime_str'] # .decode()
	self.kinect_num_frames = self.smc['Kinect'].attrs['num_frame']
	self.num_kinects = self.smc['Kinect'].attrs['num_device']
	self.kinect_color_resolution = self.get_kinect_color_resolution(0)
	self.kinect_depth_resolution = self.get_kinect_depth_resolution(0)
	self.iphone_exists = 'iPhone' in self.smc.keys()
	self.num_iphones = 1
	if self.iphone_exists:
	self.iphone_num_frames = self.smc['iPhone'].attrs['num_frame']
	self.iphone_color_resolution = \
	self.smc['iPhone'].attrs['color_resolution'] # vertical
	self.iphone_depth_resolution = \
	self.smc['iPhone'].attrs['depth_resolution'] # vertical
	self.keypoint_exists = 'Keypoints3D' in self.smc.keys()
	if self.keypoint_exists:
	self.keypoints_num_frames = self.smc['Keypoints3D'].attrs[
	'num_frame']
	self.keypoints_convention = self.smc['Keypoints3D'].attrs[
	'convention']
	self.keypoints_created_time = self.smc['Keypoints3D'].attrs[
	'created_time']
	self.smpl_exists = 'SMPL' in self.smc.keys()
	if self.smpl_exists:
	self.smpl_num_frames = self.smc['SMPL'].attrs['num_frame']
	self.smpl_created_time = self.smc['SMPL'].attrs['created_time']

	# initialize body model
	if isinstance(body_model, torch.nn.Module):
	self.body_model = body_model
	elif isinstance(body_model, dict):
	self.body_model = build_body_model(body_model)
	else:
	# in most cases, SMCReader is instantiated for image reading
	# only. Hence, it is wasteful to initialize a body model until
	# really needed in get_smpl()
	self.body_model = None
	self.default_body_model_config = dict(
	type='SMPL',
	gender='neutral',
	num_betas=10,
	keypoint_src='smpl_45',
	keypoint_dst='smpl_45',
	model_path='data/body_models/smpl',
	batch_size=1,
	)

	def get_kinect_color_extrinsics(self, kinect_id, homogeneous=True):
	"""Get extrinsics(cam2world) of a kinect RGB camera by kinect id.

	Args:
	kinect_id (int):
	ID of a kinect, starts from 0.
	homogeneous (bool, optional):
	If true, returns rotation and translation in
	one 4x4 matrix. Defaults to True.

	Returns:
	homogeneous is True
	ndarray: A 4x4 matrix of rotation and translation(cam2world).
	homogeneous is False
	dict: A dict of rotation and translation,
	keys are R and T,
	each value is an ndarray.
	"""
	R = np.asarray(self.calibration_dict[str(kinect_id * 2)]['R']).reshape(
	3, 3)
	T = np.asarray(self.calibration_dict[str(kinect_id *
	2)]['T']).reshape(3)
	if homogeneous:
	extrinsics = np.identity(4, dtype=float)
	extrinsics[:3, :3] = R
	extrinsics[:3, 3] = T
	return extrinsics
	else:
	return {'R': R, 'T': T}

	@property
	def calibration_dict(self):
	"""Get the dict of calibration.

	Returns:
	dict:
	A dict of calibrated extrinsics.
	"""
	if self.__calibration_dict__ is not None:
	return self.__calibration_dict__
	else:
	return json.loads(self.smc['Extrinsics'][()])

	def get_kinect_depth_extrinsics(self, kinect_id, homogeneous=True):
	"""Get extrinsics(cam2world) of a kinect depth camera by kinect id.

	Args:
	kinect_id (int):
	ID of a kinect, starts from 0.
	homogeneous (bool, optional):
	If true, returns rotation and translation in
	one 4x4 matrix. Defaults to True.

	Returns:
	homogeneous is True
	ndarray: A 4x4 matrix of rotation and translation(cam2world).
	homogeneous is False
	dict: A dict of rotation and translation,
	keys are R and T,
	each value is an ndarray.
	"""
	R = np.asarray(self.calibration_dict[str(kinect_id * 2 +
	1)]['R']).reshape(3, 3)
	T = np.asarray(self.calibration_dict[str(kinect_id * 2 +
	1)]['T']).reshape(3)
	if homogeneous:
	extrinsics = np.identity(4, dtype=float)
	extrinsics[:3, :3] = R
	extrinsics[:3, 3] = T
	return extrinsics
	else:
	return {'R': R, 'T': T}

	def get_kinect_color_intrinsics(self, kinect_id):
	"""Get intrinsics of a kinect RGB camera by kinect id.

	Args:
	kinect_id (int):
	ID of a kinect, starts from 0.

	Returns:
	ndarray: A 3x3 matrix.
	"""
	kinect_dict = self.smc['Kinect'][str(kinect_id)]
	intrinsics = \
	kinect_dict['Calibration']['Color']['Intrinsics'][()]
	cx, cy, fx, fy = intrinsics[:4]
	intrinsics = \
	np.asarray([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
	return intrinsics

	def get_kinect_color_resolution(self, kinect_id):
	"""Get resolution of a kinect RGB camera by kinect id.

	Args:
	kinect_id (int):
	ID of a kinect, starts from 0.

	Returns:
	ndarray:
	An ndarray of (width, height), shape=[2, ].
	"""
	kinect_dict = self.smc['Kinect'][str(kinect_id)]
	resolution = \
	kinect_dict['Calibration']['Color']['Resolution'][()]
	return resolution

	def get_kinect_depth_resolution(self, kinect_id):
	"""Get resolution of a kinect depth camera by kinect id.

	Args:
	kinect_id (int):
	ID of a kinect, starts from 0.

	Returns:
	ndarray:
	An ndarray of (width, height), shape=[2, ].
	"""
	kinect_dict = self.smc['Kinect'][str(kinect_id)]
	resolution = \
	kinect_dict['Calibration']['Depth']['Resolution'][()]
	return resolution

	def get_kinect_depth_intrinsics(self, kinect_id):
	"""Get intrinsics of a kinect depth camera by kinect id.

	Args:
	kinect_id (int):
	ID of a kinect, starts from 0.

	Returns:
	ndarray: A 3x3 matrix.
	"""
	kinect_dict = self.smc['Kinect'][str(kinect_id)]
	intrinsics = \
	kinect_dict['Calibration']['Depth']['Intrinsics'][()]
	cx, cy, fx, fy = intrinsics[:4]
	intrinsics = \
	np.asarray([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
	return intrinsics

	def get_iphone_intrinsics(self, iphone_id=0, frame_id=0, vertical=True):
	"""Get intrinsics of an iPhone RGB camera by iPhone id.

	Args:
	iphone_id (int, optional):
	ID of an iPhone, starts from 0.
	Defaults to 0.
	frame_id (int, optional):
	int: frame id of one selected frame
	Defaults to 0.
	vertical (bool, optional):
	iPhone assumes landscape orientation
	if True, convert data to vertical orientation
	Defaults to True.

	Returns:
	ndarray: A 3x3 matrix.
	"""
	camera_info = self.smc['iPhone'][str(iphone_id)]['CameraInfo'][str(
	frame_id)]
	camera_info = json.loads(camera_info[()])
	intrinsics = np.asarray(camera_info['cameraIntrinsics']).transpose()

	# Intrinsics have to be adjusted to achieve rotation
	# 1. swapping fx, fy
	# 2. cx -> image height - cy; cy -> cx
	if vertical:
	fx, fy = intrinsics[0, 0], intrinsics[1, 1]
	cx, cy = intrinsics[0, 2], intrinsics[1, 2]
	W, H = self.get_iphone_color_resolution(vertical=False)
	intrinsics = np.eye(3)
	intrinsics[0, 0], intrinsics[1, 1] = fy, fx
	intrinsics[0, 2], intrinsics[1, 2] = H - cy, cx

	return intrinsics

	def get_iphone_extrinsics(self,
	iphone_id=0,
	homogeneous=True,
	vertical=True):
	"""Get extrinsics(cam2world) of an iPhone RGB camera by iPhone id.

	Args:
	iphone_id (int, optional):
	ID of an iPhone, starts from 0.
	Defaults to 0.
	homogeneous (bool, optional):
	If true, returns rotation and translation in
	one 4x4 matrix. Defaults to True.
	vertical (bool, optional):
	iPhone assumes landscape orientation
	if True, convert data to vertical orientation
	Defaults to True.

	Returns:
	homogeneous is True
	ndarray: A 4x4 transformation matrix(cam2world).
	homogeneous is False
	dict: A dict of rotation and translation,
	keys are R and T,
	each value is an ndarray.
	"""
	if iphone_id != 0:
	raise KeyError('Currently only one iPhone.')
	R = np.asarray(self.calibration_dict['iPhone']['R']).reshape(3, 3)
	T = np.asarray(self.calibration_dict['iPhone']['T']).reshape(3)

	# cam2world
	extrinsics = np.identity(4, dtype=float)
	extrinsics[:3, :3] = R
	extrinsics[:3, 3] = T

	# Extrinsics have to be adjusted to achieve rotation
	# A rotation matrix is applied on the extrinsics
	if vertical:
	# 90-degree clockwise rotation around z-axis
	R = np.eye(4)
	R[:2, :2] = np.array([[0, -1], [1, 0]])
	# Note the extrinsics is cam2world
	# world2cam_adjusted = R @ world2cam
	# => cam2world_adjusted = cam2world @ inv(R)
	extrinsics = extrinsics @ np.linalg.inv(R)
	R = extrinsics[:3, :3]
	T = extrinsics[:3, 3]

	if homogeneous:
	return extrinsics
	else:
	return {'R': R, 'T': T}

	def get_iphone_color_resolution(self, iphone_id=0, vertical=True):
	"""Get color image resolution of an iPhone RGB camera by iPhone id.

	Args:
	iphone_id (int, optional):
	ID of an iPhone, starts from 0.
	Defaults to 0.
	vertical (bool, optional):
	iPhone assumes landscape orientation
	if True, convert data to vertical orientation
	Defaults to True.

	Returns:
	ndarray:get_iphone_keypoints2d
	An ndarray of (width, height), shape=[2, ].
	"""
	if iphone_id != 0:
	raise KeyError('Currently only one iPhone.')
	if vertical:
	W_horizontal, H_horizontal = self.iphone_color_resolution
	W_vertical, H_vertical = H_horizontal, W_horizontal
	return np.array([W_vertical, H_vertical])
	else:
	return self.iphone_color_resolution

	def get_kinect_color(self, kinect_id, frame_id=None, disable_tqdm=True):
	"""Get several frames captured by a kinect RGB camera.

	Args:
	kinect_id (int):
	ID of a kinect, starts from 0.
	frame_id (int, list or None, optional):
	int: frame id of one selected frame
	list: a list of frame id
	None: all frames will be returned
	Defaults to None.
	disable_tqdm (bool, optional):
	Whether to disable the entire progressbar wrapper.
	Defaults to True.

	Returns:
	ndarray:
	An ndarray in shape [frame_number, height, width, channels].
	"""
	frames = []
	if frame_id is None:
	frame_list = range(self.get_kinect_num_frames())
	elif isinstance(frame_id, list):
	frame_list = frame_id
	elif isinstance(frame_id, int):
	assert frame_id < self.get_kinect_num_frames(),\
	'Index out of range...'
	frame_list = [frame_id]
	else:
	raise TypeError('frame_id should be int, list or None.')
	for i in tqdm.tqdm(frame_list, disable=disable_tqdm):
	frames.append(
	self.__read_color_from_bytes__(
	self.smc['Kinect'][str(kinect_id)]['Color'][str(i)][()]))
	return np.stack(frames, axis=0)

	def get_kinect_rgbd(self,
	kinect_id,
	frame_id,
	mode='color2depth',
	threshold=0):
	if mode == 'color2depth':
	mapped_color = \
	self.__map_color_to_depth__(
	kinect_id, frame_id, threshold=threshold
	)
	depth = self.get_kinect_depth(kinect_id, frame_id)[0]
	return mapped_color, depth
	else:
	print('Model {} is not supported...'.format(mode))

	def get_kinect_depth(self, kinect_id, frame_id=None, disable_tqdm=True):
	"""Get several frames captured by a kinect depth camera.

	Args:
	kinect_id (int):
	ID of a kinect, starts from 0.
	frame_id (int, list or None, optional):
	int: frame id of one selected frame
	list: a list of frame id
	None: all frames will be returned
	Defaults to None.
	disable_tqdm (bool, optional):
	Whether to disable the entire progressbar wrapper.
	Defaults to True.

	Returns:
	ndarray:
	An ndarray in shape [frame_number, height, width, channels].
	"""
	frames = []
	frame_list = []
	if frame_id is None or type(frame_id) == list:
	frame_list = range(self.get_kinect_num_frames())
	if frame_id:
	frame_list = frame_id
	else:
	assert frame_id < self.get_kinect_num_frames(),\
	'Index out of range...'
	frame_list.append(frame_id)
	for i in tqdm.tqdm(frame_list, disable=disable_tqdm):
	frames.append(
	self.smc['Kinect'][str(kinect_id)]['Depth'][str(i)][()])
	return np.stack(frames, axis=0)

	def __read_color_from_bytes__(self, color_array):
	"""Decode an RGB image from an encoded byte array."""
	return cv2.cvtColor(cv2.imdecode(color_array, cv2.IMREAD_COLOR),
	cv2.COLOR_BGR2RGB)

	def get_num_kinect(self):
	"""Get the number of Kinect devices.

	Returns:
	int:
	Number of Kinect devices.
	"""
	return self.num_kinects

	def get_kinect_num_frames(self):
	"""Get the number of frames recorded by one Kinect RGB camera.

	Returns:
	int:
	Number of frames.
	"""
	return self.kinect_num_frames

	def get_iphone_num_frames(self):
	"""Get the number of frames recorded by one iPhone RGB camera.

	Returns:
	int:
	Number of frames.
	"""
	return self.iphone_num_frames

	def get_depth_mask(self, device_id, frame_id):
	return self.smc['Kinect'][str(device_id)]['Mask'][str(frame_id)][()]

	def get_kinect_mask(self, device_id, frame_id):
	kinect_dict = self.smc['Kinect'][str(device_id)]
	return kinect_dict['Mask_k4abt'][str(frame_id)][()]

	def get_num_iphone(self):
	"""Get the number of iPhone devices.

	Returns:
	int:
	Number of iPhone devices.
	"""
	return self.num_iphones

	def get_iphone_color(self,
	iphone_id=0,
	frame_id=None,
	disable_tqdm=True,
	vertical=True):
	"""Get several frames captured by an iPhone RGB camera.

	Args:
	iphone_id (int):
	ID of an iPhone, starts from 0.
	frame_id (int, list or None, optional):
	int: frame id of one selected frame
	list: a list of frame id
	None: all frames will be returned
	Defaults to None.
	disable_tqdm (bool, optional):
	Whether to disable the entire progressbar wrapper.
	Defaults to True.
	vertical (bool, optional):
	iPhone assumes horizontal orientation
	if True, convert data to vertical orientation
	Defaults to True.

	Returns:
	frames:
	An ndarray in shape [frame_number, height, width, channels].
	"""
	frames = []
	if frame_id is None:
	frame_list = range(self.get_iphone_num_frames())
	elif isinstance(frame_id, list):
	frame_list = frame_id
	elif isinstance(frame_id, int):
	assert frame_id < self.get_iphone_num_frames(),\
	'Index out of range...'
	frame_list = [frame_id]
	else:
	raise TypeError('frame_id should be int, list or None.')
	for i in tqdm.tqdm(frame_list, disable=disable_tqdm):
	frame = self.__read_color_from_bytes__(
	self.smc['iPhone'][str(iphone_id)]['Color'][str(i)][()])
	if vertical:
	frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
	frames.append(frame)
	return np.stack(frames, axis=0)

	def get_iphone_depth(self,
	iphone_id=0,
	frame_id=None,
	disable_tqdm=True,
	vertical=True):
	"""Get several frames captured by an iPhone RGB camera.

	Args:
	iphone_id (int):
	ID of an iPhone, starts from 0.
	frame_id (int, list or None, optional):
	int: frame id of one selected frame
	list: a list of frame id
	None: all frames will be returned
	Defaults to None.
	disable_tqdm (bool, optional):
	Whether to disable the entire progressbar wrapper.
	Defaults to True.
	vertical (bool, optional):
	iPhone assumes horizontal orientation
	if True, convert data to vertical orientation
	Defaults to True.

	Returns:
	frames:
	An ndarray in shape [frame_number, height, width, channels].
	"""
	frames = []
	if frame_id is None:
	frame_list = range(self.get_iphone_num_frames())
	elif isinstance(frame_id, list):
	frame_list = frame_id
	elif isinstance(frame_id, int):
	assert frame_id < self.get_iphone_num_frames(),\
	'Index out of range...'
	frame_list = [frame_id]
	else:
	raise TypeError('frame_id should be int, list or None.')
	for i in tqdm.tqdm(frame_list, disable=disable_tqdm):
	frame = self.smc['iPhone'][str(iphone_id)]['Depth'][str(i)][()]
	if vertical:
	frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
	frames.append(frame)
	return np.stack(frames, axis=0)

	def get_kinect_transformation_depth_to_color(self, device_id):
	"""Get transformation matrix from depth to color from a single kinect.

	Args:
	kinect_id (int, optional):
	ID of a Kinect, starts from 0.

	Returns:
	ndarray: A 4x4 transformation matrix.
	"""
	return np.linalg.inv(self.get_kinect_color_extrinsics(
	device_id)) @ self.get_kinect_depth_extrinsics(device_id)

	def get_kinect_transformation_color_to_depth(self, device_id):
	"""Get transformation matrix from color to depth from a single kinect.

	Args:
	kinect_id (int, optional):
	ID of a Kinect, starts from 0.

	Returns:
	ndarray: A 4x4 transformation matrix.
	"""
	return np.linalg.inv(self.get_kinect_depth_extrinsics(
	device_id)) @ self.get_kinect_color_extrinsics(device_id)

	def __map_color_to_depth__(self, device_id, frame_id, threshold=100):
	color_image = self.get_kinect_color(device_id, frame_id)[0]
	depth_image = self.get_kinect_depth(device_id, frame_id)[0]
	color_intrinsic = self.get_kinect_color_intrinsics(device_id)
	depth_intrinsic = self.get_kinect_depth_intrinsics(device_id)

	mask = self.get_depth_mask(device_id, frame_id)

	Td2c = self.get_kinect_transformation_depth_to_color(device_id)

	colidx = np.arange(depth_image.shape[1])
	rowidx = np.arange(depth_image.shape[0])
	colidx_map, rowidx_map = np.meshgrid(colidx, rowidx)
	col_indices = colidx_map[mask >= threshold]
	row_indices = rowidx_map[mask >= threshold]

	homo_padding = \
	np.ones((col_indices.shape[0], 1), dtype=np.float32)
	homo_indices = \
	np.concatenate(
	(col_indices[..., None], row_indices[..., None], homo_padding),
	axis=1
	)

	depth_intrinsic_inv = np.linalg.inv(depth_intrinsic)
	normalized_points = \
	depth_intrinsic_inv[None, ...] @ homo_indices[..., None]

	z_values = (depth_image / 1000)[mask >= threshold]
	valid_points = \
	normalized_points.squeeze() * z_values[..., None]

	R = Td2c[:3, :3]
	T = Td2c[:3, 3]
	valid_points = \
	R[None, ...] @ valid_points[..., None] + T[None, ..., None]
	valid_uvs = \
	color_intrinsic[None, ...] @\
	valid_points / valid_points[:, 2][..., None]
	valid_uvs = np.int32(valid_uvs.squeeze()[..., :2] + 0.5)
	valid_uvs[:, 0] = np.clip(valid_uvs[:, 0], 0, color_image.shape[1] - 1)
	valid_uvs[:, 1] = np.clip(valid_uvs[:, 1], 0, color_image.shape[0] - 1)
	mapped_color = np.ones((depth_image.shape[0], depth_image.shape[1], 3),
	dtype=np.uint8) * 255
	mapped_color[mask >= threshold] = \
	color_image[valid_uvs[:, 1], valid_uvs[:, 0]]

	if threshold == 1:
	return valid_uvs
	return mapped_color

	def get_kinect_skeleton_3d(self, device_id, frame_id):
	"""Get the 3D skeleton key points from a certain kinect.

	Args:
	device_id (int):
	ID of a kinect, starts from 0.

	Returns:
	list:
	A list with 3D keypoints
	"""
	kinect_dict = self.smc['Kinect'][str(device_id)]
	return json.loads(kinect_dict['Skeleton_k4abt'][str(frame_id)][()])

	def get_depth_floor(self, device_id: int) -> dict:
	"""Get the floor plane defined by a normal vector and a center point
	from a certain kinect.

	Args:
	device_id (int):
	ID of a kinect, starts from 0.

	Raises:
	KeyError:
	Key 'floor' not in ID of a kinect.

	Returns:
	dict:
	A dict with 'center', 'normal' and 'pnum'.
	"""
	device_dict = self.calibration_dict[str(device_id * 2 + 1)]
	if 'floor' in device_dict:
	return device_dict['floor']
	else:
	raise KeyError(f'Kinect {device_id} has no floor data.')

	def get_keypoints2d(self, device, device_id, frame_id=None, vertical=True):
	"""Get keypoints2d projected from keypoints3d.

	Args:
	device (str):
	Device name, should be Kinect or iPhone.
	device_id (int):
	ID of a device, starts from 0.
	frame_id (int, list or None, optional):
	int: frame id of one selected frame
	list: a list of frame id
	None: all frames will be returned
	Defaults to None.
	vertical (bool, optional):
	Only applicable to iPhone as device
	iPhone assumes horizontal orientation
	if True, convert data to vertical orientation
	Defaults to True.

	Returns:
	Tuple[np.ndarray, np.ndarray]:
	keypoints2d (N, J, 3) and its mask (J, )
	"""
	assert device in {
	'Kinect', 'iPhone'
	}, f'Undefined device: {device}, should be "Kinect" or "iPhone"'
	assert device_id >= 0

	kps2d_dict = self.smc['Keypoints2D'][device][str(device_id)]
	keypoints2d = kps2d_dict['keypoints2d'][...]
	keypoints2d_mask = kps2d_dict['keypoints2d_mask'][...]

	if frame_id is None:
	frame_list = range(self.get_keypoints_num_frames())
	elif isinstance(frame_id, list):
	frame_list = frame_id
	elif isinstance(frame_id, int):
	assert frame_id < self.get_keypoints_num_frames(),\
	'Index out of range...'
	frame_list = [frame_id]
	else:
	raise TypeError('frame_id should be int, list or None.')

	keypoints2d = keypoints2d[frame_list, ...]

	if device == 'iPhone' and vertical:
	# rotate keypoints 2D clockwise by 90 degrees
	W, H = self.get_iphone_color_resolution(vertical=False)
	xs, ys, conf = \
	keypoints2d[..., 0], keypoints2d[..., 1], keypoints2d[..., 2]
	xs, ys = H - ys, xs # horizontal -> vertical
	keypoints2d[..., 0], keypoints2d[..., 1] = xs.copy(), ys.copy()
	keypoints2d[conf == 0.0] = 0.0

	return keypoints2d, keypoints2d_mask

	def get_kinect_keypoints2d(self, device_id, frame_id=None):
	"""Get Kinect 2D keypoints.

	Args:
	device_id (int):
	ID of Kinect, starts from 0.
	frame_id (int, list or None, optional):
	int: frame id of one selected frame
	list: a list of frame id
	None: all frames will be returned
	Defaults to None.

	Returns:
	Tuple[np.ndarray, np.ndarray]:
	keypoints2d (N, J, 3) and its mask (J, )
	"""
	assert self.num_kinects > device_id >= 0
	return self.get_keypoints2d('Kinect', device_id, frame_id)

	def get_iphone_keypoints2d(self,
	device_id=0,
	frame_id=None,
	vertical=True):
	"""Get iPhone 2D keypoints.

	Args:
	device_id (int):
	ID of iPhone, starts from 0.
	frame_id (int, list or None, optional):
	int: frame id of one selected frame
	list: a list of frame id
	None: all frames will be returned
	Defaults to None.
	vertical (bool, optional):
	iPhone assumes horizontal orientation
	if True, convert data to vertical orientation
	Defaults to True.

	Returns:
	Tuple[np.ndarray, np.ndarray]:
	keypoints2d (N, J, 3) and its mask (J, )
	"""
	assert device_id >= 0
	return self.get_keypoints2d('iPhone',
	device_id,
	frame_id,
	vertical=vertical)

	def get_color(self,
	device,
	device_id,
	frame_id=None,
	disable_tqdm=True,
	vertical=True):
	"""Get RGB image(s) from Kinect RGB or iPhone RGB camera.

	Args:
	device (str):
	Device name, should be Kinect or iPhone.
	device_id (int):
	Device ID, starts from 0.
	frame_id (int, list or None, optional):
	int: frame id of one selected frame
	list: a list of frame id
	None: all frames will be returned
	Defaults to None.
	disable_tqdm (bool, optional):
	Whether to disable the entire progressbar wrapper.
	Defaults to True.
	vertical (bool, optional):
	Only applicable to iPhone as device
	iPhone assumes horizontal orientation
	if True, convert data to vertical orientation
	Defaults to True.

	Returns:
	img (ndarray):
	An ndarray in shape [frame_number, height, width, channels].
	"""

	assert device in {
	'Kinect', 'iPhone'
	}, f'Undefined device: {device}, should be "Kinect" or "iPhone"'

	if device == 'Kinect':
	img = self.get_kinect_color(device_id, frame_id, disable_tqdm)
	else:
	img = self.get_iphone_color(device_id,
	frame_id,
	disable_tqdm,
	vertical=vertical)

	return img

	def get_keypoints_num_frames(self):
	return self.keypoints_num_frames

	def get_keypoints_convention(self):
	return self.keypoints_convention

	def get_keypoints_created_time(self):
	return self.keypoints_created_time

	def get_keypoints3d(self,
	device=None,
	device_id=None,
	frame_id=None,
	vertical=True):
	"""Get keypoints3d (world coordinate) computed by mocap processing
	pipeline.

	Args:
	device (str):
	Device name, should be Kinect or iPhone.
	None: world coordinate
	Defaults to None.
	device_id (int):
	ID of a device, starts from 0.
	None: world coordinate
	Defaults to None
	frame_id (int, list or None, optional):
	int: frame id of one selected frame
	list: a list of frame id
	None: all frames will be returned
	Defaults to None.
	vertical (bool, optional):
	Only applicable to iPhone as device
	iPhone assumes horizontal orientation
	if True, convert data to vertical orientation
	Defaults to True.

	Returns:
	Tuple[np.ndarray, np.ndarray]:
	keypoints3d (N, J, 4) and its mask (J, )
	"""
	assert (device is None and device_id is None) or \
	(device is not None and device_id is not None), \
	'device and device_id should be both None or both not None.'
	if device is not None:
	assert device in {
	'Kinect', 'iPhone'
	}, f'Undefined device: {device}, should be "Kinect" or "iPhone"'
	if device_id is not None:
	assert device_id >= 0

	if frame_id is None:
	frame_list = range(self.get_keypoints_num_frames())
	elif isinstance(frame_id, list):
	frame_list = frame_id
	elif isinstance(frame_id, int):
	assert frame_id < self.get_keypoints_num_frames(),\
	'Index out of range...'
	frame_list = [frame_id]
	else:
	raise TypeError('frame_id should be int, list or None.')

	kps3d_dict = self.smc['Keypoints3D']

	# keypoints3d are in world coordinate system
	keypoints3d_world = kps3d_dict['keypoints3d'][...]
	keypoints3d_world = keypoints3d_world[frame_list, ...]
	keypoints3d_mask = kps3d_dict['keypoints3d_mask'][...]

	# return keypoints3d in world coordinate system
	if device is None:
	return keypoints3d_world, keypoints3d_mask

	# return keypoints3d in device coordinate system
	else:
	if device == 'Kinect':
	cam2world = self.get_kinect_color_extrinsics(
	kinect_id=device_id, homogeneous=True)
	else:
	cam2world = self.get_iphone_extrinsics(iphone_id=device_id,
	vertical=vertical)

	xyz, conf = keypoints3d_world[..., :3], keypoints3d_world[..., [3]]
	xyz_homogeneous = np.ones([*xyz.shape[:-1], 4])
	xyz_homogeneous[..., :3] = xyz
	world2cam = np.linalg.inv(cam2world)
	keypoints3d = np.einsum('ij,kmj->kmi', world2cam, xyz_homogeneous)
	keypoints3d = np.concatenate([keypoints3d[..., :3], conf], axis=-1)

	return keypoints3d, keypoints3d_mask

	def get_smpl_num_frames(self):
	return self.smpl_num_frames

	def get_smpl_created_time(self):
	return self.smpl_created_time

	def get_smpl(self,
	device=None,
	device_id=None,
	frame_id=None,
	vertical=True):
	"""Get SMPL (world coordinate) computed by mocap processing pipeline.

	Args:
	device (str):
	Device name, should be Kinect or iPhone.
	None: world coordinate
	Defaults to None.
	device_id (int):
	ID of a device, starts from 0.
	None: world coordinate
	Defaults to None
	frame_id (int, list or None, optional):
	int: frame id of one selected frame
	list: a list of frame id
	None: all frames will be returned
	Defaults to None.
	vertical (bool, optional):
	Only applicable to iPhone as device
	iPhone assumes horizontal orientation
	if True, convert data to vertical orientation
	Defaults to True.

	Returns:
	dict:
	'global_orient': np.ndarray of shape (N, 3)
	'body_pose': np.ndarray of shape (N, 69)
	'transl': np.ndarray of shape (N, 3)
	'betas': np.ndarray of shape (N, 10)
	"""
	smpl_dict = self.smc['SMPL']
	global_orient = smpl_dict['global_orient'][...]
	body_pose = smpl_dict['body_pose'][...]
	transl = smpl_dict['transl'][...]
	betas = smpl_dict['betas'][...]

	if frame_id is None:
	frame_list = range(self.get_smpl_num_frames())
	elif isinstance(frame_id, list):
	frame_list = frame_id
	elif isinstance(frame_id, int):
	assert frame_id < self.get_keypoints_num_frames(),\
	'Index out of range...'
	frame_list = [frame_id]
	else:
	raise TypeError('frame_id should be int, list or None.')

	body_pose = body_pose[frame_list, ...]
	global_orient = global_orient[frame_list, ...]
	transl = transl[frame_list, ...]

	# return SMPL parameters in world coordinate system
	if device is None:
	smpl_dict = dict(global_orient=global_orient,
	body_pose=body_pose,
	transl=transl,
	betas=betas)

	return smpl_dict

	# return SMPL parameters in device coordinate system
	else:

	if self.body_model is None:
	self.body_model = \
	build_body_model(self.default_body_model_config)
	torch_device = self.body_model.global_orient.device

	assert device in {
	'Kinect', 'iPhone'
	}, f'Undefined device: {device}, should be "Kinect" or "iPhone"'
	assert device_id >= 0

	if device == 'Kinect':
	T_cam2world = self.get_kinect_color_extrinsics(
	kinect_id=device_id, homogeneous=True)
	else:
	T_cam2world = self.get_iphone_extrinsics(iphone_id=device_id,
	vertical=vertical)

	T_world2cam = np.linalg.inv(T_cam2world)

	output = self.body_model(
	global_orient=torch.tensor(global_orient, device=torch_device),
	body_pose=torch.tensor(body_pose, device=torch_device),
	transl=torch.tensor(transl, device=torch_device),
	betas=torch.tensor(betas, device=torch_device))
	joints = output['joints'].detach().cpu().numpy()
	pelvis = joints[:, 0, :]

	new_global_orient, new_transl = batch_transform_to_camera_frame(
	global_orient=global_orient,
	transl=transl,
	pelvis=pelvis,
	extrinsic=T_world2cam)

	smpl_dict = dict(global_orient=new_global_orient,
	body_pose=body_pose,
	transl=new_transl,
	betas=betas)

	return smpl_dict