{
    "type": "sarm",
    "n_obs_steps": 1,
    "input_features": {
        "observation.images.base": {
            "type": "VISUAL",
            "shape": [
                480,
                640,
                3
            ]
        },
        "observation.state": {
            "type": "STATE",
            "shape": [
                32
            ]
        }
    },
    "output_features": {
        "stage": {
            "type": "REWARD",
            "shape": [
                9,
                5
            ]
        },
        "progress": {
            "type": "REWARD",
            "shape": [
                9,
                1
            ]
        },
        "sparse_stage": {
            "type": "REWARD",
            "shape": [
                9,
                5
            ]
        },
        "sparse_progress": {
            "type": "REWARD",
            "shape": [
                9,
                1
            ]
        },
        "dense_stage": {
            "type": "REWARD",
            "shape": [
                9,
                5
            ]
        },
        "dense_progress": {
            "type": "REWARD",
            "shape": [
                9,
                1
            ]
        }
    },
    "device": "cuda",
    "use_amp": false,
    "push_to_hub": true,
    "repo_id": "pepijn223/sarm_dual_5k",
    "private": null,
    "tags": null,
    "license": null,
    "pretrained_path": null,
    "image_dim": 512,
    "text_dim": 512,
    "num_frames": 9,
    "frame_gap": 30,
    "hidden_dim": 768,
    "num_heads": 12,
    "num_layers": 8,
    "max_state_dim": 32,
    "max_length": 9,
    "use_temporal_sampler": true,
    "dual_sparse_dense": true,
    "num_sparse_stages": 2,
    "sparse_subtask_names": [
        "Bring arms up from starting position",
        "fold the towel (3 folds in total)"
    ],
    "sparse_temporal_proportions": [
        0.09024656477258666,
        0.9097534352274134
    ],
    "num_dense_stages": 4,
    "dense_subtask_names": [
        "Bring robot arms up from starting position",
        "Grab near side and do 1st fold",
        "Grab side and do 2nd fold",
        "Grab side and do 3rd fold to finish folding"
    ],
    "dense_temporal_proportions": [
        0.07669543912038806,
        0.19693673653769384,
        0.2531207457587962,
        0.4732470785831218
    ],
    "batch_size": 64,
    "clip_batch_size": 64,
    "dropout": 0.1,
    "stage_loss_weight": 1.0,
    "pretrained_model_path": null,
    "image_key": "observation.images.base",
    "state_key": "observation.state",
    "dual_inference_mode": "sparse",
    "normalization_mapping": {
        "VISUAL": "IDENTITY",
        "STATE": "MEAN_STD",
        "LANGUAGE": "IDENTITY",
        "REWARD": "IDENTITY"
    }
}