|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from typing import List |
|
|
|
|
|
import attrs |
|
|
|
|
|
from .lazy_config_init import LazyDict |
|
|
|
|
|
|
|
|
@attrs.define(slots=False) |
|
|
class DefaultModelConfig: |
|
|
tokenizer: LazyDict = None |
|
|
conditioner: LazyDict = None |
|
|
net: LazyDict = None |
|
|
sigma_data: float = 0.5 |
|
|
precision: str = "bfloat16" |
|
|
input_data_key: str = "video" |
|
|
latent_shape: List[int] = [16, 24, 44, 80] |
|
|
|
|
|
|
|
|
@attrs.define(slots=False) |
|
|
class LatentDiffusionDecoderModelConfig(DefaultModelConfig): |
|
|
tokenizer_corruptor: LazyDict = None |
|
|
latent_corruptor: LazyDict = None |
|
|
pixel_corruptor: LazyDict = None |
|
|
diffusion_decoder_cond_sigma_low: float = None |
|
|
diffusion_decoder_cond_sigma_high: float = None |
|
|
diffusion_decoder_corrupt_prob: float = None |
|
|
condition_on_tokenizer_corruptor_token: bool = False |
|
|
|