modify log
Browse files- aegis.py +1 -1
- ar_model.py +1 -1
- ar_networks.py +1 -1
- ar_tokenizer_image_text_tokenizer.py +1 -1
- ar_tokenizer_modules.py +1 -1
- ar_tokenizer_text_tokenizer.py +1 -1
- ar_transformer.py +1 -1
- blocklist.py +1 -1
- blocks.py +1 -1
- conditioner.py +1 -1
- config_helper.py +1 -1
- cosmos1/models/autoregressive/diffusion_decoder/inference.py +1 -1
- cosmos1/models/autoregressive/inference/base.py +1 -1
- cosmos1/models/autoregressive/inference/video2world.py +1 -1
- cosmos1/models/autoregressive/inference/world_generation_pipeline.py +1 -1
- cosmos1/models/autoregressive/nemo/cosmos.py +1 -1
- cosmos1/models/autoregressive/nemo/inference/general.py +1 -1
- cosmos1/models/autoregressive/nemo/post_training/prepare_dataset.py +1 -1
- cosmos1/models/autoregressive/nemo/utils.py +1 -1
- cosmos1/models/autoregressive/utils/inference.py +1 -1
- cosmos1/models/diffusion/nemo/inference/general.py +1 -1
- cosmos1/models/diffusion/nemo/inference/inference_utils.py +1 -1
- cosmos1/models/diffusion/nemo/post_training/general.py +2 -2
- cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py +1 -1
- cosmos1/models/diffusion/networks/general_dit.py +4 -4
- cosmos1/models/diffusion/networks/general_dit_video_conditioned.py +4 -4
- distributed.py +1 -1
- face_blur_filter.py +1 -1
- guardrail_blocklist_utils.py +1 -1
- guardrail_core.py +1 -1
- guardrail_io_utils.py +1 -1
- inference_utils.py +1 -1
- log.py +25 -22
- misc.py +1 -1
- model_config.py +1 -1
- model_t2w.py +1 -1
- model_v2w.py +1 -1
- presets.py +1 -1
- retinaface_utils.py +1 -1
- t5_text_encoder.py +1 -1
- text2world.py +1 -1
- text2world_hf.py +1 -1
- text2world_prompt_upsampler_inference.py +1 -1
- video2world.py +1 -1
- video2world_prompt_upsampler_inference.py +1 -1
- video_content_safety_filter.py +1 -1
- vit.py +1 -1
- world_generation_pipeline.py +1 -1
aegis.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
| 15 |
|
| 16 |
import argparse
|
| 17 |
|
| 18 |
-
from . import log
|
| 19 |
import torch
|
| 20 |
from peft import PeftModel
|
| 21 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
| 15 |
|
| 16 |
import argparse
|
| 17 |
|
| 18 |
+
from .log import log
|
| 19 |
import torch
|
| 20 |
from peft import PeftModel
|
| 21 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
ar_model.py
CHANGED
|
@@ -19,7 +19,7 @@ import time
|
|
| 19 |
from pathlib import Path
|
| 20 |
from typing import Any, Dict, List, Optional, Set
|
| 21 |
|
| 22 |
-
from . import log
|
| 23 |
import torch
|
| 24 |
from safetensors.torch import load_file
|
| 25 |
from torch.nn.modules.module import _IncompatibleKeys
|
|
|
|
| 19 |
from pathlib import Path
|
| 20 |
from typing import Any, Dict, List, Optional, Set
|
| 21 |
|
| 22 |
+
from .log import log
|
| 23 |
import torch
|
| 24 |
from safetensors.torch import load_file
|
| 25 |
from torch.nn.modules.module import _IncompatibleKeys
|
ar_networks.py
CHANGED
|
@@ -20,7 +20,7 @@ from torch import nn
|
|
| 20 |
|
| 21 |
from .ar_tokenizer_modules import CausalConv3d, DecoderFactorized, EncoderFactorized
|
| 22 |
from .ar_tokenizer_quantizers import FSQuantizer
|
| 23 |
-
from . import log
|
| 24 |
|
| 25 |
NetworkEval = namedtuple("NetworkEval", ["reconstructions", "quant_loss", "quant_info"])
|
| 26 |
|
|
|
|
| 20 |
|
| 21 |
from .ar_tokenizer_modules import CausalConv3d, DecoderFactorized, EncoderFactorized
|
| 22 |
from .ar_tokenizer_quantizers import FSQuantizer
|
| 23 |
+
from .log import log
|
| 24 |
|
| 25 |
NetworkEval = namedtuple("NetworkEval", ["reconstructions", "quant_loss", "quant_info"])
|
| 26 |
|
ar_tokenizer_image_text_tokenizer.py
CHANGED
|
@@ -22,7 +22,7 @@ from transformers import AutoImageProcessor
|
|
| 22 |
from transformers.image_utils import ImageInput, is_valid_image, load_image
|
| 23 |
|
| 24 |
from .ar_tokenizer_text_tokenizer import TextTokenizer
|
| 25 |
-
from . import log
|
| 26 |
|
| 27 |
# Configuration for different vision-language models
|
| 28 |
IMAGE_CONFIGS = {
|
|
|
|
| 22 |
from transformers.image_utils import ImageInput, is_valid_image, load_image
|
| 23 |
|
| 24 |
from .ar_tokenizer_text_tokenizer import TextTokenizer
|
| 25 |
+
from .log import log
|
| 26 |
|
| 27 |
# Configuration for different vision-language models
|
| 28 |
IMAGE_CONFIGS = {
|
ar_tokenizer_modules.py
CHANGED
|
@@ -41,7 +41,7 @@ from .ar_tokenizer_utils import (
|
|
| 41 |
space2batch,
|
| 42 |
time2batch,
|
| 43 |
)
|
| 44 |
-
from . import log
|
| 45 |
|
| 46 |
|
| 47 |
class CausalConv3d(nn.Module):
|
|
|
|
| 41 |
space2batch,
|
| 42 |
time2batch,
|
| 43 |
)
|
| 44 |
+
from .log import log
|
| 45 |
|
| 46 |
|
| 47 |
class CausalConv3d(nn.Module):
|
ar_tokenizer_text_tokenizer.py
CHANGED
|
@@ -19,7 +19,7 @@ import numpy as np
|
|
| 19 |
import torch
|
| 20 |
from transformers import AutoTokenizer
|
| 21 |
|
| 22 |
-
from . import log
|
| 23 |
|
| 24 |
|
| 25 |
def get_tokenizer_path(model_family: str, is_instruct_model: bool = False):
|
|
|
|
| 19 |
import torch
|
| 20 |
from transformers import AutoTokenizer
|
| 21 |
|
| 22 |
+
from .log import log
|
| 23 |
|
| 24 |
|
| 25 |
def get_tokenizer_path(model_family: str, is_instruct_model: bool = False):
|
ar_transformer.py
CHANGED
|
@@ -29,7 +29,7 @@ from .ar_modules_mlp import MLP
|
|
| 29 |
from .ar_modules_normalization import create_norm
|
| 30 |
from .checkpoint import process_state_dict, substrings_to_ignore
|
| 31 |
from .ar_utils_misc import maybe_convert_to_namespace
|
| 32 |
-
from . import log
|
| 33 |
|
| 34 |
|
| 35 |
class TransformerBlock(nn.Module):
|
|
|
|
| 29 |
from .ar_modules_normalization import create_norm
|
| 30 |
from .checkpoint import process_state_dict, substrings_to_ignore
|
| 31 |
from .ar_utils_misc import maybe_convert_to_namespace
|
| 32 |
+
from .log import log
|
| 33 |
|
| 34 |
|
| 35 |
class TransformerBlock(nn.Module):
|
blocklist.py
CHANGED
|
@@ -19,7 +19,7 @@ import re
|
|
| 19 |
import string
|
| 20 |
from difflib import SequenceMatcher
|
| 21 |
|
| 22 |
-
from . import log
|
| 23 |
import nltk
|
| 24 |
from better_profanity import profanity
|
| 25 |
|
|
|
|
| 19 |
import string
|
| 20 |
from difflib import SequenceMatcher
|
| 21 |
|
| 22 |
+
from .log import log
|
| 23 |
import nltk
|
| 24 |
from better_profanity import profanity
|
| 25 |
|
blocks.py
CHANGED
|
@@ -23,7 +23,7 @@ from einops.layers.torch import Rearrange
|
|
| 23 |
from torch import nn
|
| 24 |
|
| 25 |
from .attention import Attention, GPT2FeedForward
|
| 26 |
-
from . import log
|
| 27 |
|
| 28 |
|
| 29 |
def modulate(x, shift, scale):
|
|
|
|
| 23 |
from torch import nn
|
| 24 |
|
| 25 |
from .attention import Attention, GPT2FeedForward
|
| 26 |
+
from .log import log
|
| 27 |
|
| 28 |
|
| 29 |
def modulate(x, shift, scale):
|
conditioner.py
CHANGED
|
@@ -24,7 +24,7 @@ import torch
|
|
| 24 |
import torch.nn as nn
|
| 25 |
|
| 26 |
from .batch_ops import batch_mul
|
| 27 |
-
from . import log
|
| 28 |
from .lazy_config_init import instantiate
|
| 29 |
|
| 30 |
|
|
|
|
| 24 |
import torch.nn as nn
|
| 25 |
|
| 26 |
from .batch_ops import batch_mul
|
| 27 |
+
from .log import log
|
| 28 |
from .lazy_config_init import instantiate
|
| 29 |
|
| 30 |
|
config_helper.py
CHANGED
|
@@ -27,7 +27,7 @@ from hydra import compose, initialize
|
|
| 27 |
from hydra.core.config_store import ConfigStore
|
| 28 |
from omegaconf import DictConfig, OmegaConf
|
| 29 |
|
| 30 |
-
from . import log
|
| 31 |
from .config import Config
|
| 32 |
|
| 33 |
|
|
|
|
| 27 |
from hydra.core.config_store import ConfigStore
|
| 28 |
from omegaconf import DictConfig, OmegaConf
|
| 29 |
|
| 30 |
+
from .log import log
|
| 31 |
from .config import Config
|
| 32 |
|
| 33 |
|
cosmos1/models/autoregressive/diffusion_decoder/inference.py
CHANGED
|
@@ -22,7 +22,7 @@ import torch
|
|
| 22 |
from inference_config import DiffusionDecoderSamplingConfig
|
| 23 |
from cosmos1.models.autoregressive.diffusion_decoder.model import LatentDiffusionDecoderModel
|
| 24 |
from cosmos1.models.autoregressive.diffusion_decoder.utils import linear_blend_video_list, split_with_overlap
|
| 25 |
-
from . import log
|
| 26 |
|
| 27 |
|
| 28 |
def diffusion_decoder_process_tokens(
|
|
|
|
| 22 |
from inference_config import DiffusionDecoderSamplingConfig
|
| 23 |
from cosmos1.models.autoregressive.diffusion_decoder.model import LatentDiffusionDecoderModel
|
| 24 |
from cosmos1.models.autoregressive.diffusion_decoder.utils import linear_blend_video_list, split_with_overlap
|
| 25 |
+
from .log import log
|
| 26 |
|
| 27 |
|
| 28 |
def diffusion_decoder_process_tokens(
|
cosmos1/models/autoregressive/inference/base.py
CHANGED
|
@@ -21,7 +21,7 @@ import torch
|
|
| 21 |
|
| 22 |
from cosmos1.models.autoregressive.inference.world_generation_pipeline import ARBaseGenerationPipeline
|
| 23 |
from cosmos1.models.autoregressive.utils.inference import add_common_arguments, load_vision_input, validate_args
|
| 24 |
-
from . import log
|
| 25 |
|
| 26 |
|
| 27 |
def parse_args():
|
|
|
|
| 21 |
|
| 22 |
from cosmos1.models.autoregressive.inference.world_generation_pipeline import ARBaseGenerationPipeline
|
| 23 |
from cosmos1.models.autoregressive.utils.inference import add_common_arguments, load_vision_input, validate_args
|
| 24 |
+
from .log import log
|
| 25 |
|
| 26 |
|
| 27 |
def parse_args():
|
cosmos1/models/autoregressive/inference/video2world.py
CHANGED
|
@@ -21,7 +21,7 @@ import torch
|
|
| 21 |
|
| 22 |
from cosmos1.models.autoregressive.inference.world_generation_pipeline import ARVideo2WorldGenerationPipeline
|
| 23 |
from cosmos1.models.autoregressive.utils.inference import add_common_arguments, load_vision_input, validate_args
|
| 24 |
-
from . import log
|
| 25 |
from io import read_prompts_from_file
|
| 26 |
|
| 27 |
|
|
|
|
| 21 |
|
| 22 |
from cosmos1.models.autoregressive.inference.world_generation_pipeline import ARVideo2WorldGenerationPipeline
|
| 23 |
from cosmos1.models.autoregressive.utils.inference import add_common_arguments, load_vision_input, validate_args
|
| 24 |
+
from .log import log
|
| 25 |
from io import read_prompts_from_file
|
| 26 |
|
| 27 |
|
cosmos1/models/autoregressive/inference/world_generation_pipeline.py
CHANGED
|
@@ -17,7 +17,7 @@ import gc
|
|
| 17 |
import os
|
| 18 |
from typing import List, Optional, Tuple
|
| 19 |
|
| 20 |
-
from . import log
|
| 21 |
import numpy as np
|
| 22 |
import torch
|
| 23 |
from einops import rearrange
|
|
|
|
| 17 |
import os
|
| 18 |
from typing import List, Optional, Tuple
|
| 19 |
|
| 20 |
+
from .log import log
|
| 21 |
import numpy as np
|
| 22 |
import torch
|
| 23 |
from einops import rearrange
|
cosmos1/models/autoregressive/nemo/cosmos.py
CHANGED
|
@@ -29,7 +29,7 @@ from nemo.lightning import OptimizerModule, io
|
|
| 29 |
from nemo.lightning.base import teardown
|
| 30 |
from torch import Tensor, nn
|
| 31 |
|
| 32 |
-
from . import log
|
| 33 |
|
| 34 |
|
| 35 |
class RotaryEmbedding3D(RotaryEmbedding):
|
|
|
|
| 29 |
from nemo.lightning.base import teardown
|
| 30 |
from torch import Tensor, nn
|
| 31 |
|
| 32 |
+
from .log import log
|
| 33 |
|
| 34 |
|
| 35 |
class RotaryEmbedding3D(RotaryEmbedding):
|
cosmos1/models/autoregressive/nemo/inference/general.py
CHANGED
|
@@ -37,7 +37,7 @@ from cosmos1.models.autoregressive.nemo.utils import run_diffusion_decoder_model
|
|
| 37 |
from discrete_video import DiscreteVideoFSQJITTokenizer
|
| 38 |
from cosmos1.models.autoregressive.utils.inference import load_vision_input
|
| 39 |
from . import presets as guardrail_presets
|
| 40 |
-
from . import log
|
| 41 |
|
| 42 |
torch._C._jit_set_texpr_fuser_enabled(False)
|
| 43 |
|
|
|
|
| 37 |
from discrete_video import DiscreteVideoFSQJITTokenizer
|
| 38 |
from cosmos1.models.autoregressive.utils.inference import load_vision_input
|
| 39 |
from . import presets as guardrail_presets
|
| 40 |
+
from .log import log
|
| 41 |
|
| 42 |
torch._C._jit_set_texpr_fuser_enabled(False)
|
| 43 |
|
cosmos1/models/autoregressive/nemo/post_training/prepare_dataset.py
CHANGED
|
@@ -24,7 +24,7 @@ from nemo.collections.nlp.data.language_modeling.megatron import indexed_dataset
|
|
| 24 |
|
| 25 |
from cosmos1.models.autoregressive.nemo.utils import read_input_videos
|
| 26 |
from discrete_video import DiscreteVideoFSQJITTokenizer
|
| 27 |
-
from . import log
|
| 28 |
|
| 29 |
TOKENIZER_COMPRESSION_FACTOR = [8, 16, 16]
|
| 30 |
DATA_RESOLUTION_SUPPORTED = [640, 1024]
|
|
|
|
| 24 |
|
| 25 |
from cosmos1.models.autoregressive.nemo.utils import read_input_videos
|
| 26 |
from discrete_video import DiscreteVideoFSQJITTokenizer
|
| 27 |
+
from .log import log
|
| 28 |
|
| 29 |
TOKENIZER_COMPRESSION_FACTOR = [8, 16, 16]
|
| 30 |
DATA_RESOLUTION_SUPPORTED = [640, 1024]
|
cosmos1/models/autoregressive/nemo/utils.py
CHANGED
|
@@ -31,7 +31,7 @@ from inference_utils import (
|
|
| 31 |
load_tokenizer_model,
|
| 32 |
skip_init_linear,
|
| 33 |
)
|
| 34 |
-
from . import log
|
| 35 |
from config_helper import get_config_module, override
|
| 36 |
|
| 37 |
TOKENIZER_COMPRESSION_FACTOR = [8, 16, 16]
|
|
|
|
| 31 |
load_tokenizer_model,
|
| 32 |
skip_init_linear,
|
| 33 |
)
|
| 34 |
+
from .log import log
|
| 35 |
from config_helper import get_config_module, override
|
| 36 |
|
| 37 |
TOKENIZER_COMPRESSION_FACTOR = [8, 16, 16]
|
cosmos1/models/autoregressive/utils/inference.py
CHANGED
|
@@ -26,7 +26,7 @@ import torchvision
|
|
| 26 |
from PIL import Image
|
| 27 |
|
| 28 |
from inference_config import SamplingConfig
|
| 29 |
-
from . import log
|
| 30 |
|
| 31 |
_IMAGE_EXTENSIONS = [".png", ".jpg", ".jpeg", "webp"]
|
| 32 |
_VIDEO_EXTENSIONS = [".mp4"]
|
|
|
|
| 26 |
from PIL import Image
|
| 27 |
|
| 28 |
from inference_config import SamplingConfig
|
| 29 |
+
from .log import log
|
| 30 |
|
| 31 |
_IMAGE_EXTENSIONS = [".png", ".jpg", ".jpeg", "webp"]
|
| 32 |
_VIDEO_EXTENSIONS = [".mp4"]
|
cosmos1/models/diffusion/nemo/inference/general.py
CHANGED
|
@@ -37,7 +37,7 @@ from nemo.collections.diffusion.sampler.cosmos.cosmos_diffusion_pipeline import
|
|
| 37 |
from transformers import T5EncoderModel, T5TokenizerFast
|
| 38 |
|
| 39 |
from cosmos1.models.diffusion.nemo.inference.inference_utils import process_prompt, save_video
|
| 40 |
-
from . import log
|
| 41 |
|
| 42 |
EXAMPLE_PROMPT = (
|
| 43 |
"The teal robot is cooking food in a kitchen. Steam rises from a simmering pot "
|
|
|
|
| 37 |
from transformers import T5EncoderModel, T5TokenizerFast
|
| 38 |
|
| 39 |
from cosmos1.models.diffusion.nemo.inference.inference_utils import process_prompt, save_video
|
| 40 |
+
from .log import log
|
| 41 |
|
| 42 |
EXAMPLE_PROMPT = (
|
| 43 |
"The teal robot is cooking food in a kitchen. Steam rises from a simmering pot "
|
cosmos1/models/diffusion/nemo/inference/inference_utils.py
CHANGED
|
@@ -30,7 +30,7 @@ from presets import (
|
|
| 30 |
run_text_guardrail,
|
| 31 |
run_video_guardrail,
|
| 32 |
)
|
| 33 |
-
from . import log
|
| 34 |
|
| 35 |
|
| 36 |
def get_upsampled_prompt(
|
|
|
|
| 30 |
run_text_guardrail,
|
| 31 |
run_video_guardrail,
|
| 32 |
)
|
| 33 |
+
from .log import log
|
| 34 |
|
| 35 |
|
| 36 |
def get_upsampled_prompt(
|
cosmos1/models/diffusion/nemo/post_training/general.py
CHANGED
|
@@ -57,7 +57,7 @@ def cosmos_diffusion_7b_text2world_finetune() -> run.Partial:
|
|
| 57 |
recipe.resume.resume_if_exists = False
|
| 58 |
|
| 59 |
# Directory to save checkpoints / logs
|
| 60 |
-
recipe.
|
| 61 |
|
| 62 |
return recipe
|
| 63 |
|
|
@@ -102,7 +102,7 @@ def cosmos_diffusion_14b_text2world_finetune() -> run.Partial:
|
|
| 102 |
recipe.resume.resume_if_exists = False
|
| 103 |
|
| 104 |
# Directory to save checkpoints / logs
|
| 105 |
-
recipe.
|
| 106 |
|
| 107 |
return recipe
|
| 108 |
|
|
|
|
| 57 |
recipe.resume.resume_if_exists = False
|
| 58 |
|
| 59 |
# Directory to save checkpoints / logs
|
| 60 |
+
recipe.log_log_dir = "nemo_experiments/cosmos_diffusion_7b_text2world_finetune"
|
| 61 |
|
| 62 |
return recipe
|
| 63 |
|
|
|
|
| 102 |
recipe.resume.resume_if_exists = False
|
| 103 |
|
| 104 |
# Directory to save checkpoints / logs
|
| 105 |
+
recipe.log_log_dir = "nemo_experiments/cosmos_diffusion_14b_text2world_finetune"
|
| 106 |
|
| 107 |
return recipe
|
| 108 |
|
cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py
CHANGED
|
@@ -27,7 +27,7 @@ from nemo.collections.diffusion.models.model import DiT7BConfig
|
|
| 27 |
from tqdm import tqdm
|
| 28 |
from transformers import T5EncoderModel, T5TokenizerFast
|
| 29 |
|
| 30 |
-
from . import log
|
| 31 |
|
| 32 |
|
| 33 |
def get_parser():
|
|
|
|
| 27 |
from tqdm import tqdm
|
| 28 |
from transformers import T5EncoderModel, T5TokenizerFast
|
| 29 |
|
| 30 |
+
from .log import log
|
| 31 |
|
| 32 |
|
| 33 |
def get_parser():
|
cosmos1/models/diffusion/networks/general_dit.py
CHANGED
|
@@ -34,7 +34,7 @@ from blocks import (
|
|
| 34 |
Timesteps,
|
| 35 |
)
|
| 36 |
from position_embedding import LearnablePosEmbAxis, VideoRopePosition3DEmb
|
| 37 |
-
from . import log
|
| 38 |
|
| 39 |
|
| 40 |
class GeneralDIT(nn.Module):
|
|
@@ -390,16 +390,16 @@ class GeneralDIT(nn.Module):
|
|
| 390 |
latent_condition_sigma=latent_condition_sigma,
|
| 391 |
)
|
| 392 |
# logging affline scale information
|
| 393 |
-
|
| 394 |
|
| 395 |
timesteps_B_D, adaln_lora_B_3D = self.t_embedder(timesteps.flatten())
|
| 396 |
affline_emb_B_D = timesteps_B_D
|
| 397 |
-
|
| 398 |
|
| 399 |
if scalar_feature is not None:
|
| 400 |
raise NotImplementedError("Scalar feature is not implemented yet.")
|
| 401 |
|
| 402 |
-
|
| 403 |
affline_emb_B_D = self.affline_norm(affline_emb_B_D)
|
| 404 |
|
| 405 |
if self.use_cross_attn_mask:
|
|
|
|
| 34 |
Timesteps,
|
| 35 |
)
|
| 36 |
from position_embedding import LearnablePosEmbAxis, VideoRopePosition3DEmb
|
| 37 |
+
from .log import log
|
| 38 |
|
| 39 |
|
| 40 |
class GeneralDIT(nn.Module):
|
|
|
|
| 390 |
latent_condition_sigma=latent_condition_sigma,
|
| 391 |
)
|
| 392 |
# logging affline scale information
|
| 393 |
+
affline_scale_log.info = {}
|
| 394 |
|
| 395 |
timesteps_B_D, adaln_lora_B_3D = self.t_embedder(timesteps.flatten())
|
| 396 |
affline_emb_B_D = timesteps_B_D
|
| 397 |
+
affline_scale_log.info["timesteps_B_D"] = timesteps_B_D.detach()
|
| 398 |
|
| 399 |
if scalar_feature is not None:
|
| 400 |
raise NotImplementedError("Scalar feature is not implemented yet.")
|
| 401 |
|
| 402 |
+
affline_scale_log.info["affline_emb_B_D"] = affline_emb_B_D.detach()
|
| 403 |
affline_emb_B_D = self.affline_norm(affline_emb_B_D)
|
| 404 |
|
| 405 |
if self.use_cross_attn_mask:
|
cosmos1/models/diffusion/networks/general_dit_video_conditioned.py
CHANGED
|
@@ -22,7 +22,7 @@ from torch import nn
|
|
| 22 |
from conditioner import DataType
|
| 23 |
from blocks import TimestepEmbedding, Timesteps
|
| 24 |
from cosmos1.models.diffusion.networks.general_dit import GeneralDIT
|
| 25 |
-
from . import log
|
| 26 |
|
| 27 |
|
| 28 |
class VideoExtendGeneralDIT(GeneralDIT):
|
|
@@ -155,11 +155,11 @@ class VideoExtendGeneralDIT(GeneralDIT):
|
|
| 155 |
latent_condition_sigma=latent_condition_sigma,
|
| 156 |
)
|
| 157 |
# logging affline scale information
|
| 158 |
-
|
| 159 |
|
| 160 |
timesteps_B_D, adaln_lora_B_3D = self.t_embedder(timesteps.flatten())
|
| 161 |
affline_emb_B_D = timesteps_B_D
|
| 162 |
-
|
| 163 |
|
| 164 |
if scalar_feature is not None:
|
| 165 |
raise NotImplementedError("Scalar feature is not implemented yet.")
|
|
@@ -173,7 +173,7 @@ class VideoExtendGeneralDIT(GeneralDIT):
|
|
| 173 |
|
| 174 |
affline_augment_sigma_emb_B_D, _ = self.augment_sigma_embedder(condition_video_augment_sigma.flatten())
|
| 175 |
affline_emb_B_D = affline_emb_B_D + affline_augment_sigma_emb_B_D
|
| 176 |
-
|
| 177 |
affline_emb_B_D = self.affline_norm(affline_emb_B_D)
|
| 178 |
|
| 179 |
if self.use_cross_attn_mask:
|
|
|
|
| 22 |
from conditioner import DataType
|
| 23 |
from blocks import TimestepEmbedding, Timesteps
|
| 24 |
from cosmos1.models.diffusion.networks.general_dit import GeneralDIT
|
| 25 |
+
from .log import log
|
| 26 |
|
| 27 |
|
| 28 |
class VideoExtendGeneralDIT(GeneralDIT):
|
|
|
|
| 155 |
latent_condition_sigma=latent_condition_sigma,
|
| 156 |
)
|
| 157 |
# logging affline scale information
|
| 158 |
+
affline_scale_log.info = {}
|
| 159 |
|
| 160 |
timesteps_B_D, adaln_lora_B_3D = self.t_embedder(timesteps.flatten())
|
| 161 |
affline_emb_B_D = timesteps_B_D
|
| 162 |
+
affline_scale_log.info["timesteps_B_D"] = timesteps_B_D.detach()
|
| 163 |
|
| 164 |
if scalar_feature is not None:
|
| 165 |
raise NotImplementedError("Scalar feature is not implemented yet.")
|
|
|
|
| 173 |
|
| 174 |
affline_augment_sigma_emb_B_D, _ = self.augment_sigma_embedder(condition_video_augment_sigma.flatten())
|
| 175 |
affline_emb_B_D = affline_emb_B_D + affline_augment_sigma_emb_B_D
|
| 176 |
+
affline_scale_log.info["affline_emb_B_D"] = affline_emb_B_D.detach()
|
| 177 |
affline_emb_B_D = self.affline_norm(affline_emb_B_D)
|
| 178 |
|
| 179 |
if self.use_cross_attn_mask:
|
distributed.py
CHANGED
|
@@ -27,7 +27,7 @@ import pynvml
|
|
| 27 |
import torch
|
| 28 |
import torch.distributed as dist
|
| 29 |
|
| 30 |
-
from . import log
|
| 31 |
from .device import Device
|
| 32 |
|
| 33 |
|
|
|
|
| 27 |
import torch
|
| 28 |
import torch.distributed as dist
|
| 29 |
|
| 30 |
+
from .log import log
|
| 31 |
from .device import Device
|
| 32 |
|
| 33 |
|
face_blur_filter.py
CHANGED
|
@@ -16,7 +16,7 @@
|
|
| 16 |
import argparse
|
| 17 |
import os
|
| 18 |
|
| 19 |
-
from . import log
|
| 20 |
import numpy as np
|
| 21 |
import torch
|
| 22 |
from pytorch_retinaface.data import cfg_re50
|
|
|
|
| 16 |
import argparse
|
| 17 |
import os
|
| 18 |
|
| 19 |
+
from .log import log
|
| 20 |
import numpy as np
|
| 21 |
import torch
|
| 22 |
from pytorch_retinaface.data import cfg_re50
|
guardrail_blocklist_utils.py
CHANGED
|
@@ -16,7 +16,7 @@
|
|
| 16 |
import os
|
| 17 |
import re
|
| 18 |
|
| 19 |
-
from . import log
|
| 20 |
|
| 21 |
|
| 22 |
def read_keyword_list_from_dir(folder_path: str) -> list[str]:
|
|
|
|
| 16 |
import os
|
| 17 |
import re
|
| 18 |
|
| 19 |
+
from .log import log
|
| 20 |
|
| 21 |
|
| 22 |
def read_keyword_list_from_dir(folder_path: str) -> list[str]:
|
guardrail_core.py
CHANGED
|
@@ -17,7 +17,7 @@ from typing import Any, Tuple
|
|
| 17 |
|
| 18 |
import numpy as np
|
| 19 |
|
| 20 |
-
from . import log
|
| 21 |
|
| 22 |
|
| 23 |
class ContentSafetyGuardrail:
|
|
|
|
| 17 |
|
| 18 |
import numpy as np
|
| 19 |
|
| 20 |
+
from .log import log
|
| 21 |
|
| 22 |
|
| 23 |
class ContentSafetyGuardrail:
|
guardrail_io_utils.py
CHANGED
|
@@ -19,7 +19,7 @@ from dataclasses import dataclass
|
|
| 19 |
import imageio
|
| 20 |
import numpy as np
|
| 21 |
|
| 22 |
-
from . import log
|
| 23 |
|
| 24 |
|
| 25 |
@dataclass
|
|
|
|
| 19 |
import imageio
|
| 20 |
import numpy as np
|
| 21 |
|
| 22 |
+
from .log import log
|
| 23 |
|
| 24 |
|
| 25 |
@dataclass
|
inference_utils.py
CHANGED
|
@@ -24,7 +24,7 @@ import numpy as np
|
|
| 24 |
import torch
|
| 25 |
import torchvision.transforms.functional as transforms_F
|
| 26 |
|
| 27 |
-
from.
|
| 28 |
from .model_v2w import DiffusionV2WModel
|
| 29 |
from .config_helper import get_config_module, override
|
| 30 |
from .utils_io import load_from_fileobj
|
|
|
|
| 24 |
import torch
|
| 25 |
import torchvision.transforms.functional as transforms_F
|
| 26 |
|
| 27 |
+
from .model_t2w import DiffusionT2WModel
|
| 28 |
from .model_v2w import DiffusionV2WModel
|
| 29 |
from .config_helper import get_config_module, override
|
| 30 |
from .utils_io import load_from_fileobj
|
log.py
CHANGED
|
@@ -90,36 +90,39 @@ def _rank0_only_filter(record: Any) -> bool:
|
|
| 90 |
return not is_rank0
|
| 91 |
|
| 92 |
|
| 93 |
-
|
| 94 |
-
logger.opt(depth=1).bind(rank0_only=rank0_only).trace(message)
|
| 95 |
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
|
|
|
| 99 |
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
|
|
|
| 103 |
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
|
|
|
| 107 |
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
def error(message: str, rank0_only: bool = True) -> None:
|
| 114 |
-
logger.opt(depth=1).bind(rank0_only=rank0_only).error(message)
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
def critical(message: str, rank0_only: bool = True) -> None:
|
| 118 |
-
logger.opt(depth=1).bind(rank0_only=rank0_only).critical(message)
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
def exception(message: str, rank0_only: bool = True) -> None:
|
| 122 |
-
logger.opt(depth=1).bind(rank0_only=rank0_only).exception(message)
|
| 123 |
|
| 124 |
|
| 125 |
def _get_rank(group: Optional[dist.ProcessGroup] = None) -> int:
|
|
|
|
| 90 |
return not is_rank0
|
| 91 |
|
| 92 |
|
| 93 |
+
class log():
|
|
|
|
| 94 |
|
| 95 |
+
@staticmethod
|
| 96 |
+
def trace(message: str, rank0_only: bool = True) -> None:
|
| 97 |
+
logger.opt(depth=1).bind(rank0_only=rank0_only).trace(message)
|
| 98 |
|
| 99 |
+
@staticmethod
|
| 100 |
+
def debug(message: str, rank0_only: bool = True) -> None:
|
| 101 |
+
logger.opt(depth=1).bind(rank0_only=rank0_only).debug(message)
|
| 102 |
|
| 103 |
+
@staticmethod
|
| 104 |
+
def info(message: str, rank0_only: bool = True) -> None:
|
| 105 |
+
logger.opt(depth=1).bind(rank0_only=rank0_only).info(message)
|
| 106 |
|
| 107 |
+
@staticmethod
|
| 108 |
+
def success(message: str, rank0_only: bool = True) -> None:
|
| 109 |
+
logger.opt(depth=1).bind(rank0_only=rank0_only).success(message)
|
| 110 |
|
| 111 |
+
@staticmethod
|
| 112 |
+
def warning(message: str, rank0_only: bool = True) -> None:
|
| 113 |
+
logger.opt(depth=1).bind(rank0_only=rank0_only).warning(message)
|
| 114 |
|
| 115 |
+
@staticmethod
|
| 116 |
+
def error(message: str, rank0_only: bool = True) -> None:
|
| 117 |
+
logger.opt(depth=1).bind(rank0_only=rank0_only).error(message)
|
| 118 |
|
| 119 |
+
@staticmethod
|
| 120 |
+
def critical(message: str, rank0_only: bool = True) -> None:
|
| 121 |
+
logger.opt(depth=1).bind(rank0_only=rank0_only).critical(message)
|
| 122 |
|
| 123 |
+
@staticmethod
|
| 124 |
+
def exception(message: str, rank0_only: bool = True) -> None:
|
| 125 |
+
logger.opt(depth=1).bind(rank0_only=rank0_only).exception(message)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
|
| 128 |
def _get_rank(group: Optional[dist.ProcessGroup] = None) -> int:
|
misc.py
CHANGED
|
@@ -24,7 +24,7 @@ import time
|
|
| 24 |
from contextlib import ContextDecorator
|
| 25 |
from typing import Any, Callable, TypeVar
|
| 26 |
|
| 27 |
-
from . import log
|
| 28 |
import numpy as np
|
| 29 |
import termcolor
|
| 30 |
import torch
|
|
|
|
| 24 |
from contextlib import ContextDecorator
|
| 25 |
from typing import Any, Callable, TypeVar
|
| 26 |
|
| 27 |
+
from .log import log
|
| 28 |
import numpy as np
|
| 29 |
import termcolor
|
| 30 |
import torch
|
model_config.py
CHANGED
|
@@ -25,7 +25,7 @@ from .ar_config_tokenizer import (
|
|
| 25 |
)
|
| 26 |
from .ar_tokenizer_image_text_tokenizer import ImageTextTokenizer
|
| 27 |
from .ar_tokenizer_text_tokenizer import TextTokenizer
|
| 28 |
-
from . import log
|
| 29 |
from .lazy_config_init import LazyCall as L
|
| 30 |
|
| 31 |
# Common architecture specifications
|
|
|
|
| 25 |
)
|
| 26 |
from .ar_tokenizer_image_text_tokenizer import ImageTextTokenizer
|
| 27 |
from .ar_tokenizer_text_tokenizer import TextTokenizer
|
| 28 |
+
from .log import log
|
| 29 |
from .lazy_config_init import LazyCall as L
|
| 30 |
|
| 31 |
# Common architecture specifications
|
model_t2w.py
CHANGED
|
@@ -27,7 +27,7 @@ from .blocks import FourierFeatures
|
|
| 27 |
from .pretrained_vae import BaseVAE
|
| 28 |
from . import misc
|
| 29 |
from . import instantiate as lazy_instantiate
|
| 30 |
-
from . import log
|
| 31 |
|
| 32 |
|
| 33 |
class EDMSDE:
|
|
|
|
| 27 |
from .pretrained_vae import BaseVAE
|
| 28 |
from . import misc
|
| 29 |
from . import instantiate as lazy_instantiate
|
| 30 |
+
from .log import log
|
| 31 |
|
| 32 |
|
| 33 |
class EDMSDE:
|
model_v2w.py
CHANGED
|
@@ -16,7 +16,7 @@
|
|
| 16 |
from dataclasses import dataclass
|
| 17 |
from typing import Callable, Dict, Optional, Tuple, Union
|
| 18 |
|
| 19 |
-
from . import log
|
| 20 |
import torch
|
| 21 |
from torch import Tensor
|
| 22 |
|
|
|
|
| 16 |
from dataclasses import dataclass
|
| 17 |
from typing import Callable, Dict, Optional, Tuple, Union
|
| 18 |
|
| 19 |
+
from .log import log
|
| 20 |
import torch
|
| 21 |
from torch import Tensor
|
| 22 |
|
presets.py
CHANGED
|
@@ -22,7 +22,7 @@ from .blocklist import Blocklist
|
|
| 22 |
from .guardrail_core import GuardrailRunner
|
| 23 |
from .face_blur_filter import RetinaFaceFilter
|
| 24 |
from .video_content_safety_filter import VideoContentSafetyFilter
|
| 25 |
-
from . import log
|
| 26 |
|
| 27 |
|
| 28 |
def create_text_guardrail_runner(checkpoint_dir: str) -> GuardrailRunner:
|
|
|
|
| 22 |
from .guardrail_core import GuardrailRunner
|
| 23 |
from .face_blur_filter import RetinaFaceFilter
|
| 24 |
from .video_content_safety_filter import VideoContentSafetyFilter
|
| 25 |
+
from .log import log
|
| 26 |
|
| 27 |
|
| 28 |
def create_text_guardrail_runner(checkpoint_dir: str) -> GuardrailRunner:
|
retinaface_utils.py
CHANGED
|
@@ -17,7 +17,7 @@ import numpy as np
|
|
| 17 |
import torch
|
| 18 |
from pytorch_retinaface.utils.nms.py_cpu_nms import py_cpu_nms
|
| 19 |
|
| 20 |
-
from . import log
|
| 21 |
|
| 22 |
|
| 23 |
# Adapted from https://github.com/biubug6/Pytorch_Retinaface/blob/master/detect.py
|
|
|
|
| 17 |
import torch
|
| 18 |
from pytorch_retinaface.utils.nms.py_cpu_nms import py_cpu_nms
|
| 19 |
|
| 20 |
+
from .log import log
|
| 21 |
|
| 22 |
|
| 23 |
# Adapted from https://github.com/biubug6/Pytorch_Retinaface/blob/master/detect.py
|
t5_text_encoder.py
CHANGED
|
@@ -19,7 +19,7 @@ import torch
|
|
| 19 |
import transformers
|
| 20 |
from transformers import T5EncoderModel, T5TokenizerFast
|
| 21 |
|
| 22 |
-
from . import log
|
| 23 |
|
| 24 |
transformers.logging.set_verbosity_error()
|
| 25 |
|
|
|
|
| 19 |
import transformers
|
| 20 |
from transformers import T5EncoderModel, T5TokenizerFast
|
| 21 |
|
| 22 |
+
from .log import log
|
| 23 |
|
| 24 |
transformers.logging.set_verbosity_error()
|
| 25 |
|
text2world.py
CHANGED
|
@@ -16,7 +16,7 @@
|
|
| 16 |
import argparse
|
| 17 |
import os
|
| 18 |
|
| 19 |
-
from . import log
|
| 20 |
import torch
|
| 21 |
|
| 22 |
from .inference_utils import add_common_arguments, validate_args
|
|
|
|
| 16 |
import argparse
|
| 17 |
import os
|
| 18 |
|
| 19 |
+
from .log import log
|
| 20 |
import torch
|
| 21 |
|
| 22 |
from .inference_utils import add_common_arguments, validate_args
|
text2world_hf.py
CHANGED
|
@@ -5,7 +5,7 @@ from transformers import PreTrainedModel, PretrainedConfig
|
|
| 5 |
|
| 6 |
from .inference_utils import add_common_arguments, validate_args
|
| 7 |
from .world_generation_pipeline import DiffusionText2WorldGenerationPipeline
|
| 8 |
-
from . import log
|
| 9 |
from . import misc
|
| 10 |
from .utils_io import read_prompts_from_file, save_video
|
| 11 |
|
|
|
|
| 5 |
|
| 6 |
from .inference_utils import add_common_arguments, validate_args
|
| 7 |
from .world_generation_pipeline import DiffusionText2WorldGenerationPipeline
|
| 8 |
+
from .log import log
|
| 9 |
from . import misc
|
| 10 |
from .utils_io import read_prompts_from_file, save_video
|
| 11 |
|
text2world_prompt_upsampler_inference.py
CHANGED
|
@@ -27,7 +27,7 @@ from .model_config import create_text_model_config
|
|
| 27 |
from .ar_model import AutoRegressiveModel
|
| 28 |
from .inference import chat_completion
|
| 29 |
from . import presets as guardrail_presets
|
| 30 |
-
from . import log
|
| 31 |
|
| 32 |
|
| 33 |
def create_prompt_upsampler(checkpoint_dir: str) -> AutoRegressiveModel:
|
|
|
|
| 27 |
from .ar_model import AutoRegressiveModel
|
| 28 |
from .inference import chat_completion
|
| 29 |
from . import presets as guardrail_presets
|
| 30 |
+
from .log import log
|
| 31 |
|
| 32 |
|
| 33 |
def create_prompt_upsampler(checkpoint_dir: str) -> AutoRegressiveModel:
|
video2world.py
CHANGED
|
@@ -16,7 +16,7 @@
|
|
| 16 |
import argparse
|
| 17 |
import os
|
| 18 |
|
| 19 |
-
from . import log
|
| 20 |
import torch
|
| 21 |
|
| 22 |
from .inference_utils import add_common_arguments, check_input_frames, validate_args
|
|
|
|
| 16 |
import argparse
|
| 17 |
import os
|
| 18 |
|
| 19 |
+
from .log import log
|
| 20 |
import torch
|
| 21 |
|
| 22 |
from .inference_utils import add_common_arguments, check_input_frames, validate_args
|
video2world_prompt_upsampler_inference.py
CHANGED
|
@@ -30,7 +30,7 @@ from .model_config import create_vision_language_model_config
|
|
| 30 |
from .ar_model import AutoRegressiveModel
|
| 31 |
from .inference import chat_completion
|
| 32 |
from . import presets as guardrail_presets
|
| 33 |
-
from . import log
|
| 34 |
from .utils_io import load_from_fileobj
|
| 35 |
|
| 36 |
|
|
|
|
| 30 |
from .ar_model import AutoRegressiveModel
|
| 31 |
from .inference import chat_completion
|
| 32 |
from . import presets as guardrail_presets
|
| 33 |
+
from .log import log
|
| 34 |
from .utils_io import load_from_fileobj
|
| 35 |
|
| 36 |
|
video_content_safety_filter.py
CHANGED
|
@@ -18,7 +18,7 @@ import json
|
|
| 18 |
import os
|
| 19 |
from typing import Iterable, Tuple, Union
|
| 20 |
|
| 21 |
-
from . import log
|
| 22 |
import torch
|
| 23 |
from PIL import Image
|
| 24 |
|
|
|
|
| 18 |
import os
|
| 19 |
from typing import Iterable, Tuple, Union
|
| 20 |
|
| 21 |
+
from .log import log
|
| 22 |
import torch
|
| 23 |
from PIL import Image
|
| 24 |
|
vit.py
CHANGED
|
@@ -28,7 +28,7 @@ import torch.nn as nn
|
|
| 28 |
|
| 29 |
from .ar_modules_normalization import create_norm
|
| 30 |
from .ar_transformer import TransformerBlock
|
| 31 |
-
from . import log
|
| 32 |
|
| 33 |
|
| 34 |
def get_vit_config(model_name: str) -> Mapping[str, Any]:
|
|
|
|
| 28 |
|
| 29 |
from .ar_modules_normalization import create_norm
|
| 30 |
from .ar_transformer import TransformerBlock
|
| 31 |
+
from .log import log
|
| 32 |
|
| 33 |
|
| 34 |
def get_vit_config(model_name: str) -> Mapping[str, Any]:
|
world_generation_pipeline.py
CHANGED
|
@@ -43,7 +43,7 @@ from .video2world_prompt_upsampler_inference import (
|
|
| 43 |
from .video2world_prompt_upsampler_inference import (
|
| 44 |
run_chat_completion as run_chat_completion_vlm,
|
| 45 |
)
|
| 46 |
-
from . import log
|
| 47 |
|
| 48 |
MODEL_NAME_DICT = {
|
| 49 |
"Cosmos-1.0-Diffusion-7B-Text2World": "Cosmos_1_0_Diffusion_Text2World_7B",
|
|
|
|
| 43 |
from .video2world_prompt_upsampler_inference import (
|
| 44 |
run_chat_completion as run_chat_completion_vlm,
|
| 45 |
)
|
| 46 |
+
from .log import log
|
| 47 |
|
| 48 |
MODEL_NAME_DICT = {
|
| 49 |
"Cosmos-1.0-Diffusion-7B-Text2World": "Cosmos_1_0_Diffusion_Text2World_7B",
|