Source code for neural_de.transformations.diffusion.diffpure_config

import os
from dataclasses import dataclass, field
from pathlib import Path

# where the weights will be download and store
ENHANCER = "diffpure"
MODEL_FILENAME = "256x256_diffusion_uncond.pt"
NEURALDE_MODEL_DOWNLOAD_PATH = Path(os.path.expanduser("~")) / ".neuralde" / ENHANCER / MODEL_FILENAME

# automatic channel_mult values depending on input image size
CHANNEL_MULT = dict([
    (512, (0.5, 1, 1, 2, 2, 4, 4)),
    (256, (1, 1, 2, 2, 4, 4)),
    (128, (1, 1, 2, 3, 4)),
    (64, (1, 2, 3, 4))])


[docs] @dataclass class DiffPureConfig: """ A dataclass to configure and provide parameters for the internal diffusion model of diffusion_enhancer. Most of the parameters are available to allow a custom usage of a different pre-trained diffusion models, based on the U-net architecture and code. The one which can be modified with the provided model are t, t_delta and sample_steps. Attributes: weights_path: Path of the pre-trained weights, to provide custom weights files. img_shape: the shape of each input image of the diffusion model (by default (3, 256, 256)). Dimension are hannel-first. attention_resolutions: resolution, in pixels, of the attention-layers of the model num_classes: int. (by default None). Number of classes the diffusion model is trained of. dims: int. images 1D, 2D or 3D (by default = 2) learn_sigma: bool (by default = True). If true, the output channel number will be 6 instead of 3. num_channels: int (by default 256). Base channel number for the layers of the diffusion model architecture. num_head_channels: int (by default 64). Number of channel per head of the attention blocks. num_res_blocks: int (by default 2). Number of residual block of the architecture. resblock_updown: bool (by default True). Whether to apply a downsampling after each residual block of the underlying Unet architecture. use_fp16: bool (by default True). Use 16bit floating -point precision. If cuda is not available, will be set as false (fp32). use_scale_shift_norm: bool (by default True). Normalisation of the output of each block of layers in the Unet architecture. num_heads: int (by default 4). Number of attention heads. num_heads_upsample: int (by default -1). Num head for upsampling attention layers. channel_mult: tuple (by default None). Will be computed if not provided. Depending on the resolution, multiply the base channel number to get the final one for each residual layer of the Unet model. dropout: float (by default 0.0). Dropout rate. use_new_attention_order: bool (by default False). If true, the unet will use QKVAttention layers, if False, will use QKVAttentionLegacy. t: int (by default 150). Number of diffusion steps applied for each image. t_delta: int (by default 15). Strength of the noise added before the diffusion process. use_bm: float (by default False) #Erreur sur la valeur? use_checkpoint: bool (by default False). gradient checkpointing for training conv_resample: bool (by default True). Use learned convolutions for upsampling and downsampling. If false, interpolation (nearest) will be used. sample_step: int (by default 1). Number of time the diffusion process (noise addition + denoising) is repeated for each image. rand_t: bool (by default False). If true, add random noise before denoising. The noise is sampled uniformly between -t_delta and +t_delta. """ weights_path: Path = NEURALDE_MODEL_DOWNLOAD_PATH img_shape: tuple = (3, 256, 256) attention_resolutions: list[int] = field(default_factory=lambda: [32, 16, 8]) num_classes: int = None dims: int = 2 # 1D, 2D or 3D images learn_sigma: bool = True num_channels: int = 256 num_head_channels: int = 64 num_res_blocks: int = 2 resblock_updown: bool = True use_fp16: bool = True use_scale_shift_norm: bool = True num_heads: int = 4 num_heads_upsample: int = -1 channel_mult: tuple = None dropout: float = 0.0 use_new_attention_order: bool = False t: int = 150 t_delta: int = 15 use_bm: float = False use_checkpoint: bool = False conv_resample: bool = True sample_step: int = 1 rand_t: bool = False def __post_init__(self): """ Post-init parameters inference to avoid redundant parameters """ self.image_size: int = self.img_shape[-1]