stable-diffusion-webui/modules/sd_models_xl.py

from __future__ import annotations

import torch

import sgm.models.diffusion
import sgm.modules.diffusionmodules.denoiser_scaling
import sgm.modules.diffusionmodules.discretizer
from modules import devices, shared, prompt_parser
from modules import torch_utils


def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch: prompt_parser.SdConditioning | list[str]):
    for embedder in self.conditioner.embedders:
        embedder.ucg_rate = 0.0

    width = getattr(batch, 'width', 1024)
    height = getattr(batch, 'height', 1024)
    is_negative_prompt = getattr(batch, 'is_negative_prompt', False)
    aesthetic_score = shared.opts.sdxl_refiner_low_aesthetic_score if is_negative_prompt else shared.opts.sdxl_refiner_high_aesthetic_score

    devices_args = dict(device=devices.device, dtype=devices.dtype)

    sdxl_conds = {
        "txt": batch,
        "original_size_as_tuple": torch.tensor([height, width], **devices_args).repeat(len(batch), 1),
        "crop_coords_top_left": torch.tensor([shared.opts.sdxl_crop_top, shared.opts.sdxl_crop_left], **devices_args).repeat(len(batch), 1),
        "target_size_as_tuple": torch.tensor([height, width], **devices_args).repeat(len(batch), 1),
        "aesthetic_score": torch.tensor([aesthetic_score], **devices_args).repeat(len(batch), 1),
    }

    force_zero_negative_prompt = is_negative_prompt and all(x == '' for x in batch)
    c = self.conditioner(sdxl_conds, force_zero_embeddings=['txt'] if force_zero_negative_prompt else [])

    return c


def apply_model(self: sgm.models.diffusion.DiffusionEngine, x, t, cond):
    sd = self.model.state_dict()
    diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)
    if diffusion_model_input is not None:
        if diffusion_model_input.shape[1] == 9:
            x = torch.cat([x] + cond['c_concat'], dim=1)

    return self.model(x, t, cond)


def get_first_stage_encoding(self, x):  # SDXL's encode_first_stage does everything so get_first_stage_encoding is just there for compatibility
    return x


sgm.models.diffusion.DiffusionEngine.get_learned_conditioning = get_learned_conditioning
sgm.models.diffusion.DiffusionEngine.apply_model = apply_model
sgm.models.diffusion.DiffusionEngine.get_first_stage_encoding = get_first_stage_encoding


def encode_embedding_init_text(self: sgm.modules.GeneralConditioner, init_text, nvpt):
    res = []

    for embedder in [embedder for embedder in self.embedders if hasattr(embedder, 'encode_embedding_init_text')]:
        encoded = embedder.encode_embedding_init_text(init_text, nvpt)
        res.append(encoded)

    return torch.cat(res, dim=1)


def tokenize(self: sgm.modules.GeneralConditioner, texts):
    for embedder in [embedder for embedder in self.embedders if hasattr(embedder, 'tokenize')]:
        return embedder.tokenize(texts)

    raise AssertionError('no tokenizer available')


def process_texts(self, texts):
    for embedder in [embedder for embedder in self.embedders if hasattr(embedder, 'process_texts')]:
        return embedder.process_texts(texts)


def get_target_prompt_token_count(self, token_count):
    for embedder in [embedder for embedder in self.embedders if hasattr(embedder, 'get_target_prompt_token_count')]:
        return embedder.get_target_prompt_token_count(token_count)


# those additions to GeneralConditioner make it possible to use it as model.cond_stage_model from SD1.5 in exist
sgm.modules.GeneralConditioner.encode_embedding_init_text = encode_embedding_init_text
sgm.modules.GeneralConditioner.tokenize = tokenize
sgm.modules.GeneralConditioner.process_texts = process_texts
sgm.modules.GeneralConditioner.get_target_prompt_token_count = get_target_prompt_token_count


def extend_sdxl(model):
    """this adds a bunch of parameters to make SDXL model look a bit more like SD1.5 to the rest of the codebase."""

    dtype = torch_utils.get_param(model.model.diffusion_model).dtype
    model.model.diffusion_model.dtype = dtype
    model.model.conditioning_key = 'crossattn'
    model.cond_stage_key = 'txt'
    # model.cond_stage_model will be set in sd_hijack

    model.parameterization = "v" if isinstance(model.denoiser.scaling, sgm.modules.diffusionmodules.denoiser_scaling.VScaling) else "eps"

    discretization = sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization()
    model.alphas_cumprod = torch.asarray(discretization.alphas_cumprod, device=devices.device, dtype=torch.float32)

    model.conditioner.wrapped = torch.nn.Module()


sgm.modules.attention.print = shared.ldm_print
sgm.modules.diffusionmodules.model.print = shared.ldm_print
sgm.modules.diffusionmodules.openaimodel.print = shared.ldm_print
sgm.modules.encoders.modules.print = shared.ldm_print

# this gets the code to load the vanilla attention that we override
sgm.modules.attention.SDP_IS_AVAILABLE = True
sgm.modules.attention.XFORMERS_IS_AVAILABLE = False
getting SD2.1 to run on SDXL repo 2023-07-12 02:16:43 +08:00			`from __future__ import annotations`

			`import torch`

			`import sgm.models.diffusion`
			`import sgm.modules.diffusionmodules.denoiser_scaling`
			`import sgm.modules.diffusionmodules.discretizer`
SDXL support 2023-07-13 04:52:43 +08:00			`from modules import devices, shared, prompt_parser`
change import statements for #14478 2024-01-01 03:38:30 +08:00			`from modules import torch_utils`
getting SD2.1 to run on SDXL repo 2023-07-12 02:16:43 +08:00

SDXL support 2023-07-13 04:52:43 +08:00			`def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch: prompt_parser.SdConditioning \| list[str]):`
getting SD2.1 to run on SDXL repo 2023-07-12 02:16:43 +08:00			`for embedder in self.conditioner.embedders:`
			`embedder.ucg_rate = 0.0`

Update sd_models_xl.py Fix width/height not getting fed into the conditioning 2023-07-20 23:22:52 +08:00			`width = getattr(batch, 'width', 1024)`
			`height = getattr(batch, 'height', 1024)`
initial SDXL refiner support 2023-07-14 14:16:01 +08:00			`is_negative_prompt = getattr(batch, 'is_negative_prompt', False)`
			`aesthetic_score = shared.opts.sdxl_refiner_low_aesthetic_score if is_negative_prompt else shared.opts.sdxl_refiner_high_aesthetic_score`

			`devices_args = dict(device=devices.device, dtype=devices.dtype)`
SDXL support 2023-07-13 04:52:43 +08:00
			`sdxl_conds = {`
			`"txt": batch,`
initial SDXL refiner support 2023-07-14 14:16:01 +08:00			`"original_size_as_tuple": torch.tensor([height, width], **devices_args).repeat(len(batch), 1),`
			`"crop_coords_top_left": torch.tensor([shared.opts.sdxl_crop_top, shared.opts.sdxl_crop_left], **devices_args).repeat(len(batch), 1),`
			`"target_size_as_tuple": torch.tensor([height, width], **devices_args).repeat(len(batch), 1),`
			`"aesthetic_score": torch.tensor([aesthetic_score], **devices_args).repeat(len(batch), 1),`
SDXL support 2023-07-13 04:52:43 +08:00			`}`

initial SDXL refiner support 2023-07-14 14:16:01 +08:00			`force_zero_negative_prompt = is_negative_prompt and all(x == '' for x in batch)`
fix CLIP doing the unneeded normalization revert SD2.1 back to use the original repo add SDXL's force_zero_embeddings to negative prompt 2023-07-13 16:35:52 +08:00			`c = self.conditioner(sdxl_conds, force_zero_embeddings=['txt'] if force_zero_negative_prompt else [])`
getting SD2.1 to run on SDXL repo 2023-07-12 02:16:43 +08:00
			`return c`


			`def apply_model(self: sgm.models.diffusion.DiffusionEngine, x, t, cond):`
support for sdxl-inpaint model 2023-12-21 20:15:51 +08:00			`sd = self.model.state_dict()`
			`diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)`
add some codes for robust 2023-12-27 10:20:56 +08:00			`if diffusion_model_input is not None:`
			`if diffusion_model_input.shape[1] == 9:`
			`x = torch.cat([x] + cond['c_concat'], dim=1)`
support for sdxl-inpaint model 2023-12-21 20:15:51 +08:00
getting SD2.1 to run on SDXL repo 2023-07-12 02:16:43 +08:00			`return self.model(x, t, cond)`


fix broken img2img 2023-07-13 21:18:39 +08:00			`def get_first_stage_encoding(self, x): # SDXL's encode_first_stage does everything so get_first_stage_encoding is just there for compatibility`
			`return x`

initial SDXL refiner support 2023-07-14 14:16:01 +08:00
			`sgm.models.diffusion.DiffusionEngine.get_learned_conditioning = get_learned_conditioning`
			`sgm.models.diffusion.DiffusionEngine.apply_model = apply_model`
			`sgm.models.diffusion.DiffusionEngine.get_first_stage_encoding = get_first_stage_encoding`


			`def encode_embedding_init_text(self: sgm.modules.GeneralConditioner, init_text, nvpt):`
			`res = []`

			`for embedder in [embedder for embedder in self.embedders if hasattr(embedder, 'encode_embedding_init_text')]:`
			`encoded = embedder.encode_embedding_init_text(init_text, nvpt)`
			`res.append(encoded)`

			`return torch.cat(res, dim=1)`


textual inversion support for SDXL 2023-07-29 20:15:06 +08:00			`def tokenize(self: sgm.modules.GeneralConditioner, texts):`
			`for embedder in [embedder for embedder in self.embedders if hasattr(embedder, 'tokenize')]:`
			`return embedder.tokenize(texts)`

			`raise AssertionError('no tokenizer available')`



initial SDXL refiner support 2023-07-14 14:16:01 +08:00			`def process_texts(self, texts):`
			`for embedder in [embedder for embedder in self.embedders if hasattr(embedder, 'process_texts')]:`
			`return embedder.process_texts(texts)`


			`def get_target_prompt_token_count(self, token_count):`
			`for embedder in [embedder for embedder in self.embedders if hasattr(embedder, 'get_target_prompt_token_count')]:`
			`return embedder.get_target_prompt_token_count(token_count)`


			`# those additions to GeneralConditioner make it possible to use it as model.cond_stage_model from SD1.5 in exist`
			`sgm.modules.GeneralConditioner.encode_embedding_init_text = encode_embedding_init_text`
textual inversion support for SDXL 2023-07-29 20:15:06 +08:00			`sgm.modules.GeneralConditioner.tokenize = tokenize`
initial SDXL refiner support 2023-07-14 14:16:01 +08:00			`sgm.modules.GeneralConditioner.process_texts = process_texts`
			`sgm.modules.GeneralConditioner.get_target_prompt_token_count = get_target_prompt_token_count`


getting SD2.1 to run on SDXL repo 2023-07-12 02:16:43 +08:00			`def extend_sdxl(model):`
initial SDXL refiner support 2023-07-14 14:16:01 +08:00			`"""this adds a bunch of parameters to make SDXL model look a bit more like SD1.5 to the rest of the codebase."""`

change import statements for #14478 2024-01-01 03:38:30 +08:00			`dtype = torch_utils.get_param(model.model.diffusion_model).dtype`
getting SD2.1 to run on SDXL repo 2023-07-12 02:16:43 +08:00			`model.model.diffusion_model.dtype = dtype`
			`model.model.conditioning_key = 'crossattn'`
initial SDXL refiner support 2023-07-14 14:16:01 +08:00			`model.cond_stage_key = 'txt'`
			`# model.cond_stage_model will be set in sd_hijack`
getting SD2.1 to run on SDXL repo 2023-07-12 02:16:43 +08:00
			`model.parameterization = "v" if isinstance(model.denoiser.scaling, sgm.modules.diffusionmodules.denoiser_scaling.VScaling) else "eps"`

			`discretization = sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization()`
Fix alphas cumprod 2023-10-25 12:54:28 +08:00			`model.alphas_cumprod = torch.asarray(discretization.alphas_cumprod, device=devices.device, dtype=torch.float32)`
getting SD2.1 to run on SDXL repo 2023-07-12 02:16:43 +08:00
initial SDXL refiner support 2023-07-14 14:16:01 +08:00			`model.conditioner.wrapped = torch.nn.Module()`
SDXL support 2023-07-13 04:52:43 +08:00
getting SD2.1 to run on SDXL repo 2023-07-12 02:16:43 +08:00
option to keep multiple models in memory 2023-08-01 05:24:48 +08:00			`sgm.modules.attention.print = shared.ldm_print`
			`sgm.modules.diffusionmodules.model.print = shared.ldm_print`
			`sgm.modules.diffusionmodules.openaimodel.print = shared.ldm_print`
			`sgm.modules.encoders.modules.print = shared.ldm_print`
SDXL support 2023-07-13 04:52:43 +08:00
get attention optimizations to work 2023-07-13 14:30:33 +08:00			`# this gets the code to load the vanilla attention that we override`
			`sgm.modules.attention.SDP_IS_AVAILABLE = True`
lint 2023-07-13 14:38:54 +08:00			`sgm.modules.attention.XFORMERS_IS_AVAILABLE = False`