stable-diffusion-webui/modules/sd_hijack.py

import math
import os
import sys
import traceback
import torch
import numpy as np
from torch import einsum

from modules.shared import opts, device, cmd_opts

from ldm.util import default
from einops import rearrange
import ldm.modules.attention
import ldm.modules.diffusionmodules.model


# see https://github.com/basujindal/stable-diffusion/pull/117 for discussion
def split_cross_attention_forward_v1(self, x, context=None, mask=None):
    h = self.heads

    q = self.to_q(x)
    context = default(context, x)
    k = self.to_k(context) * self.scale
    v = self.to_v(context)
    del context, x

    q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))

    r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device)
    for i in range(0, q.shape[0], 2):
        end = i + 2
        s1 = einsum('b i d, b j d -> b i j', q[i:end], k[i:end])
        s1 *= self.scale

        s2 = s1.softmax(dim=-1)
        del s1

        r1[i:end] = einsum('b i j, b j d -> b i d', s2, v[i:end])
        del s2

    r2 = rearrange(r1, '(b h) n d -> b n (h d)', h=h)
    del r1

    return self.to_out(r2)


# taken from https://github.com/Doggettx/stable-diffusion
def split_cross_attention_forward(self, x, context=None, mask=None):
    h = self.heads

    q_in = self.to_q(x)
    context = default(context, x)
    k_in = self.to_k(context)
    v_in = self.to_v(context)
    del context, x

    q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in))
    del q_in, k_in, v_in

    r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype)

    stats = torch.cuda.memory_stats(q.device)
    mem_active = stats['active_bytes.all.current']
    mem_reserved = stats['reserved_bytes.all.current']
    mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device())
    mem_free_torch = mem_reserved - mem_active
    mem_free_total = mem_free_cuda + mem_free_torch

    gb = 1024 ** 3
    tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size()
    modifier = 3 if q.element_size() == 2 else 2.5
    mem_required = tensor_size * modifier
    steps = 1

    if mem_required > mem_free_total:
        steps = 2 ** (math.ceil(math.log(mem_required / mem_free_total, 2)))
        # print(f"Expected tensor size:{tensor_size/gb:0.1f}GB, cuda free:{mem_free_cuda/gb:0.1f}GB "
        #       f"torch free:{mem_free_torch/gb:0.1f} total:{mem_free_total/gb:0.1f} steps:{steps}")

    if steps > 64:
        max_res = math.floor(math.sqrt(math.sqrt(mem_free_total / 2.5)) / 8) * 64
        raise RuntimeError(f'Not enough memory, use lower resolution (max approx. {max_res}x{max_res}). '
                           f'Need: {mem_required / 64 / gb:0.1f}GB free, Have:{mem_free_total / gb:0.1f}GB free')

    slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]
    for i in range(0, q.shape[1], slice_size):
        end = i + slice_size
        s1 = einsum('b i d, b j d -> b i j', q[:, i:end], k)

        s2 = s1.softmax(dim=-1, dtype=q.dtype)
        del s1

        r1[:, i:end] = einsum('b i j, b j d -> b i d', s2, v)
        del s2

    del q, k, v

    r2 = rearrange(r1, '(b h) n d -> b n (h d)', h=h)
    del r1

    return self.to_out(r2)

def nonlinearity_hijack(x):
    # swish
    t = torch.sigmoid(x)
    x *= t
    del t

    return x

def cross_attention_attnblock_forward(self, x):
        h_ = x
        h_ = self.norm(h_)
        q1 = self.q(h_)
        k1 = self.k(h_)
        v = self.v(h_)

        # compute attention
        b, c, h, w = q1.shape

        q2 = q1.reshape(b, c, h*w)
        del q1

        q = q2.permute(0, 2, 1)   # b,hw,c
        del q2

        k = k1.reshape(b, c, h*w) # b,c,hw
        del k1

        h_ = torch.zeros_like(k, device=q.device)

        stats = torch.cuda.memory_stats(q.device)
        mem_active = stats['active_bytes.all.current']
        mem_reserved = stats['reserved_bytes.all.current']
        mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device())
        mem_free_torch = mem_reserved - mem_active
        mem_free_total = mem_free_cuda + mem_free_torch

        tensor_size = q.shape[0] * q.shape[1] * k.shape[2] * q.element_size()
        mem_required = tensor_size * 2.5
        steps = 1

        if mem_required > mem_free_total:
            steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))

        slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]
        for i in range(0, q.shape[1], slice_size):
            end = i + slice_size

            w1 = torch.bmm(q[:, i:end], k)     # b,hw,hw    w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
            w2 = w1 * (int(c)**(-0.5))
            del w1
            w3 = torch.nn.functional.softmax(w2, dim=2, dtype=q.dtype)
            del w2

            # attend to values
            v1 = v.reshape(b, c, h*w)
            w4 = w3.permute(0, 2, 1)   # b,hw,hw (first hw of k, second of q)
            del w3

            h_[:, :, i:end] = torch.bmm(v1, w4)     # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
            del v1, w4

        h2 = h_.reshape(b, c, h, w)
        del h_

        h3 = self.proj_out(h2)
        del h2

        h3 += x

        return h3

class StableDiffusionModelHijack:
    ids_lookup = {}
    word_embeddings = {}
    word_embeddings_checksums = {}
    fixes = None
    comments = []
    dir_mtime = None
    layers = None
    circular_enabled = False

    def load_textual_inversion_embeddings(self, dirname, model):
        mt = os.path.getmtime(dirname)
        if self.dir_mtime is not None and mt <= self.dir_mtime:
            return

        self.dir_mtime = mt
        self.ids_lookup.clear()
        self.word_embeddings.clear()

        tokenizer = model.cond_stage_model.tokenizer

        def const_hash(a):
            r = 0
            for v in a:
                r = (r * 281 ^ int(v) * 997) & 0xFFFFFFFF
            return r

        def process_file(path, filename):
            name = os.path.splitext(filename)[0]

            data = torch.load(path)

            # textual inversion embeddings
            if 'string_to_param' in data:
                param_dict = data['string_to_param']
                if hasattr(param_dict, '_parameters'):
                    param_dict = getattr(param_dict, '_parameters')  # fix for torch 1.12.1 loading saved file from torch 1.11
                assert len(param_dict) == 1, 'embedding file has multiple terms in it'
                emb = next(iter(param_dict.items()))[1]
            elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:
                assert len(data.keys()) == 1, 'embedding file has multiple terms in it'

                emb = next(iter(data.values()))
                if len(emb.shape) == 1:
                    emb = emb.unsqueeze(0)

            self.word_embeddings[name] = emb.detach()
            self.word_embeddings_checksums[name] = f'{const_hash(emb.reshape(-1))&0xffff:04x}'

            ids = tokenizer([name], add_special_tokens=False)['input_ids'][0]

            first_id = ids[0]
            if first_id not in self.ids_lookup:
                self.ids_lookup[first_id] = []
            self.ids_lookup[first_id].append((ids, name))

        for fn in os.listdir(dirname):
            try:
                process_file(os.path.join(dirname, fn), fn)
            except Exception:
                print(f"Error loading emedding {fn}:", file=sys.stderr)
                print(traceback.format_exc(), file=sys.stderr)
                continue

        print(f"Loaded a total of {len(self.word_embeddings)} text inversion embeddings.")

    def hijack(self, m):
        model_embeddings = m.cond_stage_model.transformer.text_model.embeddings

        model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)
        m.cond_stage_model = FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)

        if cmd_opts.opt_split_attention:
            ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward
            ldm.modules.diffusionmodules.model.nonlinearity = nonlinearity_hijack
            ldm.modules.diffusionmodules.model.AttnBlock.forward = cross_attention_attnblock_forward
        elif cmd_opts.opt_split_attention_v1:
            ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward_v1

        def flatten(el):
            flattened = [flatten(children) for children in el.children()]
            res = [el]
            for c in flattened:
                res += c
            return res

        self.layers = flatten(m)

    def apply_circular(self, enable):
        if self.circular_enabled == enable:
            return

        self.circular_enabled = enable

        for layer in [layer for layer in self.layers if type(layer) == torch.nn.Conv2d]:
            layer.padding_mode = 'circular' if enable else 'zeros'


class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
    def __init__(self, wrapped, hijack):
        super().__init__()
        self.wrapped = wrapped
        self.hijack = hijack
        self.tokenizer = wrapped.tokenizer
        self.max_length = wrapped.max_length
        self.token_mults = {}

        tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if '(' in k or ')' in k or '[' in k or ']' in k]
        for text, ident in tokens_with_parens:
            mult = 1.0
            for c in text:
                if c == '[':
                    mult /= 1.1
                if c == ']':
                    mult *= 1.1
                if c == '(':
                    mult *= 1.1
                if c == ')':
                    mult /= 1.1

            if mult != 1.0:
                self.token_mults[ident] = mult

    def forward(self, text):
        self.hijack.fixes = []
        self.hijack.comments = []
        remade_batch_tokens = []
        id_start = self.wrapped.tokenizer.bos_token_id
        id_end = self.wrapped.tokenizer.eos_token_id
        maxlen = self.wrapped.max_length - 2
        used_custom_terms = []

        cache = {}
        batch_tokens = self.wrapped.tokenizer(text, truncation=False, add_special_tokens=False)["input_ids"]
        batch_multipliers = []
        for tokens in batch_tokens:
            tuple_tokens = tuple(tokens)

            if tuple_tokens in cache:
                remade_tokens, fixes, multipliers = cache[tuple_tokens]
            else:
                fixes = []
                remade_tokens = []
                multipliers = []
                mult = 1.0

                i = 0
                while i < len(tokens):
                    token = tokens[i]

                    possible_matches = self.hijack.ids_lookup.get(token, None)

                    mult_change = self.token_mults.get(token) if opts.enable_emphasis else None
                    if mult_change is not None:
                        mult *= mult_change
                    elif possible_matches is None:
                        remade_tokens.append(token)
                        multipliers.append(mult)
                    else:
                        found = False
                        for ids, word in possible_matches:
                            if tokens[i:i+len(ids)] == ids:
                                emb_len = int(self.hijack.word_embeddings[word].shape[0])
                                fixes.append((len(remade_tokens), word))
                                remade_tokens += [0] * emb_len
                                multipliers += [mult] * emb_len
                                i += len(ids) - 1
                                found = True
                                used_custom_terms.append((word, self.hijack.word_embeddings_checksums[word]))
                                break

                        if not found:
                            remade_tokens.append(token)
                            multipliers.append(mult)

                    i += 1

                if len(remade_tokens) > maxlen - 2:
                    vocab = {v: k for k, v in self.wrapped.tokenizer.get_vocab().items()}
                    ovf = remade_tokens[maxlen - 2:]
                    overflowing_words = [vocab.get(int(x), "") for x in ovf]
                    overflowing_text = self.wrapped.tokenizer.convert_tokens_to_string(''.join(overflowing_words))

                    self.hijack.comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n")

                remade_tokens = remade_tokens + [id_end] * (maxlen - 2 - len(remade_tokens))
                remade_tokens = [id_start] + remade_tokens[0:maxlen-2] + [id_end]
                cache[tuple_tokens] = (remade_tokens, fixes, multipliers)

            multipliers = multipliers + [1.0] * (maxlen - 2 - len(multipliers))
            multipliers = [1.0] + multipliers[0:maxlen - 2] + [1.0]

            remade_batch_tokens.append(remade_tokens)
            self.hijack.fixes.append(fixes)
            batch_multipliers.append(multipliers)

        if len(used_custom_terms) > 0:
            self.hijack.comments.append("Used custom terms: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))

        tokens = torch.asarray(remade_batch_tokens).to(device)
        outputs = self.wrapped.transformer(input_ids=tokens)
        z = outputs.last_hidden_state

        # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
        batch_multipliers = torch.asarray(batch_multipliers).to(device)
        original_mean = z.mean()
        z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
        new_mean = z.mean()
        z *= original_mean / new_mean

        return z


class EmbeddingsWithFixes(torch.nn.Module):
    def __init__(self, wrapped, embeddings):
        super().__init__()
        self.wrapped = wrapped
        self.embeddings = embeddings

    def forward(self, input_ids):
        batch_fixes = self.embeddings.fixes
        self.embeddings.fixes = None

        inputs_embeds = self.wrapped(input_ids)

        if batch_fixes is not None:
            for fixes, tensor in zip(batch_fixes, inputs_embeds):
                for offset, word in fixes:
                    emb = self.embeddings.word_embeddings[word]
                    emb_len = min(tensor.shape[0]-offset, emb.shape[0])
                    tensor[offset:offset+emb_len] = self.embeddings.word_embeddings[word][0:emb_len]

        return inputs_embeds


def add_circular_option_to_conv_2d():
    conv2d_constructor = torch.nn.Conv2d.__init__

    def conv2d_constructor_circular(self, *args, **kwargs):
        return conv2d_constructor(self, *args, padding_mode='circular', **kwargs)

    torch.nn.Conv2d.__init__ = conv2d_constructor_circular


model_hijack = StableDiffusionModelHijack()
Update to cross attention from https://github.com/Doggettx/stable-diffusion #219 2022-09-10 17:06:19 +08:00			`import math`
split codebase into multiple files; to anyone this affects negatively: sorry 2022-09-03 17:08:45 +08:00			`import os`
			`import sys`
			`import traceback`
			`import torch`
			`import numpy as np`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00			`from torch import einsum`
split codebase into multiple files; to anyone this affects negatively: sorry 2022-09-03 17:08:45 +08:00
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00			`from modules.shared import opts, device, cmd_opts`

			`from ldm.util import default`
			`from einops import rearrange`
			`import ldm.modules.attention`
Complete cross attention update 2022-09-13 19:29:56 +08:00			`import ldm.modules.diffusionmodules.model`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00
added --opt-split-attention-v1 2022-09-11 05:29:10 +08:00
			`# see https://github.com/basujindal/stable-diffusion/pull/117 for discussion`
			`def split_cross_attention_forward_v1(self, x, context=None, mask=None):`
			`h = self.heads`

			`q = self.to_q(x)`
			`context = default(context, x)`
Move scale multiplication to the front 2022-09-18 06:05:31 +08:00			`k = self.to_k(context) * self.scale`
added --opt-split-attention-v1 2022-09-11 05:29:10 +08:00			`v = self.to_v(context)`
			`del context, x`

			`q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))`

			`r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device)`
			`for i in range(0, q.shape[0], 2):`
			`end = i + 2`
			`s1 = einsum('b i d, b j d -> b i j', q[i:end], k[i:end])`
			`s1 *= self.scale`

			`s2 = s1.softmax(dim=-1)`
			`del s1`

			`r1[i:end] = einsum('b i j, b j d -> b i d', s2, v[i:end])`
			`del s2`

			`r2 = rearrange(r1, '(b h) n d -> b n (h d)', h=h)`
			`del r1`

			`return self.to_out(r2)`


Update to cross attention from https://github.com/Doggettx/stable-diffusion #219 2022-09-10 17:06:19 +08:00			`# taken from https://github.com/Doggettx/stable-diffusion`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00			`def split_cross_attention_forward(self, x, context=None, mask=None):`
			`h = self.heads`

Update to cross attention from https://github.com/Doggettx/stable-diffusion #219 2022-09-10 17:06:19 +08:00			`q_in = self.to_q(x)`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00			`context = default(context, x)`
Update to cross attention from https://github.com/Doggettx/stable-diffusion #219 2022-09-10 17:06:19 +08:00			`k_in = self.to_k(context)`
			`v_in = self.to_v(context)`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00			`del context, x`

Update to cross attention from https://github.com/Doggettx/stable-diffusion #219 2022-09-10 17:06:19 +08:00			`q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in))`
			`del q_in, k_in, v_in`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00
fix typo 2022-09-15 18:48:13 +08:00			`r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype)`
Update to cross attention from https://github.com/Doggettx/stable-diffusion #219 2022-09-10 17:06:19 +08:00
			`stats = torch.cuda.memory_stats(q.device)`
			`mem_active = stats['active_bytes.all.current']`
			`mem_reserved = stats['reserved_bytes.all.current']`
			`mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device())`
			`mem_free_torch = mem_reserved - mem_active`
			`mem_free_total = mem_free_cuda + mem_free_torch`

			`gb = 1024 ** 3`
Update cross attention to the newest version 2022-09-12 21:48:21 +08:00			`tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size()`
			`modifier = 3 if q.element_size() == 2 else 2.5`
			`mem_required = tensor_size * modifier`
Update to cross attention from https://github.com/Doggettx/stable-diffusion #219 2022-09-10 17:06:19 +08:00			`steps = 1`

			`if mem_required > mem_free_total:`
			`steps = 2 ** (math.ceil(math.log(mem_required / mem_free_total, 2)))`
			`# print(f"Expected tensor size:{tensor_size/gb:0.1f}GB, cuda free:{mem_free_cuda/gb:0.1f}GB "`
			`# f"torch free:{mem_free_torch/gb:0.1f} total:{mem_free_total/gb:0.1f} steps:{steps}")`

			`if steps > 64:`
			`max_res = math.floor(math.sqrt(math.sqrt(mem_free_total / 2.5)) / 8) * 64`
			`raise RuntimeError(f'Not enough memory, use lower resolution (max approx. {max_res}x{max_res}). '`
			`f'Need: {mem_required / 64 / gb:0.1f}GB free, Have:{mem_free_total / gb:0.1f}GB free')`

			`slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]`
			`for i in range(0, q.shape[1], slice_size):`
			`end = i + slice_size`
Move scale multiplication to the front 2022-09-18 06:05:31 +08:00			`s1 = einsum('b i d, b j d -> b i j', q[:, i:end], k)`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00
Update cross attention to the newest version 2022-09-12 21:48:21 +08:00			`s2 = s1.softmax(dim=-1, dtype=q.dtype)`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00			`del s1`

Update to cross attention from https://github.com/Doggettx/stable-diffusion #219 2022-09-10 17:06:19 +08:00			`r1[:, i:end] = einsum('b i j, b j d -> b i d', s2, v)`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00			`del s2`

Update to cross attention from https://github.com/Doggettx/stable-diffusion #219 2022-09-10 17:06:19 +08:00			`del q, k, v`

add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00			`r2 = rearrange(r1, '(b h) n d -> b n (h d)', h=h)`
			`del r1`

			`return self.to_out(r2)`
split codebase into multiple files; to anyone this affects negatively: sorry 2022-09-03 17:08:45 +08:00
Complete cross attention update 2022-09-13 19:29:56 +08:00			`def nonlinearity_hijack(x):`
			`# swish`
			`t = torch.sigmoid(x)`
			`x *= t`
			`del t`

			`return x`

			`def cross_attention_attnblock_forward(self, x):`
			`h_ = x`
			`h_ = self.norm(h_)`
			`q1 = self.q(h_)`
			`k1 = self.k(h_)`
			`v = self.v(h_)`

			`# compute attention`
			`b, c, h, w = q1.shape`

			`q2 = q1.reshape(b, c, h*w)`
			`del q1`

			`q = q2.permute(0, 2, 1) # b,hw,c`
			`del q2`

			`k = k1.reshape(b, c, h*w) # b,c,hw`
			`del k1`

			`h_ = torch.zeros_like(k, device=q.device)`

			`stats = torch.cuda.memory_stats(q.device)`
			`mem_active = stats['active_bytes.all.current']`
			`mem_reserved = stats['reserved_bytes.all.current']`
			`mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device())`
			`mem_free_torch = mem_reserved - mem_active`
			`mem_free_total = mem_free_cuda + mem_free_torch`

			`tensor_size = q.shape[0] * q.shape[1] * k.shape[2] * q.element_size()`
			`mem_required = tensor_size * 2.5`
			`steps = 1`

			`if mem_required > mem_free_total:`
			`steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))`

			`slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]`
			`for i in range(0, q.shape[1], slice_size):`
			`end = i + slice_size`

			`w1 = torch.bmm(q[:, i:end], k) # b,hw,hw w[b,i,j]=sum_c q[b,i,c]k[b,c,j]`
			`w2 = w1 * (int(c)**(-0.5))`
			`del w1`
			`w3 = torch.nn.functional.softmax(w2, dim=2, dtype=q.dtype)`
			`del w2`

			`# attend to values`
			`v1 = v.reshape(b, c, h*w)`
			`w4 = w3.permute(0, 2, 1) # b,hw,hw (first hw of k, second of q)`
			`del w3`

			`h_[:, :, i:end] = torch.bmm(v1, w4) # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]`
			`del v1, w4`

			`h2 = h_.reshape(b, c, h, w)`
			`del h_`

			`h3 = self.proj_out(h2)`
			`del h2`

			`h3 += x`

			`return h3`
split codebase into multiple files; to anyone this affects negatively: sorry 2022-09-03 17:08:45 +08:00
			`class StableDiffusionModelHijack:`
			`ids_lookup = {}`
			`word_embeddings = {}`
			`word_embeddings_checksums = {}`
			`fixes = None`
			`comments = []`
			`dir_mtime = None`
re-integrated tiling option as a UI element 2022-09-05 08:25:37 +08:00			`layers = None`
			`circular_enabled = False`
split codebase into multiple files; to anyone this affects negatively: sorry 2022-09-03 17:08:45 +08:00
			`def load_textual_inversion_embeddings(self, dirname, model):`
			`mt = os.path.getmtime(dirname)`
			`if self.dir_mtime is not None and mt <= self.dir_mtime:`
			`return`

			`self.dir_mtime = mt`
			`self.ids_lookup.clear()`
			`self.word_embeddings.clear()`

			`tokenizer = model.cond_stage_model.tokenizer`

			`def const_hash(a):`
			`r = 0`
			`for v in a:`
			`r = (r * 281 ^ int(v) * 997) & 0xFFFFFFFF`
			`return r`

			`def process_file(path, filename):`
			`name = os.path.splitext(filename)[0]`

			`data = torch.load(path)`
support for sd-concepts as alternatives for textual inversion #151 2022-09-08 20:36:50 +08:00
			`# textual inversion embeddings`
			`if 'string_to_param' in data:`
			`param_dict = data['string_to_param']`
			`if hasattr(param_dict, '_parameters'):`
			`param_dict = getattr(param_dict, '_parameters') # fix for torch 1.12.1 loading saved file from torch 1.11`
			`assert len(param_dict) == 1, 'embedding file has multiple terms in it'`
			`emb = next(iter(param_dict.items()))[1]`
			`elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:`
			`assert len(data.keys()) == 1, 'embedding file has multiple terms in it'`

			`emb = next(iter(data.values()))`
			`if len(emb.shape) == 1:`
			`emb = emb.unsqueeze(0)`

split codebase into multiple files; to anyone this affects negatively: sorry 2022-09-03 17:08:45 +08:00			`self.word_embeddings[name] = emb.detach()`
			`self.word_embeddings_checksums[name] = f'{const_hash(emb.reshape(-1))&0xffff:04x}'`

			`ids = tokenizer([name], add_special_tokens=False)['input_ids'][0]`

			`first_id = ids[0]`
			`if first_id not in self.ids_lookup:`
			`self.ids_lookup[first_id] = []`
			`self.ids_lookup[first_id].append((ids, name))`

			`for fn in os.listdir(dirname):`
			`try:`
			`process_file(os.path.join(dirname, fn), fn)`
			`except Exception:`
			`print(f"Error loading emedding {fn}:", file=sys.stderr)`
			`print(traceback.format_exc(), file=sys.stderr)`
			`continue`

			`print(f"Loaded a total of {len(self.word_embeddings)} text inversion embeddings.")`

			`def hijack(self, m):`
			`model_embeddings = m.cond_stage_model.transformer.text_model.embeddings`

			`model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)`
			`m.cond_stage_model = FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)`

add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00			`if cmd_opts.opt_split_attention:`
			`ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward`
Complete cross attention update 2022-09-13 19:29:56 +08:00			`ldm.modules.diffusionmodules.model.nonlinearity = nonlinearity_hijack`
			`ldm.modules.diffusionmodules.model.AttnBlock.forward = cross_attention_attnblock_forward`
added --opt-split-attention-v1 2022-09-11 05:29:10 +08:00			`elif cmd_opts.opt_split_attention_v1:`
			`ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward_v1`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00
re-integrated tiling option as a UI element 2022-09-05 08:25:37 +08:00			`def flatten(el):`
			`flattened = [flatten(children) for children in el.children()]`
			`res = [el]`
			`for c in flattened:`
			`res += c`
			`return res`

			`self.layers = flatten(m)`

			`def apply_circular(self, enable):`
			`if self.circular_enabled == enable:`
			`return`

			`self.circular_enabled = enable`

			`for layer in [layer for layer in self.layers if type(layer) == torch.nn.Conv2d]:`
			`layer.padding_mode = 'circular' if enable else 'zeros'`

split codebase into multiple files; to anyone this affects negatively: sorry 2022-09-03 17:08:45 +08:00
			`class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):`
			`def __init__(self, wrapped, hijack):`
			`super().__init__()`
			`self.wrapped = wrapped`
			`self.hijack = hijack`
			`self.tokenizer = wrapped.tokenizer`
			`self.max_length = wrapped.max_length`
			`self.token_mults = {}`

			`tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if '(' in k or ')' in k or '[' in k or ']' in k]`
			`for text, ident in tokens_with_parens:`
			`mult = 1.0`
			`for c in text:`
			`if c == '[':`
			`mult /= 1.1`
			`if c == ']':`
			`mult *= 1.1`
			`if c == '(':`
			`mult *= 1.1`
			`if c == ')':`
			`mult /= 1.1`

			`if mult != 1.0:`
			`self.token_mults[ident] = mult`

			`def forward(self, text):`
			`self.hijack.fixes = []`
			`self.hijack.comments = []`
			`remade_batch_tokens = []`
			`id_start = self.wrapped.tokenizer.bos_token_id`
			`id_end = self.wrapped.tokenizer.eos_token_id`
			`maxlen = self.wrapped.max_length - 2`
			`used_custom_terms = []`

			`cache = {}`
			`batch_tokens = self.wrapped.tokenizer(text, truncation=False, add_special_tokens=False)["input_ids"]`
			`batch_multipliers = []`
			`for tokens in batch_tokens:`
			`tuple_tokens = tuple(tokens)`

			`if tuple_tokens in cache:`
			`remade_tokens, fixes, multipliers = cache[tuple_tokens]`
			`else:`
			`fixes = []`
			`remade_tokens = []`
			`multipliers = []`
			`mult = 1.0`

			`i = 0`
			`while i < len(tokens):`
			`token = tokens[i]`

			`possible_matches = self.hijack.ids_lookup.get(token, None)`

			`mult_change = self.token_mults.get(token) if opts.enable_emphasis else None`
			`if mult_change is not None:`
			`mult *= mult_change`
			`elif possible_matches is None:`
			`remade_tokens.append(token)`
			`multipliers.append(mult)`
			`else:`
			`found = False`
			`for ids, word in possible_matches:`
			`if tokens[i:i+len(ids)] == ids:`
			`emb_len = int(self.hijack.word_embeddings[word].shape[0])`
			`fixes.append((len(remade_tokens), word))`
			`remade_tokens += [0] * emb_len`
			`multipliers += [mult] * emb_len`
			`i += len(ids) - 1`
			`found = True`
			`used_custom_terms.append((word, self.hijack.word_embeddings_checksums[word]))`
			`break`

			`if not found:`
			`remade_tokens.append(token)`
			`multipliers.append(mult)`

			`i += 1`

			`if len(remade_tokens) > maxlen - 2:`
			`vocab = {v: k for k, v in self.wrapped.tokenizer.get_vocab().items()}`
			`ovf = remade_tokens[maxlen - 2:]`
			`overflowing_words = [vocab.get(int(x), "") for x in ovf]`
			`overflowing_text = self.wrapped.tokenizer.convert_tokens_to_string(''.join(overflowing_words))`

			`self.hijack.comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n")`

			`remade_tokens = remade_tokens + [id_end] * (maxlen - 2 - len(remade_tokens))`
			`remade_tokens = [id_start] + remade_tokens[0:maxlen-2] + [id_end]`
			`cache[tuple_tokens] = (remade_tokens, fixes, multipliers)`

			`multipliers = multipliers + [1.0] * (maxlen - 2 - len(multipliers))`
			`multipliers = [1.0] + multipliers[0:maxlen - 2] + [1.0]`

			`remade_batch_tokens.append(remade_tokens)`
			`self.hijack.fixes.append(fixes)`
			`batch_multipliers.append(multipliers)`

			`if len(used_custom_terms) > 0:`
			`self.hijack.comments.append("Used custom terms: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))`

			`tokens = torch.asarray(remade_batch_tokens).to(device)`
			`outputs = self.wrapped.transformer(input_ids=tokens)`
			`z = outputs.last_hidden_state`

			`# restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise`
directly convert list to tensor 2022-09-08 02:40:32 +08:00			`batch_multipliers = torch.asarray(batch_multipliers).to(device)`
split codebase into multiple files; to anyone this affects negatively: sorry 2022-09-03 17:08:45 +08:00			`original_mean = z.mean()`
			`z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)`
			`new_mean = z.mean()`
			`z *= original_mean / new_mean`

			`return z`


			`class EmbeddingsWithFixes(torch.nn.Module):`
			`def __init__(self, wrapped, embeddings):`
			`super().__init__()`
			`self.wrapped = wrapped`
			`self.embeddings = embeddings`

			`def forward(self, input_ids):`
			`batch_fixes = self.embeddings.fixes`
			`self.embeddings.fixes = None`

			`inputs_embeds = self.wrapped(input_ids)`

			`if batch_fixes is not None:`
			`for fixes, tensor in zip(batch_fixes, inputs_embeds):`
			`for offset, word in fixes:`
			`emb = self.embeddings.word_embeddings[word]`
			`emb_len = min(tensor.shape[0]-offset, emb.shape[0])`
			`tensor[offset:offset+emb_len] = self.embeddings.word_embeddings[word][0:emb_len]`

			`return inputs_embeds`


add an option to enable tiling image generation 2022-09-05 07:16:36 +08:00			`def add_circular_option_to_conv_2d():`
			`conv2d_constructor = torch.nn.Conv2d.__init__`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00
add an option to enable tiling image generation 2022-09-05 07:16:36 +08:00			`def conv2d_constructor_circular(self, args, *kwargs):`
			`return conv2d_constructor(self, args, padding_mode='circular', *kwargs)`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00
add an option to enable tiling image generation 2022-09-05 07:16:36 +08:00			`torch.nn.Conv2d.__init__ = conv2d_constructor_circular`
add split attention layer optimization from https://github.com/basujindal/stable-diffusion/pull/117 2022-09-05 06:41:20 +08:00

split codebase into multiple files; to anyone this affects negatively: sorry 2022-09-03 17:08:45 +08:00			`model_hijack = StableDiffusionModelHijack()`