stable-diffusion-webui/modules/textual_inversion/dataset.py

import os
import numpy as np
import PIL
import torch
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

import random
import tqdm
from modules import devices, shared
import re

re_numbers_at_start = re.compile(r"^[-\d]+\s*")


class DatasetEntry:
    def __init__(self, filename=None, latent=None, filename_text=None):
        self.filename = filename
        self.latent = latent
        self.filename_text = filename_text
        self.cond = None
        self.cond_text = None


class PersonalizedBase(Dataset):
    def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None, include_cond=False, batch_size=1):
        re_word = re.compile(shared.opts.dataset_filename_word_regex) if len(shared.opts.dataset_filename_word_regex) > 0 else None

        self.placeholder_token = placeholder_token

        self.batch_size = batch_size
        self.width = width
        self.height = height
        self.flip = transforms.RandomHorizontalFlip(p=flip_p)

        self.dataset = []

        with open(template_file, "r") as file:
            lines = [x.strip() for x in file.readlines()]

        self.lines = lines

        assert data_root, 'dataset directory not specified'

        cond_model = shared.sd_model.cond_stage_model

        self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]
        print("Preparing dataset...")
        for path in tqdm.tqdm(self.image_paths):
            try:
                image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.BICUBIC)
            except Exception:
                continue

            text_filename = os.path.splitext(path)[0] + ".txt"
            filename = os.path.basename(path)

            if os.path.exists(text_filename):
                with open(text_filename, "r", encoding="utf8") as file:
                    filename_text = file.read()
            else:
                filename_text = os.path.splitext(filename)[0]
                filename_text = re.sub(re_numbers_at_start, '', filename_text)
                if re_word:
                    tokens = re_word.findall(filename_text)
                    filename_text = (shared.opts.dataset_filename_join_string or "").join(tokens)

            npimage = np.array(image).astype(np.uint8)
            npimage = (npimage / 127.5 - 1.0).astype(np.float32)

            torchdata = torch.from_numpy(npimage).to(device=device, dtype=torch.float32)
            torchdata = torch.moveaxis(torchdata, 2, 0)

            init_latent = model.get_first_stage_encoding(model.encode_first_stage(torchdata.unsqueeze(dim=0))).squeeze()
            init_latent = init_latent.to(devices.cpu)

            entry = DatasetEntry(filename=path, filename_text=filename_text, latent=init_latent)

            if include_cond:
                entry.cond_text = self.create_text(filename_text)
                entry.cond = cond_model([entry.cond_text]).to(devices.cpu).squeeze(0)

            self.dataset.append(entry)

        assert len(self.dataset) > 1, "No images have been found in the dataset."
        self.length = len(self.dataset) * repeats // batch_size

        self.initial_indexes = np.arange(len(self.dataset))
        self.indexes = None
        self.shuffle()

    def shuffle(self):
        self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0])]

    def create_text(self, filename_text):
        text = random.choice(self.lines)
        text = text.replace("[name]", self.placeholder_token)
        text = text.replace("[filewords]", filename_text)
        return text

    def __len__(self):
        return self.length

    def __getitem__(self, i):
        res = []

        for j in range(self.batch_size):
            position = i * self.batch_size + j
            if position % len(self.indexes) == 0:
                self.shuffle()

            index = self.indexes[position % len(self.indexes)]
            entry = self.dataset[index]

            if entry.cond is None:
                entry.cond_text = self.create_text(entry.filename_text)

            res.append(entry)

        return res
initial support for training textual inversion 2022-10-02 20:03:39 +08:00			`import os`
			`import numpy as np`
			`import PIL`
			`import torch`
			`from PIL import Image`
			`from torch.utils.data import Dataset`
			`from torchvision import transforms`

			`import random`
			`import tqdm`
add an option to unload models during hypernetwork training to save VRAM 2022-10-12 00:03:08 +08:00			`from modules import devices, shared`
add support for gelbooru tags in filenames for textual inversion 2022-10-04 13:52:11 +08:00			`import re`

train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options 2022-10-13 01:49:47 +08:00			`re_numbers_at_start = re.compile(r"^[-\d]+\s*")`


			`class DatasetEntry:`
			`def __init__(self, filename=None, latent=None, filename_text=None):`
			`self.filename = filename`
			`self.latent = latent`
			`self.filename_text = filename_text`
			`self.cond = None`
			`self.cond_text = None`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00

			`class PersonalizedBase(Dataset):`
add option to use batch size for training 2022-10-15 14:24:59 +08:00			`def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None, include_cond=False, batch_size=1):`
			`re_word = re.compile(shared.opts.dataset_filename_word_regex) if len(shared.opts.dataset_filename_word_regex) > 0 else None`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00
			`self.placeholder_token = placeholder_token`

add option to use batch size for training 2022-10-15 14:24:59 +08:00			`self.batch_size = batch_size`
Custom Width and Height 2022-10-10 21:35:35 +08:00			`self.width = width`
			`self.height = height`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00			`self.flip = transforms.RandomHorizontalFlip(p=flip_p)`

			`self.dataset = []`

			`with open(template_file, "r") as file:`
			`lines = [x.strip() for x in file.readlines()]`

			`self.lines = lines`

			`assert data_root, 'dataset directory not specified'`

add an option to unload models during hypernetwork training to save VRAM 2022-10-12 00:03:08 +08:00			`cond_model = shared.sd_model.cond_stage_model`

Switched to exception handling 2022-10-11 16:32:46 +08:00			`self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00			`print("Preparing dataset...")`
			`for path in tqdm.tqdm(self.image_paths):`
Switched to exception handling 2022-10-11 16:32:46 +08:00			`try:`
			`image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.BICUBIC)`
			`except Exception:`
			`continue`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00
train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options 2022-10-13 01:49:47 +08:00			`text_filename = os.path.splitext(path)[0] + ".txt"`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00			`filename = os.path.basename(path)`
train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options 2022-10-13 01:49:47 +08:00
			`if os.path.exists(text_filename):`
			`with open(text_filename, "r", encoding="utf8") as file:`
			`filename_text = file.read()`
			`else:`
			`filename_text = os.path.splitext(filename)[0]`
			`filename_text = re.sub(re_numbers_at_start, '', filename_text)`
			`if re_word:`
			`tokens = re_word.findall(filename_text)`
			`filename_text = (shared.opts.dataset_filename_join_string or "").join(tokens)`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00
			`npimage = np.array(image).astype(np.uint8)`
			`npimage = (npimage / 127.5 - 1.0).astype(np.float32)`

			`torchdata = torch.from_numpy(npimage).to(device=device, dtype=torch.float32)`
			`torchdata = torch.moveaxis(torchdata, 2, 0)`

			`init_latent = model.get_first_stage_encoding(model.encode_first_stage(torchdata.unsqueeze(dim=0))).squeeze()`
keep textual inversion dataset latents in CPU memory to save a bit of VRAM 2022-10-03 03:59:01 +08:00			`init_latent = init_latent.to(devices.cpu)`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00
train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options 2022-10-13 01:49:47 +08:00			`entry = DatasetEntry(filename=path, filename_text=filename_text, latent=init_latent)`

add an option to unload models during hypernetwork training to save VRAM 2022-10-12 00:03:08 +08:00			`if include_cond:`
train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options 2022-10-13 01:49:47 +08:00			`entry.cond_text = self.create_text(filename_text)`
add option to use batch size for training 2022-10-15 14:24:59 +08:00			`entry.cond = cond_model([entry.cond_text]).to(devices.cpu).squeeze(0)`
add an option to unload models during hypernetwork training to save VRAM 2022-10-12 00:03:08 +08:00
train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options 2022-10-13 01:49:47 +08:00			`self.dataset.append(entry)`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00
Raise an assertion error if no training images have been found. 2022-10-15 04:45:26 +08:00			`assert len(self.dataset) > 1, "No images have been found in the dataset."`
add option to use batch size for training 2022-10-15 14:24:59 +08:00			`self.length = len(self.dataset) * repeats // batch_size`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00
add option to use batch size for training 2022-10-15 14:24:59 +08:00			`self.initial_indexes = np.arange(len(self.dataset))`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00			`self.indexes = None`
			`self.shuffle()`

			`def shuffle(self):`
			`self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0])]`

train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options 2022-10-13 01:49:47 +08:00			`def create_text(self, filename_text):`
add an option to unload models during hypernetwork training to save VRAM 2022-10-12 00:03:08 +08:00			`text = random.choice(self.lines)`
			`text = text.replace("[name]", self.placeholder_token)`
train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options 2022-10-13 01:49:47 +08:00			`text = text.replace("[filewords]", filename_text)`
add an option to unload models during hypernetwork training to save VRAM 2022-10-12 00:03:08 +08:00			`return text`

initial support for training textual inversion 2022-10-02 20:03:39 +08:00			`def __len__(self):`
			`return self.length`

			`def __getitem__(self, i):`
add option to use batch size for training 2022-10-15 14:24:59 +08:00			`res = []`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00
add option to use batch size for training 2022-10-15 14:24:59 +08:00			`for j in range(self.batch_size):`
			`position = i * self.batch_size + j`
			`if position % len(self.indexes) == 0:`
			`self.shuffle()`
train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options 2022-10-13 01:49:47 +08:00
add option to use batch size for training 2022-10-15 14:24:59 +08:00			`index = self.indexes[position % len(self.indexes)]`
			`entry = self.dataset[index]`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00
add option to use batch size for training 2022-10-15 14:24:59 +08:00			`if entry.cond is None:`
			`entry.cond_text = self.create_text(entry.filename_text)`
train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options 2022-10-13 01:49:47 +08:00
add option to use batch size for training 2022-10-15 14:24:59 +08:00			`res.append(entry)`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00
add option to use batch size for training 2022-10-15 14:24:59 +08:00			`return res`