stable-diffusion-webui/modules/devices.py

import sys, os, shlex
import contextlib
import torch
from modules import errors


# has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
# check `getattr` and try it for compatibility
def has_mps() -> bool:
    if not getattr(torch, 'has_mps', False):
        return False
    try:
        torch.zeros(1).to(torch.device("mps"))
        return True
    except Exception:
        return False


def extract_device_id(args, name):
    for x in range(len(args)):
        if name in args[x]:
            return args[x + 1]

    return None


def get_optimal_device():
    if torch.cuda.is_available():
        from modules import shared

        device_id = shared.cmd_opts.device_id

        if device_id is not None:
            cuda_device = f"cuda:{device_id}"
            return torch.device(cuda_device)
        else:
            return torch.device("cuda")

    if has_mps():
        return torch.device("mps")

    return cpu


def torch_gc():
    if torch.cuda.is_available():
        from modules import shared

        device_id = shared.cmd_opts.device_id
        
        if device_id is not None:
            cuda_device = f"cuda:{device_id}"
        else:
            cuda_device = "cuda"
        
        with torch.cuda.device(cuda_device):
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()


def enable_tf32():
    if torch.cuda.is_available():
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True


errors.run(enable_tf32, "Enabling TF32")

cpu = torch.device("cpu")
device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None
dtype = torch.float16
dtype_vae = torch.float16


def randn(seed, shape):
    # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
    if device.type == 'mps':
        generator = torch.Generator(device=cpu)
        generator.manual_seed(seed)
        noise = torch.randn(shape, generator=generator, device=cpu).to(device)
        return noise

    torch.manual_seed(seed)
    return torch.randn(shape, device=device)


def randn_without_seed(shape):
    # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
    if device.type == 'mps':
        generator = torch.Generator(device=cpu)
        noise = torch.randn(shape, generator=generator, device=cpu).to(device)
        return noise

    return torch.randn(shape, device=device)


def autocast(disable=False):
    from modules import shared

    if disable:
        return contextlib.nullcontext()

    if dtype == torch.float32 or shared.cmd_opts.precision == "full":
        return contextlib.nullcontext()

    return torch.autocast("cuda")


# MPS workaround for https://github.com/pytorch/pytorch/issues/79383
def mps_contiguous(input_tensor, device):
    return input_tensor.contiguous() if device.type == 'mps' else input_tensor


def mps_contiguous_to(input_tensor, device):
    return mps_contiguous(input_tensor, device).to(device)
implement CUDA device selection by ID 2022-10-22 06:11:07 +08:00			`import sys, os, shlex`
send all three of GFPGAN's and codeformer's models to CPU memory instead of just one for #1283 2022-10-04 17:32:22 +08:00			`import contextlib`
Modular device management 2022-09-11 13:11:27 +08:00			`import torch`
Allow TF32 in CUDA for increased performance #279 2022-09-12 21:34:13 +08:00			`from modules import errors`

change formatting to match the main program in devices.py 2022-11-12 15:00:49 +08:00
Fix wrong mps selection below MasOS 12.3 2022-11-12 11:02:40 +08:00			`# has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.`
			# check `getattr` and try it for compatibility
			`def has_mps() -> bool:`
change formatting to match the main program in devices.py 2022-11-12 15:00:49 +08:00			`if not getattr(torch, 'has_mps', False):`
			`return False`
Fix wrong mps selection below MasOS 12.3 2022-11-12 11:02:40 +08:00			`try:`
			`torch.zeros(1).to(torch.device("mps"))`
			`return True`
			`except Exception:`
			`return False`
Modular device management 2022-09-11 13:11:27 +08:00
CLIP interrogator 2022-09-11 23:48:36 +08:00
implement CUDA device selection by ID 2022-10-22 06:11:07 +08:00			`def extract_device_id(args, name):`
			`for x in range(len(args)):`
change formatting to match the main program in devices.py 2022-11-12 15:00:49 +08:00			`if name in args[x]:`
			`return args[x + 1]`

implement CUDA device selection by ID 2022-10-22 06:11:07 +08:00			`return None`
CLIP interrogator 2022-09-11 23:48:36 +08:00
change formatting to match the main program in devices.py 2022-11-12 15:00:49 +08:00
Modular device management 2022-09-11 13:11:27 +08:00			`def get_optimal_device():`
CLIP interrogator 2022-09-11 23:48:36 +08:00			`if torch.cuda.is_available():`
remove parsing command line from devices.py 2022-10-22 19:04:14 +08:00			`from modules import shared`

			`device_id = shared.cmd_opts.device_id`

implement CUDA device selection by ID 2022-10-22 06:11:07 +08:00			`if device_id is not None:`
			`cuda_device = f"cuda:{device_id}"`
			`return torch.device(cuda_device)`
			`else:`
			`return torch.device("cuda")`
CLIP interrogator 2022-09-11 23:48:36 +08:00
Fix wrong mps selection below MasOS 12.3 2022-11-12 11:02:40 +08:00			`if has_mps():`
CLIP interrogator 2022-09-11 23:48:36 +08:00			`return torch.device("mps")`

			`return cpu`
add half() supporrt for CLIP interrogation 2022-09-12 04:24:24 +08:00

			`def torch_gc():`
			`if torch.cuda.is_available():`
torch.cuda.empty_cache() defaults to cuda:0 device unless explicitly set otherwise first. Updating torch_gc() to use the device set by --device-id if specified to avoid OOM edge cases on multi-GPU systems. 2022-11-27 07:25:16 +08:00			`from modules import shared`

			`device_id = shared.cmd_opts.device_id`

			`if device_id is not None:`
			`cuda_device = f"cuda:{device_id}"`
			`else:`
			`cuda_device = "cuda"`

			`with torch.cuda.device(cuda_device):`
			`torch.cuda.empty_cache()`
			`torch.cuda.ipc_collect()`
Allow TF32 in CUDA for increased performance #279 2022-09-12 21:34:13 +08:00

			`def enable_tf32():`
			`if torch.cuda.is_available():`
			`torch.backends.cuda.matmul.allow_tf32 = True`
			`torch.backends.cudnn.allow_tf32 = True`


			`errors.run(enable_tf32, "Enabling TF32")`
changes for #294 2022-09-13 01:09:32 +08:00
change formatting to match the main program in devices.py 2022-11-12 15:00:49 +08:00			`cpu = torch.device("cpu")`
Remove BSRGAN from --use-cpu, add SwinIR 2022-10-25 11:04:50 +08:00			`device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None`
initial support for training textual inversion 2022-10-02 20:03:39 +08:00			`dtype = torch.float16`
--no-half-vae 2022-10-10 21:11:14 +08:00			`dtype_vae = torch.float16`
changes for #294 2022-09-13 01:09:32 +08:00
change formatting to match the main program in devices.py 2022-11-12 15:00:49 +08:00
changes for #294 2022-09-13 01:09:32 +08:00			`def randn(seed, shape):`
			`# Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.`
			`if device.type == 'mps':`
			`generator = torch.Generator(device=cpu)`
			`generator.manual_seed(seed)`
			`noise = torch.randn(shape, generator=generator, device=cpu).to(device)`
			`return noise`

			`torch.manual_seed(seed)`
			`return torch.randn(shape, device=device)`

first attempt to produce crrect seeds in batch 2022-09-14 02:49:58 +08:00
			`def randn_without_seed(shape):`
			`# Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.`
			`if device.type == 'mps':`
			`generator = torch.Generator(device=cpu)`
			`noise = torch.randn(shape, generator=generator, device=cpu).to(device)`
			`return noise`

			`return torch.randn(shape, device=device)`

send all three of GFPGAN's and codeformer's models to CPU memory instead of just one for #1283 2022-10-04 17:32:22 +08:00
--no-half-vae 2022-10-10 21:11:14 +08:00			`def autocast(disable=False):`
send all three of GFPGAN's and codeformer's models to CPU memory instead of just one for #1283 2022-10-04 17:32:22 +08:00			`from modules import shared`

--no-half-vae 2022-10-10 21:11:14 +08:00			`if disable:`
			`return contextlib.nullcontext()`

send all three of GFPGAN's and codeformer's models to CPU memory instead of just one for #1283 2022-10-04 17:32:22 +08:00			`if dtype == torch.float32 or shared.cmd_opts.precision == "full":`
			`return contextlib.nullcontext()`

			`return torch.autocast("cuda")`
MPS Upscalers Fix Get ESRGAN, SCUNet, and SwinIR working correctly on MPS by ensuring memory is contiguous for tensor views before sending to MPS device. 2022-10-25 14:01:57 +08:00
change formatting to match the main program in devices.py 2022-11-12 15:00:49 +08:00
MPS Upscalers Fix Get ESRGAN, SCUNet, and SwinIR working correctly on MPS by ensuring memory is contiguous for tensor views before sending to MPS device. 2022-10-25 14:01:57 +08:00			`# MPS workaround for https://github.com/pytorch/pytorch/issues/79383`
change formatting to match the main program in devices.py 2022-11-12 15:00:49 +08:00			`def mps_contiguous(input_tensor, device):`
			`return input_tensor.contiguous() if device.type == 'mps' else input_tensor`


			`def mps_contiguous_to(input_tensor, device):`
			`return mps_contiguous(input_tensor, device).to(device)`