From 703e6d9e4e161d36b9328eefb5200e1c44fb4afd Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sat, 15 Oct 2022 21:47:08 +0900 Subject: [PATCH] check NaN for hypernetwork tuning --- modules/hypernetworks/hypernetwork.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index a2b3bc0af..4905710e5 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -272,15 +272,17 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log optimizer.zero_grad() loss.backward() optimizer.step() - - pbar.set_description(f"loss: {losses.mean():.7f}") + mean_loss = losses.mean() + if torch.isnan(mean_loss): + raise RuntimeError("Loss diverged.") + pbar.set_description(f"loss: {mean_loss:.7f}") if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0: last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name}-{hypernetwork.step}.pt') hypernetwork.save(last_saved_file) textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { - "loss": f"{losses.mean():.7f}", + "loss": f"{mean_loss:.7f}", "learn_rate": scheduler.learn_rate }) @@ -328,7 +330,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log shared.state.textinfo = f"""
-Loss: {losses.mean():.7f}
+Loss: {mean_loss:.7f}
Step: {hypernetwork.step}
Last prompt: {html.escape(entries[0].cond_text)}
Last saved embedding: {html.escape(last_saved_file)}