mirror of
https://github.com/tencentmusic/cube-studio.git
synced 2024-11-27 05:33:10 +08:00
add order model
This commit is contained in:
parent
b9e56d04a7
commit
75d5087f24
279
aihub/deep-learning/DeOldify/ImageColorizer.ipynb
Normal file
279
aihub/deep-learning/DeOldify/ImageColorizer.ipynb
Normal file
File diff suppressed because one or more lines are too long
316
aihub/deep-learning/DeOldify/VideoColorizer.ipynb
Normal file
316
aihub/deep-learning/DeOldify/VideoColorizer.ipynb
Normal file
@ -0,0 +1,316 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 安装环境依赖\n",
|
||||
"! apt install -y ffmpeg\n",
|
||||
"! pip install wandb fastai==1.0.60 tensorboardX>=1.6 ffmpeg-python yt-dlp opencv-python>=3.3.0.10 Pillow==9.1.0 --extra-index-url https://download.pytorch.org/whl/cu113 torch==1.11.0 torchvision==0.12.0 ipywidgets\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"^C\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/usr/local/bin/pip\", line 5, in <module>\n",
|
||||
" from pip._internal.cli.main import main\n",
|
||||
" File \"/usr/local/lib/python3.8/dist-packages/pip/_internal/cli/main.py\", line 9, in <module>\n",
|
||||
" from pip._internal.cli.autocompletion import autocomplete\n",
|
||||
" File \"/usr/local/lib/python3.8/dist-packages/pip/_internal/cli/autocompletion.py\", line 10, in <module>\n",
|
||||
" from pip._internal.cli.main_parser import create_main_parser\n",
|
||||
" File \"/usr/local/lib/python3.8/dist-packages/pip/_internal/cli/main_parser.py\", line 8, in <module>\n",
|
||||
" from pip._internal.cli import cmdoptions\n",
|
||||
" File \"/usr/local/lib/python3.8/dist-packages/pip/_internal/cli/cmdoptions.py\", line 23, in <module>\n",
|
||||
" from pip._internal.cli.parser import ConfigOptionParser\n",
|
||||
" File \"/usr/local/lib/python3.8/dist-packages/pip/_internal/cli/parser.py\", line 12, in <module>\n",
|
||||
" from pip._internal.configuration import Configuration, ConfigurationError\n",
|
||||
" File \"/usr/local/lib/python3.8/dist-packages/pip/_internal/configuration.py\", line 20, in <module>\n",
|
||||
" from pip._internal.exceptions import (\n",
|
||||
" File \"/usr/local/lib/python3.8/dist-packages/pip/_internal/exceptions.py\", line 7, in <module>\n",
|
||||
" from pip._vendor.pkg_resources import Distribution\n",
|
||||
" File \"/usr/local/lib/python3.8/dist-packages/pip/_vendor/pkg_resources/__init__.py\", line 36, in <module>\n",
|
||||
" import email.parser\n",
|
||||
" File \"/usr/lib/python3.8/email/parser.py\", line 12, in <module>\n",
|
||||
" from email.feedparser import FeedParser, BytesFeedParser\n",
|
||||
" File \"/usr/lib/python3.8/email/feedparser.py\", line 27, in <module>\n",
|
||||
" from email._policybase import compat32\n",
|
||||
" File \"/usr/lib/python3.8/email/_policybase.py\", line 9, in <module>\n",
|
||||
" from email.utils import _has_surrogates\n",
|
||||
" File \"/usr/lib/python3.8/email/utils.py\", line 33, in <module>\n",
|
||||
" from email._parseaddr import quote\n",
|
||||
" File \"/usr/lib/python3.8/email/_parseaddr.py\", line 16, in <module>\n",
|
||||
" import time, calendar\n",
|
||||
" File \"<frozen importlib._bootstrap>\", line 991, in _find_and_load\n",
|
||||
" File \"<frozen importlib._bootstrap>\", line 971, in _find_and_load_unlocked\n",
|
||||
" File \"<frozen importlib._bootstrap>\", line 914, in _find_spec\n",
|
||||
" File \"/usr/local/lib/python3.8/dist-packages/_distutils_hack/__init__.py\", line 87, in find_spec\n",
|
||||
" method_name = 'spec_for_{fullname}'.format(**locals())\n",
|
||||
"KeyboardInterrupt\n",
|
||||
"\n",
|
||||
"Usage: \n",
|
||||
" pip install [options] <requirement specifier> [package-index-options] ...\n",
|
||||
" pip install [options] -r <requirements file> [package-index-options] ...\n",
|
||||
" pip install [options] [-e] <vcs project url> ...\n",
|
||||
" pip install [options] [-e] <local project path> ...\n",
|
||||
" pip install [options] <archive url/path> ...\n",
|
||||
"\n",
|
||||
"no such option: -y\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"! pip uninstall -y ffmpeg ffmpeg-python\n",
|
||||
"! pip install ffmpeg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 下载模型\n",
|
||||
"! wget https://data.deepai.org/deoldify/ColorizeVideo_gen.pth -O models/ColorizeVideo_gen.pth"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<DeviceId.GPU0: 0>"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#NOTE: This must be the first call in order to work properly!\n",
|
||||
"from deoldify import device\n",
|
||||
"from deoldify.device_id import DeviceId\n",
|
||||
"#choices: CPU, GPU0...GPU7\n",
|
||||
"device.set(device=DeviceId.GPU0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from deoldify.visualize import *\n",
|
||||
"plt.style.use('dark_background')\n",
|
||||
"import warnings\n",
|
||||
"warnings.filterwarnings(\"ignore\", category=UserWarning, message=\".*?Your .*? set is empty.*?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"colorizer = get_video_colorizer()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Instructions\n",
|
||||
"\n",
|
||||
"### source_url\n",
|
||||
"Type in a url hosting a video from YouTube, Imgur, Twitter, Reddit, Vimeo, etc. Many sources work! GIFs also work. Full list here: https://ytdl-org.github.io/youtube-dl/supportedsites.html NOTE: If you want to use your own video, you can set source_url to None and just upload the file to video/source/ in Jupyter. Just make sure that the file_name parameter matches the file you uploaded.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### file_name\n",
|
||||
"Name this whatever sensible file name you want (minus extension)! It should actually exist in video/source if source_url=None\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### render_factor\n",
|
||||
"The default value of 21 has been carefully chosen and should work -ok- for most scenarios (but probably won't be the -best-). This determines resolution at which the color portion of the video is rendered. Lower resolution will render faster, and colors also tend to look more vibrant. Older and lower quality film in particular will generally benefit by lowering the render factor. Higher render factors are often better for higher quality videos and inconsistencies (flashy render) will generally be reduced, but the colors may get slightly washed out. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### file_name_ext\n",
|
||||
"There's no reason to changes this.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### result_path\n",
|
||||
"Ditto- don't change.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### How to Download a Copy\n",
|
||||
"Simply shift+right click on the displayed video and click \"Save video as...\"!\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Pro Tips\n",
|
||||
"1. If a video takes a long time to render and you're wondering how well the frames will actually be colorized, you can preview how well the frames will be rendered at each render_factor by using the code at the bottom. Just stop the video rendering by hitting the stop button on the cell, then run that bottom cell under \"See how well render_factor values perform on a frame here\". It's not perfect and you may still need to experiment a bit especially when it comes to figuring out how to reduce frame inconsistency. But it'll go a long way in narrowing down what actually works.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Troubleshooting\n",
|
||||
"The video player may wind up not showing up, in which case- make sure to wait for the Jupyter cell to complete processing first (the play button will stop spinning). Then follow these alternative download instructions\n",
|
||||
"\n",
|
||||
"1. In the menu to the left, click Home icon.\n",
|
||||
"2. By default, rendered video will be in /video/result/\n",
|
||||
"\n",
|
||||
"If a video you downloaded doesn't play, it's probably because the cell didn't complete processing and the video is in a half-finished state.\n",
|
||||
"If you get a 'CUDA out of memory' error, you probably have the render_factor too high. The max is 44 on 11GB video cards."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Colorize!!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[twitter] 1116751583386034176: Downloading guest token\n",
|
||||
"[twitter] 1116751583386034176: Downloading JSON metadata\n",
|
||||
"[info] 1116751583386034176: Downloading 1 format(s): http\n",
|
||||
"[download] Destination: video/source/DogShy1926.mp4\n",
|
||||
"[download] 100% of 227.58KiB in 00:00 at 825.81KiB/s \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "AttributeError",
|
||||
"evalue": "module 'ffmpeg' has no attribute 'input'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
||||
"Input \u001b[0;32mIn [17]\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m result_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m source_url \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 10\u001b[0m result_path \u001b[38;5;241m=\u001b[39m \u001b[43mcolorizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolorize_from_url\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource_url\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile_name_ext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrender_factor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrender_factor\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 12\u001b[0m result_path \u001b[38;5;241m=\u001b[39m colorizer\u001b[38;5;241m.\u001b[39mcolorize_from_file_name(file_name_ext, render_factor\u001b[38;5;241m=\u001b[39mrender_factor)\n",
|
||||
"File \u001b[0;32m/mnt/admin/DeOldify/deoldify/visualize.py:384\u001b[0m, in \u001b[0;36mVideoColorizer.colorize_from_url\u001b[0;34m(self, source_url, file_name, render_factor, post_process, watermarked)\u001b[0m\n\u001b[1;32m 382\u001b[0m source_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msource_folder \u001b[38;5;241m/\u001b[39m file_name\n\u001b[1;32m 383\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_download_video_from_url(source_url, source_path)\n\u001b[0;32m--> 384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_colorize_from_path\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 385\u001b[0m \u001b[43m \u001b[49m\u001b[43msource_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrender_factor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrender_factor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpost_process\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpost_process\u001b[49m\u001b[43m,\u001b[49m\u001b[43mwatermarked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwatermarked\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/mnt/admin/DeOldify/deoldify/visualize.py:403\u001b[0m, in \u001b[0;36mVideoColorizer._colorize_from_path\u001b[0;34m(self, source_path, render_factor, watermarked, post_process)\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m source_path\u001b[38;5;241m.\u001b[39mexists():\n\u001b[1;32m 400\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\n\u001b[1;32m 401\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVideo at path specfied, \u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(source_path) \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m could not be found.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 402\u001b[0m )\n\u001b[0;32m--> 403\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_extract_raw_frames\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 404\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_colorize_raw_frames(\n\u001b[1;32m 405\u001b[0m source_path, render_factor\u001b[38;5;241m=\u001b[39mrender_factor,post_process\u001b[38;5;241m=\u001b[39mpost_process,watermarked\u001b[38;5;241m=\u001b[39mwatermarked\n\u001b[1;32m 406\u001b[0m )\n\u001b[1;32m 407\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_video(source_path)\n",
|
||||
"File \u001b[0;32m/mnt/admin/DeOldify/deoldify/visualize.py:267\u001b[0m, in \u001b[0;36mVideoColorizer._extract_raw_frames\u001b[0;34m(self, source_path)\u001b[0m\n\u001b[1;32m 263\u001b[0m bwframes_folder\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 264\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_purge_images(bwframes_folder)\n\u001b[1;32m 266\u001b[0m process \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m--> 267\u001b[0m \u001b[43mffmpeg\u001b[49m\n\u001b[1;32m 268\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput\u001b[49m(\u001b[38;5;28mstr\u001b[39m(source_path))\n\u001b[1;32m 269\u001b[0m \u001b[38;5;241m.\u001b[39moutput(\u001b[38;5;28mstr\u001b[39m(bwframe_path_template), \u001b[38;5;28mformat\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mimage2\u001b[39m\u001b[38;5;124m'\u001b[39m, vcodec\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmjpeg\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mq:v\u001b[39m\u001b[38;5;124m'\u001b[39m:\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m0\u001b[39m\u001b[38;5;124m'\u001b[39m})\n\u001b[1;32m 270\u001b[0m \u001b[38;5;241m.\u001b[39mglobal_args(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m-hide_banner\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 271\u001b[0m \u001b[38;5;241m.\u001b[39mglobal_args(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m-nostats\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 272\u001b[0m \u001b[38;5;241m.\u001b[39mglobal_args(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m-loglevel\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124merror\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 273\u001b[0m )\n\u001b[1;32m 275\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 276\u001b[0m process\u001b[38;5;241m.\u001b[39mrun()\n",
|
||||
"\u001b[0;31mAttributeError\u001b[0m: module 'ffmpeg' has no attribute 'input'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#NOTE: Max is 44 with 11GB video cards. 21 is a good default\n",
|
||||
"render_factor=21\n",
|
||||
"#NOTE: Make source_url None to just read from file at ./video/source/[file_name] directly without modification\n",
|
||||
"source_url='https://twitter.com/silentmoviegifs/status/1116751583386034176'\n",
|
||||
"file_name = 'DogShy1926'\n",
|
||||
"file_name_ext = file_name + '.mp4'\n",
|
||||
"result_path = None\n",
|
||||
"\n",
|
||||
"if source_url is not None:\n",
|
||||
" result_path = colorizer.colorize_from_url(source_url, file_name_ext, render_factor=render_factor)\n",
|
||||
"else:\n",
|
||||
" result_path = colorizer.colorize_from_file_name(file_name_ext, render_factor=render_factor)\n",
|
||||
"\n",
|
||||
"show_video_in_notebook(result_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## See how well render_factor values perform on a frame here"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "FileNotFoundError",
|
||||
"evalue": "[Errno 2] No such file or directory: '/mnt/admin/DeOldify/video/bwframes/DogShy1926/00001.jpg'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||
"Input \u001b[0;32mIn [18]\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m10\u001b[39m,\u001b[38;5;241m45\u001b[39m,\u001b[38;5;241m2\u001b[39m):\n\u001b[0;32m----> 2\u001b[0m \u001b[43mcolorizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvis\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot_transformed_image\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mvideo/bwframes/\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mfile_name\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m/00001.jpg\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrender_factor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdisplay_render_factor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfigsize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m8\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m8\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/mnt/admin/DeOldify/deoldify/visualize.py:105\u001b[0m, in \u001b[0;36mModelImageVisualizer.plot_transformed_image\u001b[0;34m(self, path, results_dir, figsize, render_factor, display_render_factor, compare, post_process, watermarked)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m results_dir \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 104\u001b[0m results_dir \u001b[38;5;241m=\u001b[39m Path(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresults_dir)\n\u001b[0;32m--> 105\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_transformed_image\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 106\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrender_factor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpost_process\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpost_process\u001b[49m\u001b[43m,\u001b[49m\u001b[43mwatermarked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwatermarked\u001b[49m\n\u001b[1;32m 107\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 108\u001b[0m orig \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_open_pil_image(path)\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compare:\n",
|
||||
"File \u001b[0;32m/mnt/admin/DeOldify/deoldify/visualize.py:173\u001b[0m, in \u001b[0;36mModelImageVisualizer.get_transformed_image\u001b[0;34m(self, path, render_factor, post_process, watermarked)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_transformed_image\u001b[39m(\n\u001b[1;32m 169\u001b[0m \u001b[38;5;28mself\u001b[39m, path: Path, render_factor: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, post_process: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 170\u001b[0m watermarked: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 171\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Image:\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_clean_mem()\n\u001b[0;32m--> 173\u001b[0m orig_image \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_open_pil_image\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m filtered_image \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfilter\u001b[38;5;241m.\u001b[39mfilter(\n\u001b[1;32m 175\u001b[0m orig_image, orig_image, render_factor\u001b[38;5;241m=\u001b[39mrender_factor,post_process\u001b[38;5;241m=\u001b[39mpost_process\n\u001b[1;32m 176\u001b[0m )\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m watermarked:\n",
|
||||
"File \u001b[0;32m/mnt/admin/DeOldify/deoldify/visualize.py:58\u001b[0m, in \u001b[0;36mModelImageVisualizer._open_pil_image\u001b[0;34m(self, path)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_open_pil_image\u001b[39m(\u001b[38;5;28mself\u001b[39m, path: Path) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Image:\n\u001b[0;32m---> 58\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mPIL\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mImage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mconvert(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mRGB\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
|
||||
"File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/PIL/Image.py:3068\u001b[0m, in \u001b[0;36mopen\u001b[0;34m(fp, mode, formats)\u001b[0m\n\u001b[1;32m 3065\u001b[0m filename \u001b[38;5;241m=\u001b[39m fp\n\u001b[1;32m 3067\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m filename:\n\u001b[0;32m-> 3068\u001b[0m fp \u001b[38;5;241m=\u001b[39m \u001b[43mbuiltins\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3069\u001b[0m exclusive_fp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 3071\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
|
||||
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/mnt/admin/DeOldify/video/bwframes/DogShy1926/00001.jpg'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in range(10,45,2):\n",
|
||||
" colorizer.vis.plot_transformed_image('video/bwframes/' + file_name + '/00001.jpg', render_factor=i, display_render_factor=True, figsize=(8,8))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.12"
|
||||
},
|
||||
"toc": {
|
||||
"colors": {
|
||||
"hover_highlight": "#DAA520",
|
||||
"navigate_num": "#000000",
|
||||
"navigate_text": "#333333",
|
||||
"running_highlight": "#FF0000",
|
||||
"selected_highlight": "#FFD700",
|
||||
"sidebar_border": "#EEEEEE",
|
||||
"wrapper_background": "#FFFFFF"
|
||||
},
|
||||
"moveMenuLeft": true,
|
||||
"nav_menu": {
|
||||
"height": "67px",
|
||||
"width": "252px"
|
||||
},
|
||||
"navigate_menu": true,
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"threshold": 4,
|
||||
"toc_cell": false,
|
||||
"toc_section_display": "block",
|
||||
"toc_window_display": false,
|
||||
"widenNotebook": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
175
aihub/deep-learning/first-order-model/README.md
Normal file
175
aihub/deep-learning/first-order-model/README.md
Normal file
@ -0,0 +1,175 @@
|
||||
<b>!!! Check out our new [paper](https://arxiv.org/pdf/2104.11280.pdf) and [framework](https://github.com/snap-research/articulated-animation) improved for articulated objects</b>
|
||||
|
||||
# First Order Motion Model for Image Animation
|
||||
|
||||
This repository contains the source code for the paper [First Order Motion Model for Image Animation](https://papers.nips.cc/paper/8935-first-order-motion-model-for-image-animation) by Aliaksandr Siarohin, [Stéphane Lathuilière](http://stelat.eu), [Sergey Tulyakov](http://stulyakov.com), [Elisa Ricci](http://elisaricci.eu/) and [Nicu Sebe](http://disi.unitn.it/~sebe/).
|
||||
|
||||
## Example animations
|
||||
|
||||
The videos on the left show the driving videos. The first row on the right for each dataset shows the source videos. The bottom row contains the animated sequences with motion transferred from the driving video and object taken from the source image. We trained a separate network for each task.
|
||||
|
||||
### VoxCeleb Dataset
|
||||
![Screenshot](https://github.com/AliaksandrSiarohin/first-order-model/blob/master/sup-mat/vox-teaser.gif)
|
||||
### Fashion Dataset
|
||||
![Screenshot](https://github.com/AliaksandrSiarohin/first-order-model/blob/master/sup-mat/fashion-teaser.gif)
|
||||
### MGIF Dataset
|
||||
![Screenshot](https://github.com/AliaksandrSiarohin/first-order-model/blob/master/sup-mat/mgif-teaser.gif)
|
||||
|
||||
|
||||
### Installation
|
||||
|
||||
We support ```python3```. To install the dependencies run:
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### YAML configs
|
||||
|
||||
There are several configuration (```config/dataset_name.yaml```) files one for each `dataset`. See ```config/taichi-256.yaml``` to get description of each parameter.
|
||||
|
||||
|
||||
### Pre-trained checkpoint
|
||||
Checkpoints can be found under following link: [google-drive](https://drive.google.com/open?id=1PyQJmkdCsAkOYwUyaj_l-l0as-iLDgeH) or [yandex-disk](https://yadi.sk/d/lEw8uRm140L_eQ).
|
||||
|
||||
### Animation Demo
|
||||
To run a demo, download checkpoint and run the following command:
|
||||
```
|
||||
python demo.py --config config/dataset_name.yaml --driving_video path/to/driving --source_image path/to/source --checkpoint path/to/checkpoint --relative --adapt_scale
|
||||
```
|
||||
The result will be stored in ```result.mp4```.
|
||||
|
||||
The driving videos and source images should be cropped before it can be used in our method. To obtain some semi-automatic crop suggestions you can use ```python crop-video.py --inp some_youtube_video.mp4```. It will generate commands for crops using ffmpeg. In order to use the script, face-alligment library is needed:
|
||||
```
|
||||
git clone https://github.com/1adrianb/face-alignment
|
||||
cd face-alignment
|
||||
pip install -r requirements.txt
|
||||
python setup.py install
|
||||
```
|
||||
|
||||
### Animation demo with Docker
|
||||
|
||||
If you are having trouble getting the demo to work because of library compatibility issues,
|
||||
and you're running Linux, you might try running it inside a Docker container, which would
|
||||
give you better control over the execution environment.
|
||||
|
||||
Requirements: Docker 19.03+ and [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)
|
||||
installed and able to successfully run the `nvidia-docker` usage tests.
|
||||
|
||||
We'll first build the container.
|
||||
|
||||
```
|
||||
docker build -t first-order-model .
|
||||
```
|
||||
|
||||
And now that we have the container available locally, we can use it to run the demo.
|
||||
|
||||
```
|
||||
docker run -it --rm --gpus all \
|
||||
-v $HOME/first-order-model:/app first-order-model \
|
||||
python3 demo.py --config config/vox-256.yaml \
|
||||
--driving_video driving.mp4 \
|
||||
--source_image source.png \
|
||||
--checkpoint vox-cpk.pth.tar \
|
||||
--result_video result.mp4 \
|
||||
--relative --adapt_scale
|
||||
```
|
||||
|
||||
### Colab Demo
|
||||
@graphemecluster prepared a gui-demo for the google-colab see: ```demo.ipynb```. To run press ```Open In Colab``` button.
|
||||
|
||||
For old demo, see ```old-demo.ipynb```.
|
||||
|
||||
### Face-swap
|
||||
It is possible to modify the method to perform face-swap using supervised segmentation masks.
|
||||
![Screenshot](https://github.com/AliaksandrSiarohin/first-order-model/blob/master/sup-mat/face-swap.gif)
|
||||
For both unsupervised and supervised video editing, such as face-swap, please refer to [Motion Co-Segmentation](https://github.com/AliaksandrSiarohin/motion-cosegmentation).
|
||||
|
||||
|
||||
### Training
|
||||
|
||||
To train a model on specific dataset run:
|
||||
```
|
||||
CUDA_VISIBLE_DEVICES=0,1,2,3 python run.py --config config/dataset_name.yaml --device_ids 0,1,2,3
|
||||
```
|
||||
The code will create a folder in the log directory (each run will create a time-stamped new directory).
|
||||
Checkpoints will be saved to this folder.
|
||||
To check the loss values during training see ```log.txt```.
|
||||
You can also check training data reconstructions in the ```train-vis``` subfolder.
|
||||
By default the batch size is tunned to run on 2 or 4 Titan-X gpu (appart from speed it does not make much difference). You can change the batch size in the train_params in corresponding ```.yaml``` file.
|
||||
|
||||
### Evaluation on video reconstruction
|
||||
|
||||
To evaluate the reconstruction performance run:
|
||||
```
|
||||
CUDA_VISIBLE_DEVICES=0 python run.py --config config/dataset_name.yaml --mode reconstruction --checkpoint path/to/checkpoint
|
||||
```
|
||||
You will need to specify the path to the checkpoint,
|
||||
the ```reconstruction``` subfolder will be created in the checkpoint folder.
|
||||
The generated video will be stored to this folder, also generated videos will be stored in ```png``` subfolder in loss-less '.png' format for evaluation.
|
||||
Instructions for computing metrics from the paper can be found: https://github.com/AliaksandrSiarohin/pose-evaluation.
|
||||
|
||||
### Image animation
|
||||
|
||||
In order to animate videos run:
|
||||
```
|
||||
CUDA_VISIBLE_DEVICES=0 python run.py --config config/dataset_name.yaml --mode animate --checkpoint path/to/checkpoint
|
||||
```
|
||||
You will need to specify the path to the checkpoint,
|
||||
the ```animation``` subfolder will be created in the same folder as the checkpoint.
|
||||
You can find the generated video there and its loss-less version in the ```png``` subfolder.
|
||||
By default video from test set will be randomly paired, but you can specify the "source,driving" pairs in the corresponding ```.csv``` files. The path to this file should be specified in corresponding ```.yaml``` file in pairs_list setting.
|
||||
|
||||
There are 2 different ways of performing animation:
|
||||
by using **absolute** keypoint locations or by using **relative** keypoint locations.
|
||||
|
||||
1) <i>Animation using absolute coordinates:</i> the animation is performed using the absolute postions of the driving video and appearance of the source image.
|
||||
In this way there are no specific requirements for the driving video and source appearance that is used.
|
||||
However this usually leads to poor performance since unrelevant details such as shape is transfered.
|
||||
Check animate parameters in ```taichi-256.yaml``` to enable this mode.
|
||||
|
||||
<img src="sup-mat/absolute-demo.gif" width="512">
|
||||
|
||||
2) <i>Animation using relative coordinates:</i> from the driving video we first estimate the relative movement of each keypoint,
|
||||
then we add this movement to the absolute position of keypoints in the source image.
|
||||
This keypoint along with source image is used for animation. This usually leads to better performance, however this requires
|
||||
that the object in the first frame of the video and in the source image have the same pose
|
||||
|
||||
<img src="https://github.com/AliaksandrSiarohin/first-order-model/blob/master/sup-mat/relative-demo.gif" width="512">
|
||||
|
||||
|
||||
### Datasets
|
||||
|
||||
1) **Bair**. This dataset can be directly [downloaded](https://yadi.sk/d/Rr-fjn-PdmmqeA).
|
||||
|
||||
2) **Mgif**. This dataset can be directly [downloaded](https://yadi.sk/d/5VdqLARizmnj3Q).
|
||||
|
||||
3) **Fashion**. Follow the instruction on dataset downloading [from](https://vision.cs.ubc.ca/datasets/fashion/).
|
||||
|
||||
4) **Taichi**. Follow the instructions in [data/taichi-loading](data/taichi-loading/README.md) or instructions from https://github.com/AliaksandrSiarohin/video-preprocessing.
|
||||
|
||||
5) **Nemo**. Please follow the [instructions](https://www.uva-nemo.org/) on how to download the dataset. Then the dataset should be preprocessed using scripts from https://github.com/AliaksandrSiarohin/video-preprocessing.
|
||||
|
||||
6) **VoxCeleb**. Please follow the instruction from https://github.com/AliaksandrSiarohin/video-preprocessing.
|
||||
|
||||
|
||||
### Training on your own dataset
|
||||
1) Resize all the videos to the same size e.g 256x256, the videos can be in '.gif', '.mp4' or folder with images.
|
||||
We recommend the later, for each video make a separate folder with all the frames in '.png' format. This format is loss-less, and it has better i/o performance.
|
||||
|
||||
2) Create a folder ```data/dataset_name``` with 2 subfolders ```train``` and ```test```, put training videos in the ```train``` and testing in the ```test```.
|
||||
|
||||
3) Create a config ```config/dataset_name.yaml```, in dataset_params specify the root dir the ```root_dir: data/dataset_name```. Also adjust the number of epoch in train_params.
|
||||
|
||||
#### Additional notes
|
||||
|
||||
Citation:
|
||||
|
||||
```
|
||||
@InProceedings{Siarohin_2019_NeurIPS,
|
||||
author={Siarohin, Aliaksandr and Lathuilière, Stéphane and Tulyakov, Sergey and Ricci, Elisa and Sebe, Nicu},
|
||||
title={First Order Motion Model for Image Animation},
|
||||
booktitle = {Conference on Neural Information Processing Systems (NeurIPS)},
|
||||
month = {December},
|
||||
year = {2019}
|
||||
}
|
||||
```
|
Loading…
Reference in New Issue
Block a user