Improve make_waveform (#4918)

* make waveform animate

* fixes

* changelog

* fix

* Update CHANGELOG.md

* format

* fix

* changes

* add animate flag

* format

* fixes

* demo

* fixes

* lint

---------

Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
This commit is contained in:
Dawood Khan 2023-07-20 23:47:48 +03:00 committed by GitHub
parent c57a4e2729
commit ad9fb84f05
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 142 additions and 47 deletions

View File

@ -4,13 +4,14 @@ No changes to highlight.
## New Features:
- Provide a parameter `animate` (`False` by default) in `gr.make_waveform()` which animates the overlayed waveform by [@dawoodkhan82](https://github.com/dawoodkhan82) in [PR 4918](https://github.com/gradio-app/gradio/pull/4918)
- Add `show_download_button` param to allow the download button in static Image components to be hidden by [@hannahblair](https://github.com/hannahblair) in [PR 4959](https://github.com/gradio-app/gradio/pull/4959)
- Added autofocus argument to Textbox by [@aliabid94](https://github.com/aliabid94) in [PR 4978](https://github.com/gradio-app/gradio/pull/4978)
- The `gr.ChatInterface` UI now converts the "Submit" button to a "Stop" button in ChatInterface while streaming, which can be used to pause generation. By [@abidlabs](https://github.com/abidlabs) in [PR 4971](https://github.com/gradio-app/gradio/pull/4971).
## Bug Fixes:
- Fixes `cancels` for generators so that if a generator is canceled before it is complete, subsequent runs of the event do not continue from the previous iteration, but rather start from the beginning. By [@abidlabs](https://github.com/abidlabs) in [PR 4969](https://github.com/gradio-app/gradio/pull/4969).
- Add `show_download_button` param to allow the download button in static Image components to be hidden by [@hannahblair](https://github.com/hannahblair) in [PR 4959](https://github.com/gradio-app/gradio/pull/4959)
- Added autofocus argument to Textbox by [@aliabid94](https://github.com/aliabid94) in [PR 4978](https://github.com/gradio-app/gradio/pull/4978)
- Use `gr.State` in `gr.ChatInterface` to reduce latency by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 4976](https://github.com/gradio-app/gradio/pull/4976)
- Add a `chatbot_user_message_border_color_accent` theme variable to control the border color of user messages in a chatbot by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 4989](https://github.com/gradio-app/gradio/pull/4989). Set the value of this variable in `Default` theme to `*primary_200`.

View File

@ -1 +1 @@
{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: waveform"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import random\n", "\n", "\n", "COLORS = [\n", " [\"#ff0000\", \"#00ff00\"],\n", " [\"#00ff00\", \"#0000ff\"],\n", " [\"#0000ff\", \"#ff0000\"],\n", "] \n", "\n", "def audio_waveform(audio, image):\n", " return (\n", " audio,\n", " gr.make_waveform(audio),\n", " gr.make_waveform(audio, bg_image=image, bars_color=random.choice(COLORS)),\n", " )\n", "\n", "\n", "gr.Interface(\n", " audio_waveform,\n", " inputs=[gr.Audio(), gr.Image(type=\"filepath\")],\n", " outputs=[\n", " gr.Audio(),\n", " gr.Video(),\n", " gr.Video(),\n", " ],\n", ").launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: waveform"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import random\n", "\n", "\n", "COLORS = [\n", " [\"#ff0000\", \"#00ff00\"],\n", " [\"#00ff00\", \"#0000ff\"],\n", " [\"#0000ff\", \"#ff0000\"],\n", "] \n", "\n", "def audio_waveform(audio, image):\n", " return (\n", " audio,\n", " gr.make_waveform(audio),\n", " gr.make_waveform(audio, animate=True),\n", " gr.make_waveform(audio, bg_image=image, bars_color=random.choice(COLORS)),\n", " )\n", "\n", "\n", "gr.Interface(\n", " audio_waveform,\n", " inputs=[gr.Audio(), gr.Image(type=\"filepath\")],\n", " outputs=[\n", " gr.Audio(),\n", " gr.Video(),\n", " gr.Video(),\n", " gr.Video(),\n", " ],\n", ").launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}

View File

@ -12,6 +12,7 @@ def audio_waveform(audio, image):
return (
audio,
gr.make_waveform(audio),
gr.make_waveform(audio, animate=True),
gr.make_waveform(audio, bg_image=image, bars_color=random.choice(COLORS)),
)
@ -23,5 +24,6 @@ gr.Interface(
gr.Audio(),
gr.Video(),
gr.Video(),
gr.Video(),
],
).launch()

View File

@ -21,6 +21,7 @@ import PIL
import PIL.Image
from gradio_client import utils as client_utils
from gradio_client.documentation import document, set_documentation_group
from matplotlib import animation
from gradio import components, processing_utils, routes, utils
from gradio.context import Context
@ -756,6 +757,7 @@ def make_waveform(
bars_color: str | tuple[str, str] = ("#fbbf24", "#ea580c"),
bar_count: int = 50,
bar_width: float = 0.6,
animate: bool = False,
) -> str:
"""
Generates a waveform video from an audio file. Useful for creating an easy to share audio visualization. The output should be passed into a `gr.Video` component.
@ -767,6 +769,7 @@ def make_waveform(
bars_color: Color of waveform bars. Can be a single color or a tuple of (start_color, end_color) of gradient
bar_count: Number of bars in waveform
bar_width: Width of bars in waveform. 1 represents full width, 0.5 represents half width, etc.
animate: If true, the audio waveform overlay will be animated, if false, it will be static.
Returns:
A filepath to the output video in mp4 format.
"""
@ -820,71 +823,160 @@ def make_waveform(
if isinstance(bars_color, str)
else get_color_gradient(bars_color[0], bars_color[1], bar_count)
)
plt.bar(
if animate:
fig = plt.figure(figsize=(5, 1), dpi=200, frameon=False)
fig.subplots_adjust(left=0, bottom=0, right=1, top=1)
plt.axis("off")
plt.margins(x=0)
bar_alpha = fg_alpha if animate else 1.0
barcollection = plt.bar(
np.arange(0, bar_count),
samples * 2,
bottom=(-1 * samples),
width=bar_width,
color=color,
alpha=bar_alpha,
)
plt.axis("off")
plt.margins(x=0)
tmp_img = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
savefig_kwargs: dict[str, Any] = {"bbox_inches": "tight"}
if bg_image is not None:
savefig_kwargs["transparent"] = True
if animate:
savefig_kwargs["facecolor"] = "none"
else:
savefig_kwargs["facecolor"] = bg_color
plt.savefig(tmp_img.name, **savefig_kwargs)
waveform_img = PIL.Image.open(tmp_img.name)
waveform_img = waveform_img.resize((1000, 200))
# Composite waveform with background image
if bg_image is not None:
waveform_array = np.array(waveform_img)
waveform_array[:, :, 3] = waveform_array[:, :, 3] * fg_alpha
waveform_img = PIL.Image.fromarray(waveform_array)
if not animate:
waveform_img = PIL.Image.open(tmp_img.name)
waveform_img = waveform_img.resize((1000, 200))
bg_img = PIL.Image.open(bg_image)
waveform_width, waveform_height = waveform_img.size
bg_width, bg_height = bg_img.size
if waveform_width != bg_width:
bg_img = bg_img.resize(
(waveform_width, 2 * int(bg_height * waveform_width / bg_width / 2))
)
# Composite waveform with background image
if bg_image is not None:
waveform_array = np.array(waveform_img)
waveform_array[:, :, 3] = waveform_array[:, :, 3] * fg_alpha
waveform_img = PIL.Image.fromarray(waveform_array)
bg_img = PIL.Image.open(bg_image)
waveform_width, waveform_height = waveform_img.size
bg_width, bg_height = bg_img.size
composite_height = max(bg_height, waveform_height)
composite = PIL.Image.new(
"RGBA", (waveform_width, composite_height), "#FFFFFF"
)
composite.paste(bg_img, (0, composite_height - bg_height))
composite.paste(
waveform_img, (0, composite_height - waveform_height), waveform_img
)
composite.save(tmp_img.name)
img_width, img_height = composite.size
if waveform_width != bg_width:
bg_img = bg_img.resize(
(
waveform_width,
2 * int(bg_height * waveform_width / bg_width / 2),
)
)
bg_width, bg_height = bg_img.size
composite_height = max(bg_height, waveform_height)
composite = PIL.Image.new(
"RGBA", (waveform_width, composite_height), "#FFFFFF"
)
composite.paste(bg_img, (0, composite_height - bg_height))
composite.paste(
waveform_img, (0, composite_height - waveform_height), waveform_img
)
composite.save(tmp_img.name)
img_width, img_height = composite.size
else:
img_width, img_height = waveform_img.size
waveform_img.save(tmp_img.name)
else:
img_width, img_height = waveform_img.size
waveform_img.save(tmp_img.name)
def _animate(_):
for idx, b in enumerate(barcollection):
rand_height = np.random.uniform(0.8, 1.2)
b.set_height(samples[idx] * rand_height * 2)
b.set_y((-rand_height * samples)[idx])
frames = int(duration * 10)
anim = animation.FuncAnimation(
fig, # type: ignore
_animate,
repeat=False,
blit=False,
frames=frames,
interval=100,
)
anim.save(
tmp_img.name,
writer="pillow",
fps=10,
codec="png",
savefig_kwargs=savefig_kwargs,
)
# Convert waveform to video with ffmpeg
output_mp4 = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
ffmpeg_cmd = [
ffmpeg,
"-loop",
"1",
"-i",
tmp_img.name,
"-i",
audio_file,
"-vf",
f"color=c=#FFFFFF77:s={img_width}x{img_height}[bar];[0][bar]overlay=-w+(w/{duration})*t:H-h:shortest=1",
"-t",
str(duration),
"-y",
output_mp4.name,
]
if animate and bg_image is not None:
ffmpeg_cmd = [
ffmpeg,
"-loop",
"1",
"-i",
bg_image,
"-i",
tmp_img.name,
"-i",
audio_file,
"-filter_complex",
"[0:v]scale=w=trunc(iw/2)*2:h=trunc(ih/2)*2[bg];[1:v]format=rgba,colorchannelmixer=aa=1.0[ov];[bg][ov]overlay=(main_w-overlay_w*0.9)/2:main_h-overlay_h*0.9/2[output]",
"-t",
str(duration),
"-map",
"[output]",
"-map",
"2:a",
"-c:v",
"libx264",
"-c:a",
"aac",
"-shortest",
"-y",
output_mp4.name,
]
elif animate and bg_image is None:
ffmpeg_cmd = [
ffmpeg,
"-i",
tmp_img.name,
"-i",
audio_file,
"-filter_complex",
"[0:v][1:a]concat=n=1:v=1:a=1[v][a]",
"-map",
"[v]",
"-map",
"[a]",
"-c:v",
"libx264",
"-c:a",
"aac",
"-shortest",
"-y",
output_mp4.name,
]
else:
ffmpeg_cmd = [
ffmpeg,
"-loop",
"1",
"-i",
tmp_img.name,
"-i",
audio_file,
"-vf",
f"color=c=#FFFFFF77:s={img_width}x{img_height}[bar];[0][bar]overlay=-w+(w/{duration})*t:H-h:shortest=1", # type: ignore
"-t",
str(duration),
"-y",
output_mp4.name,
]
subprocess.check_call(ffmpeg_cmd)
return output_mp4.name