diff --git a/CHANGELOG.md b/CHANGELOG.md index 297ef74460..b04cf3c7c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,13 +4,14 @@ No changes to highlight. ## New Features: +- Provide a parameter `animate` (`False` by default) in `gr.make_waveform()` which animates the overlayed waveform by [@dawoodkhan82](https://github.com/dawoodkhan82) in [PR 4918](https://github.com/gradio-app/gradio/pull/4918) +- Add `show_download_button` param to allow the download button in static Image components to be hidden by [@hannahblair](https://github.com/hannahblair) in [PR 4959](https://github.com/gradio-app/gradio/pull/4959) +- Added autofocus argument to Textbox by [@aliabid94](https://github.com/aliabid94) in [PR 4978](https://github.com/gradio-app/gradio/pull/4978) - The `gr.ChatInterface` UI now converts the "Submit" button to a "Stop" button in ChatInterface while streaming, which can be used to pause generation. By [@abidlabs](https://github.com/abidlabs) in [PR 4971](https://github.com/gradio-app/gradio/pull/4971). ## Bug Fixes: - Fixes `cancels` for generators so that if a generator is canceled before it is complete, subsequent runs of the event do not continue from the previous iteration, but rather start from the beginning. By [@abidlabs](https://github.com/abidlabs) in [PR 4969](https://github.com/gradio-app/gradio/pull/4969). -- Add `show_download_button` param to allow the download button in static Image components to be hidden by [@hannahblair](https://github.com/hannahblair) in [PR 4959](https://github.com/gradio-app/gradio/pull/4959) -- Added autofocus argument to Textbox by [@aliabid94](https://github.com/aliabid94) in [PR 4978](https://github.com/gradio-app/gradio/pull/4978) - Use `gr.State` in `gr.ChatInterface` to reduce latency by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 4976](https://github.com/gradio-app/gradio/pull/4976) - Add a `chatbot_user_message_border_color_accent` theme variable to control the border color of user messages in a chatbot by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 4989](https://github.com/gradio-app/gradio/pull/4989). Set the value of this variable in `Default` theme to `*primary_200`. diff --git a/demo/waveform/run.ipynb b/demo/waveform/run.ipynb index 974ce72a31..4da46e83bc 100644 --- a/demo/waveform/run.ipynb +++ b/demo/waveform/run.ipynb @@ -1 +1 @@ -{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: waveform"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import random\n", "\n", "\n", "COLORS = [\n", " [\"#ff0000\", \"#00ff00\"],\n", " [\"#00ff00\", \"#0000ff\"],\n", " [\"#0000ff\", \"#ff0000\"],\n", "] \n", "\n", "def audio_waveform(audio, image):\n", " return (\n", " audio,\n", " gr.make_waveform(audio),\n", " gr.make_waveform(audio, bg_image=image, bars_color=random.choice(COLORS)),\n", " )\n", "\n", "\n", "gr.Interface(\n", " audio_waveform,\n", " inputs=[gr.Audio(), gr.Image(type=\"filepath\")],\n", " outputs=[\n", " gr.Audio(),\n", " gr.Video(),\n", " gr.Video(),\n", " ],\n", ").launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file +{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: waveform"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import random\n", "\n", "\n", "COLORS = [\n", " [\"#ff0000\", \"#00ff00\"],\n", " [\"#00ff00\", \"#0000ff\"],\n", " [\"#0000ff\", \"#ff0000\"],\n", "] \n", "\n", "def audio_waveform(audio, image):\n", " return (\n", " audio,\n", " gr.make_waveform(audio),\n", " gr.make_waveform(audio, animate=True),\n", " gr.make_waveform(audio, bg_image=image, bars_color=random.choice(COLORS)),\n", " )\n", "\n", "\n", "gr.Interface(\n", " audio_waveform,\n", " inputs=[gr.Audio(), gr.Image(type=\"filepath\")],\n", " outputs=[\n", " gr.Audio(),\n", " gr.Video(),\n", " gr.Video(),\n", " gr.Video(),\n", " ],\n", ").launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/waveform/run.py b/demo/waveform/run.py index 9d61a19902..f86ed52d8e 100644 --- a/demo/waveform/run.py +++ b/demo/waveform/run.py @@ -12,6 +12,7 @@ def audio_waveform(audio, image): return ( audio, gr.make_waveform(audio), + gr.make_waveform(audio, animate=True), gr.make_waveform(audio, bg_image=image, bars_color=random.choice(COLORS)), ) @@ -23,5 +24,6 @@ gr.Interface( gr.Audio(), gr.Video(), gr.Video(), + gr.Video(), ], ).launch() diff --git a/gradio/helpers.py b/gradio/helpers.py index 734c721e6a..660c89de70 100644 --- a/gradio/helpers.py +++ b/gradio/helpers.py @@ -21,6 +21,7 @@ import PIL import PIL.Image from gradio_client import utils as client_utils from gradio_client.documentation import document, set_documentation_group +from matplotlib import animation from gradio import components, processing_utils, routes, utils from gradio.context import Context @@ -756,6 +757,7 @@ def make_waveform( bars_color: str | tuple[str, str] = ("#fbbf24", "#ea580c"), bar_count: int = 50, bar_width: float = 0.6, + animate: bool = False, ) -> str: """ Generates a waveform video from an audio file. Useful for creating an easy to share audio visualization. The output should be passed into a `gr.Video` component. @@ -767,6 +769,7 @@ def make_waveform( bars_color: Color of waveform bars. Can be a single color or a tuple of (start_color, end_color) of gradient bar_count: Number of bars in waveform bar_width: Width of bars in waveform. 1 represents full width, 0.5 represents half width, etc. + animate: If true, the audio waveform overlay will be animated, if false, it will be static. Returns: A filepath to the output video in mp4 format. """ @@ -820,71 +823,160 @@ def make_waveform( if isinstance(bars_color, str) else get_color_gradient(bars_color[0], bars_color[1], bar_count) ) - plt.bar( + + if animate: + fig = plt.figure(figsize=(5, 1), dpi=200, frameon=False) + fig.subplots_adjust(left=0, bottom=0, right=1, top=1) + plt.axis("off") + plt.margins(x=0) + + bar_alpha = fg_alpha if animate else 1.0 + barcollection = plt.bar( np.arange(0, bar_count), samples * 2, bottom=(-1 * samples), width=bar_width, color=color, + alpha=bar_alpha, ) - plt.axis("off") - plt.margins(x=0) + tmp_img = tempfile.NamedTemporaryFile(suffix=".png", delete=False) + savefig_kwargs: dict[str, Any] = {"bbox_inches": "tight"} if bg_image is not None: savefig_kwargs["transparent"] = True + if animate: + savefig_kwargs["facecolor"] = "none" else: savefig_kwargs["facecolor"] = bg_color plt.savefig(tmp_img.name, **savefig_kwargs) - waveform_img = PIL.Image.open(tmp_img.name) - waveform_img = waveform_img.resize((1000, 200)) - # Composite waveform with background image - if bg_image is not None: - waveform_array = np.array(waveform_img) - waveform_array[:, :, 3] = waveform_array[:, :, 3] * fg_alpha - waveform_img = PIL.Image.fromarray(waveform_array) + if not animate: + waveform_img = PIL.Image.open(tmp_img.name) + waveform_img = waveform_img.resize((1000, 200)) - bg_img = PIL.Image.open(bg_image) - waveform_width, waveform_height = waveform_img.size - bg_width, bg_height = bg_img.size - if waveform_width != bg_width: - bg_img = bg_img.resize( - (waveform_width, 2 * int(bg_height * waveform_width / bg_width / 2)) - ) + # Composite waveform with background image + if bg_image is not None: + waveform_array = np.array(waveform_img) + waveform_array[:, :, 3] = waveform_array[:, :, 3] * fg_alpha + waveform_img = PIL.Image.fromarray(waveform_array) + + bg_img = PIL.Image.open(bg_image) + waveform_width, waveform_height = waveform_img.size bg_width, bg_height = bg_img.size - composite_height = max(bg_height, waveform_height) - composite = PIL.Image.new( - "RGBA", (waveform_width, composite_height), "#FFFFFF" - ) - composite.paste(bg_img, (0, composite_height - bg_height)) - composite.paste( - waveform_img, (0, composite_height - waveform_height), waveform_img - ) - composite.save(tmp_img.name) - img_width, img_height = composite.size + if waveform_width != bg_width: + bg_img = bg_img.resize( + ( + waveform_width, + 2 * int(bg_height * waveform_width / bg_width / 2), + ) + ) + bg_width, bg_height = bg_img.size + composite_height = max(bg_height, waveform_height) + composite = PIL.Image.new( + "RGBA", (waveform_width, composite_height), "#FFFFFF" + ) + composite.paste(bg_img, (0, composite_height - bg_height)) + composite.paste( + waveform_img, (0, composite_height - waveform_height), waveform_img + ) + composite.save(tmp_img.name) + img_width, img_height = composite.size + else: + img_width, img_height = waveform_img.size + waveform_img.save(tmp_img.name) else: - img_width, img_height = waveform_img.size - waveform_img.save(tmp_img.name) + + def _animate(_): + for idx, b in enumerate(barcollection): + rand_height = np.random.uniform(0.8, 1.2) + b.set_height(samples[idx] * rand_height * 2) + b.set_y((-rand_height * samples)[idx]) + + frames = int(duration * 10) + anim = animation.FuncAnimation( + fig, # type: ignore + _animate, + repeat=False, + blit=False, + frames=frames, + interval=100, + ) + anim.save( + tmp_img.name, + writer="pillow", + fps=10, + codec="png", + savefig_kwargs=savefig_kwargs, + ) # Convert waveform to video with ffmpeg output_mp4 = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) - ffmpeg_cmd = [ - ffmpeg, - "-loop", - "1", - "-i", - tmp_img.name, - "-i", - audio_file, - "-vf", - f"color=c=#FFFFFF77:s={img_width}x{img_height}[bar];[0][bar]overlay=-w+(w/{duration})*t:H-h:shortest=1", - "-t", - str(duration), - "-y", - output_mp4.name, - ] + if animate and bg_image is not None: + ffmpeg_cmd = [ + ffmpeg, + "-loop", + "1", + "-i", + bg_image, + "-i", + tmp_img.name, + "-i", + audio_file, + "-filter_complex", + "[0:v]scale=w=trunc(iw/2)*2:h=trunc(ih/2)*2[bg];[1:v]format=rgba,colorchannelmixer=aa=1.0[ov];[bg][ov]overlay=(main_w-overlay_w*0.9)/2:main_h-overlay_h*0.9/2[output]", + "-t", + str(duration), + "-map", + "[output]", + "-map", + "2:a", + "-c:v", + "libx264", + "-c:a", + "aac", + "-shortest", + "-y", + output_mp4.name, + ] + elif animate and bg_image is None: + ffmpeg_cmd = [ + ffmpeg, + "-i", + tmp_img.name, + "-i", + audio_file, + "-filter_complex", + "[0:v][1:a]concat=n=1:v=1:a=1[v][a]", + "-map", + "[v]", + "-map", + "[a]", + "-c:v", + "libx264", + "-c:a", + "aac", + "-shortest", + "-y", + output_mp4.name, + ] + else: + ffmpeg_cmd = [ + ffmpeg, + "-loop", + "1", + "-i", + tmp_img.name, + "-i", + audio_file, + "-vf", + f"color=c=#FFFFFF77:s={img_width}x{img_height}[bar];[0][bar]overlay=-w+(w/{duration})*t:H-h:shortest=1", # type: ignore + "-t", + str(duration), + "-y", + output_mp4.name, + ] subprocess.check_call(ffmpeg_cmd) return output_mp4.name