mirror of
https://github.com/gradio-app/gradio.git
synced 2025-04-12 12:40:29 +08:00
Audio waveform (#2706)
* utils func * changes * changes * changes * notebook * fix * changes * change * changes * changes * notebook fix * changes * changes * changes * change * fix * revert backend changes, gr.make_waveform * notebook * Update CHANGELOG.md Co-authored-by: Abubakar Abid <abubakar@huggingface.co> * format fix * changes * changes * revert pnpm * change * change * changes * changes * changes Co-authored-by: Ali Abid <aabid94@gmail.com> Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
This commit is contained in:
parent
074bf909ee
commit
714ab2cc09
@ -1,7 +1,11 @@
|
||||
# Upcoming Release
|
||||
|
||||
## New Features:
|
||||
No changes to highlight.
|
||||
|
||||
### Add Waveform Visual Support to Audio
|
||||
Adds a `gr.make_waveform()` function that creates a waveform video by combining an audio and an optional background image by [@dawoodkhan82](http://github.com/dawoodkhan82) and [@aliabid94](http://github.com/aliabid94) in [PR 2706](https://github.com/gradio-app/gradio/pull/2706. Helpful for making audio outputs much more shareable.
|
||||
|
||||

|
||||
|
||||
## Bug Fixes:
|
||||
No changes to highlight.
|
||||
|
1
demo/waveform/run.ipynb
Normal file
1
demo/waveform/run.ipynb
Normal file
@ -0,0 +1 @@
|
||||
{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: waveform"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import random\n", "\n", "\n", "COLORS = [\n", " [\"#ff0000\", \"#00ff00\"],\n", " [\"#00ff00\", \"#0000ff\"],\n", " [\"#0000ff\", \"#ff0000\"],\n", "] \n", "\n", "def audio_waveform(audio, image):\n", " return (\n", " audio,\n", " gr.make_waveform(audio),\n", " gr.make_waveform(audio, bg_image=image, bars_color=random.choice(COLORS)),\n", " )\n", "\n", "\n", "gr.Interface(\n", " audio_waveform,\n", " inputs=[gr.Audio(), gr.Image(type=\"filepath\")],\n", " outputs=[\n", " gr.Audio(),\n", " gr.Video(),\n", " gr.Video(),\n", " ],\n", ").launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
27
demo/waveform/run.py
Normal file
27
demo/waveform/run.py
Normal file
@ -0,0 +1,27 @@
|
||||
import gradio as gr
|
||||
import random
|
||||
|
||||
|
||||
COLORS = [
|
||||
["#ff0000", "#00ff00"],
|
||||
["#00ff00", "#0000ff"],
|
||||
["#0000ff", "#ff0000"],
|
||||
]
|
||||
|
||||
def audio_waveform(audio, image):
|
||||
return (
|
||||
audio,
|
||||
gr.make_waveform(audio),
|
||||
gr.make_waveform(audio, bg_image=image, bars_color=random.choice(COLORS)),
|
||||
)
|
||||
|
||||
|
||||
gr.Interface(
|
||||
audio_waveform,
|
||||
inputs=[gr.Audio(), gr.Image(type="filepath")],
|
||||
outputs=[
|
||||
gr.Audio(),
|
||||
gr.Video(),
|
||||
gr.Video(),
|
||||
],
|
||||
).launch()
|
@ -78,6 +78,7 @@ from gradio.templates import (
|
||||
TextArea,
|
||||
Webcam,
|
||||
)
|
||||
from gradio.utils import make_waveform
|
||||
|
||||
current_pkg_version = pkgutil.get_data(__name__, "version.txt").decode("ascii").strip()
|
||||
__version__ = current_pkg_version
|
||||
|
130
gradio/utils.py
130
gradio/utils.py
@ -11,7 +11,9 @@ import os
|
||||
import pkgutil
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import typing
|
||||
import warnings
|
||||
@ -36,12 +38,17 @@ from typing import (
|
||||
import aiohttp
|
||||
import fsspec.asyn
|
||||
import httpx
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import PIL
|
||||
import requests
|
||||
from pydantic import BaseModel, Json, parse_obj_as
|
||||
|
||||
import gradio
|
||||
from gradio import processing_utils
|
||||
from gradio.context import Context
|
||||
from gradio.documentation import document, set_documentation_group
|
||||
|
||||
if TYPE_CHECKING: # Only import for type checking (is False at runtime).
|
||||
from gradio.blocks import BlockContext
|
||||
@ -808,6 +815,129 @@ class TupleNoPrint(tuple):
|
||||
return ""
|
||||
|
||||
|
||||
set_documentation_group("component-helpers")
|
||||
|
||||
|
||||
@document()
|
||||
def make_waveform(
|
||||
audio: str | Tuple[int, np.ndarray],
|
||||
*,
|
||||
bg_color: str = "#f3f4f6",
|
||||
bg_image: str = None,
|
||||
fg_alpha: float = 0.75,
|
||||
bars_color: str | Tuple[str, str] = ("#fbbf24", "#ea580c"),
|
||||
bar_count: int = 50,
|
||||
bar_width: float = 0.6,
|
||||
):
|
||||
"""
|
||||
Generates a waveform video from an audio file. Useful for creating an easy to share audio visualization. The output should be passed into a `gr.Video` component.
|
||||
Parameters:
|
||||
audio: Audio file path or tuple of (sample_rate, audio_data)
|
||||
bg_color: Background color of waveform (ignored if bg_image is provided)
|
||||
bg_image: Background image of waveform
|
||||
fg_alpha: Opacity of foreground waveform
|
||||
bars_color: Color of waveform bars. Can be a single color or a tuple of (start_color, end_color) of gradient
|
||||
bar_count: Number of bars in waveform
|
||||
bar_width: Width of bars in waveform. 1 represents full width, 0.5 represents half width, etc.
|
||||
Returns:
|
||||
A filepath to the output video.
|
||||
"""
|
||||
if isinstance(audio, str):
|
||||
audio_file = audio
|
||||
audio = processing_utils.audio_from_file(audio)
|
||||
else:
|
||||
tmp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||
processing_utils.audio_to_file(audio[0], audio[1], tmp_wav.name)
|
||||
audio_file = tmp_wav.name
|
||||
duration = round(len(audio[1]) / audio[0], 4)
|
||||
|
||||
# Helper methods to create waveform
|
||||
def hex_to_RGB(hex_str):
|
||||
return [int(hex_str[i : i + 2], 16) for i in range(1, 6, 2)]
|
||||
|
||||
def get_color_gradient(c1, c2, n):
|
||||
assert n > 1
|
||||
c1_rgb = np.array(hex_to_RGB(c1)) / 255
|
||||
c2_rgb = np.array(hex_to_RGB(c2)) / 255
|
||||
mix_pcts = [x / (n - 1) for x in range(n)]
|
||||
rgb_colors = [((1 - mix) * c1_rgb + (mix * c2_rgb)) for mix in mix_pcts]
|
||||
return [
|
||||
"#" + "".join([format(int(round(val * 255)), "02x") for val in item])
|
||||
for item in rgb_colors
|
||||
]
|
||||
|
||||
# Reshape audio to have a fixed number of bars
|
||||
samples = audio[1]
|
||||
if len(samples.shape) > 1:
|
||||
samples = np.mean(samples, 1)
|
||||
bins_to_pad = bar_count - (len(samples) % bar_count)
|
||||
samples = np.pad(samples, [(0, bins_to_pad)])
|
||||
samples = np.reshape(samples, (bar_count, -1))
|
||||
samples = np.abs(samples)
|
||||
samples = np.max(samples, 1)
|
||||
|
||||
matplotlib.use("Agg")
|
||||
plt.clf()
|
||||
# Plot waveform
|
||||
color = (
|
||||
bars_color
|
||||
if isinstance(bars_color, str)
|
||||
else get_color_gradient(bars_color[0], bars_color[1], bar_count)
|
||||
)
|
||||
plt.bar(
|
||||
np.arange(0, bar_count),
|
||||
samples * 2,
|
||||
bottom=(-1 * samples),
|
||||
width=bar_width,
|
||||
color=color,
|
||||
)
|
||||
plt.axis("off")
|
||||
plt.margins(x=0)
|
||||
tmp_img = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
|
||||
savefig_kwargs = {"bbox_inches": "tight"}
|
||||
if bg_image is not None:
|
||||
savefig_kwargs["transparent"] = True
|
||||
else:
|
||||
savefig_kwargs["facecolor"] = bg_color
|
||||
plt.savefig(tmp_img.name, **savefig_kwargs)
|
||||
waveform_img = PIL.Image.open(tmp_img.name)
|
||||
waveform_img = waveform_img.resize((1000, 200))
|
||||
|
||||
# Composite waveform with background image
|
||||
if bg_image is not None:
|
||||
waveform_array = np.array(waveform_img)
|
||||
waveform_array[:, :, 3] = waveform_array[:, :, 3] * fg_alpha
|
||||
waveform_img = PIL.Image.fromarray(waveform_array)
|
||||
|
||||
bg_img = PIL.Image.open(bg_image)
|
||||
waveform_width, waveform_height = waveform_img.size
|
||||
bg_width, bg_height = bg_img.size
|
||||
if waveform_width != bg_width:
|
||||
bg_img = bg_img.resize(
|
||||
(waveform_width, 2 * int(bg_height * waveform_width / bg_width / 2))
|
||||
)
|
||||
bg_width, bg_height = bg_img.size
|
||||
composite_height = max(bg_height, waveform_height)
|
||||
composite = PIL.Image.new("RGBA", (waveform_width, composite_height), "#FFFFFF")
|
||||
composite.paste(bg_img, (0, composite_height - bg_height))
|
||||
composite.paste(
|
||||
waveform_img, (0, composite_height - waveform_height), waveform_img
|
||||
)
|
||||
composite.save(tmp_img.name)
|
||||
img_width, img_height = composite.size
|
||||
else:
|
||||
img_width, img_height = waveform_img.size
|
||||
waveform_img.save(tmp_img.name)
|
||||
|
||||
# Convert waveform to video with ffmpeg
|
||||
output_mp4 = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
|
||||
|
||||
ffmpeg_cmd = f"""ffmpeg -loop 1 -i {tmp_img.name} -i {audio_file} -vf "color=c=#FFFFFF77:s={img_width}x{img_height}[bar];[0][bar]overlay=-w+(w/{duration})*t:H-h:shortest=1" -t {duration} -y {output_mp4.name}"""
|
||||
|
||||
subprocess.call(ffmpeg_cmd, shell=True)
|
||||
return output_mp4.name
|
||||
|
||||
|
||||
def tex2svg(formula, *args):
|
||||
FONTSIZE = 20
|
||||
DPI = 300
|
||||
|
@ -323,7 +323,7 @@ class TestComponentsInBlocks:
|
||||
)
|
||||
|
||||
output = demo.postprocess_data(
|
||||
0, [gr.update(value=None) for _ in io_components], state=None
|
||||
0, [gr.update(value=None) for _ in io_components], state={}
|
||||
)
|
||||
assert all(
|
||||
[o["value"] == c.postprocess(None) for o, c in zip(output, io_components)]
|
||||
@ -339,7 +339,7 @@ class TestComponentsInBlocks:
|
||||
outputs=text,
|
||||
)
|
||||
|
||||
output = demo.postprocess_data(0, gr.update(value="NO_VALUE"), state=None)
|
||||
output = demo.postprocess_data(0, gr.update(value="NO_VALUE"), state={})
|
||||
assert output[0]["value"] == "NO_VALUE"
|
||||
|
||||
def test_blocks_returns_correct_output_dict_single_key(self):
|
||||
@ -353,12 +353,10 @@ class TestComponentsInBlocks:
|
||||
|
||||
update.click(update_values, inputs=[num], outputs=[num2])
|
||||
|
||||
output = demo.postprocess_data(
|
||||
0, {num2: gr.Number.update(value=42)}, state=None
|
||||
)
|
||||
output = demo.postprocess_data(0, {num2: gr.Number.update(value=42)}, state={})
|
||||
assert output[0]["value"] == 42
|
||||
|
||||
output = demo.postprocess_data(0, {num2: 23}, state=None)
|
||||
output = demo.postprocess_data(0, {num2: 23}, state={})
|
||||
assert output[0] == 23
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
@ -1170,6 +1170,14 @@ class TestVideo:
|
||||
iface = gr.Interface(lambda x: x, "video", "playable_video")
|
||||
assert iface(x_video).endswith(".mp4")
|
||||
|
||||
def test_with_waveform(self):
|
||||
"""
|
||||
Interface, process
|
||||
"""
|
||||
x_audio = media_data.BASE64_AUDIO["name"]
|
||||
iface = gr.Interface(lambda x: gr.make_waveform(x), "audio", "video")
|
||||
assert iface(x_audio).endswith(".mp4")
|
||||
|
||||
def test_video_postprocess_converts_to_playable_format(self):
|
||||
test_file_dir = pathlib.Path(pathlib.Path(__file__).parent, "test_files")
|
||||
# This file has a playable container but not playable codec
|
||||
|
Loading…
x
Reference in New Issue
Block a user