Improve make_waveform (#4570)

* make_waveform: ensure input file exists

* make_waveform: avoid shell injection vulnerability (and as a side effect, support names with spaces)

* add test

* add test

---------

Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
This commit is contained in:
Aarni Koskela 2023-06-20 20:19:15 +03:00 committed by GitHub
parent 249f1865e2
commit c6c545cfe0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 37 additions and 4 deletions

View File

@ -10,6 +10,7 @@
- Restored missing imports in `gr.components` by [@abidlabs](https://github.com/abidlabs) in [PR 4566](https://github.com/gradio-app/gradio/pull/4566)
- Fix bug where `select` event was not triggered in `gr.Gallery` if `height` was set to be large with `allow_preview=False` by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 4551](https://github.com/gradio-app/gradio/pull/4551)
- Fix bug where setting `visible=False` in `gr.Group` event did not work by [@abidlabs](https://github.com/abidlabs) in [PR 4567](https://github.com/gradio-app/gradio/pull/4567)
- Fix `make_waveform` to work with paths that contain spaces [@akx](https://github.com/akx) in [PR 4567](https://github.com/gradio-app/gradio/pull/4570)
- Send captured data in `stop_recording` event for `gr.Audio` and `gr.Video` components by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 4554](https://github.com/gradio-app/gradio/pull/4554)
## Other Changes:

View File

@ -7,6 +7,7 @@ import ast
import csv
import inspect
import os
import shutil
import subprocess
import tempfile
import threading
@ -722,7 +723,7 @@ def make_waveform(
bars_color: str | tuple[str, str] = ("#fbbf24", "#ea580c"),
bar_count: int = 50,
bar_width: float = 0.6,
):
) -> str:
"""
Generates a waveform video from an audio file. Useful for creating an easy to share audio visualization. The output should be passed into a `gr.Video` component.
Parameters:
@ -734,7 +735,7 @@ def make_waveform(
bar_count: Number of bars in waveform
bar_width: Width of bars in waveform. 1 represents full width, 0.5 represents half width, etc.
Returns:
A filepath to the output video.
A filepath to the output video in mp4 format.
"""
if isinstance(audio, str):
audio_file = audio
@ -743,6 +744,14 @@ def make_waveform(
tmp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
processing_utils.audio_to_file(audio[0], audio[1], tmp_wav.name, format="wav")
audio_file = tmp_wav.name
if not os.path.isfile(audio_file):
raise ValueError("Audio file not found.")
ffmpeg = shutil.which("ffmpeg")
if not ffmpeg:
raise RuntimeError("ffmpeg not found.")
duration = round(len(audio[1]) / audio[0], 4)
# Helper methods to create waveform
@ -828,9 +837,23 @@ def make_waveform(
# Convert waveform to video with ffmpeg
output_mp4 = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
ffmpeg_cmd = f"""ffmpeg -loop 1 -i {tmp_img.name} -i {audio_file} -vf "color=c=#FFFFFF77:s={img_width}x{img_height}[bar];[0][bar]overlay=-w+(w/{duration})*t:H-h:shortest=1" -t {duration} -y {output_mp4.name}"""
ffmpeg_cmd = [
ffmpeg,
"-loop",
"1",
"-i",
tmp_img.name,
"-i",
audio_file,
"-vf",
f"color=c=#FFFFFF77:s={img_width}x{img_height}[bar];[0][bar]overlay=-w+(w/{duration})*t:H-h:shortest=1",
"-t",
str(duration),
"-y",
output_mp4.name,
]
subprocess.call(ffmpeg_cmd, shell=True)
subprocess.call(ffmpeg_cmd)
return output_mp4.name

View File

@ -1,4 +1,5 @@
import os
import shutil
import tempfile
from pathlib import Path
from unittest.mock import patch
@ -397,3 +398,11 @@ async def test_examples_keep_all_suffixes(tmp_path):
prediction = await io.examples_handler.load_from_cache(1)
assert Path(prediction[0]["name"]).read_text() == "file 2"
assert prediction[0]["orig_name"] == "foo.bar.txt"
def test_make_waveform_with_spaces_in_filename():
with tempfile.TemporaryDirectory() as tmpdirname:
audio = os.path.join(tmpdirname, "test audio.wav")
shutil.copy("test/test_files/audio_sample.wav", audio)
waveform = gr.make_waveform(audio)
assert waveform.endswith(".mp4")