mirror of
https://github.com/gradio-app/gradio.git
synced 2025-02-23 11:39:17 +08:00
Improve audio streaming (#5238)
* changes * changes * add changeset * add changeset * chages * Update silver-clowns-brush.md * changes * chagers * changes * Update silver-clowns-brush.md * change * change --------- Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com> Co-authored-by: pngwn <hello@pngwn.io>
This commit is contained in:
parent
a74605572d
commit
de23e9f7d6
10
.changeset/silver-clowns-brush.md
Normal file
10
.changeset/silver-clowns-brush.md
Normal file
@ -0,0 +1,10 @@
|
||||
---
|
||||
"gradio": patch
|
||||
---
|
||||
|
||||
highlight:Improve audio streaming
|
||||
|
||||
This PR improves audio streaming in two ways:
|
||||
|
||||
1. Proper audio streaming with WAV files. We now do the proper processing to stream out wav files as a single stream of audio without any cracks in the seams.
|
||||
2. Audio streaming with bytes. Stream any audio type by yielding out bytes, and it should work flawlessly.
|
@ -1 +1 @@
|
||||
{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: stream_audio_out"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from pydub import AudioSegment\n", "\n", "def stream_audio(audio_file):\n", " audio = AudioSegment.from_mp3(audio_file)\n", " i = 0\n", " chunk_size = 3000\n", " \n", " while chunk_size*i < len(audio):\n", " chunk = audio[chunk_size*i:chunk_size*(i+1)]\n", " i += 1\n", " if chunk:\n", " file = f\"/tmp/{i}.mp3\"\n", " chunk.export(file, format=\"mp3\") \n", " yield file\n", " \n", "demo = gr.Interface(\n", " fn=stream_audio,\n", " inputs=gr.Audio(type=\"filepath\", label=\"Audio file to stream\"),\n", " outputs=gr.Audio(autoplay=True, streaming=True),\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.queue().launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
||||
{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: stream_audio_out"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from pydub import AudioSegment\n", "from time import sleep\n", "\n", "with gr.Blocks() as demo:\n", " input_audio = gr.Audio(label=\"Input Audio\", type=\"filepath\", format=\"mp3\")\n", " with gr.Row():\n", " with gr.Column():\n", " stream_as_file_btn = gr.Button(\"Stream as File\")\n", " format = gr.Radio([\"wav\", \"mp3\"], value=\"wav\", label=\"Format\")\n", " stream_as_file_output = gr.Audio(streaming=True)\n", "\n", " def stream_file(audio_file, format):\n", " audio = AudioSegment.from_file(audio_file)\n", " i = 0\n", " chunk_size = 3000 \n", " while chunk_size*i < len(audio):\n", " chunk = audio[chunk_size*i:chunk_size*(i+1)]\n", " i += 1\n", " if chunk:\n", " file = f\"/tmp/{i}.{format}\"\n", " chunk.export(file, format=format)\n", " yield file\n", " sleep(1)\n", " \n", " stream_as_file_btn.click(stream_file, [input_audio, format], stream_as_file_output)\n", "\n", " with gr.Column():\n", " stream_as_bytes_btn = gr.Button(\"Stream as Bytes\")\n", " stream_as_bytes_output = gr.Audio(format=\"bytes\", streaming=True)\n", "\n", " def stream_bytes(audio_file):\n", " chunk_size = 20_000\n", " with open(audio_file, \"rb\") as f:\n", " while True:\n", " chunk = f.read(chunk_size)\n", " if chunk:\n", " yield chunk\n", " sleep(1)\n", " else:\n", " break\n", " \n", " stream_as_bytes_btn.click(stream_bytes, input_audio, stream_as_bytes_output)\n", "\n", "if __name__ == \"__main__\":\n", " demo.queue().launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
@ -1,24 +1,46 @@
|
||||
import gradio as gr
|
||||
from pydub import AudioSegment
|
||||
from time import sleep
|
||||
|
||||
def stream_audio(audio_file):
|
||||
audio = AudioSegment.from_mp3(audio_file)
|
||||
i = 0
|
||||
chunk_size = 3000
|
||||
|
||||
while chunk_size*i < len(audio):
|
||||
chunk = audio[chunk_size*i:chunk_size*(i+1)]
|
||||
i += 1
|
||||
if chunk:
|
||||
file = f"/tmp/{i}.mp3"
|
||||
chunk.export(file, format="mp3")
|
||||
yield file
|
||||
|
||||
demo = gr.Interface(
|
||||
fn=stream_audio,
|
||||
inputs=gr.Audio(type="filepath", label="Audio file to stream"),
|
||||
outputs=gr.Audio(autoplay=True, streaming=True),
|
||||
)
|
||||
with gr.Blocks() as demo:
|
||||
input_audio = gr.Audio(label="Input Audio", type="filepath", format="mp3")
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
stream_as_file_btn = gr.Button("Stream as File")
|
||||
format = gr.Radio(["wav", "mp3"], value="wav", label="Format")
|
||||
stream_as_file_output = gr.Audio(streaming=True)
|
||||
|
||||
def stream_file(audio_file, format):
|
||||
audio = AudioSegment.from_file(audio_file)
|
||||
i = 0
|
||||
chunk_size = 3000
|
||||
while chunk_size*i < len(audio):
|
||||
chunk = audio[chunk_size*i:chunk_size*(i+1)]
|
||||
i += 1
|
||||
if chunk:
|
||||
file = f"/tmp/{i}.{format}"
|
||||
chunk.export(file, format=format)
|
||||
yield file
|
||||
sleep(1)
|
||||
|
||||
stream_as_file_btn.click(stream_file, [input_audio, format], stream_as_file_output)
|
||||
|
||||
with gr.Column():
|
||||
stream_as_bytes_btn = gr.Button("Stream as Bytes")
|
||||
stream_as_bytes_output = gr.Audio(format="bytes", streaming=True)
|
||||
|
||||
def stream_bytes(audio_file):
|
||||
chunk_size = 20_000
|
||||
with open(audio_file, "rb") as f:
|
||||
while True:
|
||||
chunk = f.read(chunk_size)
|
||||
if chunk:
|
||||
yield chunk
|
||||
sleep(1)
|
||||
else:
|
||||
break
|
||||
|
||||
stream_as_bytes_btn.click(stream_bytes, input_audio, stream_as_bytes_output)
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue().launch()
|
||||
|
@ -1350,20 +1350,23 @@ Received outputs:
|
||||
) -> list:
|
||||
if session_hash is None or run is None:
|
||||
return data
|
||||
if run not in self.pending_streams[session_hash]:
|
||||
self.pending_streams[session_hash][run] = {}
|
||||
stream_run = self.pending_streams[session_hash][run]
|
||||
|
||||
from gradio.events import StreamableOutput
|
||||
|
||||
for i, output_id in enumerate(self.dependencies[fn_index]["outputs"]):
|
||||
block = self.blocks[output_id]
|
||||
if isinstance(block, StreamableOutput) and block.streaming:
|
||||
stream = block.stream_output(data[i])
|
||||
if run not in self.pending_streams[session_hash]:
|
||||
self.pending_streams[session_hash][run] = defaultdict(list)
|
||||
self.pending_streams[session_hash][run][output_id].append(stream)
|
||||
if data[i]:
|
||||
data[i]["is_file"] = False
|
||||
data[i]["name"] = f"{session_hash}/{run}/{output_id}"
|
||||
data[i]["is_stream"] = True
|
||||
first_chunk = output_id not in stream_run
|
||||
binary_data, output_data = block.stream_output(
|
||||
data[i], f"{session_hash}/{run}/{output_id}", first_chunk
|
||||
)
|
||||
if first_chunk:
|
||||
stream_run[output_id] = []
|
||||
self.pending_streams[session_hash][run][output_id].append(binary_data)
|
||||
data[i] = output_data
|
||||
return data
|
||||
|
||||
async def process_api(
|
||||
|
@ -45,7 +45,7 @@ class Audio(
|
||||
"""
|
||||
Creates an audio component that can be used to upload/record audio (as an input) or display audio (as an output).
|
||||
Preprocessing: passes the uploaded audio as a {Tuple(int, numpy.array)} corresponding to (sample rate in Hz, audio data as a 16-bit int array whose values range from -32768 to 32767), or as a {str} filepath, depending on `type`.
|
||||
Postprocessing: expects a {Tuple(int, numpy.array)} corresponding to (sample rate in Hz, audio data as a float or int numpy array) or as a {str} or {pathlib.Path} filepath or URL to an audio file, which gets displayed
|
||||
Postprocessing: expects a {Tuple(int, numpy.array)} corresponding to (sample rate in Hz, audio data as a float or int numpy array) or as a {str} or {pathlib.Path} filepath or URL to an audio file, or bytes for binary content (recommended for streaming)
|
||||
Examples-format: a {str} filepath to a local file that contains audio.
|
||||
Demos: main_note, generate_tone, reverse_audio
|
||||
Guides: real-time-speech-recognition
|
||||
@ -328,8 +328,8 @@ class Audio(
|
||||
return masked_inputs
|
||||
|
||||
def postprocess(
|
||||
self, y: tuple[int, np.ndarray] | str | Path | None
|
||||
) -> str | dict | None:
|
||||
self, y: tuple[int, np.ndarray] | str | Path | bytes | None
|
||||
) -> str | dict | bytes | None:
|
||||
"""
|
||||
Parameters:
|
||||
y: audio data in either of the following formats: a tuple of (sample_rate, data), or a string filepath or URL to an audio file, or None.
|
||||
@ -338,25 +338,22 @@ class Audio(
|
||||
"""
|
||||
if y is None:
|
||||
return None
|
||||
if isinstance(y, str) and client_utils.is_http_url_like(y):
|
||||
if isinstance(y, bytes):
|
||||
if self.streaming:
|
||||
return y
|
||||
file_path = self.file_bytes_to_file(y, "audio")
|
||||
elif isinstance(y, str) and client_utils.is_http_url_like(y):
|
||||
return {"name": y, "data": None, "is_file": True}
|
||||
if isinstance(y, tuple):
|
||||
elif isinstance(y, tuple):
|
||||
sample_rate, data = y
|
||||
file_path = self.audio_to_temp_file(
|
||||
data,
|
||||
sample_rate,
|
||||
format="mp3" if self.streaming else self.format,
|
||||
format=self.format,
|
||||
)
|
||||
self.temp_files.add(file_path)
|
||||
else:
|
||||
if isinstance(y, Path):
|
||||
y = str(y)
|
||||
if self.streaming and not y.endswith(".mp3"):
|
||||
sample_rate, data = processing_utils.audio_from_file(y)
|
||||
file_path = self.audio_to_temp_file(data, sample_rate, format="mp3")
|
||||
self.temp_files.add(file_path)
|
||||
else:
|
||||
file_path = self.make_temp_copy_if_needed(y)
|
||||
file_path = self.make_temp_copy_if_needed(y)
|
||||
return {
|
||||
"name": file_path,
|
||||
"data": None,
|
||||
@ -364,17 +361,37 @@ class Audio(
|
||||
"orig_name": Path(file_path).name,
|
||||
}
|
||||
|
||||
def stream_output(self, y):
|
||||
def stream_output(self, y, output_id: str, first_chunk: bool):
|
||||
output_file = {
|
||||
"name": output_id,
|
||||
"is_stream": True,
|
||||
"is_file": False,
|
||||
}
|
||||
if y is None:
|
||||
return None
|
||||
return None, output_file
|
||||
if isinstance(y, bytes):
|
||||
return y, output_file
|
||||
if client_utils.is_http_url_like(y["name"]):
|
||||
response = requests.get(y["name"])
|
||||
bytes = response.content
|
||||
binary_data = response.content
|
||||
else:
|
||||
output_file["orig_name"] = y["orig_name"]
|
||||
file_path = y["name"]
|
||||
is_wav = file_path.endswith(".wav")
|
||||
with open(file_path, "rb") as f:
|
||||
bytes = f.read()
|
||||
return bytes
|
||||
binary_data = f.read()
|
||||
if is_wav:
|
||||
# strip length information from first chunk header, remove headers entirely from subsequent chunks
|
||||
if first_chunk:
|
||||
binary_data = (
|
||||
binary_data[:4] + b"\xFF\xFF\xFF\xFF" + binary_data[8:]
|
||||
)
|
||||
binary_data = (
|
||||
binary_data[:40] + b"\xFF\xFF\xFF\xFF" + binary_data[44:]
|
||||
)
|
||||
else:
|
||||
binary_data = binary_data[44:]
|
||||
return binary_data, output_file
|
||||
|
||||
def check_streamable(self):
|
||||
if self.source != "microphone":
|
||||
|
@ -328,8 +328,8 @@ class IOComponent(Component):
|
||||
processing_utils.audio_to_file(sample_rate, data, filename, format=format)
|
||||
return filename
|
||||
|
||||
def file_bytes_to_file(self, data: bytes, dir: str, file_name: str):
|
||||
path = Path(dir) / self.hash_bytes(data)
|
||||
def file_bytes_to_file(self, data: bytes, file_name: str):
|
||||
path = Path(self.DEFAULT_TEMP_DIR) / self.hash_bytes(data)
|
||||
path.mkdir(exist_ok=True, parents=True)
|
||||
path = path / Path(file_name).name
|
||||
path.write_bytes(data)
|
||||
|
@ -190,9 +190,7 @@ class File(
|
||||
path = self.make_temp_copy_if_needed(file_name)
|
||||
else:
|
||||
data, _ = client_utils.decode_base64_to_binary(data)
|
||||
path = self.file_bytes_to_file(
|
||||
data, dir=self.DEFAULT_TEMP_DIR, file_name=file_name
|
||||
)
|
||||
path = self.file_bytes_to_file(data, file_name=file_name)
|
||||
path = str(utils.abspath(path))
|
||||
self.temp_files.add(path)
|
||||
|
||||
|
@ -157,9 +157,7 @@ class UploadButton(Clickable, Uploadable, IOComponent, FileSerializable):
|
||||
path = self.make_temp_copy_if_needed(file_name)
|
||||
else:
|
||||
data, _ = client_utils.decode_base64_to_binary(data)
|
||||
path = self.file_bytes_to_file(
|
||||
data, dir=self.DEFAULT_TEMP_DIR, file_name=file_name
|
||||
)
|
||||
path = self.file_bytes_to_file(data, file_name=file_name)
|
||||
path = str(utils.abspath(path))
|
||||
self.temp_files.add(path)
|
||||
file = tempfile.NamedTemporaryFile(
|
||||
|
@ -274,7 +274,7 @@ class StreamableOutput(EventListener):
|
||||
def __init__(self):
|
||||
self.streaming: bool
|
||||
|
||||
def stream_output(self, y) -> bytes:
|
||||
def stream_output(self, y, output_id: str, first_chunk: bool) -> tuple[bytes, Any]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user