Add sample rate config option to gr.Audio() (#6826)

* Fix a bug that caused the sample rate of audio to be 8000 Hz after trimming and a bug that caused volume amplification and clipping each time trimming was performed

* Fix format

* add changeset

* add sample_rate param to waveform_options

* add changeset

* set WaveformOptions defaults

* formatting

* formatting

* add changeset

* audio

* changes

* add changeset

* tweak sample rate logic + docstring

* Tweak docstring

* formatting

* linting

* type tweak

* remove redundant None check

* tweak waveform lifecycle

* fix test

---------

Co-authored-by: tsukumi <tsukumijima@users.noreply.github.com>
Co-authored-by: Hannah <hannahblair@users.noreply.github.com>
Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com>
Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
This commit is contained in:
tsukumi 2024-01-23 03:56:48 +09:00 committed by GitHub
parent 44c53d9bde
commit e8b2d8b2f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 53 additions and 21 deletions

View File

@ -0,0 +1,6 @@
---
"@gradio/audio": minor
"gradio": minor
---
fix:Add sample rate config option to `gr.Audio()`

View File

@ -30,6 +30,7 @@ class WaveformOptions:
show_recording_waveform: Whether to show the waveform when recording audio. Defaults to True. show_recording_waveform: Whether to show the waveform when recording audio. Defaults to True.
show_controls: Whether to show the standard HTML audio player below the waveform when recording audio or playing recorded audio. Defaults to False. show_controls: Whether to show the standard HTML audio player below the waveform when recording audio or playing recorded audio. Defaults to False.
skip_length: The percentage (between 0 and 100) of the audio to skip when clicking on the skip forward / skip backward buttons. Defaults to 5. skip_length: The percentage (between 0 and 100) of the audio to skip when clicking on the skip forward / skip backward buttons. Defaults to 5.
sample_rate: The output sample rate (in Hz) of the audio after editing. Defaults to 44100.
""" """
waveform_color: str = "#9ca3af" waveform_color: str = "#9ca3af"
@ -37,6 +38,7 @@ class WaveformOptions:
show_recording_waveform: bool = True show_recording_waveform: bool = True
show_controls: bool = False show_controls: bool = False
skip_length: int | float = 5 skip_length: int | float = 5
sample_rate: int = 44100
@document() @document()
@ -161,11 +163,10 @@ class Audio(
self.editable = editable self.editable = editable
if waveform_options is None: if waveform_options is None:
self.waveform_options = WaveformOptions() self.waveform_options = WaveformOptions()
self.waveform_options = ( elif isinstance(waveform_options, dict):
WaveformOptions(**waveform_options) self.waveform_options = WaveformOptions(**waveform_options)
if isinstance(waveform_options, dict) else:
else waveform_options self.waveform_options = waveform_options
)
self.min_length = min_length self.min_length = min_length
self.max_length = max_length self.max_length = max_length
super().__init__( super().__init__(

View File

@ -104,7 +104,8 @@
dragToSeek: true, dragToSeek: true,
normalize: true, normalize: true,
minPxPerSec: 20, minPxPerSec: 20,
mediaControls: waveform_options.show_controls mediaControls: waveform_options.show_controls,
sampleRate: waveform_options.sample_rate || 44100
}; };
const trim_region_settings = { const trim_region_settings = {

View File

@ -112,13 +112,16 @@
mode = ""; mode = "";
const decodedData = waveform?.getDecodedData(); const decodedData = waveform?.getDecodedData();
if (decodedData) if (decodedData)
await process_audio(decodedData, start, end).then( await process_audio(
async (trimmedBlob: Uint8Array) => { decodedData,
await dispatch_blob([trimmedBlob], "change"); start,
waveform?.destroy(); end,
create_waveform(); waveform_settings.sampleRate
} ).then(async (trimmedBlob: Uint8Array) => {
); await dispatch_blob([trimmedBlob], "change");
waveform?.destroy();
container.innerHTML = "";
});
dispatch("edit"); dispatch("edit");
}; };

View File

@ -82,7 +82,9 @@
timing = false; timing = false;
clearInterval(interval); clearInterval(interval);
const array_buffer = await blob.arrayBuffer(); const array_buffer = await blob.arrayBuffer();
const context = new AudioContext(); const context = new AudioContext({
sampleRate: waveform_settings.sampleRate
});
const audio_buffer = await context.decodeAudioData(array_buffer); const audio_buffer = await context.decodeAudioData(array_buffer);
if (audio_buffer) if (audio_buffer)

View File

@ -47,7 +47,9 @@ export function audioBufferToWav(audioBuffer: AudioBuffer): Uint8Array {
for (let i = 0; i < audioBuffer.numberOfChannels; i++) { for (let i = 0; i < audioBuffer.numberOfChannels; i++) {
const channel = audioBuffer.getChannelData(i); const channel = audioBuffer.getChannelData(i);
for (let j = 0; j < channel.length; j++) { for (let j = 0; j < channel.length; j++) {
view.setInt16(offset, channel[j] * 0xffff, true); // Scaling Float32 to Int16
const sample = Math.max(-1, Math.min(1, channel[j]));
view.setInt16(offset, sample * 0x7fff, true);
offset += 2; offset += 2;
} }
} }

View File

@ -5,4 +5,5 @@ export type WaveformOptions = {
skip_length?: number; skip_length?: number;
trim_region_color?: string; trim_region_color?: string;
show_recording_waveform?: boolean; show_recording_waveform?: boolean;
sample_rate?: number;
}; };

View File

@ -1,5 +1,4 @@
import type WaveSurfer from "wavesurfer.js"; import type WaveSurfer from "wavesurfer.js";
import Regions from "wavesurfer.js/dist/plugins/regions.js";
import { audioBufferToWav } from "./audioBufferToWav"; import { audioBufferToWav } from "./audioBufferToWav";
export interface LoadedParams { export interface LoadedParams {
@ -18,11 +17,14 @@ export function blob_to_data_url(blob: Blob): Promise<string> {
export const process_audio = async ( export const process_audio = async (
audioBuffer: AudioBuffer, audioBuffer: AudioBuffer,
start?: number, start?: number,
end?: number end?: number,
waveform_sample_rate?: number
): Promise<Uint8Array> => { ): Promise<Uint8Array> => {
const audioContext = new AudioContext(); const audioContext = new AudioContext({
sampleRate: waveform_sample_rate || audioBuffer.sampleRate
});
const numberOfChannels = audioBuffer.numberOfChannels; const numberOfChannels = audioBuffer.numberOfChannels;
const sampleRate = audioBuffer.sampleRate; const sampleRate = waveform_sample_rate || audioBuffer.sampleRate;
let trimmedLength = audioBuffer.length; let trimmedLength = audioBuffer.length;
let startOffset = 0; let startOffset = 0;

View File

@ -836,7 +836,14 @@ class TestAudio:
"streamable": False, "streamable": False,
"max_length": None, "max_length": None,
"min_length": None, "min_length": None,
"waveform_options": None, "waveform_options": {
"sample_rate": 44100,
"show_controls": False,
"show_recording_waveform": True,
"skip_length": 5,
"waveform_color": "#9ca3af",
"waveform_progress_color": "#f97316",
},
"_selectable": False, "_selectable": False,
} }
assert audio_input.preprocess(None) is None assert audio_input.preprocess(None) is None
@ -881,7 +888,14 @@ class TestAudio:
"format": "wav", "format": "wav",
"streamable": False, "streamable": False,
"sources": ["upload", "microphone"], "sources": ["upload", "microphone"],
"waveform_options": None, "waveform_options": {
"sample_rate": 44100,
"show_controls": False,
"show_recording_waveform": True,
"skip_length": 5,
"waveform_color": "#9ca3af",
"waveform_progress_color": "#f97316",
},
"_selectable": False, "_selectable": False,
} }