mirror of
https://github.com/gradio-app/gradio.git
synced 2025-01-18 10:44:33 +08:00
Add sample rate config option to gr.Audio()
(#6826)
* Fix a bug that caused the sample rate of audio to be 8000 Hz after trimming and a bug that caused volume amplification and clipping each time trimming was performed * Fix format * add changeset * add sample_rate param to waveform_options * add changeset * set WaveformOptions defaults * formatting * formatting * add changeset * audio * changes * add changeset * tweak sample rate logic + docstring * Tweak docstring * formatting * linting * type tweak * remove redundant None check * tweak waveform lifecycle * fix test --------- Co-authored-by: tsukumi <tsukumijima@users.noreply.github.com> Co-authored-by: Hannah <hannahblair@users.noreply.github.com> Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com> Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
This commit is contained in:
parent
44c53d9bde
commit
e8b2d8b2f8
6
.changeset/weak-streets-check.md
Normal file
6
.changeset/weak-streets-check.md
Normal file
@ -0,0 +1,6 @@
|
||||
---
|
||||
"@gradio/audio": minor
|
||||
"gradio": minor
|
||||
---
|
||||
|
||||
fix:Add sample rate config option to `gr.Audio()`
|
@ -30,6 +30,7 @@ class WaveformOptions:
|
||||
show_recording_waveform: Whether to show the waveform when recording audio. Defaults to True.
|
||||
show_controls: Whether to show the standard HTML audio player below the waveform when recording audio or playing recorded audio. Defaults to False.
|
||||
skip_length: The percentage (between 0 and 100) of the audio to skip when clicking on the skip forward / skip backward buttons. Defaults to 5.
|
||||
sample_rate: The output sample rate (in Hz) of the audio after editing. Defaults to 44100.
|
||||
"""
|
||||
|
||||
waveform_color: str = "#9ca3af"
|
||||
@ -37,6 +38,7 @@ class WaveformOptions:
|
||||
show_recording_waveform: bool = True
|
||||
show_controls: bool = False
|
||||
skip_length: int | float = 5
|
||||
sample_rate: int = 44100
|
||||
|
||||
|
||||
@document()
|
||||
@ -161,11 +163,10 @@ class Audio(
|
||||
self.editable = editable
|
||||
if waveform_options is None:
|
||||
self.waveform_options = WaveformOptions()
|
||||
self.waveform_options = (
|
||||
WaveformOptions(**waveform_options)
|
||||
if isinstance(waveform_options, dict)
|
||||
else waveform_options
|
||||
)
|
||||
elif isinstance(waveform_options, dict):
|
||||
self.waveform_options = WaveformOptions(**waveform_options)
|
||||
else:
|
||||
self.waveform_options = waveform_options
|
||||
self.min_length = min_length
|
||||
self.max_length = max_length
|
||||
super().__init__(
|
||||
|
@ -104,7 +104,8 @@
|
||||
dragToSeek: true,
|
||||
normalize: true,
|
||||
minPxPerSec: 20,
|
||||
mediaControls: waveform_options.show_controls
|
||||
mediaControls: waveform_options.show_controls,
|
||||
sampleRate: waveform_options.sample_rate || 44100
|
||||
};
|
||||
|
||||
const trim_region_settings = {
|
||||
|
@ -112,13 +112,16 @@
|
||||
mode = "";
|
||||
const decodedData = waveform?.getDecodedData();
|
||||
if (decodedData)
|
||||
await process_audio(decodedData, start, end).then(
|
||||
async (trimmedBlob: Uint8Array) => {
|
||||
await dispatch_blob([trimmedBlob], "change");
|
||||
waveform?.destroy();
|
||||
create_waveform();
|
||||
}
|
||||
);
|
||||
await process_audio(
|
||||
decodedData,
|
||||
start,
|
||||
end,
|
||||
waveform_settings.sampleRate
|
||||
).then(async (trimmedBlob: Uint8Array) => {
|
||||
await dispatch_blob([trimmedBlob], "change");
|
||||
waveform?.destroy();
|
||||
container.innerHTML = "";
|
||||
});
|
||||
dispatch("edit");
|
||||
};
|
||||
|
||||
|
@ -82,7 +82,9 @@
|
||||
timing = false;
|
||||
clearInterval(interval);
|
||||
const array_buffer = await blob.arrayBuffer();
|
||||
const context = new AudioContext();
|
||||
const context = new AudioContext({
|
||||
sampleRate: waveform_settings.sampleRate
|
||||
});
|
||||
const audio_buffer = await context.decodeAudioData(array_buffer);
|
||||
|
||||
if (audio_buffer)
|
||||
|
@ -47,7 +47,9 @@ export function audioBufferToWav(audioBuffer: AudioBuffer): Uint8Array {
|
||||
for (let i = 0; i < audioBuffer.numberOfChannels; i++) {
|
||||
const channel = audioBuffer.getChannelData(i);
|
||||
for (let j = 0; j < channel.length; j++) {
|
||||
view.setInt16(offset, channel[j] * 0xffff, true);
|
||||
// Scaling Float32 to Int16
|
||||
const sample = Math.max(-1, Math.min(1, channel[j]));
|
||||
view.setInt16(offset, sample * 0x7fff, true);
|
||||
offset += 2;
|
||||
}
|
||||
}
|
||||
|
@ -5,4 +5,5 @@ export type WaveformOptions = {
|
||||
skip_length?: number;
|
||||
trim_region_color?: string;
|
||||
show_recording_waveform?: boolean;
|
||||
sample_rate?: number;
|
||||
};
|
||||
|
@ -1,5 +1,4 @@
|
||||
import type WaveSurfer from "wavesurfer.js";
|
||||
import Regions from "wavesurfer.js/dist/plugins/regions.js";
|
||||
import { audioBufferToWav } from "./audioBufferToWav";
|
||||
|
||||
export interface LoadedParams {
|
||||
@ -18,11 +17,14 @@ export function blob_to_data_url(blob: Blob): Promise<string> {
|
||||
export const process_audio = async (
|
||||
audioBuffer: AudioBuffer,
|
||||
start?: number,
|
||||
end?: number
|
||||
end?: number,
|
||||
waveform_sample_rate?: number
|
||||
): Promise<Uint8Array> => {
|
||||
const audioContext = new AudioContext();
|
||||
const audioContext = new AudioContext({
|
||||
sampleRate: waveform_sample_rate || audioBuffer.sampleRate
|
||||
});
|
||||
const numberOfChannels = audioBuffer.numberOfChannels;
|
||||
const sampleRate = audioBuffer.sampleRate;
|
||||
const sampleRate = waveform_sample_rate || audioBuffer.sampleRate;
|
||||
|
||||
let trimmedLength = audioBuffer.length;
|
||||
let startOffset = 0;
|
||||
|
@ -836,7 +836,14 @@ class TestAudio:
|
||||
"streamable": False,
|
||||
"max_length": None,
|
||||
"min_length": None,
|
||||
"waveform_options": None,
|
||||
"waveform_options": {
|
||||
"sample_rate": 44100,
|
||||
"show_controls": False,
|
||||
"show_recording_waveform": True,
|
||||
"skip_length": 5,
|
||||
"waveform_color": "#9ca3af",
|
||||
"waveform_progress_color": "#f97316",
|
||||
},
|
||||
"_selectable": False,
|
||||
}
|
||||
assert audio_input.preprocess(None) is None
|
||||
@ -881,7 +888,14 @@ class TestAudio:
|
||||
"format": "wav",
|
||||
"streamable": False,
|
||||
"sources": ["upload", "microphone"],
|
||||
"waveform_options": None,
|
||||
"waveform_options": {
|
||||
"sample_rate": 44100,
|
||||
"show_controls": False,
|
||||
"show_recording_waveform": True,
|
||||
"skip_length": 5,
|
||||
"waveform_color": "#9ca3af",
|
||||
"waveform_progress_color": "#f97316",
|
||||
},
|
||||
"_selectable": False,
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user