mirror of
https://github.com/gradio-app/gradio.git
synced 2025-01-18 10:44:33 +08:00
Add sample rate config option to gr.Audio()
(#6826)
* Fix a bug that caused the sample rate of audio to be 8000 Hz after trimming and a bug that caused volume amplification and clipping each time trimming was performed * Fix format * add changeset * add sample_rate param to waveform_options * add changeset * set WaveformOptions defaults * formatting * formatting * add changeset * audio * changes * add changeset * tweak sample rate logic + docstring * Tweak docstring * formatting * linting * type tweak * remove redundant None check * tweak waveform lifecycle * fix test --------- Co-authored-by: tsukumi <tsukumijima@users.noreply.github.com> Co-authored-by: Hannah <hannahblair@users.noreply.github.com> Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com> Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
This commit is contained in:
parent
44c53d9bde
commit
e8b2d8b2f8
6
.changeset/weak-streets-check.md
Normal file
6
.changeset/weak-streets-check.md
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
---
|
||||||
|
"@gradio/audio": minor
|
||||||
|
"gradio": minor
|
||||||
|
---
|
||||||
|
|
||||||
|
fix:Add sample rate config option to `gr.Audio()`
|
@ -30,6 +30,7 @@ class WaveformOptions:
|
|||||||
show_recording_waveform: Whether to show the waveform when recording audio. Defaults to True.
|
show_recording_waveform: Whether to show the waveform when recording audio. Defaults to True.
|
||||||
show_controls: Whether to show the standard HTML audio player below the waveform when recording audio or playing recorded audio. Defaults to False.
|
show_controls: Whether to show the standard HTML audio player below the waveform when recording audio or playing recorded audio. Defaults to False.
|
||||||
skip_length: The percentage (between 0 and 100) of the audio to skip when clicking on the skip forward / skip backward buttons. Defaults to 5.
|
skip_length: The percentage (between 0 and 100) of the audio to skip when clicking on the skip forward / skip backward buttons. Defaults to 5.
|
||||||
|
sample_rate: The output sample rate (in Hz) of the audio after editing. Defaults to 44100.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
waveform_color: str = "#9ca3af"
|
waveform_color: str = "#9ca3af"
|
||||||
@ -37,6 +38,7 @@ class WaveformOptions:
|
|||||||
show_recording_waveform: bool = True
|
show_recording_waveform: bool = True
|
||||||
show_controls: bool = False
|
show_controls: bool = False
|
||||||
skip_length: int | float = 5
|
skip_length: int | float = 5
|
||||||
|
sample_rate: int = 44100
|
||||||
|
|
||||||
|
|
||||||
@document()
|
@document()
|
||||||
@ -161,11 +163,10 @@ class Audio(
|
|||||||
self.editable = editable
|
self.editable = editable
|
||||||
if waveform_options is None:
|
if waveform_options is None:
|
||||||
self.waveform_options = WaveformOptions()
|
self.waveform_options = WaveformOptions()
|
||||||
self.waveform_options = (
|
elif isinstance(waveform_options, dict):
|
||||||
WaveformOptions(**waveform_options)
|
self.waveform_options = WaveformOptions(**waveform_options)
|
||||||
if isinstance(waveform_options, dict)
|
else:
|
||||||
else waveform_options
|
self.waveform_options = waveform_options
|
||||||
)
|
|
||||||
self.min_length = min_length
|
self.min_length = min_length
|
||||||
self.max_length = max_length
|
self.max_length = max_length
|
||||||
super().__init__(
|
super().__init__(
|
||||||
|
@ -104,7 +104,8 @@
|
|||||||
dragToSeek: true,
|
dragToSeek: true,
|
||||||
normalize: true,
|
normalize: true,
|
||||||
minPxPerSec: 20,
|
minPxPerSec: 20,
|
||||||
mediaControls: waveform_options.show_controls
|
mediaControls: waveform_options.show_controls,
|
||||||
|
sampleRate: waveform_options.sample_rate || 44100
|
||||||
};
|
};
|
||||||
|
|
||||||
const trim_region_settings = {
|
const trim_region_settings = {
|
||||||
|
@ -112,13 +112,16 @@
|
|||||||
mode = "";
|
mode = "";
|
||||||
const decodedData = waveform?.getDecodedData();
|
const decodedData = waveform?.getDecodedData();
|
||||||
if (decodedData)
|
if (decodedData)
|
||||||
await process_audio(decodedData, start, end).then(
|
await process_audio(
|
||||||
async (trimmedBlob: Uint8Array) => {
|
decodedData,
|
||||||
await dispatch_blob([trimmedBlob], "change");
|
start,
|
||||||
waveform?.destroy();
|
end,
|
||||||
create_waveform();
|
waveform_settings.sampleRate
|
||||||
}
|
).then(async (trimmedBlob: Uint8Array) => {
|
||||||
);
|
await dispatch_blob([trimmedBlob], "change");
|
||||||
|
waveform?.destroy();
|
||||||
|
container.innerHTML = "";
|
||||||
|
});
|
||||||
dispatch("edit");
|
dispatch("edit");
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -82,7 +82,9 @@
|
|||||||
timing = false;
|
timing = false;
|
||||||
clearInterval(interval);
|
clearInterval(interval);
|
||||||
const array_buffer = await blob.arrayBuffer();
|
const array_buffer = await blob.arrayBuffer();
|
||||||
const context = new AudioContext();
|
const context = new AudioContext({
|
||||||
|
sampleRate: waveform_settings.sampleRate
|
||||||
|
});
|
||||||
const audio_buffer = await context.decodeAudioData(array_buffer);
|
const audio_buffer = await context.decodeAudioData(array_buffer);
|
||||||
|
|
||||||
if (audio_buffer)
|
if (audio_buffer)
|
||||||
|
@ -47,7 +47,9 @@ export function audioBufferToWav(audioBuffer: AudioBuffer): Uint8Array {
|
|||||||
for (let i = 0; i < audioBuffer.numberOfChannels; i++) {
|
for (let i = 0; i < audioBuffer.numberOfChannels; i++) {
|
||||||
const channel = audioBuffer.getChannelData(i);
|
const channel = audioBuffer.getChannelData(i);
|
||||||
for (let j = 0; j < channel.length; j++) {
|
for (let j = 0; j < channel.length; j++) {
|
||||||
view.setInt16(offset, channel[j] * 0xffff, true);
|
// Scaling Float32 to Int16
|
||||||
|
const sample = Math.max(-1, Math.min(1, channel[j]));
|
||||||
|
view.setInt16(offset, sample * 0x7fff, true);
|
||||||
offset += 2;
|
offset += 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,4 +5,5 @@ export type WaveformOptions = {
|
|||||||
skip_length?: number;
|
skip_length?: number;
|
||||||
trim_region_color?: string;
|
trim_region_color?: string;
|
||||||
show_recording_waveform?: boolean;
|
show_recording_waveform?: boolean;
|
||||||
|
sample_rate?: number;
|
||||||
};
|
};
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import type WaveSurfer from "wavesurfer.js";
|
import type WaveSurfer from "wavesurfer.js";
|
||||||
import Regions from "wavesurfer.js/dist/plugins/regions.js";
|
|
||||||
import { audioBufferToWav } from "./audioBufferToWav";
|
import { audioBufferToWav } from "./audioBufferToWav";
|
||||||
|
|
||||||
export interface LoadedParams {
|
export interface LoadedParams {
|
||||||
@ -18,11 +17,14 @@ export function blob_to_data_url(blob: Blob): Promise<string> {
|
|||||||
export const process_audio = async (
|
export const process_audio = async (
|
||||||
audioBuffer: AudioBuffer,
|
audioBuffer: AudioBuffer,
|
||||||
start?: number,
|
start?: number,
|
||||||
end?: number
|
end?: number,
|
||||||
|
waveform_sample_rate?: number
|
||||||
): Promise<Uint8Array> => {
|
): Promise<Uint8Array> => {
|
||||||
const audioContext = new AudioContext();
|
const audioContext = new AudioContext({
|
||||||
|
sampleRate: waveform_sample_rate || audioBuffer.sampleRate
|
||||||
|
});
|
||||||
const numberOfChannels = audioBuffer.numberOfChannels;
|
const numberOfChannels = audioBuffer.numberOfChannels;
|
||||||
const sampleRate = audioBuffer.sampleRate;
|
const sampleRate = waveform_sample_rate || audioBuffer.sampleRate;
|
||||||
|
|
||||||
let trimmedLength = audioBuffer.length;
|
let trimmedLength = audioBuffer.length;
|
||||||
let startOffset = 0;
|
let startOffset = 0;
|
||||||
|
@ -836,7 +836,14 @@ class TestAudio:
|
|||||||
"streamable": False,
|
"streamable": False,
|
||||||
"max_length": None,
|
"max_length": None,
|
||||||
"min_length": None,
|
"min_length": None,
|
||||||
"waveform_options": None,
|
"waveform_options": {
|
||||||
|
"sample_rate": 44100,
|
||||||
|
"show_controls": False,
|
||||||
|
"show_recording_waveform": True,
|
||||||
|
"skip_length": 5,
|
||||||
|
"waveform_color": "#9ca3af",
|
||||||
|
"waveform_progress_color": "#f97316",
|
||||||
|
},
|
||||||
"_selectable": False,
|
"_selectable": False,
|
||||||
}
|
}
|
||||||
assert audio_input.preprocess(None) is None
|
assert audio_input.preprocess(None) is None
|
||||||
@ -881,7 +888,14 @@ class TestAudio:
|
|||||||
"format": "wav",
|
"format": "wav",
|
||||||
"streamable": False,
|
"streamable": False,
|
||||||
"sources": ["upload", "microphone"],
|
"sources": ["upload", "microphone"],
|
||||||
"waveform_options": None,
|
"waveform_options": {
|
||||||
|
"sample_rate": 44100,
|
||||||
|
"show_controls": False,
|
||||||
|
"show_recording_waveform": True,
|
||||||
|
"skip_length": 5,
|
||||||
|
"waveform_color": "#9ca3af",
|
||||||
|
"waveform_progress_color": "#f97316",
|
||||||
|
},
|
||||||
"_selectable": False,
|
"_selectable": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user