Add sample rate config option to gr.Audio() (#6826)

* Fix a bug that caused the sample rate of audio to be 8000 Hz after trimming and a bug that caused volume amplification and clipping each time trimming was performed * Fix format * add changeset * add sample_rate param to waveform_options * add changeset * set WaveformOptions defaults * formatting * formatting * add changeset * audio * changes * add changeset * tweak sample rate logic + docstring * Tweak docstring * formatting * linting * type tweak * remove redundant None check * tweak waveform lifecycle * fix test --------- Co-authored-by: tsukumi <tsukumijima@users.noreply.github.com> Co-authored-by: Hannah <hannahblair@users.noreply.github.com> Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com> Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
2025-04-12 12:40:29 +08:00 · 2024-01-23 03:56:48 +09:00 · 2024-01-23 03:56:48 +09:00 · e8b2d8b2f8
commit e8b2d8b2f8
parent 44c53d9bde
9 changed files with 53 additions and 21 deletions
--- a/.changeset/weak-streets-check.md
+++ b/.changeset/weak-streets-check.md
@ -0,0 +1,6 @@
+---
+"@gradio/audio": minor
+"gradio": minor
+---
+
+fix:Add sample rate config option to `gr.Audio()`
--- a/gradio/components/audio.py
+++ b/gradio/components/audio.py
@ -30,6 +30,7 @@ class WaveformOptions:
        show_recording_waveform: Whether to show the waveform when recording audio. Defaults to True.
        show_controls: Whether to show the standard HTML audio player below the waveform when recording audio or playing recorded audio. Defaults to False.
        skip_length: The percentage (between 0 and 100) of the audio to skip when clicking on the skip forward / skip backward buttons. Defaults to 5.
+        sample_rate: The output sample rate (in Hz) of the audio after editing. Defaults to 44100.
    """

    waveform_color: str = "#9ca3af"
@ -37,6 +38,7 @@ class WaveformOptions:
    show_recording_waveform: bool = True
    show_controls: bool = False
    skip_length: int | float = 5
+    sample_rate: int = 44100


@document()
@ -161,11 +163,10 @@ class Audio(
        self.editable = editable
        if waveform_options is None:
            self.waveform_options = WaveformOptions()
-        self.waveform_options = (
-            WaveformOptions(**waveform_options)
-            if isinstance(waveform_options, dict)
-            else waveform_options
-        )
+        elif isinstance(waveform_options, dict):
+            self.waveform_options = WaveformOptions(**waveform_options)
+        else:
+            self.waveform_options = waveform_options
        self.min_length = min_length
        self.max_length = max_length
        super().__init__(
--- a/js/audio/Index.svelte
+++ b/js/audio/Index.svelte
@ -104,7 +104,8 @@
 		dragToSeek: true,
 		normalize: true,
 		minPxPerSec: 20,
-		mediaControls: waveform_options.show_controls
+		mediaControls: waveform_options.show_controls,
+		sampleRate: waveform_options.sample_rate || 44100
 	};

 	const trim_region_settings = {
--- a/js/audio/player/AudioPlayer.svelte
+++ b/js/audio/player/AudioPlayer.svelte
@ -112,13 +112,16 @@
 		mode = "";
 		const decodedData = waveform?.getDecodedData();
 		if (decodedData)
-			await process_audio(decodedData, start, end).then(
-				async (trimmedBlob: Uint8Array) => {
-					await dispatch_blob([trimmedBlob], "change");
-					waveform?.destroy();
-					create_waveform();
-				}
-			);
+			await process_audio(
+				decodedData,
+				start,
+				end,
+				waveform_settings.sampleRate
+			).then(async (trimmedBlob: Uint8Array) => {
+				await dispatch_blob([trimmedBlob], "change");
+				waveform?.destroy();
+				container.innerHTML = "";
+			});
 		dispatch("edit");
 	};

--- a/js/audio/recorder/AudioRecorder.svelte
+++ b/js/audio/recorder/AudioRecorder.svelte
@ -82,7 +82,9 @@
 		timing = false;
 		clearInterval(interval);
 		const array_buffer = await blob.arrayBuffer();
-		const context = new AudioContext();
+		const context = new AudioContext({
+			sampleRate: waveform_settings.sampleRate
+		});
 		const audio_buffer = await context.decodeAudioData(array_buffer);

 		if (audio_buffer)
--- a/js/audio/shared/audioBufferToWav.ts
+++ b/js/audio/shared/audioBufferToWav.ts
@ -47,7 +47,9 @@ export function audioBufferToWav(audioBuffer: AudioBuffer): Uint8Array {
 	for (let i = 0; i < audioBuffer.numberOfChannels; i++) {
 		const channel = audioBuffer.getChannelData(i);
 		for (let j = 0; j < channel.length; j++) {
-			view.setInt16(offset, channel[j] * 0xffff, true);
+			// Scaling Float32 to Int16
+			const sample = Math.max(-1, Math.min(1, channel[j]));
+			view.setInt16(offset, sample * 0x7fff, true);
 			offset += 2;
 		}
 	}
--- a/js/audio/shared/types.ts
+++ b/js/audio/shared/types.ts
@ -5,4 +5,5 @@ export type WaveformOptions = {
 	skip_length?: number;
 	trim_region_color?: string;
 	show_recording_waveform?: boolean;
+	sample_rate?: number;
 };
--- a/js/audio/shared/utils.ts
+++ b/js/audio/shared/utils.ts
@ -1,5 +1,4 @@
 import type WaveSurfer from "wavesurfer.js";
-import Regions from "wavesurfer.js/dist/plugins/regions.js";
 import { audioBufferToWav } from "./audioBufferToWav";

 export interface LoadedParams {
@ -18,11 +17,14 @@ export function blob_to_data_url(blob: Blob): Promise<string> {
 export const process_audio = async (
 	audioBuffer: AudioBuffer,
 	start?: number,
-	end?: number
+	end?: number,
+	waveform_sample_rate?: number
 ): Promise<Uint8Array> => {
-	const audioContext = new AudioContext();
+	const audioContext = new AudioContext({
+		sampleRate: waveform_sample_rate || audioBuffer.sampleRate
+	});
 	const numberOfChannels = audioBuffer.numberOfChannels;
-	const sampleRate = audioBuffer.sampleRate;
+	const sampleRate = waveform_sample_rate || audioBuffer.sampleRate;

 	let trimmedLength = audioBuffer.length;
 	let startOffset = 0;
--- a/test/test_components.py
+++ b/test/test_components.py
@ -836,7 +836,14 @@ class TestAudio:
            "streamable": False,
            "max_length": None,
            "min_length": None,
-            "waveform_options": None,
+            "waveform_options": {
+                "sample_rate": 44100,
+                "show_controls": False,
+                "show_recording_waveform": True,
+                "skip_length": 5,
+                "waveform_color": "#9ca3af",
+                "waveform_progress_color": "#f97316",
+            },
            "_selectable": False,
        }
        assert audio_input.preprocess(None) is None
@ -881,7 +888,14 @@ class TestAudio:
            "format": "wav",
            "streamable": False,
            "sources": ["upload", "microphone"],
-            "waveform_options": None,
+            "waveform_options": {
+                "sample_rate": 44100,
+                "show_controls": False,
+                "show_recording_waveform": True,
+                "skip_length": 5,
+                "waveform_color": "#9ca3af",
+                "waveform_progress_color": "#f97316",
+            },
            "_selectable": False,
        }