Adds support for audio samples in float64, float16, or uint16 formats (#2545)

* fixed audio * added some unit tests * formatting * formatting
2024-11-27 01:40:20 +08:00 · 2022-10-26 16:24:46 -07:00 · 2022-10-26 16:24:46 -07:00 · 76512378a8
commit 76512378a8
parent 5c80b544ab
3 changed files with 50 additions and 16 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -48,9 +48,23 @@ In the example above, 16 requests could be processed in parallel (for a total in
 time of 5 seconds), instead of each request being processed separately (for a total
 inference time of 80 seconds).

-### Load Event
+### Upload Event

-`Video`, `Audio`, `Image`, and `File` components now support a `upload` event that is triggered when a user uploads a file into any of these components.
+`Video`, `Audio`, `Image`, and `File` components now support a `upload()` event that is triggered when a user uploads a file into any of these components.
+
+Example usage:
+
+```py
+import gradio as gr
+
+with gr.Blocks() as demo:
+    with gr.Row():
+        input_video = gr.Video()
+        output_video = gr.Video()
+
+     # Clears the output video when an input video is uploaded
+    input_video.upload(lambda : None, None, output_video)  
+```


 ## Bug Fixes:
@ -79,6 +93,7 @@ No changes to highlight.
 * Changes websocket path for Spaces as it is no longer necessary to have a different URL for websocket connections on Spaces by [@abidlabs](https://github.com/abidlabs) in [PR 2528](https://github.com/gradio-app/gradio/pull/2528)
 * Clearer error message when events are defined outside of a Blocks scope, and a warning if you
 try to use `Series` or `Parallel` with `Blocks` by [@abidlabs](https://github.com/abidlabs) in [PR 2543](https://github.com/gradio-app/gradio/pull/2543)
+* Adds support for audio samples that are in `float64`, `float16`, or `uint16` formats by [@abidlabs](https://github.com/abidlabs) in [PR 2545](https://github.com/gradio-app/gradio/pull/2545)


 ## Contributors Shoutout:
--- a/gradio/processing_utils.py
+++ b/gradio/processing_utils.py
@ -220,32 +220,31 @@ def audio_to_file(sample_rate, data, filename):

 def convert_to_16_bit_wav(data):
    # Based on: https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.wavfile.write.html
-    if data.dtype == np.float32:
-        warnings.warn(
-            "Audio data is not in 16-bit integer format."
-            "Trying to convert to 16-bit int format."
-        )
+    warning = "Trying to convert audio automatically from {} to 16-bit int format."
+    if data.dtype in [np.float64, np.float32, np.float16]:
+        warnings.warn(warning.format(data.dtype))
        data = data / np.abs(data).max()
        data = data * 32767
        data = data.astype(np.int16)
    elif data.dtype == np.int32:
-        warnings.warn(
-            "Audio data is not in 16-bit integer format."
-            "Trying to convert to 16-bit int format."
-        )
+        warnings.warn(warning.format(data.dtype))
        data = data / 65538
        data = data.astype(np.int16)
    elif data.dtype == np.int16:
        pass
+    elif data.dtype == np.uint16:
+        warnings.warn(warning.format(data.dtype))
+        data = data - 32768
+        data = data.astype(np.int16)
    elif data.dtype == np.uint8:
-        warnings.warn(
-            "Audio data is not in 16-bit integer format."
-            "Trying to convert to 16-bit int format."
-        )
+        warnings.warn(warning.format(data.dtype))
        data = data * 257 - 32768
        data = data.astype(np.int16)
    else:
-        raise ValueError("Audio data cannot be converted to " "16-bit int format.")
+        raise ValueError(
+            "Audio data cannot be converted automatically from "
+            f"{data.dtype} to 16-bit int format."
+        )
    return data


--- a/test/test_processing_utils.py
+++ b/test/test_processing_utils.py
@ -97,6 +97,26 @@ class TestAudioPreprocessing(unittest.TestCase):
        self.assertTrue(os.path.exists("test_audio_to_file"))
        os.remove("test_audio_to_file")

+    def test_convert_to_16_bit_wav(self):
+        # Generate a random audio sample and set the amplitude
+        audio = np.random.randint(-100, 100, size=(100), dtype="int16")
+        audio[0] = -32767
+        audio[1] = 32766
+
+        audio_ = audio.astype("float64")
+        audio_ = gr.processing_utils.convert_to_16_bit_wav(audio_)
+        assert np.allclose(audio, audio_)
+        assert audio_.dtype == "int16"
+
+        audio_ = audio.astype("float32")
+        audio_ = gr.processing_utils.convert_to_16_bit_wav(audio_)
+        assert np.allclose(audio, audio_)
+        assert audio_.dtype == "int16"
+
+        audio_ = gr.processing_utils.convert_to_16_bit_wav(audio)
+        assert np.allclose(audio, audio_)
+        assert audio_.dtype == "int16"
+

 class TestOutputPreprocessing(unittest.TestCase):
    def test_decode_base64_to_binary(self):