Adds support for audio samples in float64, float16, or uint16 formats (#2545)

* fixed audio

* added some unit tests

* formatting

* formatting
This commit is contained in:
Abubakar Abid 2022-10-26 16:24:46 -07:00 committed by GitHub
parent 5c80b544ab
commit 76512378a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 50 additions and 16 deletions

View File

@ -48,9 +48,23 @@ In the example above, 16 requests could be processed in parallel (for a total in
time of 5 seconds), instead of each request being processed separately (for a total
inference time of 80 seconds).
### Load Event
### Upload Event
`Video`, `Audio`, `Image`, and `File` components now support a `upload` event that is triggered when a user uploads a file into any of these components.
`Video`, `Audio`, `Image`, and `File` components now support a `upload()` event that is triggered when a user uploads a file into any of these components.
Example usage:
```py
import gradio as gr
with gr.Blocks() as demo:
with gr.Row():
input_video = gr.Video()
output_video = gr.Video()
# Clears the output video when an input video is uploaded
input_video.upload(lambda : None, None, output_video)
```
## Bug Fixes:
@ -79,6 +93,7 @@ No changes to highlight.
* Changes websocket path for Spaces as it is no longer necessary to have a different URL for websocket connections on Spaces by [@abidlabs](https://github.com/abidlabs) in [PR 2528](https://github.com/gradio-app/gradio/pull/2528)
* Clearer error message when events are defined outside of a Blocks scope, and a warning if you
try to use `Series` or `Parallel` with `Blocks` by [@abidlabs](https://github.com/abidlabs) in [PR 2543](https://github.com/gradio-app/gradio/pull/2543)
* Adds support for audio samples that are in `float64`, `float16`, or `uint16` formats by [@abidlabs](https://github.com/abidlabs) in [PR 2545](https://github.com/gradio-app/gradio/pull/2545)
## Contributors Shoutout:

View File

@ -220,32 +220,31 @@ def audio_to_file(sample_rate, data, filename):
def convert_to_16_bit_wav(data):
# Based on: https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.wavfile.write.html
if data.dtype == np.float32:
warnings.warn(
"Audio data is not in 16-bit integer format."
"Trying to convert to 16-bit int format."
)
warning = "Trying to convert audio automatically from {} to 16-bit int format."
if data.dtype in [np.float64, np.float32, np.float16]:
warnings.warn(warning.format(data.dtype))
data = data / np.abs(data).max()
data = data * 32767
data = data.astype(np.int16)
elif data.dtype == np.int32:
warnings.warn(
"Audio data is not in 16-bit integer format."
"Trying to convert to 16-bit int format."
)
warnings.warn(warning.format(data.dtype))
data = data / 65538
data = data.astype(np.int16)
elif data.dtype == np.int16:
pass
elif data.dtype == np.uint16:
warnings.warn(warning.format(data.dtype))
data = data - 32768
data = data.astype(np.int16)
elif data.dtype == np.uint8:
warnings.warn(
"Audio data is not in 16-bit integer format."
"Trying to convert to 16-bit int format."
)
warnings.warn(warning.format(data.dtype))
data = data * 257 - 32768
data = data.astype(np.int16)
else:
raise ValueError("Audio data cannot be converted to " "16-bit int format.")
raise ValueError(
"Audio data cannot be converted automatically from "
f"{data.dtype} to 16-bit int format."
)
return data

View File

@ -97,6 +97,26 @@ class TestAudioPreprocessing(unittest.TestCase):
self.assertTrue(os.path.exists("test_audio_to_file"))
os.remove("test_audio_to_file")
def test_convert_to_16_bit_wav(self):
# Generate a random audio sample and set the amplitude
audio = np.random.randint(-100, 100, size=(100), dtype="int16")
audio[0] = -32767
audio[1] = 32766
audio_ = audio.astype("float64")
audio_ = gr.processing_utils.convert_to_16_bit_wav(audio_)
assert np.allclose(audio, audio_)
assert audio_.dtype == "int16"
audio_ = audio.astype("float32")
audio_ = gr.processing_utils.convert_to_16_bit_wav(audio_)
assert np.allclose(audio, audio_)
assert audio_.dtype == "int16"
audio_ = gr.processing_utils.convert_to_16_bit_wav(audio)
assert np.allclose(audio, audio_)
assert audio_.dtype == "int16"
class TestOutputPreprocessing(unittest.TestCase):
def test_decode_base64_to_binary(self):