mirror of
https://github.com/gradio-app/gradio.git
synced 2024-11-27 01:40:20 +08:00
Adds support for audio samples in float64
, float16
, or uint16
formats (#2545)
* fixed audio * added some unit tests * formatting * formatting
This commit is contained in:
parent
5c80b544ab
commit
76512378a8
19
CHANGELOG.md
19
CHANGELOG.md
@ -48,9 +48,23 @@ In the example above, 16 requests could be processed in parallel (for a total in
|
||||
time of 5 seconds), instead of each request being processed separately (for a total
|
||||
inference time of 80 seconds).
|
||||
|
||||
### Load Event
|
||||
### Upload Event
|
||||
|
||||
`Video`, `Audio`, `Image`, and `File` components now support a `upload` event that is triggered when a user uploads a file into any of these components.
|
||||
`Video`, `Audio`, `Image`, and `File` components now support a `upload()` event that is triggered when a user uploads a file into any of these components.
|
||||
|
||||
Example usage:
|
||||
|
||||
```py
|
||||
import gradio as gr
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
with gr.Row():
|
||||
input_video = gr.Video()
|
||||
output_video = gr.Video()
|
||||
|
||||
# Clears the output video when an input video is uploaded
|
||||
input_video.upload(lambda : None, None, output_video)
|
||||
```
|
||||
|
||||
|
||||
## Bug Fixes:
|
||||
@ -79,6 +93,7 @@ No changes to highlight.
|
||||
* Changes websocket path for Spaces as it is no longer necessary to have a different URL for websocket connections on Spaces by [@abidlabs](https://github.com/abidlabs) in [PR 2528](https://github.com/gradio-app/gradio/pull/2528)
|
||||
* Clearer error message when events are defined outside of a Blocks scope, and a warning if you
|
||||
try to use `Series` or `Parallel` with `Blocks` by [@abidlabs](https://github.com/abidlabs) in [PR 2543](https://github.com/gradio-app/gradio/pull/2543)
|
||||
* Adds support for audio samples that are in `float64`, `float16`, or `uint16` formats by [@abidlabs](https://github.com/abidlabs) in [PR 2545](https://github.com/gradio-app/gradio/pull/2545)
|
||||
|
||||
|
||||
## Contributors Shoutout:
|
||||
|
@ -220,32 +220,31 @@ def audio_to_file(sample_rate, data, filename):
|
||||
|
||||
def convert_to_16_bit_wav(data):
|
||||
# Based on: https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.wavfile.write.html
|
||||
if data.dtype == np.float32:
|
||||
warnings.warn(
|
||||
"Audio data is not in 16-bit integer format."
|
||||
"Trying to convert to 16-bit int format."
|
||||
)
|
||||
warning = "Trying to convert audio automatically from {} to 16-bit int format."
|
||||
if data.dtype in [np.float64, np.float32, np.float16]:
|
||||
warnings.warn(warning.format(data.dtype))
|
||||
data = data / np.abs(data).max()
|
||||
data = data * 32767
|
||||
data = data.astype(np.int16)
|
||||
elif data.dtype == np.int32:
|
||||
warnings.warn(
|
||||
"Audio data is not in 16-bit integer format."
|
||||
"Trying to convert to 16-bit int format."
|
||||
)
|
||||
warnings.warn(warning.format(data.dtype))
|
||||
data = data / 65538
|
||||
data = data.astype(np.int16)
|
||||
elif data.dtype == np.int16:
|
||||
pass
|
||||
elif data.dtype == np.uint16:
|
||||
warnings.warn(warning.format(data.dtype))
|
||||
data = data - 32768
|
||||
data = data.astype(np.int16)
|
||||
elif data.dtype == np.uint8:
|
||||
warnings.warn(
|
||||
"Audio data is not in 16-bit integer format."
|
||||
"Trying to convert to 16-bit int format."
|
||||
)
|
||||
warnings.warn(warning.format(data.dtype))
|
||||
data = data * 257 - 32768
|
||||
data = data.astype(np.int16)
|
||||
else:
|
||||
raise ValueError("Audio data cannot be converted to " "16-bit int format.")
|
||||
raise ValueError(
|
||||
"Audio data cannot be converted automatically from "
|
||||
f"{data.dtype} to 16-bit int format."
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
|
@ -97,6 +97,26 @@ class TestAudioPreprocessing(unittest.TestCase):
|
||||
self.assertTrue(os.path.exists("test_audio_to_file"))
|
||||
os.remove("test_audio_to_file")
|
||||
|
||||
def test_convert_to_16_bit_wav(self):
|
||||
# Generate a random audio sample and set the amplitude
|
||||
audio = np.random.randint(-100, 100, size=(100), dtype="int16")
|
||||
audio[0] = -32767
|
||||
audio[1] = 32766
|
||||
|
||||
audio_ = audio.astype("float64")
|
||||
audio_ = gr.processing_utils.convert_to_16_bit_wav(audio_)
|
||||
assert np.allclose(audio, audio_)
|
||||
assert audio_.dtype == "int16"
|
||||
|
||||
audio_ = audio.astype("float32")
|
||||
audio_ = gr.processing_utils.convert_to_16_bit_wav(audio_)
|
||||
assert np.allclose(audio, audio_)
|
||||
assert audio_.dtype == "int16"
|
||||
|
||||
audio_ = gr.processing_utils.convert_to_16_bit_wav(audio)
|
||||
assert np.allclose(audio, audio_)
|
||||
assert audio_.dtype == "int16"
|
||||
|
||||
|
||||
class TestOutputPreprocessing(unittest.TestCase):
|
||||
def test_decode_base64_to_binary(self):
|
||||
|
Loading…
Reference in New Issue
Block a user