From e84c5be03d73ea41e879208a64aeb22590234f95 Mon Sep 17 00:00:00 2001 From: AK391 <81195143+AK391@users.noreply.github.com> Date: Wed, 2 Feb 2022 18:17:50 -0500 Subject: [PATCH 1/2] add audio-to-audio --- gradio/external.py | 9 +++++++++ test/test_external.py | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/gradio/external.py b/gradio/external.py index da8b5ca59f..1fb6f79500 100644 --- a/gradio/external.py +++ b/gradio/external.py @@ -44,6 +44,15 @@ def get_huggingface_interface(model_name, api_key, alias): i["label"].split(", ")[0]: i["score"] for i in r.json() }, }, + "audio-to-audio": { + # example model: https://hf.co/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition + "inputs": inputs.Audio(label="Input", source="upload", type="filepath"), + "outputs": outputs.Audio(label="Output"), + "preprocess": lambda i: base64.b64decode( + i["data"].split(",")[1] + ), # convert the base64 representation to binary + "postprocess": encode_to_base64, + }, "automatic-speech-recognition": { # example model: https://hf.co/jonatasgrosman/wav2vec2-large-xlsr-53-english "inputs": inputs.Audio(label="Input", source="upload", type="filepath"), diff --git a/test/test_external.py b/test/test_external.py index b47ddda599..aaf23babb3 100644 --- a/test/test_external.py +++ b/test/test_external.py @@ -16,6 +16,18 @@ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" class TestHuggingFaceModelAPI(unittest.TestCase): + def test_audio_to_audio(self): + model_type = "audio-to-audio" + interface_info = gr.external.get_huggingface_interface( + "facebook/xm_transformer_600m-es_en-multi_domain", + api_key=None, + alias=model_type, + ) + self.assertEqual(interface_info["fn"].__name__, model_type) + self.assertIsInstance(interface_info["inputs"], gr.inputs.Audio) + self.assertIsInstance(interface_info["outputs"], gr.outputs.Audio) + + def test_question_answering(self): model_type = "question-answering" interface_info = gr.external.get_huggingface_interface( From 8e632b84dc3bac9d829d3a0bc51f33df91780454 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Thu, 3 Feb 2022 08:33:20 -0500 Subject: [PATCH 2/2] Fixed audio to audio & better error messaging if Space is not loading --- gradio/external.py | 26 ++++++++++++++++++++++---- test/test_external.py | 2 +- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/gradio/external.py b/gradio/external.py index 1fb6f79500..f49f321ab4 100644 --- a/gradio/external.py +++ b/gradio/external.py @@ -2,6 +2,7 @@ import base64 import json import re import tempfile +from pydantic import MissingError import requests @@ -24,13 +25,27 @@ def get_huggingface_interface(model_name, api_key, alias): p = response.json().get("pipeline_tag") def encode_to_base64(r: requests.Response) -> str: + # Handles the different ways HF API returns the prediction base64_repr = base64.b64encode(r.content).decode("utf-8") data_prefix = ";base64," + # Case 1: base64 representation already includes data prefix if data_prefix in base64_repr: return base64_repr else: content_type = r.headers.get("content-type") - return "data:{};base64,".format(content_type) + base64_repr + # Case 2: the data prefix is a key in the response + if content_type == "application/json": + try: + content_type = r.json()[0]["content-type"] + base64_repr = r.json()[0]["blob"] + except KeyError: + raise ValueError("Cannot determine content type returned" + "by external API.") + # Case 3: the data prefix is included in the response headers + else: + pass + new_base64 = "data:{};base64,".format(content_type) + base64_repr + return new_base64 pipelines = { "audio-classification": { @@ -45,7 +60,7 @@ def get_huggingface_interface(model_name, api_key, alias): }, }, "audio-to-audio": { - # example model: https://hf.co/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition + # example model: https://hf.co/speechbrain/mtl-mimic-voicebank "inputs": inputs.Audio(label="Input", source="upload", type="filepath"), "outputs": outputs.Audio(label="Output"), "preprocess": lambda i: base64.b64decode( @@ -192,7 +207,7 @@ def get_huggingface_interface(model_name, api_key, alias): } if p is None or not (p in pipelines): - raise ValueError("Unsupported pipeline type: {}".format(type(p))) + raise ValueError("Unsupported pipeline type: {}".format(p)) pipeline = pipelines[p] @@ -282,7 +297,10 @@ def get_spaces_interface(model_name, api_key, alias): result = re.search( "window.gradio_config = (.*?);", r.text ) # some basic regex to extract the config - config = json.loads(result.group(1)) + try: + config = json.loads(result.group(1)) + except AttributeError: + raise ValueError("Could not load the Space: {}".format(model_name)) interface_info = interface_params_from_config(config) # The function should call the API with preprocessed data diff --git a/test/test_external.py b/test/test_external.py index aaf23babb3..353e4cdab7 100644 --- a/test/test_external.py +++ b/test/test_external.py @@ -19,7 +19,7 @@ class TestHuggingFaceModelAPI(unittest.TestCase): def test_audio_to_audio(self): model_type = "audio-to-audio" interface_info = gr.external.get_huggingface_interface( - "facebook/xm_transformer_600m-es_en-multi_domain", + "speechbrain/mtl-mimic-voicebank", api_key=None, alias=model_type, )