Blocks-Components

- move audio component
2025-03-31 12:20:26 +08:00 · 2022-03-15 12:41:15 +03:00 · 2022-03-15 12:41:15 +03:00 · 9e45418227
commit 9e45418227
parent cf32e0097c
3 changed files with 292 additions and 271 deletions
--- a/gradio/components.py
+++ b/gradio/components.py
@ -1230,7 +1230,7 @@ class Video(Component):
        type: Optional[str] = None,
        source: str = "upload",
        label: Optional[str] = None,
-        optional: bool = False,
+        **kwargs
    ):
        """
        Parameters:
@ -1242,7 +1242,7 @@ class Video(Component):
        """
        self.type = type
        self.source = source
-        super().__init__(label=label, optional=optional)
+        super().__init__(label=label, **kwargs)

    @classmethod
    def get_shortcut_implementations(cls):
@ -1301,9 +1301,6 @@ class Video(Component):
        return self.save_flagged_file(
            dir, label, None if data is None else data["data"], encryption_key
        )
-        # TODO: Might need to converge these two
-        #  Output save_flagged:
-        # return self.save_flagged_file(dir, label, data["data"], encryption_key)

    def generate_sample(self):
        return test_data.BASE64_VIDEO
@ -1331,3 +1328,244 @@ class Video(Component):

    def restore_flagged(self, dir, data, encryption_key):
        return self.restore_flagged_file(dir, data, encryption_key)
+
+
+class Audio(Component):
+    """
+    Component accepts audio input files or creates an audio player that plays the output audio.
+
+
+    Input type: Union[Tuple[int, numpy.array], file-object, numpy.array]
+    Output type: Union[Tuple[int, numpy.array], str]
+    Demos: main_note, generate_tone, reverse_audio, spectogram
+    """
+
+    def __init__(
+        self,
+        default="",
+        *,
+        source: str = "upload",
+        type: str = "numpy",
+        label: str = None,
+        **kwargs
+    ):
+        """
+        Parameters:
+        source (str): Source of audio. "upload" creates a box where user can drop an audio file, "microphone" creates a microphone input.
+        type (str): Type of value to be returned by component. "numpy" returns a 2-set tuple with an integer sample_rate and the data numpy.array of shape (samples, 2), "file" returns a temporary file object whose path can be retrieved by file_obj.name, "filepath" returns the path directly.
+        label (str): component name in interface.
+        """
+        self.source = source
+        requires_permissions = source == "microphone"
+        self.type = type
+        self.test_input = test_data.BASE64_AUDIO
+        self.interpret_by_tokens = True
+        super().__init__(label=label, requires_permissions=requires_permissions, **kwargs)
+
+    def get_template_context(self):
+        return {
+            "source": self.source,  # TODO: This did not exist in output template, careful here if an error arrives
+            **super().get_template_context(),
+        }
+
+    @classmethod
+    def get_shortcut_implementations(cls):
+        return {
+            "audio": {},
+            "microphone": {"source": "microphone"},
+            "mic": {"source": "microphone"},
+        }
+
+    def preprocess_example(self, x):
+        return {"name": x, "data": None, "is_example": True}
+
+    def preprocess(self, x: Dict[str, str] | None) -> Tuple[int, np.array] | str | None:
+        """
+        Parameters:
+        x (Dict[name: str, data: str]): JSON object with filename as 'name' property and base64 data as 'data' property
+        Returns:
+        (Union[Tuple[int, numpy.array], str, numpy.array]): audio in requested format
+        """
+        if x is None:
+            return x
+        file_name, file_data, is_example = (
+            x["name"],
+            x["data"],
+            x.get("is_example", False),
+        )
+        crop_min, crop_max = x.get("crop_min", 0), x.get("crop_max", 100)
+        if is_example:
+            file_obj = processing_utils.create_tmp_copy_of_file(file_name)
+        else:
+            file_obj = processing_utils.decode_base64_to_file(
+                file_data, file_path=file_name
+            )
+        if crop_min != 0 or crop_max != 100:
+            sample_rate, data = processing_utils.audio_from_file(
+                file_obj.name, crop_min=crop_min, crop_max=crop_max
+            )
+            processing_utils.audio_to_file(sample_rate, data, file_obj.name)
+        if self.type == "file":
+            warnings.warn(
+                "The 'file' type has been deprecated. Set parameter 'type' to 'filepath' instead.",
+                DeprecationWarning,
+            )
+            return file_obj
+        elif self.type == "filepath":
+            return file_obj.name
+        elif self.type == "numpy":
+            return processing_utils.audio_from_file(file_obj.name)
+        else:
+            raise ValueError(
+                "Unknown type: "
+                + str(self.type)
+                + ". Please choose from: 'numpy', 'filepath'."
+            )
+
+    def serialize(self, x, called_directly):
+        if x is None:
+            return None
+        if self.type == "filepath" or called_directly:
+            name = x
+        elif self.type == "file":
+            warnings.warn(
+                "The 'file' type has been deprecated. Set parameter 'type' to 'filepath' instead.",
+                DeprecationWarning,
+            )
+            name = x.name
+        elif self.type == "numpy":
+            file = tempfile.NamedTemporaryFile(delete=False)
+            name = file.name
+            processing_utils.audio_to_file(x[0], x[1], name)
+        else:
+            raise ValueError(
+                "Unknown type: "
+                + str(self.type)
+                + ". Please choose from: 'numpy', 'filepath'."
+            )
+
+        file_data = processing_utils.encode_url_or_file_to_base64(name)
+        return {"name": name, "data": file_data, "is_example": False}
+
+    def set_interpret_parameters(self, segments=8):
+        """
+        Calculates interpretation score of audio subsections by splitting the audio into subsections, then using a "leave one out" method to calculate the score of each subsection by removing the subsection and measuring the delta of the output value.
+        Parameters:
+        segments (int): Number of interpretation segments to split audio into.
+        """
+        self.interpretation_segments = segments
+        return self
+
+    def tokenize(self, x):
+        if x.get("is_example"):
+            sample_rate, data = processing_utils.audio_from_file(x["name"])
+        else:
+            file_obj = processing_utils.decode_base64_to_file(x["data"])
+            sample_rate, data = processing_utils.audio_from_file(file_obj.name)
+        leave_one_out_sets = []
+        tokens = []
+        masks = []
+        duration = data.shape[0]
+        boundaries = np.linspace(0, duration, self.interpretation_segments + 1).tolist()
+        boundaries = [round(boundary) for boundary in boundaries]
+        for index in range(len(boundaries) - 1):
+            start, stop = boundaries[index], boundaries[index + 1]
+            masks.append((start, stop))
+
+            # Handle the leave one outs
+            leave_one_out_data = np.copy(data)
+            leave_one_out_data[start:stop] = 0
+            file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+            processing_utils.audio_to_file(sample_rate, leave_one_out_data, file.name)
+            out_data = processing_utils.encode_file_to_base64(file.name)
+            leave_one_out_sets.append(out_data)
+            file.close()
+            os.unlink(file.name)
+
+            # Handle the tokens
+            token = np.copy(data)
+            token[0:start] = 0
+            token[stop:] = 0
+            file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+            processing_utils.audio_to_file(sample_rate, token, file.name)
+            token_data = processing_utils.encode_file_to_base64(file.name)
+            file.close()
+            os.unlink(file.name)
+
+            tokens.append(token_data)
+        tokens = [{"name": "token.wav", "data": token} for token in tokens]
+        leave_one_out_sets = [
+            {"name": "loo.wav", "data": loo_set} for loo_set in leave_one_out_sets
+        ]
+        return tokens, leave_one_out_sets, masks
+
+    def get_masked_inputs(self, tokens, binary_mask_matrix):
+        # create a "zero input" vector and get sample rate
+        x = tokens[0]["data"]
+        file_obj = processing_utils.decode_base64_to_file(x)
+        sample_rate, data = processing_utils.audio_from_file(file_obj.name)
+        zero_input = np.zeros_like(data, dtype="int16")
+        # decode all of the tokens
+        token_data = []
+        for token in tokens:
+            file_obj = processing_utils.decode_base64_to_file(token["data"])
+            _, data = processing_utils.audio_from_file(file_obj.name)
+            token_data.append(data)
+        # construct the masked version
+        masked_inputs = []
+        for binary_mask_vector in binary_mask_matrix:
+            masked_input = np.copy(zero_input)
+            for t, b in zip(token_data, binary_mask_vector):
+                masked_input = masked_input + t * int(b)
+            file = tempfile.NamedTemporaryFile(delete=False)
+            processing_utils.audio_to_file(sample_rate, masked_input, file.name)
+            masked_data = processing_utils.encode_file_to_base64(file.name)
+            file.close()
+            os.unlink(file.name)
+            masked_inputs.append(masked_data)
+        return masked_inputs
+
+    def get_interpretation_scores(self, x, neighbors, scores, masks=None, tokens=None):
+        """
+        Returns:
+        (List[float]): Each value represents the interpretation score corresponding to an evenly spaced subsection of audio.
+        """
+        return list(scores)
+
+    def save_flagged(self, dir, label, data, encryption_key):
+        """
+        Returns: (str) path to audio file
+        """
+        return self.save_flagged_file(
+            dir, label, None if data is None else data["data"], encryption_key
+        )
+
+    def generate_sample(self):
+        return test_data.BASE64_AUDIO
+
+    def postprocess(self, y):
+        """
+        Parameters:
+        y (Union[Tuple[int, numpy.array], str]): audio data in requested format
+        Returns:
+        (str): base64 url data
+        """
+        if self.type in ["numpy", "file", "auto"]:
+            if self.type == "numpy" or (self.type == "auto" and isinstance(y, tuple)):
+                sample_rate, data = y
+                file = tempfile.NamedTemporaryFile(
+                    prefix="sample", suffix=".wav", delete=False
+                )
+                processing_utils.audio_to_file(sample_rate, data, file.name)
+                y = file.name
+            return processing_utils.encode_url_or_file_to_base64(y)
+        else:
+            raise ValueError(
+                "Unknown type: " + self.type + ". Please choose from: 'numpy', 'file'."
+            )
+
+    def deserialize(self, x):
+        return processing_utils.decode_base64_to_file(x).name
+
+    def restore_flagged(self, dir, data, encryption_key):
+        return self.restore_flagged_file(dir, data, encryption_key)["data"]
--- a/gradio/inputs.py
+++ b/gradio/inputs.py
@ -28,6 +28,7 @@ from gradio.components import (
    Radio,
    Slider,
    Textbox,
+    Audio
 )

 if TYPE_CHECKING:  # Only import for type checking (is False at runtime).
@ -323,6 +324,34 @@ class Video(Component):
        super().__init__(type=type, source=source, label=label, optional=optional)


+class Audio(Audio):
+    """
+    Component accepts audio input files.
+    Input type: Union[Tuple[int, numpy.array], file-object, numpy.array]
+    Demos: main_note, reverse_audio, spectogram
+    """
+
+    def __init__(
+        self,
+        source: str = "upload",
+        type: str = "numpy",
+        label: str = None,
+        optional: bool = False,
+    ):
+        """
+        Parameters:
+        source (str): Source of audio. "upload" creates a box where user can drop an audio file, "microphone" creates a microphone input.
+        type (str): Type of value to be returned by component. "numpy" returns a 2-set tuple with an integer sample_rate and the data numpy.array of shape (samples, 2), "file" returns a temporary file object whose path can be retrieved by file_obj.name, "filepath" returns the path directly.
+        label (str): component name in interface.
+        optional (bool): If True, the interface can be submitted with no uploaded audio, in which case the input value is None.
+        """
+        warnings.warn(
+            "Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your components from gradio.components",
+            DeprecationWarning,
+        )
+        super().__init__(source=source, type=type, label=label, optional=optional)
+
+
 class InputComponent(Component):
    """
    Input Component. All input components subclass this.
@ -406,217 +435,6 @@ class InputComponent(Component):
        }


-class Audio(InputComponent):
-    """
-    Component accepts audio input files.
-    Input type: Union[Tuple[int, numpy.array], file-object, numpy.array]
-    Demos: main_note, reverse_audio, spectogram
-    """
-
-    def __init__(
-        self,
-        source: str = "upload",
-        type: str = "numpy",
-        label: str = None,
-        optional: bool = False,
-    ):
-        """
-        Parameters:
-        source (str): Source of audio. "upload" creates a box where user can drop an audio file, "microphone" creates a microphone input.
-        type (str): Type of value to be returned by component. "numpy" returns a 2-set tuple with an integer sample_rate and the data numpy.array of shape (samples, 2), "file" returns a temporary file object whose path can be retrieved by file_obj.name, "filepath" returns the path directly.
-        label (str): component name in interface.
-        optional (bool): If True, the interface can be submitted with no uploaded audio, in which case the input value is None.
-        """
-        self.source = source
-        requires_permissions = source == "microphone"
-        self.type = type
-        self.test_input = test_data.BASE64_AUDIO
-        self.interpret_by_tokens = True
-        super().__init__(label, requires_permissions, optional=optional)
-
-    def get_template_context(self):
-        return {
-            "source": self.source,
-            "optional": self.optional,
-            **super().get_template_context(),
-        }
-
-    @classmethod
-    def get_shortcut_implementations(cls):
-        return {
-            "audio": {},
-            "microphone": {"source": "microphone"},
-            "mic": {"source": "microphone"},
-        }
-
-    def preprocess_example(self, x):
-        return {"name": x, "data": None, "is_example": True}
-
-    def preprocess(self, x: Dict[str, str] | None) -> Tuple[int, np.array] | str | None:
-        """
-        Parameters:
-        x (Dict[name: str, data: str]): JSON object with filename as 'name' property and base64 data as 'data' property
-        Returns:
-        (Union[Tuple[int, numpy.array], str, numpy.array]): audio in requested format
-        """
-        if x is None:
-            return x
-        file_name, file_data, is_example = (
-            x["name"],
-            x["data"],
-            x.get("is_example", False),
-        )
-        crop_min, crop_max = x.get("crop_min", 0), x.get("crop_max", 100)
-        if is_example:
-            file_obj = processing_utils.create_tmp_copy_of_file(file_name)
-        else:
-            file_obj = processing_utils.decode_base64_to_file(
-                file_data, file_path=file_name
-            )
-        if crop_min != 0 or crop_max != 100:
-            sample_rate, data = processing_utils.audio_from_file(
-                file_obj.name, crop_min=crop_min, crop_max=crop_max
-            )
-            processing_utils.audio_to_file(sample_rate, data, file_obj.name)
-        if self.type == "file":
-            warnings.warn(
-                "The 'file' type has been deprecated. Set parameter 'type' to 'filepath' instead.",
-                DeprecationWarning,
-            )
-            return file_obj
-        elif self.type == "filepath":
-            return file_obj.name
-        elif self.type == "numpy":
-            return processing_utils.audio_from_file(file_obj.name)
-        else:
-            raise ValueError(
-                "Unknown type: "
-                + str(self.type)
-                + ". Please choose from: 'numpy', 'filepath'."
-            )
-
-    def serialize(self, x, called_directly):
-        if x is None:
-            return None
-        if self.type == "filepath" or called_directly:
-            name = x
-        elif self.type == "file":
-            warnings.warn(
-                "The 'file' type has been deprecated. Set parameter 'type' to 'filepath' instead.",
-                DeprecationWarning,
-            )
-            name = x.name
-        elif self.type == "numpy":
-            file = tempfile.NamedTemporaryFile(delete=False)
-            name = file.name
-            processing_utils.audio_to_file(x[0], x[1], name)
-        else:
-            raise ValueError(
-                "Unknown type: "
-                + str(self.type)
-                + ". Please choose from: 'numpy', 'filepath'."
-            )
-
-        file_data = processing_utils.encode_url_or_file_to_base64(name)
-        return {"name": name, "data": file_data, "is_example": False}
-
-    def set_interpret_parameters(self, segments=8):
-        """
-        Calculates interpretation score of audio subsections by splitting the audio into subsections, then using a "leave one out" method to calculate the score of each subsection by removing the subsection and measuring the delta of the output value.
-        Parameters:
-        segments (int): Number of interpretation segments to split audio into.
-        """
-        self.interpretation_segments = segments
-        return self
-
-    def tokenize(self, x):
-        if x.get("is_example"):
-            sample_rate, data = processing_utils.audio_from_file(x["name"])
-        else:
-            file_obj = processing_utils.decode_base64_to_file(x["data"])
-            sample_rate, data = processing_utils.audio_from_file(file_obj.name)
-        leave_one_out_sets = []
-        tokens = []
-        masks = []
-        duration = data.shape[0]
-        boundaries = np.linspace(0, duration, self.interpretation_segments + 1).tolist()
-        boundaries = [round(boundary) for boundary in boundaries]
-        for index in range(len(boundaries) - 1):
-            start, stop = boundaries[index], boundaries[index + 1]
-            masks.append((start, stop))
-
-            # Handle the leave one outs
-            leave_one_out_data = np.copy(data)
-            leave_one_out_data[start:stop] = 0
-            file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-            processing_utils.audio_to_file(sample_rate, leave_one_out_data, file.name)
-            out_data = processing_utils.encode_file_to_base64(file.name)
-            leave_one_out_sets.append(out_data)
-            file.close()
-            os.unlink(file.name)
-
-            # Handle the tokens
-            token = np.copy(data)
-            token[0:start] = 0
-            token[stop:] = 0
-            file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-            processing_utils.audio_to_file(sample_rate, token, file.name)
-            token_data = processing_utils.encode_file_to_base64(file.name)
-            file.close()
-            os.unlink(file.name)
-
-            tokens.append(token_data)
-        tokens = [{"name": "token.wav", "data": token} for token in tokens]
-        leave_one_out_sets = [
-            {"name": "loo.wav", "data": loo_set} for loo_set in leave_one_out_sets
-        ]
-        return tokens, leave_one_out_sets, masks
-
-    def get_masked_inputs(self, tokens, binary_mask_matrix):
-        # create a "zero input" vector and get sample rate
-        x = tokens[0]["data"]
-        file_obj = processing_utils.decode_base64_to_file(x)
-        sample_rate, data = processing_utils.audio_from_file(file_obj.name)
-        zero_input = np.zeros_like(data, dtype="int16")
-        # decode all of the tokens
-        token_data = []
-        for token in tokens:
-            file_obj = processing_utils.decode_base64_to_file(token["data"])
-            _, data = processing_utils.audio_from_file(file_obj.name)
-            token_data.append(data)
-        # construct the masked version
-        masked_inputs = []
-        for binary_mask_vector in binary_mask_matrix:
-            masked_input = np.copy(zero_input)
-            for t, b in zip(token_data, binary_mask_vector):
-                masked_input = masked_input + t * int(b)
-            file = tempfile.NamedTemporaryFile(delete=False)
-            processing_utils.audio_to_file(sample_rate, masked_input, file.name)
-            masked_data = processing_utils.encode_file_to_base64(file.name)
-            file.close()
-            os.unlink(file.name)
-            masked_inputs.append(masked_data)
-        return masked_inputs
-
-    def get_interpretation_scores(self, x, neighbors, scores, masks=None, tokens=None):
-        """
-        Returns:
-        (List[float]): Each value represents the interpretation score corresponding to an evenly spaced subsection of audio.
-        """
-        return list(scores)
-
-    def save_flagged(self, dir, label, data, encryption_key):
-        """
-        Returns: (str) path to audio file
-        """
-        return self.save_flagged_file(
-            dir, label, None if data is None else data["data"], encryption_key
-        )
-
-    def generate_sample(self):
-        return test_data.BASE64_AUDIO
-
-
 class File(InputComponent):
    """
    Component accepts generic file uploads.
--- a/gradio/outputs.py
+++ b/gradio/outputs.py
@ -21,7 +21,7 @@ import PIL
 from ffmpy import FFmpeg

 from gradio import processing_utils
-from gradio.components import Component, Image, Textbox, Video
+from gradio.components import Component, Image, Textbox, Video, Audio

 if TYPE_CHECKING:  # Only import for type checking (is False at runtime).
    from gradio import Interface
@ -83,6 +83,25 @@ class Video(Video):
            DeprecationWarning,
        )
        super().__init__(label=label, type=type)
+class Audio(Audio):
+    """
+    Creates an audio player that plays the output audio.
+    Output type: Union[Tuple[int, numpy.array], str]
+    Demos: generate_tone, reverse_audio
+    """
+
+    def __init__(self, type: str = "auto", label: Optional[str] = None):
+        """
+        Parameters:
+        type (str): Type of value to be passed to component. "numpy" returns a 2-set tuple with an integer sample_rate and the data as 16-bit int numpy.array of shape (samples, 2), "file" returns a temporary file path to the saved wav audio file, "auto" detects return type.
+        label (str): component name in interface.
+        """
+        warnings.warn(
+            "Usage of gradio.outputs is deprecated, and will not be supported in the future, please import your components from gradio.components",
+            DeprecationWarning,
+        )
+        super().__init__(type=type, label=label)
+


 class OutputComponent(Component):
@ -309,60 +328,6 @@ class HighlightedText(OutputComponent):
        return json.loads(data)


-class Audio(OutputComponent):
-    """
-    Creates an audio player that plays the output audio.
-    Output type: Union[Tuple[int, numpy.array], str]
-    Demos: generate_tone, reverse_audio
-    """
-
-    def __init__(self, type: str = "auto", label: Optional[str] = None):
-        """
-        Parameters:
-        type (str): Type of value to be passed to component. "numpy" returns a 2-set tuple with an integer sample_rate and the data as 16-bit int numpy.array of shape (samples, 2), "file" returns a temporary file path to the saved wav audio file, "auto" detects return type.
-        label (str): component name in interface.
-        """
-        self.type = type
-        super().__init__(label)
-
-    def get_template_context(self):
-        return {**super().get_template_context()}
-
-    @classmethod
-    def get_shortcut_implementations(cls):
-        return {
-            "audio": {},
-        }
-
-    def postprocess(self, y):
-        """
-        Parameters:
-        y (Union[Tuple[int, numpy.array], str]): audio data in requested format
-        Returns:
-        (str): base64 url data
-        """
-        if self.type in ["numpy", "file", "auto"]:
-            if self.type == "numpy" or (self.type == "auto" and isinstance(y, tuple)):
-                sample_rate, data = y
-                file = tempfile.NamedTemporaryFile(
-                    prefix="sample", suffix=".wav", delete=False
-                )
-                processing_utils.audio_to_file(sample_rate, data, file.name)
-                y = file.name
-            return processing_utils.encode_url_or_file_to_base64(y)
-        else:
-            raise ValueError(
-                "Unknown type: " + self.type + ". Please choose from: 'numpy', 'file'."
-            )
-
-    def deserialize(self, x):
-        return processing_utils.decode_base64_to_file(x).name
-
-    def save_flagged(self, dir, label, data, encryption_key):
-        return self.save_flagged_file(dir, label, data, encryption_key)
-
-    def restore_flagged(self, dir, data, encryption_key):
-        return self.restore_flagged_file(dir, data, encryption_key)["data"]


 class JSON(OutputComponent):