Blocks-Components

- move audio component
This commit is contained in:
Ömer Faruk Özdemir 2022-03-15 12:41:15 +03:00
parent cf32e0097c
commit 9e45418227
3 changed files with 292 additions and 271 deletions

View File

@ -1230,7 +1230,7 @@ class Video(Component):
type: Optional[str] = None,
source: str = "upload",
label: Optional[str] = None,
optional: bool = False,
**kwargs
):
"""
Parameters:
@ -1242,7 +1242,7 @@ class Video(Component):
"""
self.type = type
self.source = source
super().__init__(label=label, optional=optional)
super().__init__(label=label, **kwargs)
@classmethod
def get_shortcut_implementations(cls):
@ -1301,9 +1301,6 @@ class Video(Component):
return self.save_flagged_file(
dir, label, None if data is None else data["data"], encryption_key
)
# TODO: Might need to converge these two
# Output save_flagged:
# return self.save_flagged_file(dir, label, data["data"], encryption_key)
def generate_sample(self):
return test_data.BASE64_VIDEO
@ -1331,3 +1328,244 @@ class Video(Component):
def restore_flagged(self, dir, data, encryption_key):
return self.restore_flagged_file(dir, data, encryption_key)
class Audio(Component):
"""
Component accepts audio input files or creates an audio player that plays the output audio.
Input type: Union[Tuple[int, numpy.array], file-object, numpy.array]
Output type: Union[Tuple[int, numpy.array], str]
Demos: main_note, generate_tone, reverse_audio, spectogram
"""
def __init__(
self,
default="",
*,
source: str = "upload",
type: str = "numpy",
label: str = None,
**kwargs
):
"""
Parameters:
source (str): Source of audio. "upload" creates a box where user can drop an audio file, "microphone" creates a microphone input.
type (str): Type of value to be returned by component. "numpy" returns a 2-set tuple with an integer sample_rate and the data numpy.array of shape (samples, 2), "file" returns a temporary file object whose path can be retrieved by file_obj.name, "filepath" returns the path directly.
label (str): component name in interface.
"""
self.source = source
requires_permissions = source == "microphone"
self.type = type
self.test_input = test_data.BASE64_AUDIO
self.interpret_by_tokens = True
super().__init__(label=label, requires_permissions=requires_permissions, **kwargs)
def get_template_context(self):
return {
"source": self.source, # TODO: This did not exist in output template, careful here if an error arrives
**super().get_template_context(),
}
@classmethod
def get_shortcut_implementations(cls):
return {
"audio": {},
"microphone": {"source": "microphone"},
"mic": {"source": "microphone"},
}
def preprocess_example(self, x):
return {"name": x, "data": None, "is_example": True}
def preprocess(self, x: Dict[str, str] | None) -> Tuple[int, np.array] | str | None:
"""
Parameters:
x (Dict[name: str, data: str]): JSON object with filename as 'name' property and base64 data as 'data' property
Returns:
(Union[Tuple[int, numpy.array], str, numpy.array]): audio in requested format
"""
if x is None:
return x
file_name, file_data, is_example = (
x["name"],
x["data"],
x.get("is_example", False),
)
crop_min, crop_max = x.get("crop_min", 0), x.get("crop_max", 100)
if is_example:
file_obj = processing_utils.create_tmp_copy_of_file(file_name)
else:
file_obj = processing_utils.decode_base64_to_file(
file_data, file_path=file_name
)
if crop_min != 0 or crop_max != 100:
sample_rate, data = processing_utils.audio_from_file(
file_obj.name, crop_min=crop_min, crop_max=crop_max
)
processing_utils.audio_to_file(sample_rate, data, file_obj.name)
if self.type == "file":
warnings.warn(
"The 'file' type has been deprecated. Set parameter 'type' to 'filepath' instead.",
DeprecationWarning,
)
return file_obj
elif self.type == "filepath":
return file_obj.name
elif self.type == "numpy":
return processing_utils.audio_from_file(file_obj.name)
else:
raise ValueError(
"Unknown type: "
+ str(self.type)
+ ". Please choose from: 'numpy', 'filepath'."
)
def serialize(self, x, called_directly):
if x is None:
return None
if self.type == "filepath" or called_directly:
name = x
elif self.type == "file":
warnings.warn(
"The 'file' type has been deprecated. Set parameter 'type' to 'filepath' instead.",
DeprecationWarning,
)
name = x.name
elif self.type == "numpy":
file = tempfile.NamedTemporaryFile(delete=False)
name = file.name
processing_utils.audio_to_file(x[0], x[1], name)
else:
raise ValueError(
"Unknown type: "
+ str(self.type)
+ ". Please choose from: 'numpy', 'filepath'."
)
file_data = processing_utils.encode_url_or_file_to_base64(name)
return {"name": name, "data": file_data, "is_example": False}
def set_interpret_parameters(self, segments=8):
"""
Calculates interpretation score of audio subsections by splitting the audio into subsections, then using a "leave one out" method to calculate the score of each subsection by removing the subsection and measuring the delta of the output value.
Parameters:
segments (int): Number of interpretation segments to split audio into.
"""
self.interpretation_segments = segments
return self
def tokenize(self, x):
if x.get("is_example"):
sample_rate, data = processing_utils.audio_from_file(x["name"])
else:
file_obj = processing_utils.decode_base64_to_file(x["data"])
sample_rate, data = processing_utils.audio_from_file(file_obj.name)
leave_one_out_sets = []
tokens = []
masks = []
duration = data.shape[0]
boundaries = np.linspace(0, duration, self.interpretation_segments + 1).tolist()
boundaries = [round(boundary) for boundary in boundaries]
for index in range(len(boundaries) - 1):
start, stop = boundaries[index], boundaries[index + 1]
masks.append((start, stop))
# Handle the leave one outs
leave_one_out_data = np.copy(data)
leave_one_out_data[start:stop] = 0
file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
processing_utils.audio_to_file(sample_rate, leave_one_out_data, file.name)
out_data = processing_utils.encode_file_to_base64(file.name)
leave_one_out_sets.append(out_data)
file.close()
os.unlink(file.name)
# Handle the tokens
token = np.copy(data)
token[0:start] = 0
token[stop:] = 0
file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
processing_utils.audio_to_file(sample_rate, token, file.name)
token_data = processing_utils.encode_file_to_base64(file.name)
file.close()
os.unlink(file.name)
tokens.append(token_data)
tokens = [{"name": "token.wav", "data": token} for token in tokens]
leave_one_out_sets = [
{"name": "loo.wav", "data": loo_set} for loo_set in leave_one_out_sets
]
return tokens, leave_one_out_sets, masks
def get_masked_inputs(self, tokens, binary_mask_matrix):
# create a "zero input" vector and get sample rate
x = tokens[0]["data"]
file_obj = processing_utils.decode_base64_to_file(x)
sample_rate, data = processing_utils.audio_from_file(file_obj.name)
zero_input = np.zeros_like(data, dtype="int16")
# decode all of the tokens
token_data = []
for token in tokens:
file_obj = processing_utils.decode_base64_to_file(token["data"])
_, data = processing_utils.audio_from_file(file_obj.name)
token_data.append(data)
# construct the masked version
masked_inputs = []
for binary_mask_vector in binary_mask_matrix:
masked_input = np.copy(zero_input)
for t, b in zip(token_data, binary_mask_vector):
masked_input = masked_input + t * int(b)
file = tempfile.NamedTemporaryFile(delete=False)
processing_utils.audio_to_file(sample_rate, masked_input, file.name)
masked_data = processing_utils.encode_file_to_base64(file.name)
file.close()
os.unlink(file.name)
masked_inputs.append(masked_data)
return masked_inputs
def get_interpretation_scores(self, x, neighbors, scores, masks=None, tokens=None):
"""
Returns:
(List[float]): Each value represents the interpretation score corresponding to an evenly spaced subsection of audio.
"""
return list(scores)
def save_flagged(self, dir, label, data, encryption_key):
"""
Returns: (str) path to audio file
"""
return self.save_flagged_file(
dir, label, None if data is None else data["data"], encryption_key
)
def generate_sample(self):
return test_data.BASE64_AUDIO
def postprocess(self, y):
"""
Parameters:
y (Union[Tuple[int, numpy.array], str]): audio data in requested format
Returns:
(str): base64 url data
"""
if self.type in ["numpy", "file", "auto"]:
if self.type == "numpy" or (self.type == "auto" and isinstance(y, tuple)):
sample_rate, data = y
file = tempfile.NamedTemporaryFile(
prefix="sample", suffix=".wav", delete=False
)
processing_utils.audio_to_file(sample_rate, data, file.name)
y = file.name
return processing_utils.encode_url_or_file_to_base64(y)
else:
raise ValueError(
"Unknown type: " + self.type + ". Please choose from: 'numpy', 'file'."
)
def deserialize(self, x):
return processing_utils.decode_base64_to_file(x).name
def restore_flagged(self, dir, data, encryption_key):
return self.restore_flagged_file(dir, data, encryption_key)["data"]

View File

@ -28,6 +28,7 @@ from gradio.components import (
Radio,
Slider,
Textbox,
Audio
)
if TYPE_CHECKING: # Only import for type checking (is False at runtime).
@ -323,6 +324,34 @@ class Video(Component):
super().__init__(type=type, source=source, label=label, optional=optional)
class Audio(Audio):
"""
Component accepts audio input files.
Input type: Union[Tuple[int, numpy.array], file-object, numpy.array]
Demos: main_note, reverse_audio, spectogram
"""
def __init__(
self,
source: str = "upload",
type: str = "numpy",
label: str = None,
optional: bool = False,
):
"""
Parameters:
source (str): Source of audio. "upload" creates a box where user can drop an audio file, "microphone" creates a microphone input.
type (str): Type of value to be returned by component. "numpy" returns a 2-set tuple with an integer sample_rate and the data numpy.array of shape (samples, 2), "file" returns a temporary file object whose path can be retrieved by file_obj.name, "filepath" returns the path directly.
label (str): component name in interface.
optional (bool): If True, the interface can be submitted with no uploaded audio, in which case the input value is None.
"""
warnings.warn(
"Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your components from gradio.components",
DeprecationWarning,
)
super().__init__(source=source, type=type, label=label, optional=optional)
class InputComponent(Component):
"""
Input Component. All input components subclass this.
@ -406,217 +435,6 @@ class InputComponent(Component):
}
class Audio(InputComponent):
"""
Component accepts audio input files.
Input type: Union[Tuple[int, numpy.array], file-object, numpy.array]
Demos: main_note, reverse_audio, spectogram
"""
def __init__(
self,
source: str = "upload",
type: str = "numpy",
label: str = None,
optional: bool = False,
):
"""
Parameters:
source (str): Source of audio. "upload" creates a box where user can drop an audio file, "microphone" creates a microphone input.
type (str): Type of value to be returned by component. "numpy" returns a 2-set tuple with an integer sample_rate and the data numpy.array of shape (samples, 2), "file" returns a temporary file object whose path can be retrieved by file_obj.name, "filepath" returns the path directly.
label (str): component name in interface.
optional (bool): If True, the interface can be submitted with no uploaded audio, in which case the input value is None.
"""
self.source = source
requires_permissions = source == "microphone"
self.type = type
self.test_input = test_data.BASE64_AUDIO
self.interpret_by_tokens = True
super().__init__(label, requires_permissions, optional=optional)
def get_template_context(self):
return {
"source": self.source,
"optional": self.optional,
**super().get_template_context(),
}
@classmethod
def get_shortcut_implementations(cls):
return {
"audio": {},
"microphone": {"source": "microphone"},
"mic": {"source": "microphone"},
}
def preprocess_example(self, x):
return {"name": x, "data": None, "is_example": True}
def preprocess(self, x: Dict[str, str] | None) -> Tuple[int, np.array] | str | None:
"""
Parameters:
x (Dict[name: str, data: str]): JSON object with filename as 'name' property and base64 data as 'data' property
Returns:
(Union[Tuple[int, numpy.array], str, numpy.array]): audio in requested format
"""
if x is None:
return x
file_name, file_data, is_example = (
x["name"],
x["data"],
x.get("is_example", False),
)
crop_min, crop_max = x.get("crop_min", 0), x.get("crop_max", 100)
if is_example:
file_obj = processing_utils.create_tmp_copy_of_file(file_name)
else:
file_obj = processing_utils.decode_base64_to_file(
file_data, file_path=file_name
)
if crop_min != 0 or crop_max != 100:
sample_rate, data = processing_utils.audio_from_file(
file_obj.name, crop_min=crop_min, crop_max=crop_max
)
processing_utils.audio_to_file(sample_rate, data, file_obj.name)
if self.type == "file":
warnings.warn(
"The 'file' type has been deprecated. Set parameter 'type' to 'filepath' instead.",
DeprecationWarning,
)
return file_obj
elif self.type == "filepath":
return file_obj.name
elif self.type == "numpy":
return processing_utils.audio_from_file(file_obj.name)
else:
raise ValueError(
"Unknown type: "
+ str(self.type)
+ ". Please choose from: 'numpy', 'filepath'."
)
def serialize(self, x, called_directly):
if x is None:
return None
if self.type == "filepath" or called_directly:
name = x
elif self.type == "file":
warnings.warn(
"The 'file' type has been deprecated. Set parameter 'type' to 'filepath' instead.",
DeprecationWarning,
)
name = x.name
elif self.type == "numpy":
file = tempfile.NamedTemporaryFile(delete=False)
name = file.name
processing_utils.audio_to_file(x[0], x[1], name)
else:
raise ValueError(
"Unknown type: "
+ str(self.type)
+ ". Please choose from: 'numpy', 'filepath'."
)
file_data = processing_utils.encode_url_or_file_to_base64(name)
return {"name": name, "data": file_data, "is_example": False}
def set_interpret_parameters(self, segments=8):
"""
Calculates interpretation score of audio subsections by splitting the audio into subsections, then using a "leave one out" method to calculate the score of each subsection by removing the subsection and measuring the delta of the output value.
Parameters:
segments (int): Number of interpretation segments to split audio into.
"""
self.interpretation_segments = segments
return self
def tokenize(self, x):
if x.get("is_example"):
sample_rate, data = processing_utils.audio_from_file(x["name"])
else:
file_obj = processing_utils.decode_base64_to_file(x["data"])
sample_rate, data = processing_utils.audio_from_file(file_obj.name)
leave_one_out_sets = []
tokens = []
masks = []
duration = data.shape[0]
boundaries = np.linspace(0, duration, self.interpretation_segments + 1).tolist()
boundaries = [round(boundary) for boundary in boundaries]
for index in range(len(boundaries) - 1):
start, stop = boundaries[index], boundaries[index + 1]
masks.append((start, stop))
# Handle the leave one outs
leave_one_out_data = np.copy(data)
leave_one_out_data[start:stop] = 0
file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
processing_utils.audio_to_file(sample_rate, leave_one_out_data, file.name)
out_data = processing_utils.encode_file_to_base64(file.name)
leave_one_out_sets.append(out_data)
file.close()
os.unlink(file.name)
# Handle the tokens
token = np.copy(data)
token[0:start] = 0
token[stop:] = 0
file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
processing_utils.audio_to_file(sample_rate, token, file.name)
token_data = processing_utils.encode_file_to_base64(file.name)
file.close()
os.unlink(file.name)
tokens.append(token_data)
tokens = [{"name": "token.wav", "data": token} for token in tokens]
leave_one_out_sets = [
{"name": "loo.wav", "data": loo_set} for loo_set in leave_one_out_sets
]
return tokens, leave_one_out_sets, masks
def get_masked_inputs(self, tokens, binary_mask_matrix):
# create a "zero input" vector and get sample rate
x = tokens[0]["data"]
file_obj = processing_utils.decode_base64_to_file(x)
sample_rate, data = processing_utils.audio_from_file(file_obj.name)
zero_input = np.zeros_like(data, dtype="int16")
# decode all of the tokens
token_data = []
for token in tokens:
file_obj = processing_utils.decode_base64_to_file(token["data"])
_, data = processing_utils.audio_from_file(file_obj.name)
token_data.append(data)
# construct the masked version
masked_inputs = []
for binary_mask_vector in binary_mask_matrix:
masked_input = np.copy(zero_input)
for t, b in zip(token_data, binary_mask_vector):
masked_input = masked_input + t * int(b)
file = tempfile.NamedTemporaryFile(delete=False)
processing_utils.audio_to_file(sample_rate, masked_input, file.name)
masked_data = processing_utils.encode_file_to_base64(file.name)
file.close()
os.unlink(file.name)
masked_inputs.append(masked_data)
return masked_inputs
def get_interpretation_scores(self, x, neighbors, scores, masks=None, tokens=None):
"""
Returns:
(List[float]): Each value represents the interpretation score corresponding to an evenly spaced subsection of audio.
"""
return list(scores)
def save_flagged(self, dir, label, data, encryption_key):
"""
Returns: (str) path to audio file
"""
return self.save_flagged_file(
dir, label, None if data is None else data["data"], encryption_key
)
def generate_sample(self):
return test_data.BASE64_AUDIO
class File(InputComponent):
"""
Component accepts generic file uploads.

View File

@ -21,7 +21,7 @@ import PIL
from ffmpy import FFmpeg
from gradio import processing_utils
from gradio.components import Component, Image, Textbox, Video
from gradio.components import Component, Image, Textbox, Video, Audio
if TYPE_CHECKING: # Only import for type checking (is False at runtime).
from gradio import Interface
@ -83,6 +83,25 @@ class Video(Video):
DeprecationWarning,
)
super().__init__(label=label, type=type)
class Audio(Audio):
"""
Creates an audio player that plays the output audio.
Output type: Union[Tuple[int, numpy.array], str]
Demos: generate_tone, reverse_audio
"""
def __init__(self, type: str = "auto", label: Optional[str] = None):
"""
Parameters:
type (str): Type of value to be passed to component. "numpy" returns a 2-set tuple with an integer sample_rate and the data as 16-bit int numpy.array of shape (samples, 2), "file" returns a temporary file path to the saved wav audio file, "auto" detects return type.
label (str): component name in interface.
"""
warnings.warn(
"Usage of gradio.outputs is deprecated, and will not be supported in the future, please import your components from gradio.components",
DeprecationWarning,
)
super().__init__(type=type, label=label)
class OutputComponent(Component):
@ -309,60 +328,6 @@ class HighlightedText(OutputComponent):
return json.loads(data)
class Audio(OutputComponent):
"""
Creates an audio player that plays the output audio.
Output type: Union[Tuple[int, numpy.array], str]
Demos: generate_tone, reverse_audio
"""
def __init__(self, type: str = "auto", label: Optional[str] = None):
"""
Parameters:
type (str): Type of value to be passed to component. "numpy" returns a 2-set tuple with an integer sample_rate and the data as 16-bit int numpy.array of shape (samples, 2), "file" returns a temporary file path to the saved wav audio file, "auto" detects return type.
label (str): component name in interface.
"""
self.type = type
super().__init__(label)
def get_template_context(self):
return {**super().get_template_context()}
@classmethod
def get_shortcut_implementations(cls):
return {
"audio": {},
}
def postprocess(self, y):
"""
Parameters:
y (Union[Tuple[int, numpy.array], str]): audio data in requested format
Returns:
(str): base64 url data
"""
if self.type in ["numpy", "file", "auto"]:
if self.type == "numpy" or (self.type == "auto" and isinstance(y, tuple)):
sample_rate, data = y
file = tempfile.NamedTemporaryFile(
prefix="sample", suffix=".wav", delete=False
)
processing_utils.audio_to_file(sample_rate, data, file.name)
y = file.name
return processing_utils.encode_url_or_file_to_base64(y)
else:
raise ValueError(
"Unknown type: " + self.type + ". Please choose from: 'numpy', 'file'."
)
def deserialize(self, x):
return processing_utils.decode_base64_to_file(x).name
def save_flagged(self, dir, label, data, encryption_key):
return self.save_flagged_file(dir, label, data, encryption_key)
def restore_flagged(self, dir, data, encryption_key):
return self.restore_flagged_file(dir, data, encryption_key)["data"]
class JSON(OutputComponent):