diff --git a/build/lib/gradio/inputs.py b/build/lib/gradio/inputs.py index b34f00a7f2..9e29b64ea5 100644 --- a/build/lib/gradio/inputs.py +++ b/build/lib/gradio/inputs.py @@ -146,7 +146,7 @@ class CheckboxGroup(InputComponent): Input type: Union[List[str], List[int]] """ - def __init__(self, choices, type="choices", label=None): + def __init__(self, choices, type="value", label=None): ''' Parameters: choices (List[str]): list of options to select from. @@ -241,20 +241,18 @@ class Image(InputComponent): Input type: Union[numpy.array, PIL.Image, str] """ - def __init__(self, shape=None, image_mode='RGB', source="upload", tools=["brush", "crop", "rotate", "undo", "filter"], type="numpy", label=None): + def __init__(self, shape=None, image_mode='RGB', source="upload", type="numpy", label=None): ''' Parameters: shape (Tuple[int, int]): shape to crop and resize image to; if None, matches input image size. image_mode (str): "RGB" if color, or "L" if black and white. source (str): Source of image. "upload" creates a box where user can drop an image file, "webcam" allows user to take snapshot from their webcam, "canvas" defaults to a white image that can be edited and drawn upon with tools. - tools (List[str]): Tools available to user to edit images. "brush" allows user to draw on image, "crop" allows user to select portion of image, "rotate" allows user to rotate or flip image, "undo" allows user to revert changes, "filter" allows user to apply filters on image. type (str): Type of value to be returned by component. "numpy" returns a numpy array with shape (width, height, 3), "pil" returns a PIL image object, "file" returns a temporary file object whose path can be retrieved by file_obj.name. label (str): component name in interface. ''' self.shape = shape self.image_mode = image_mode self.source = source - self.tools = tools self.type = type super().__init__(label) @@ -263,15 +261,13 @@ class Image(InputComponent): return { "image": {}, "webcam": {"source": "webcam"}, - "sketchpad": {"image_mode": "L", "source": "canvas", "tools": ["brush"]}, - "paint": {"source": "canvas", "tools": ["brush", "undo"]}, + "sketchpad": {"image_mode": "L", "source": "canvas"}, } def get_template_context(self): return { "image_mode": self.image_mode, "source": self.source, - "tools": self.tools, **super().get_template_context() } @@ -292,7 +288,6 @@ class Image(InputComponent): im.save(file_obj.name) return file_obj - def process_example(self, example): if os.path.exists(example): return processing_utils.encode_file_to_base64(example) @@ -312,7 +307,7 @@ class Image(InputComponent): class Audio(InputComponent): """ - Component accepts audio input files. Provides numpy array of shape `(samples, 2)` as an argument to the wrapped function. + Component accepts audio input files. Input type: Union[Tuple[int, numpy.array], str, numpy.array] """ @@ -327,6 +322,12 @@ class Audio(InputComponent): self.type = type super().__init__(label) + def get_template_context(self): + return { + "source": self.source, + **super().get_template_context() + } + @classmethod def get_shortcut_implementations(cls): return { @@ -434,7 +435,9 @@ class Dataframe(InputComponent): else: raise ValueError("Unknown type: " + self.type + ". Please choose from: 'pandas', 'numpy', 'array'.") +####################### # DEPRECATED COMPONENTS +####################### class Sketchpad(InputComponent): """ diff --git a/build/lib/gradio/outputs.py b/build/lib/gradio/outputs.py index 5b5412efc3..071fc384e2 100644 --- a/build/lib/gradio/outputs.py +++ b/build/lib/gradio/outputs.py @@ -14,6 +14,7 @@ from numbers import Number import warnings import tempfile import scipy +import os class OutputComponent(Component): """ @@ -314,14 +315,13 @@ class HTML(OutputComponent): class File(OutputComponent): ''' - Used for file output. Expects a string path to a file if `return_path` is True. - Output type: Union[bytes, str] + Used for file output. + Output type: Union[file-like, str] ''' - def __init__(self, type="file", label=None): + def __init__(self, label=None): ''' Parameters: - type (str): Type of value to be passed to component. "file" expects a file path, "str" exxpects a string to be returned as a file, "binary" expects an bytes object to be returned as a file. label (str): component name in interface. ''' super().__init__(label) @@ -333,6 +333,13 @@ class File(OutputComponent): "file": {}, } + def postprocess(self, y): + return { + "name": os.path.basename(y), + "size": os.path.getsize(y), + "data": processing_utils.encode_file_to_base64(y, header=False) + } + class Dataframe(OutputComponent): """ diff --git a/build/lib/gradio/processing_utils.py b/build/lib/gradio/processing_utils.py new file mode 100644 index 0000000000..6850c04aa3 --- /dev/null +++ b/build/lib/gradio/processing_utils.py @@ -0,0 +1,157 @@ +from PIL import Image, ImageOps +from io import BytesIO +import base64 +import tempfile +import scipy.io.wavfile +from scipy.fftpack import dct +import numpy as np +import skimage + + +######################### +# IMAGE PRE-PROCESSING +######################### +def decode_base64_to_image(encoding): + content = encoding.split(';')[1] + image_encoded = content.split(',')[1] + return Image.open(BytesIO(base64.b64decode(image_encoded))) + + +def encode_file_to_base64(f, type="image", ext=None, header=True): + with open(f, "rb") as file: + encoded_string = base64.b64encode(file.read()) + base64_str = str(encoded_string, 'utf-8') + if not header: + return base64_str + if ext is None: + ext = f.split(".")[-1] + return "data:" + type + "/" + ext + ";base64," + base64_str + + +def encode_plot_to_base64(plt): + with BytesIO() as output_bytes: + plt.savefig(output_bytes, format="png") + bytes_data = output_bytes.getvalue() + plt.close() + base64_str = str(base64.b64encode(bytes_data), 'utf-8') + return "data:image/png;base64," + base64_str + +def encode_array_to_base64(image_array): + with BytesIO() as output_bytes: + PIL_image = Image.fromarray(skimage.img_as_ubyte(image_array)) + PIL_image.save(output_bytes, 'PNG') + bytes_data = output_bytes.getvalue() + base64_str = str(base64.b64encode(bytes_data), 'utf-8') + return "data:image/png;base64," + base64_str + + +def resize_and_crop(img, size, crop_type='center'): + """ + Resize and crop an image to fit the specified size. + args: + size: `(width, height)` tuple. + crop_type: can be 'top', 'middle' or 'bottom', depending on this + value, the image will cropped getting the 'top/left', 'middle' or + 'bottom/right' of the image to fit the size. + raises: + ValueError: if an invalid `crop_type` is provided. + """ + if crop_type == "top": + center = (0, 0) + elif crop_type == "center": + center = (0.5, 0.5) + else: + raise ValueError + return ImageOps.fit(img, size, centering=center) + +################## +# OUTPUT +################## + +def decode_base64_to_binary(encoding): + inp = encoding.split(';')[1].split(',')[1] + return base64.b64decode(inp) + + +def decode_base64_to_file(encoding): + file_obj = tempfile.NamedTemporaryFile() + file_obj.write(decode_base64_to_binary(encoding)) + return file_obj + + +################## +# AUDIO FILES +################## + +def generate_mfcc_features_from_audio_file(wav_filename, + pre_emphasis=0.95, + frame_size= 0.025, + frame_stride=0.01, + NFFT=512, + nfilt=40, + num_ceps=12, + cep_lifter=22): + """ + Loads and preprocesses a .wav audio file into mfcc coefficients, the typical inputs to models. + Adapted from: https://haythamfayek.com/2016/04/21/speech-processing-for-machine-learning.html + :param wav_filename: string name of audio file to process. + :param pre_emphasis: a float factor, typically 0.95 or 0.97, which amplifies high frequencies. + :param frame_size: a float that is the length, in seconds, of time frame over which to take the fft. + :param frame_stride: a float that is the offset, in seconds, between consecutive time frames. + :param NFFT: The number of points in the short-time fft for each time frame. + :param nfilt: The number of filters on the Mel-scale to extract frequency bands. + :param num_ceps: the number of cepstral coefficients to retrain. + :param cep_lifter: the int factor, by which to de-emphasize higher-frequency. + :return: a numpy array of mfcc coefficients. + """ + sample_rate, signal = scipy.io.wavfile.read(wav_filename) + emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1]) + + frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate # Convert from seconds to samples + signal_length = len(emphasized_signal) + frame_length = int(round(frame_length)) + frame_step = int(round(frame_step)) + num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step)) # Make sure that we have at least 1 frame + + pad_signal_length = num_frames * frame_step + frame_length + z = np.zeros((pad_signal_length - signal_length)) + pad_signal = np.append(emphasized_signal, z) # Pad Signal to make sure that all frames have equal number of samples without truncating any samples from the original signal + + indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T + frames = pad_signal[indices.astype(np.int32, copy=False)] + + frames *= np.hamming(frame_length) + mag_frames = np.absolute(np.fft.rfft(frames, NFFT)) # Magnitude of the FFT + pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2)) # Power Spectrum + + low_freq_mel = 0 + high_freq_mel = (2595 * np.log10(1 + (sample_rate / 2) / 700)) # Convert Hz to Mel + mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2) # Equally spaced in Mel scale + hz_points = (700 * (10**(mel_points / 2595) - 1)) # Convert Mel to Hz + bin = np.floor((NFFT + 1) * hz_points / sample_rate) + + fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1)))) + for m in range(1, nfilt + 1): + f_m_minus = int(bin[m - 1]) # left + f_m = int(bin[m]) # center + f_m_plus = int(bin[m + 1]) # right + + for k in range(f_m_minus, f_m): + fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1]) + for k in range(f_m, f_m_plus): + fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m]) + filter_banks = np.dot(pow_frames, fbank.T) + filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks) # Numerical Stability + filter_banks = 20 * np.log10(filter_banks) # dB + + mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 0: (num_ceps + 1)] # Keep filters 1-13 by default. + (nframes, ncoeff) = mfcc.shape + n = np.arange(ncoeff) + lift = 1 + (cep_lifter / 2) * np.sin(np.pi * n / cep_lifter) + mfcc *= lift + + filter_banks -= (np.mean(filter_banks, axis=0) + 1e-8) + mfcc -= (np.mean(mfcc, axis=0) + 1e-8) + return mfcc[np.newaxis, :, :] # Create a batch dimension. + + diff --git a/build/lib/gradio/static/css/gradio.css b/build/lib/gradio/static/css/gradio.css index f161ad94a5..0c8479bc13 100644 --- a/build/lib/gradio/static/css/gradio.css +++ b/build/lib/gradio/static/css/gradio.css @@ -27,10 +27,19 @@ flex-grow: 1; } .interface { - height: 360px; display: flex; flex-flow: column; } +.interface_box { + height: 360px; +} +.interface_mini_box { + height: 180px; +} +.interface_max_box { + overflow: auto; + max-height: 360px; +} .interface:not(*:last-child) { margin-bottom: 16px; } diff --git a/build/lib/gradio/static/css/interfaces/input/sketchpad.css b/build/lib/gradio/static/css/interfaces/input/sketchpad.css index de8d9064b6..8753456eae 100644 --- a/build/lib/gradio/static/css/interfaces/input/sketchpad.css +++ b/build/lib/gradio/static/css/interfaces/input/sketchpad.css @@ -37,10 +37,10 @@ .canvas_holder canvas { background-color: white; } -.canvas_holder, .saliency_holder { +.canvas_holder { text-align: center; width: 100%; - height: 100%; + height: calc(100% - 36px); } .saliency_holder { position: absolute; diff --git a/build/lib/gradio/static/css/interfaces/output/json.css b/build/lib/gradio/static/css/interfaces/output/json.css index cfaddb7191..e69de29bb2 100644 --- a/build/lib/gradio/static/css/interfaces/output/json.css +++ b/build/lib/gradio/static/css/interfaces/output/json.css @@ -1,15 +0,0 @@ -.output_text { - width: 100%; - font-size: 18px; - outline: none; - background-color: white; - border: solid 1px lightgray; - border-radius: 2px; - box-sizing: border-box; - padding: 4px; - min-height: 30px; - font-family: monospace; - white-space: pre-wrap; /* CSS3 */ - white-space: -moz-pre-wrap; /* Firefox */ - word-wrap: break-word; /* IE */ -} diff --git a/build/lib/gradio/static/css/interfaces/output/label.css b/build/lib/gradio/static/css/interfaces/output/label.css index 2fa65e1e42..3e3952e621 100644 --- a/build/lib/gradio/static/css/interfaces/output/label.css +++ b/build/lib/gradio/static/css/interfaces/output/label.css @@ -40,6 +40,7 @@ .output_class { font-weight: bold; font-size: 36px; + padding: 32px 16px;; flex-grow: 1; display: flex; align-items: center; diff --git a/build/lib/gradio/static/js/interfaces/input/audio.js b/build/lib/gradio/static/js/interfaces/input/audio.js index 3caae529e0..f84839b776 100644 --- a/build/lib/gradio/static/js/interfaces/input/audio.js +++ b/build/lib/gradio/static/js/interfaces/input/audio.js @@ -1,27 +1,40 @@ const audio_input = { html: ` -
- -