Support video example preview

This commit is contained in:
Ali Abid 2021-10-05 19:23:23 +00:00 committed by Ali Abid
parent 760bf48b24
commit 9b968d3fb5
26 changed files with 179 additions and 190 deletions

BIN
demo/files/video.avi Normal file

Binary file not shown.

BIN
demo/files/video.mp4 Normal file

Binary file not shown.

View File

@ -4,7 +4,11 @@ def video_flip(video):
return video
iface = gr.Interface(
video_flip, "video", "playable_video", theme="huggingface")
video_flip, "video", "playable_video", theme="huggingface",
examples=[
["files/video.avi"],
["files/video.mp4"]
])
if __name__ == "__main__":
iface.launch()

View File

@ -11666,11 +11666,18 @@
"integrity": "sha512-QBmA/G2y+IfeS4oktet3qRZ+P5kPhCKRXxXnQEudYqUaEioAU1/Lq2us3D/t1Jfo4hE9REQPrbB7K5sOczJVIw=="
},
"mime-types": {
"version": "2.1.30",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.30.tgz",
"integrity": "sha512-crmjA4bLtR8m9qLpHvgxSChT+XoSlZi8J4n/aIdn3z92e/U47Z0V/yl+Wh9W046GgFVAmoNR/fmdbZYcSSIUeg==",
"version": "2.1.33",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.33.tgz",
"integrity": "sha512-plLElXp7pRDd0bNZHw+nMd52vRYjLwQjygaNg7ddJ2uJtTlmnTCjWuPKxVu6//AdaRuME84SvLW91sIkBqGT0g==",
"requires": {
"mime-db": "1.47.0"
"mime-db": "1.50.0"
},
"dependencies": {
"mime-db": {
"version": "1.50.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.50.0.tgz",
"integrity": "sha512-9tMZCDlYHqeERXEHO9f/hKfNXhre5dK2eE/krIvUjZbS2KPcqGDfNShIWS1uW9XOTKQKqK6qbeOci18rbfW77A=="
}
}
},
"mimic-fn": {

View File

@ -15,6 +15,7 @@
"fabric": "^4.5.0",
"html2canvas-objectfit-fix": "^1.2.0",
"jspreadsheet-ce": "^4.7.3",
"mime-types": "^2.1.33",
"plotly.js": "^2.3.1",
"prettier-eslint": "^13.0.0",
"prettier-eslint-cli": "^5.0.1",

View File

@ -12,7 +12,11 @@ export default class ComponentExample extends React.Component {
export class FileComponentExample extends ComponentExample {
static async preprocess(x, examples_dir) {
return examples_dir + "/" + x;
return {
"name": x,
"data": examples_dir + "/" + x,
"is_example": true
}
}
}

View File

@ -1,6 +1,6 @@
import React from "react";
import BaseComponent from "../base_component";
import { DataURLComponentExample } from "../component_example";
import { FileComponentExample } from "../component_example";
import Recorder from "recorder-js";
import { getSaliencyColor } from "../../utils";
@ -147,12 +147,12 @@ class AudioInput extends BaseComponent {
let file = files[0];
ReaderObj.readAsDataURL(file);
ReaderObj.onloadend = function () {
component.props.handleChange({ "name": file.name, "data": this.result });
component.props.handleChange({ "name": file.name, "data": this.result, "is_example": false });
};
};
}
class AudioInputExample extends DataURLComponentExample {
class AudioInputExample extends FileComponentExample {
render() {
return <div className="input_audio_example">{this.props.value}</div>;
}

View File

@ -98,7 +98,7 @@ class FileInput extends BaseComponent {
name: file.name,
size: file.size,
data: reader.result,
is_local_example: false
is_example: false
});
if (this.file_data.length === file_count) {
this.handleChange(this.file_data);

View File

@ -1,5 +1,6 @@
import React from "react";
import BaseComponent from "../base_component";
import { FileComponentExample } from "../component_example";
import ComponentExample from "../component_example";
import { isPlayable } from "../../utils";
@ -25,8 +26,7 @@ class VideoInput extends BaseComponent {
evt.stopPropagation();
};
if (this.props.value != null) {
if (isPlayable("video", this.props.value["data"].substring(
5, this.props.value["data"].indexOf(";")))) {
if (isPlayable("video", this.props.value["name"])) {
return (
<div className="input_video">
<div className="video_preview_holder">
@ -93,15 +93,36 @@ class VideoInput extends BaseComponent {
let file = files[0];
ReaderObj.readAsDataURL(file);
ReaderObj.onloadend = function () {
component.props.handleChange({ "name": file.name, "data": this.result });
component.props.handleChange({ "name": file.name, "data": this.result, "is_example": false });
};
}
}
class VideoInputExample extends ComponentExample {
class VideoInputExample extends FileComponentExample {
constructor(props) {
super(props);
this.video = React.createRef();
}
render() {
return <span className="input_video_example">{this.props.value}</span>;
if (isPlayable("video", this.props.value)) {
return <div className="input_video_example">
<div className="video_holder">
<video
ref={this.video}
className="video_preview"
onMouseOver={() => { this.video.current.play() }}
onMouseOut={() => { this.video.current.pause() }}
preload="metadata"
>
<source src={this.props.examples_dir + "/" + this.props.value}></source>
</video>
</div>
</div>
} else {
return <div className="input_video_example">{this.props.value}</div>
}
}
}
export { VideoInput, VideoInputExample };

View File

@ -6,8 +6,7 @@ import { isPlayable } from "../../utils";
class VideoOutput extends BaseComponent {
render() {
if (this.props.value) {
if (isPlayable("video", this.props.value["data"].substring(
5, this.props.value["data"].indexOf(";")))) {
if (isPlayable("video", this.props.value["name"])) {
return <div className="output_video">
<video controls src={this.props.value["data"]}></video>
</div>

View File

@ -428,6 +428,14 @@
@apply w-full h-full object-contain;
}
}
.input_video_example {
.video_holder {
@apply h-36 object-contain flex justify-center;
}
.video_preview {
@apply w-full;
}
}
.input_file {
@apply w-full h-80;
.upload_zone {

View File

@ -380,6 +380,14 @@ html {
@apply w-full h-full object-contain;
}
}
.input_video_example {
.video_holder {
@apply h-36 object-contain flex justify-center;
}
.video_preview {
@apply w-full;
}
}
.input_file {
@apply w-full h-48;
.upload_zone {

View File

@ -370,6 +370,14 @@
@apply w-full h-full object-contain;
}
}
.input_video_example {
.video_holder {
@apply h-36 object-contain flex justify-center;
}
.video_preview {
@apply w-full;
}
}
.input_file {
@apply w-full h-80;
.upload_zone {

View File

@ -383,6 +383,14 @@
@apply w-full h-full object-contain;
}
}
.input_video_example {
.video_holder {
@apply h-36 object-contain flex justify-center;
}
.video_preview {
@apply w-full;
}
}
.input_file {
@apply w-full h-80;
.upload_zone {

View File

@ -1,3 +1,5 @@
var mime = require('mime-types')
export function prettyBytes(bytes) {
let units = ["B", "KB", "MB", "GB", "PB"];
let i = 0;
@ -162,12 +164,14 @@ export function CSVToArray(strData, strDelimiter) {
return arrData;
}
export function isPlayable(data_type, mime_type) {
export function isPlayable(data_type, file_name) {
if (data_type == "audio") {
let audio_element = new Audio();
let mime_type = mime.lookup(file_name)
return audio_element.canPlayType(mime_type) != "";
} else {
let video_element = document.createElement("video");
let mime_type = mime.lookup(file_name)
return video_element.canPlayType(mime_type) != "";
}
}

View File

@ -1,6 +1,6 @@
Metadata-Version: 1.0
Name: gradio
Version: 2.3.7b0
Version: 2.3.7b1
Summary: Python library for easily interacting with trained machine learning models
Home-page: https://github.com/gradio-app/gradio-UI
Author: Abubakar Abid

View File

@ -11,5 +11,5 @@ pandas
paramiko
pillow
pycryptodome
pydub
requests
scipy

View File

@ -1,6 +1,6 @@
{
"files": {
"main.css": "/static/css/main.61fa3417.css",
"main.css": "/static/css/main.dcd72078.css",
"main.js": "/static/bundle.js",
"index.html": "/index.html",
"static/bundle.js.LICENSE.txt": "/static/bundle.js.LICENSE.txt",
@ -11,7 +11,7 @@
},
"entrypoints": [
"static/bundle.css",
"static/css/main.61fa3417.css",
"static/css/main.dcd72078.css",
"static/bundle.js"
]
}

View File

@ -8,4 +8,4 @@
window.config = {{ config|tojson }};
} catch (e) {
window.config = {};
}</script><script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script><title>Gradio</title><link href="static/bundle.css" rel="stylesheet"><link href="static/css/main.61fa3417.css" rel="stylesheet"></head><body style="height:100%"><div id="root" style="height:100%"></div><script src="static/bundle.js"></script></body></html>
}</script><script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script><title>Gradio</title><link href="static/bundle.css" rel="stylesheet"><link href="static/css/main.dcd72078.css" rel="stylesheet"></head><body style="height:100%"><div id="root" style="height:100%"></div><script src="static/bundle.js"></script></body></html>

View File

@ -9,7 +9,7 @@ import warnings
from gradio.component import Component
import numpy as np
import PIL
import scipy.io.wavfile
from pydub import AudioSegment
from gradio import processing_utils, test_data
import pandas as pd
from ffmpy import FFmpeg
@ -877,9 +877,12 @@ class Video(InputComponent):
def preprocess(self, x):
if x is None:
return x
file_name, file_data = x["name"], x["data"]
file = processing_utils.decode_base64_to_file(
file_data, filename=file_name)
file_name, file_data, is_example = x["name"], x["data"], x["is_example"]
if is_example:
file = processing_utils.create_tmp_copy_of_file(file_name)
else:
file = processing_utils.decode_base64_to_file(
file_data, file_path=file_name)
file_name = file.name
uploaded_format = file_name.split(".")[-1].lower()
if self.type is not None and uploaded_format != self.type:
@ -915,7 +918,7 @@ class Audio(InputComponent):
"""
Parameters:
source (str): Source of audio. "upload" creates a box where user can drop an audio file, "microphone" creates a microphone input.
type (str): Type of value to be returned by component. "numpy" returns a 2-set tuple with an integer sample_rate and the data numpy.array of shape (samples, 2), "file" returns a temporary file object whose path can be retrieved by file_obj.name, "mfcc" returns the mfcc coefficients of the input audio.
type (str): Type of value to be returned by component. "numpy" returns a 2-set tuple with an integer sample_rate and the data numpy.array of shape (samples, 2), "file" returns a temporary file object whose path can be retrieved by file_obj.name.
label (str): component name in interface.
optional (bool): If True, the interface can be submitted with no uploaded audio, in which case the input value is None.
"""
@ -923,7 +926,8 @@ class Audio(InputComponent):
requires_permissions = source == "microphone"
self.type = type
self.optional = optional
self.test_input = {"name": "sample.wav", "data": test_data.BASE64_AUDIO}
self.test_input = {"name": "sample.wav",
"data": test_data.BASE64_AUDIO, "is_example": False}
self.interpret_by_tokens = True
super().__init__(label, requires_permissions)
@ -947,14 +951,17 @@ class Audio(InputComponent):
"""
if x is None:
return x
file_name, file_data = x["name"], x["data"]
file_obj = processing_utils.decode_base64_to_file(file_data, filename=file_name)
file_name, file_data, is_example = x["name"], x["data"], x["is_example"]
if is_example:
file_obj = processing_utils.create_tmp_copy_of_file(file_name)
else:
file_obj = processing_utils.decode_base64_to_file(
file_data, file_path=file_name)
if self.type == "file":
return file_obj
elif self.type == "numpy":
return scipy.io.wavfile.read(file_obj.name)
elif self.type == "mfcc":
return processing_utils.generate_mfcc_features_from_audio_file(file_obj.name)
audio_segment = AudioSegment.from_file(file_obj.name)
return audio_segment.frame_rate, np.array(audio_segment.get_array_of_samples())
def preprocess_example(self, x):
return processing_utils.encode_file_to_base64(x, type="audio")
@ -970,8 +977,8 @@ class Audio(InputComponent):
def tokenize(self, x):
file_obj = processing_utils.decode_base64_to_file(x)
x = scipy.io.wavfile.read(file_obj.name)
sample_rate, data = x
x = AudioSegment.from_file(file_obj.name)
sample_rate, data = x.frame_rate, np.array(x.get_array_of_samples())
leave_one_out_sets = []
tokens = []
masks = []
@ -986,7 +993,12 @@ class Audio(InputComponent):
leave_one_out_data = np.copy(data)
leave_one_out_data[start:stop] = 0
file = tempfile.NamedTemporaryFile(delete=False)
scipy.io.wavfile.write(file, sample_rate, leave_one_out_data)
audio_segment = AudioSegment(
leave_one_out_data.tobytes(),
frame_rate=sample_rate,
sample_width=leave_one_out_data.dtype.itemsize,
channels=len(leave_one_out_data.shape))
audio_segment.export(file.name)
out_data = processing_utils.encode_file_to_base64(
file.name, type="audio", ext="wav")
leave_one_out_sets.append(out_data)
@ -995,7 +1007,12 @@ class Audio(InputComponent):
token[0:start] = 0
token[stop:] = 0
file = tempfile.NamedTemporaryFile(delete=False)
scipy.io.wavfile.write(file, sample_rate, token)
audio_segment = AudioSegment(
token.tobytes(),
frame_rate=sample_rate,
sample_width=token.dtype.itemsize,
channels=len(token.shape))
audio_segment.export(file.name)
token_data = processing_utils.encode_file_to_base64(
file.name, type="audio", ext="wav")
tokens.append(token_data)
@ -1005,13 +1022,15 @@ class Audio(InputComponent):
# create a "zero input" vector and get sample rate
x = tokens[0]
file_obj = processing_utils.decode_base64_to_file(x)
sample_rate, data = scipy.io.wavfile.read(file_obj.name)
audio_segment = AudioSegment.from_file(file_obj.name)
sample_rate, data = audio_segment.frame_rate, np.array(audio_segment.get_array_of_samples())
zero_input = np.zeros_like(data, dtype=int)
# decode all of the tokens
token_data = []
for token in tokens:
file_obj = processing_utils.decode_base64_to_file(token)
_, data = scipy.io.wavfile.read(file_obj.name)
audio_segment = AudioSegment.from_file(file_obj.name)
data = np.array(audio_segment.get_array_of_samples())
token_data.append(data)
# construct the masked version
masked_inputs = []
@ -1020,7 +1039,12 @@ class Audio(InputComponent):
for t, b in zip(token_data, binary_mask_vector):
masked_input = masked_input + t*int(b)
file = tempfile.NamedTemporaryFile(delete=False)
scipy.io.wavfile.write(file, sample_rate, masked_input)
audio_segment = AudioSegment(
masked_input.tobytes(),
frame_rate=sample_rate,
sample_width=masked_input.dtype.itemsize,
channels=len(masked_input.shape))
audio_segment.export(file.name)
masked_data = processing_utils.encode_file_to_base64(
file.name, type="audio", ext="wav")
masked_inputs.append(masked_data)
@ -1033,27 +1057,6 @@ class Audio(InputComponent):
"""
return list(scores)
def embed(self, x):
"""
Resamples each audio signal to be 1,000 frames and then returns the flattened vectors
"""
num_frames = 1000
if self.type == "file":
file_name = x.name
mfcc = processing_utils.generate_mfcc_features_from_audio_file(
file_name, downsample_to=num_frames)
return mfcc.flatten()
elif self.type == "numpy":
sample_rate, signal = x
mfcc = processing_utils.generate_mfcc_features_from_audio_file(
wav_filename=None, sample_rate=sample_rate, signal=signal, downsample_to=num_frames)
return mfcc.flatten()
elif self.type == "mfcc":
mfcc = scipy.signal.resample(x, num_frames, axis=1)
return mfcc.flatten()
else:
raise ValueError("Unknown type: " + str(self.type) +
". Please choose from: 'numpy', 'mfcc', 'file'.")
def save_flagged(self, dir, label, data, encryption_key):
"""
@ -1103,14 +1106,14 @@ class File(InputComponent):
return None
def process_single_file(f):
file_name, data, is_local_example = f["name"], f["data"], f["is_local_example"]
file_name, data, is_example = f["name"], f["data"], f["is_example"]
if self.type == "file":
if is_local_example:
return open(file_name)
if is_example:
return processing_utils.create_tmp_copy_of_file(file_name)
else:
return processing_utils.decode_base64_to_file(data, file_name=file_name)
return processing_utils.decode_base64_to_file(data, file_path=file_name)
elif self.type == "bytes":
if is_local_example:
if is_example:
with open(file_name, "rb") as file_data:
return file_data.read()
return processing_utils.decode_base64_to_binary(data)[0]

View File

@ -269,24 +269,6 @@ def update_embeddings():
return jsonify({"sample_embedding_2d": sample_embedding_2d})
@app.route("/api/predict_examples/", methods=["POST"])
@login_check
def predict_examples():
example_ids = request.json["data"]
predictions_set = {}
for example_id in example_ids:
example_set = app.interface.examples[example_id]
processed_example_set = [iface.preprocess_example(example)
for iface, example in zip(app.interface.input_components, example_set)]
try:
predictions, _ = app.interface.process(processed_example_set)
except:
continue
predictions_set[example_id] = predictions
output = {"data": predictions_set}
return jsonify(output)
def flag_data(input_data, output_data, flag_option=None, flag_index=None, username=None):
flag_path = os.path.join(app.cwd, app.interface.flagging_dir)
log_fp = "{}/log.csv".format(flag_path)

View File

@ -13,7 +13,7 @@ import operator
from numbers import Number
import warnings
import tempfile
import scipy
from pydub import AudioSegment
import os
import pandas as pd
import PIL
@ -361,8 +361,14 @@ class Audio(OutputComponent):
def postprocess(self, y):
if self.type in ["numpy", "file", "auto"]:
if self.type == "numpy" or (self.type == "auto" and isinstance(y, tuple)):
sample_rate, data = y
file = tempfile.NamedTemporaryFile(delete=False)
scipy.io.wavfile.write(file, y[0], y[1])
audio_segment = AudioSegment(
data.tobytes(),
frame_rate=sample_rate,
sample_width=data.dtype.itemsize,
channels=len(data.shape))
audio_segment.export(file.name)
y = file.name
return processing_utils.encode_file_to_base64(y, type="audio", ext="wav")
else:

View File

@ -2,8 +2,8 @@ from PIL import Image, ImageOps
from io import BytesIO
import base64
import tempfile
import scipy.io.wavfile
from scipy.fftpack import dct
import shutil
import os
import numpy as np
from gradio import encryptor
@ -79,12 +79,15 @@ def decode_base64_to_binary(encoding):
data = encoding
return base64.b64decode(data), extension
def decode_base64_to_file(encoding, encryption_key=None, filename=None):
def decode_base64_to_file(encoding, encryption_key=None, file_path=None):
data, mime_extension = decode_base64_to_binary(encoding)
prefix, extension = None, None
if filename is not None and "." in filename:
prefix = filename[0: filename.index(".")]
extension = filename[filename.index(".") + 1:]
if file_path is not None:
filename = os.path.basename(file_path)
prefix = filename
if "." in filename:
prefix = filename[0: filename.index(".")]
extension = filename[filename.index(".") + 1:]
if extension is None:
extension = mime_extension
if extension is None:
@ -97,6 +100,19 @@ def decode_base64_to_file(encoding, encryption_key=None, filename=None):
file_obj.flush()
return file_obj
def create_tmp_copy_of_file(file_path):
file_name = os.path.basename(file_path)
prefix, extension = file_name, None
if "." in file_name:
prefix = file_name[0: file_name.index(".")]
extension = file_name[file_name.index(".") + 1:]
if extension is None:
file_obj = tempfile.NamedTemporaryFile(delete=False, prefix=prefix)
else:
file_obj = tempfile.NamedTemporaryFile(delete=False, prefix=prefix, suffix="."+extension)
shutil.copy2(file_path, file_obj.name)
return file_obj
def _convert(image, dtype, force_copy=False, uniform=False):
"""
Adapted from: https://github.com/scikit-image/scikit-image/blob/main/skimage/util/dtype.py#L510-L531
@ -379,94 +395,3 @@ def _convert(image, dtype, force_copy=False, uniform=False):
image = _scale(image, 8 * itemsize_in, 8 * itemsize_out, copy=False)
image += imin_out
return image.astype(dtype_out)
##################
# AUDIO FILES
##################
def generate_mfcc_features_from_audio_file(wav_filename=None,
pre_emphasis=0.95,
frame_size= 0.025,
frame_stride=0.01,
NFFT=512,
nfilt=40,
num_ceps=12,
cep_lifter=22,
sample_rate=None,
signal=None,
downsample_to=None):
"""
Loads and preprocesses a .wav audio file (or alternatively, a sample rate & signal) into mfcc coefficients, the typical inputs to models.
Adapted from: https://haythamfayek.com/2016/04/21/speech-processing-for-machine-learning.html
:param wav_filename: string name of audio file to process.
:param pre_emphasis: a float factor, typically 0.95 or 0.97, which amplifies high frequencies.
:param frame_size: a float that is the length, in seconds, of time frame over which to take the fft.
:param frame_stride: a float that is the offset, in seconds, between consecutive time frames.
:param NFFT: The number of points in the short-time fft for each time frame.
:param nfilt: The number of filters on the Mel-scale to extract frequency bands.
:param num_ceps: the number of cepstral coefficients to retrain.
:param cep_lifter: the int factor, by which to de-emphasize higher-frequency.
:param sample_rate: optional param represnting sample rate that is used if `wav_filename` is not provided
:param signal: optional param representing sample data that is used if `wav_filename` is not provided
:param downsample_to: optional param. If provided, audio file is downsampled to this many frames.
:return: a 3D numpy array of mfcc coefficients, of the shape 1 x num_frames x num_coeffs.
"""
if (wav_filename is None) and (sample_rate is None or signal is None):
raise ValueError("Either a wav_filename must be provdied or a sample_rate and signal")
elif wav_filename is None:
pass
else:
sample_rate, signal = scipy.io.wavfile.read(wav_filename)
if not(downsample_to is None):
signal = scipy.signal.resample(signal, downsample_to)
emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate # Convert from seconds to samples
signal_length = len(emphasized_signal)
frame_length = int(round(frame_length))
frame_step = int(round(frame_step))
num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step)) # Make sure that we have at least 1 frame
pad_signal_length = num_frames * frame_step + frame_length
z = np.zeros((pad_signal_length - signal_length))
pad_signal = np.append(emphasized_signal, z) # Pad Signal to make sure that all frames have equal number of samples without truncating any samples from the original signal
indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
frames = pad_signal[indices.astype(np.int32, copy=False)]
frames *= np.hamming(frame_length)
mag_frames = np.absolute(np.fft.rfft(frames, NFFT)) # Magnitude of the FFT
pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2)) # Power Spectrum
low_freq_mel = 0
high_freq_mel = (2595 * np.log10(1 + (sample_rate / 2) / 700)) # Convert Hz to Mel
mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2) # Equally spaced in Mel scale
hz_points = (700 * (10**(mel_points / 2595) - 1)) # Convert Mel to Hz
bin = np.floor((NFFT + 1) * hz_points / sample_rate)
fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
for m in range(1, nfilt + 1):
f_m_minus = int(bin[m - 1]) # left
f_m = int(bin[m]) # center
f_m_plus = int(bin[m + 1]) # right
for k in range(f_m_minus, f_m):
fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
for k in range(f_m, f_m_plus):
fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
filter_banks = np.dot(pow_frames, fbank.T)
filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks) # Numerical Stability
filter_banks = 20 * np.log10(filter_banks) # dB
mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 0: (num_ceps + 1)] # Keep filters 1-13 by default.
(nframes, ncoeff) = mfcc.shape
n = np.arange(ncoeff)
lift = 1 + (cep_lifter / 2) * np.sin(np.pi * n / cep_lifter)
mfcc *= lift
filter_banks -= (np.mean(filter_banks, axis=0) + 1e-8)
mfcc -= (np.mean(mfcc, axis=0) + 1e-8)
return mfcc[np.newaxis, :, :] # Create a batch dimension.

View File

@ -1 +1 @@
2.3.7b
2.3.7b1

View File

@ -5,7 +5,7 @@ except ImportError:
setup(
name='gradio',
version='2.3.7b',
version='2.3.7b1',
include_package_data=True,
description='Python library for easily interacting with trained machine learning models',
author='Abubakar Abid',
@ -16,7 +16,7 @@ setup(
keywords=['machine learning', 'visualization', 'reproducibility'],
install_requires=[
'numpy',
'scipy',
'pydub',
'matplotlib',
'pandas',
'pillow',

View File

@ -2,7 +2,7 @@ import unittest
import gradio as gr
import PIL
import numpy as np
import scipy
from pydub import AudioSegment
import os
class TestTextbox(unittest.TestCase):
@ -97,7 +97,8 @@ class TestAudio(unittest.TestCase):
def test_in_interface(self):
x_wav = gr.test_data.BASE64_AUDIO
def max_amplitude_from_wav_file(wav_file):
_, data = scipy.io.wavfile.read(wav_file.name)
audio_segment = AudioSegment.from_file(wav_file.name)
data = np.array(audio_segment.get_array_of_samples())
return np.max(data)
iface = gr.Interface(
@ -111,7 +112,7 @@ class TestFile(unittest.TestCase):
x_file = {
"name": "audio.wav",
"data": gr.test_data.BASE64_AUDIO,
"is_local_example": False
"is_example": False
}
def get_size_of_file(file_obj):
return os.path.getsize(file_obj.name)