mirror of
https://github.com/gradio-app/gradio.git
synced 2024-12-15 02:11:15 +08:00
Support video example preview
This commit is contained in:
parent
760bf48b24
commit
9b968d3fb5
BIN
demo/files/video.avi
Normal file
BIN
demo/files/video.avi
Normal file
Binary file not shown.
BIN
demo/files/video.mp4
Normal file
BIN
demo/files/video.mp4
Normal file
Binary file not shown.
@ -4,7 +4,11 @@ def video_flip(video):
|
||||
return video
|
||||
|
||||
iface = gr.Interface(
|
||||
video_flip, "video", "playable_video", theme="huggingface")
|
||||
video_flip, "video", "playable_video", theme="huggingface",
|
||||
examples=[
|
||||
["files/video.avi"],
|
||||
["files/video.mp4"]
|
||||
])
|
||||
|
||||
if __name__ == "__main__":
|
||||
iface.launch()
|
||||
|
15
frontend/package-lock.json
generated
15
frontend/package-lock.json
generated
@ -11666,11 +11666,18 @@
|
||||
"integrity": "sha512-QBmA/G2y+IfeS4oktet3qRZ+P5kPhCKRXxXnQEudYqUaEioAU1/Lq2us3D/t1Jfo4hE9REQPrbB7K5sOczJVIw=="
|
||||
},
|
||||
"mime-types": {
|
||||
"version": "2.1.30",
|
||||
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.30.tgz",
|
||||
"integrity": "sha512-crmjA4bLtR8m9qLpHvgxSChT+XoSlZi8J4n/aIdn3z92e/U47Z0V/yl+Wh9W046GgFVAmoNR/fmdbZYcSSIUeg==",
|
||||
"version": "2.1.33",
|
||||
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.33.tgz",
|
||||
"integrity": "sha512-plLElXp7pRDd0bNZHw+nMd52vRYjLwQjygaNg7ddJ2uJtTlmnTCjWuPKxVu6//AdaRuME84SvLW91sIkBqGT0g==",
|
||||
"requires": {
|
||||
"mime-db": "1.47.0"
|
||||
"mime-db": "1.50.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"mime-db": {
|
||||
"version": "1.50.0",
|
||||
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.50.0.tgz",
|
||||
"integrity": "sha512-9tMZCDlYHqeERXEHO9f/hKfNXhre5dK2eE/krIvUjZbS2KPcqGDfNShIWS1uW9XOTKQKqK6qbeOci18rbfW77A=="
|
||||
}
|
||||
}
|
||||
},
|
||||
"mimic-fn": {
|
||||
|
@ -15,6 +15,7 @@
|
||||
"fabric": "^4.5.0",
|
||||
"html2canvas-objectfit-fix": "^1.2.0",
|
||||
"jspreadsheet-ce": "^4.7.3",
|
||||
"mime-types": "^2.1.33",
|
||||
"plotly.js": "^2.3.1",
|
||||
"prettier-eslint": "^13.0.0",
|
||||
"prettier-eslint-cli": "^5.0.1",
|
||||
|
@ -12,7 +12,11 @@ export default class ComponentExample extends React.Component {
|
||||
|
||||
export class FileComponentExample extends ComponentExample {
|
||||
static async preprocess(x, examples_dir) {
|
||||
return examples_dir + "/" + x;
|
||||
return {
|
||||
"name": x,
|
||||
"data": examples_dir + "/" + x,
|
||||
"is_example": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
import React from "react";
|
||||
import BaseComponent from "../base_component";
|
||||
import { DataURLComponentExample } from "../component_example";
|
||||
import { FileComponentExample } from "../component_example";
|
||||
import Recorder from "recorder-js";
|
||||
import { getSaliencyColor } from "../../utils";
|
||||
|
||||
@ -147,12 +147,12 @@ class AudioInput extends BaseComponent {
|
||||
let file = files[0];
|
||||
ReaderObj.readAsDataURL(file);
|
||||
ReaderObj.onloadend = function () {
|
||||
component.props.handleChange({ "name": file.name, "data": this.result });
|
||||
component.props.handleChange({ "name": file.name, "data": this.result, "is_example": false });
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
class AudioInputExample extends DataURLComponentExample {
|
||||
class AudioInputExample extends FileComponentExample {
|
||||
render() {
|
||||
return <div className="input_audio_example">{this.props.value}</div>;
|
||||
}
|
||||
|
@ -98,7 +98,7 @@ class FileInput extends BaseComponent {
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
data: reader.result,
|
||||
is_local_example: false
|
||||
is_example: false
|
||||
});
|
||||
if (this.file_data.length === file_count) {
|
||||
this.handleChange(this.file_data);
|
||||
|
@ -1,5 +1,6 @@
|
||||
import React from "react";
|
||||
import BaseComponent from "../base_component";
|
||||
import { FileComponentExample } from "../component_example";
|
||||
import ComponentExample from "../component_example";
|
||||
import { isPlayable } from "../../utils";
|
||||
|
||||
@ -25,8 +26,7 @@ class VideoInput extends BaseComponent {
|
||||
evt.stopPropagation();
|
||||
};
|
||||
if (this.props.value != null) {
|
||||
if (isPlayable("video", this.props.value["data"].substring(
|
||||
5, this.props.value["data"].indexOf(";")))) {
|
||||
if (isPlayable("video", this.props.value["name"])) {
|
||||
return (
|
||||
<div className="input_video">
|
||||
<div className="video_preview_holder">
|
||||
@ -93,15 +93,36 @@ class VideoInput extends BaseComponent {
|
||||
let file = files[0];
|
||||
ReaderObj.readAsDataURL(file);
|
||||
ReaderObj.onloadend = function () {
|
||||
component.props.handleChange({ "name": file.name, "data": this.result });
|
||||
component.props.handleChange({ "name": file.name, "data": this.result, "is_example": false });
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
class VideoInputExample extends ComponentExample {
|
||||
class VideoInputExample extends FileComponentExample {
|
||||
constructor(props) {
|
||||
super(props);
|
||||
this.video = React.createRef();
|
||||
}
|
||||
render() {
|
||||
return <span className="input_video_example">{this.props.value}</span>;
|
||||
if (isPlayable("video", this.props.value)) {
|
||||
return <div className="input_video_example">
|
||||
<div className="video_holder">
|
||||
<video
|
||||
ref={this.video}
|
||||
className="video_preview"
|
||||
onMouseOver={() => { this.video.current.play() }}
|
||||
onMouseOut={() => { this.video.current.pause() }}
|
||||
preload="metadata"
|
||||
>
|
||||
<source src={this.props.examples_dir + "/" + this.props.value}></source>
|
||||
</video>
|
||||
</div>
|
||||
</div>
|
||||
} else {
|
||||
return <div className="input_video_example">{this.props.value}</div>
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export { VideoInput, VideoInputExample };
|
||||
|
@ -6,8 +6,7 @@ import { isPlayable } from "../../utils";
|
||||
class VideoOutput extends BaseComponent {
|
||||
render() {
|
||||
if (this.props.value) {
|
||||
if (isPlayable("video", this.props.value["data"].substring(
|
||||
5, this.props.value["data"].indexOf(";")))) {
|
||||
if (isPlayable("video", this.props.value["name"])) {
|
||||
return <div className="output_video">
|
||||
<video controls src={this.props.value["data"]}></video>
|
||||
</div>
|
||||
|
@ -428,6 +428,14 @@
|
||||
@apply w-full h-full object-contain;
|
||||
}
|
||||
}
|
||||
.input_video_example {
|
||||
.video_holder {
|
||||
@apply h-36 object-contain flex justify-center;
|
||||
}
|
||||
.video_preview {
|
||||
@apply w-full;
|
||||
}
|
||||
}
|
||||
.input_file {
|
||||
@apply w-full h-80;
|
||||
.upload_zone {
|
||||
|
@ -380,6 +380,14 @@ html {
|
||||
@apply w-full h-full object-contain;
|
||||
}
|
||||
}
|
||||
.input_video_example {
|
||||
.video_holder {
|
||||
@apply h-36 object-contain flex justify-center;
|
||||
}
|
||||
.video_preview {
|
||||
@apply w-full;
|
||||
}
|
||||
}
|
||||
.input_file {
|
||||
@apply w-full h-48;
|
||||
.upload_zone {
|
||||
|
@ -370,6 +370,14 @@
|
||||
@apply w-full h-full object-contain;
|
||||
}
|
||||
}
|
||||
.input_video_example {
|
||||
.video_holder {
|
||||
@apply h-36 object-contain flex justify-center;
|
||||
}
|
||||
.video_preview {
|
||||
@apply w-full;
|
||||
}
|
||||
}
|
||||
.input_file {
|
||||
@apply w-full h-80;
|
||||
.upload_zone {
|
||||
|
@ -383,6 +383,14 @@
|
||||
@apply w-full h-full object-contain;
|
||||
}
|
||||
}
|
||||
.input_video_example {
|
||||
.video_holder {
|
||||
@apply h-36 object-contain flex justify-center;
|
||||
}
|
||||
.video_preview {
|
||||
@apply w-full;
|
||||
}
|
||||
}
|
||||
.input_file {
|
||||
@apply w-full h-80;
|
||||
.upload_zone {
|
||||
|
@ -1,3 +1,5 @@
|
||||
var mime = require('mime-types')
|
||||
|
||||
export function prettyBytes(bytes) {
|
||||
let units = ["B", "KB", "MB", "GB", "PB"];
|
||||
let i = 0;
|
||||
@ -162,12 +164,14 @@ export function CSVToArray(strData, strDelimiter) {
|
||||
return arrData;
|
||||
}
|
||||
|
||||
export function isPlayable(data_type, mime_type) {
|
||||
export function isPlayable(data_type, file_name) {
|
||||
if (data_type == "audio") {
|
||||
let audio_element = new Audio();
|
||||
let mime_type = mime.lookup(file_name)
|
||||
return audio_element.canPlayType(mime_type) != "";
|
||||
} else {
|
||||
let video_element = document.createElement("video");
|
||||
let mime_type = mime.lookup(file_name)
|
||||
return video_element.canPlayType(mime_type) != "";
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
Metadata-Version: 1.0
|
||||
Name: gradio
|
||||
Version: 2.3.7b0
|
||||
Version: 2.3.7b1
|
||||
Summary: Python library for easily interacting with trained machine learning models
|
||||
Home-page: https://github.com/gradio-app/gradio-UI
|
||||
Author: Abubakar Abid
|
||||
|
@ -11,5 +11,5 @@ pandas
|
||||
paramiko
|
||||
pillow
|
||||
pycryptodome
|
||||
pydub
|
||||
requests
|
||||
scipy
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"files": {
|
||||
"main.css": "/static/css/main.61fa3417.css",
|
||||
"main.css": "/static/css/main.dcd72078.css",
|
||||
"main.js": "/static/bundle.js",
|
||||
"index.html": "/index.html",
|
||||
"static/bundle.js.LICENSE.txt": "/static/bundle.js.LICENSE.txt",
|
||||
@ -11,7 +11,7 @@
|
||||
},
|
||||
"entrypoints": [
|
||||
"static/bundle.css",
|
||||
"static/css/main.61fa3417.css",
|
||||
"static/css/main.dcd72078.css",
|
||||
"static/bundle.js"
|
||||
]
|
||||
}
|
@ -8,4 +8,4 @@
|
||||
window.config = {{ config|tojson }};
|
||||
} catch (e) {
|
||||
window.config = {};
|
||||
}</script><script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script><title>Gradio</title><link href="static/bundle.css" rel="stylesheet"><link href="static/css/main.61fa3417.css" rel="stylesheet"></head><body style="height:100%"><div id="root" style="height:100%"></div><script src="static/bundle.js"></script></body></html>
|
||||
}</script><script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script><title>Gradio</title><link href="static/bundle.css" rel="stylesheet"><link href="static/css/main.dcd72078.css" rel="stylesheet"></head><body style="height:100%"><div id="root" style="height:100%"></div><script src="static/bundle.js"></script></body></html>
|
@ -9,7 +9,7 @@ import warnings
|
||||
from gradio.component import Component
|
||||
import numpy as np
|
||||
import PIL
|
||||
import scipy.io.wavfile
|
||||
from pydub import AudioSegment
|
||||
from gradio import processing_utils, test_data
|
||||
import pandas as pd
|
||||
from ffmpy import FFmpeg
|
||||
@ -877,9 +877,12 @@ class Video(InputComponent):
|
||||
def preprocess(self, x):
|
||||
if x is None:
|
||||
return x
|
||||
file_name, file_data = x["name"], x["data"]
|
||||
file = processing_utils.decode_base64_to_file(
|
||||
file_data, filename=file_name)
|
||||
file_name, file_data, is_example = x["name"], x["data"], x["is_example"]
|
||||
if is_example:
|
||||
file = processing_utils.create_tmp_copy_of_file(file_name)
|
||||
else:
|
||||
file = processing_utils.decode_base64_to_file(
|
||||
file_data, file_path=file_name)
|
||||
file_name = file.name
|
||||
uploaded_format = file_name.split(".")[-1].lower()
|
||||
if self.type is not None and uploaded_format != self.type:
|
||||
@ -915,7 +918,7 @@ class Audio(InputComponent):
|
||||
"""
|
||||
Parameters:
|
||||
source (str): Source of audio. "upload" creates a box where user can drop an audio file, "microphone" creates a microphone input.
|
||||
type (str): Type of value to be returned by component. "numpy" returns a 2-set tuple with an integer sample_rate and the data numpy.array of shape (samples, 2), "file" returns a temporary file object whose path can be retrieved by file_obj.name, "mfcc" returns the mfcc coefficients of the input audio.
|
||||
type (str): Type of value to be returned by component. "numpy" returns a 2-set tuple with an integer sample_rate and the data numpy.array of shape (samples, 2), "file" returns a temporary file object whose path can be retrieved by file_obj.name.
|
||||
label (str): component name in interface.
|
||||
optional (bool): If True, the interface can be submitted with no uploaded audio, in which case the input value is None.
|
||||
"""
|
||||
@ -923,7 +926,8 @@ class Audio(InputComponent):
|
||||
requires_permissions = source == "microphone"
|
||||
self.type = type
|
||||
self.optional = optional
|
||||
self.test_input = {"name": "sample.wav", "data": test_data.BASE64_AUDIO}
|
||||
self.test_input = {"name": "sample.wav",
|
||||
"data": test_data.BASE64_AUDIO, "is_example": False}
|
||||
self.interpret_by_tokens = True
|
||||
super().__init__(label, requires_permissions)
|
||||
|
||||
@ -947,14 +951,17 @@ class Audio(InputComponent):
|
||||
"""
|
||||
if x is None:
|
||||
return x
|
||||
file_name, file_data = x["name"], x["data"]
|
||||
file_obj = processing_utils.decode_base64_to_file(file_data, filename=file_name)
|
||||
file_name, file_data, is_example = x["name"], x["data"], x["is_example"]
|
||||
if is_example:
|
||||
file_obj = processing_utils.create_tmp_copy_of_file(file_name)
|
||||
else:
|
||||
file_obj = processing_utils.decode_base64_to_file(
|
||||
file_data, file_path=file_name)
|
||||
if self.type == "file":
|
||||
return file_obj
|
||||
elif self.type == "numpy":
|
||||
return scipy.io.wavfile.read(file_obj.name)
|
||||
elif self.type == "mfcc":
|
||||
return processing_utils.generate_mfcc_features_from_audio_file(file_obj.name)
|
||||
audio_segment = AudioSegment.from_file(file_obj.name)
|
||||
return audio_segment.frame_rate, np.array(audio_segment.get_array_of_samples())
|
||||
|
||||
def preprocess_example(self, x):
|
||||
return processing_utils.encode_file_to_base64(x, type="audio")
|
||||
@ -970,8 +977,8 @@ class Audio(InputComponent):
|
||||
|
||||
def tokenize(self, x):
|
||||
file_obj = processing_utils.decode_base64_to_file(x)
|
||||
x = scipy.io.wavfile.read(file_obj.name)
|
||||
sample_rate, data = x
|
||||
x = AudioSegment.from_file(file_obj.name)
|
||||
sample_rate, data = x.frame_rate, np.array(x.get_array_of_samples())
|
||||
leave_one_out_sets = []
|
||||
tokens = []
|
||||
masks = []
|
||||
@ -986,7 +993,12 @@ class Audio(InputComponent):
|
||||
leave_one_out_data = np.copy(data)
|
||||
leave_one_out_data[start:stop] = 0
|
||||
file = tempfile.NamedTemporaryFile(delete=False)
|
||||
scipy.io.wavfile.write(file, sample_rate, leave_one_out_data)
|
||||
audio_segment = AudioSegment(
|
||||
leave_one_out_data.tobytes(),
|
||||
frame_rate=sample_rate,
|
||||
sample_width=leave_one_out_data.dtype.itemsize,
|
||||
channels=len(leave_one_out_data.shape))
|
||||
audio_segment.export(file.name)
|
||||
out_data = processing_utils.encode_file_to_base64(
|
||||
file.name, type="audio", ext="wav")
|
||||
leave_one_out_sets.append(out_data)
|
||||
@ -995,7 +1007,12 @@ class Audio(InputComponent):
|
||||
token[0:start] = 0
|
||||
token[stop:] = 0
|
||||
file = tempfile.NamedTemporaryFile(delete=False)
|
||||
scipy.io.wavfile.write(file, sample_rate, token)
|
||||
audio_segment = AudioSegment(
|
||||
token.tobytes(),
|
||||
frame_rate=sample_rate,
|
||||
sample_width=token.dtype.itemsize,
|
||||
channels=len(token.shape))
|
||||
audio_segment.export(file.name)
|
||||
token_data = processing_utils.encode_file_to_base64(
|
||||
file.name, type="audio", ext="wav")
|
||||
tokens.append(token_data)
|
||||
@ -1005,13 +1022,15 @@ class Audio(InputComponent):
|
||||
# create a "zero input" vector and get sample rate
|
||||
x = tokens[0]
|
||||
file_obj = processing_utils.decode_base64_to_file(x)
|
||||
sample_rate, data = scipy.io.wavfile.read(file_obj.name)
|
||||
audio_segment = AudioSegment.from_file(file_obj.name)
|
||||
sample_rate, data = audio_segment.frame_rate, np.array(audio_segment.get_array_of_samples())
|
||||
zero_input = np.zeros_like(data, dtype=int)
|
||||
# decode all of the tokens
|
||||
token_data = []
|
||||
for token in tokens:
|
||||
file_obj = processing_utils.decode_base64_to_file(token)
|
||||
_, data = scipy.io.wavfile.read(file_obj.name)
|
||||
audio_segment = AudioSegment.from_file(file_obj.name)
|
||||
data = np.array(audio_segment.get_array_of_samples())
|
||||
token_data.append(data)
|
||||
# construct the masked version
|
||||
masked_inputs = []
|
||||
@ -1020,7 +1039,12 @@ class Audio(InputComponent):
|
||||
for t, b in zip(token_data, binary_mask_vector):
|
||||
masked_input = masked_input + t*int(b)
|
||||
file = tempfile.NamedTemporaryFile(delete=False)
|
||||
scipy.io.wavfile.write(file, sample_rate, masked_input)
|
||||
audio_segment = AudioSegment(
|
||||
masked_input.tobytes(),
|
||||
frame_rate=sample_rate,
|
||||
sample_width=masked_input.dtype.itemsize,
|
||||
channels=len(masked_input.shape))
|
||||
audio_segment.export(file.name)
|
||||
masked_data = processing_utils.encode_file_to_base64(
|
||||
file.name, type="audio", ext="wav")
|
||||
masked_inputs.append(masked_data)
|
||||
@ -1033,27 +1057,6 @@ class Audio(InputComponent):
|
||||
"""
|
||||
return list(scores)
|
||||
|
||||
def embed(self, x):
|
||||
"""
|
||||
Resamples each audio signal to be 1,000 frames and then returns the flattened vectors
|
||||
"""
|
||||
num_frames = 1000
|
||||
if self.type == "file":
|
||||
file_name = x.name
|
||||
mfcc = processing_utils.generate_mfcc_features_from_audio_file(
|
||||
file_name, downsample_to=num_frames)
|
||||
return mfcc.flatten()
|
||||
elif self.type == "numpy":
|
||||
sample_rate, signal = x
|
||||
mfcc = processing_utils.generate_mfcc_features_from_audio_file(
|
||||
wav_filename=None, sample_rate=sample_rate, signal=signal, downsample_to=num_frames)
|
||||
return mfcc.flatten()
|
||||
elif self.type == "mfcc":
|
||||
mfcc = scipy.signal.resample(x, num_frames, axis=1)
|
||||
return mfcc.flatten()
|
||||
else:
|
||||
raise ValueError("Unknown type: " + str(self.type) +
|
||||
". Please choose from: 'numpy', 'mfcc', 'file'.")
|
||||
|
||||
def save_flagged(self, dir, label, data, encryption_key):
|
||||
"""
|
||||
@ -1103,14 +1106,14 @@ class File(InputComponent):
|
||||
return None
|
||||
|
||||
def process_single_file(f):
|
||||
file_name, data, is_local_example = f["name"], f["data"], f["is_local_example"]
|
||||
file_name, data, is_example = f["name"], f["data"], f["is_example"]
|
||||
if self.type == "file":
|
||||
if is_local_example:
|
||||
return open(file_name)
|
||||
if is_example:
|
||||
return processing_utils.create_tmp_copy_of_file(file_name)
|
||||
else:
|
||||
return processing_utils.decode_base64_to_file(data, file_name=file_name)
|
||||
return processing_utils.decode_base64_to_file(data, file_path=file_name)
|
||||
elif self.type == "bytes":
|
||||
if is_local_example:
|
||||
if is_example:
|
||||
with open(file_name, "rb") as file_data:
|
||||
return file_data.read()
|
||||
return processing_utils.decode_base64_to_binary(data)[0]
|
||||
|
@ -269,24 +269,6 @@ def update_embeddings():
|
||||
return jsonify({"sample_embedding_2d": sample_embedding_2d})
|
||||
|
||||
|
||||
@app.route("/api/predict_examples/", methods=["POST"])
|
||||
@login_check
|
||||
def predict_examples():
|
||||
example_ids = request.json["data"]
|
||||
predictions_set = {}
|
||||
for example_id in example_ids:
|
||||
example_set = app.interface.examples[example_id]
|
||||
processed_example_set = [iface.preprocess_example(example)
|
||||
for iface, example in zip(app.interface.input_components, example_set)]
|
||||
try:
|
||||
predictions, _ = app.interface.process(processed_example_set)
|
||||
except:
|
||||
continue
|
||||
predictions_set[example_id] = predictions
|
||||
output = {"data": predictions_set}
|
||||
return jsonify(output)
|
||||
|
||||
|
||||
def flag_data(input_data, output_data, flag_option=None, flag_index=None, username=None):
|
||||
flag_path = os.path.join(app.cwd, app.interface.flagging_dir)
|
||||
log_fp = "{}/log.csv".format(flag_path)
|
||||
|
@ -13,7 +13,7 @@ import operator
|
||||
from numbers import Number
|
||||
import warnings
|
||||
import tempfile
|
||||
import scipy
|
||||
from pydub import AudioSegment
|
||||
import os
|
||||
import pandas as pd
|
||||
import PIL
|
||||
@ -361,8 +361,14 @@ class Audio(OutputComponent):
|
||||
def postprocess(self, y):
|
||||
if self.type in ["numpy", "file", "auto"]:
|
||||
if self.type == "numpy" or (self.type == "auto" and isinstance(y, tuple)):
|
||||
sample_rate, data = y
|
||||
file = tempfile.NamedTemporaryFile(delete=False)
|
||||
scipy.io.wavfile.write(file, y[0], y[1])
|
||||
audio_segment = AudioSegment(
|
||||
data.tobytes(),
|
||||
frame_rate=sample_rate,
|
||||
sample_width=data.dtype.itemsize,
|
||||
channels=len(data.shape))
|
||||
audio_segment.export(file.name)
|
||||
y = file.name
|
||||
return processing_utils.encode_file_to_base64(y, type="audio", ext="wav")
|
||||
else:
|
||||
|
@ -2,8 +2,8 @@ from PIL import Image, ImageOps
|
||||
from io import BytesIO
|
||||
import base64
|
||||
import tempfile
|
||||
import scipy.io.wavfile
|
||||
from scipy.fftpack import dct
|
||||
import shutil
|
||||
import os
|
||||
import numpy as np
|
||||
from gradio import encryptor
|
||||
|
||||
@ -79,12 +79,15 @@ def decode_base64_to_binary(encoding):
|
||||
data = encoding
|
||||
return base64.b64decode(data), extension
|
||||
|
||||
def decode_base64_to_file(encoding, encryption_key=None, filename=None):
|
||||
def decode_base64_to_file(encoding, encryption_key=None, file_path=None):
|
||||
data, mime_extension = decode_base64_to_binary(encoding)
|
||||
prefix, extension = None, None
|
||||
if filename is not None and "." in filename:
|
||||
prefix = filename[0: filename.index(".")]
|
||||
extension = filename[filename.index(".") + 1:]
|
||||
if file_path is not None:
|
||||
filename = os.path.basename(file_path)
|
||||
prefix = filename
|
||||
if "." in filename:
|
||||
prefix = filename[0: filename.index(".")]
|
||||
extension = filename[filename.index(".") + 1:]
|
||||
if extension is None:
|
||||
extension = mime_extension
|
||||
if extension is None:
|
||||
@ -97,6 +100,19 @@ def decode_base64_to_file(encoding, encryption_key=None, filename=None):
|
||||
file_obj.flush()
|
||||
return file_obj
|
||||
|
||||
def create_tmp_copy_of_file(file_path):
|
||||
file_name = os.path.basename(file_path)
|
||||
prefix, extension = file_name, None
|
||||
if "." in file_name:
|
||||
prefix = file_name[0: file_name.index(".")]
|
||||
extension = file_name[file_name.index(".") + 1:]
|
||||
if extension is None:
|
||||
file_obj = tempfile.NamedTemporaryFile(delete=False, prefix=prefix)
|
||||
else:
|
||||
file_obj = tempfile.NamedTemporaryFile(delete=False, prefix=prefix, suffix="."+extension)
|
||||
shutil.copy2(file_path, file_obj.name)
|
||||
return file_obj
|
||||
|
||||
def _convert(image, dtype, force_copy=False, uniform=False):
|
||||
"""
|
||||
Adapted from: https://github.com/scikit-image/scikit-image/blob/main/skimage/util/dtype.py#L510-L531
|
||||
@ -379,94 +395,3 @@ def _convert(image, dtype, force_copy=False, uniform=False):
|
||||
image = _scale(image, 8 * itemsize_in, 8 * itemsize_out, copy=False)
|
||||
image += imin_out
|
||||
return image.astype(dtype_out)
|
||||
|
||||
|
||||
##################
|
||||
# AUDIO FILES
|
||||
##################
|
||||
|
||||
def generate_mfcc_features_from_audio_file(wav_filename=None,
|
||||
pre_emphasis=0.95,
|
||||
frame_size= 0.025,
|
||||
frame_stride=0.01,
|
||||
NFFT=512,
|
||||
nfilt=40,
|
||||
num_ceps=12,
|
||||
cep_lifter=22,
|
||||
sample_rate=None,
|
||||
signal=None,
|
||||
downsample_to=None):
|
||||
"""
|
||||
Loads and preprocesses a .wav audio file (or alternatively, a sample rate & signal) into mfcc coefficients, the typical inputs to models.
|
||||
Adapted from: https://haythamfayek.com/2016/04/21/speech-processing-for-machine-learning.html
|
||||
:param wav_filename: string name of audio file to process.
|
||||
:param pre_emphasis: a float factor, typically 0.95 or 0.97, which amplifies high frequencies.
|
||||
:param frame_size: a float that is the length, in seconds, of time frame over which to take the fft.
|
||||
:param frame_stride: a float that is the offset, in seconds, between consecutive time frames.
|
||||
:param NFFT: The number of points in the short-time fft for each time frame.
|
||||
:param nfilt: The number of filters on the Mel-scale to extract frequency bands.
|
||||
:param num_ceps: the number of cepstral coefficients to retrain.
|
||||
:param cep_lifter: the int factor, by which to de-emphasize higher-frequency.
|
||||
:param sample_rate: optional param represnting sample rate that is used if `wav_filename` is not provided
|
||||
:param signal: optional param representing sample data that is used if `wav_filename` is not provided
|
||||
:param downsample_to: optional param. If provided, audio file is downsampled to this many frames.
|
||||
:return: a 3D numpy array of mfcc coefficients, of the shape 1 x num_frames x num_coeffs.
|
||||
"""
|
||||
if (wav_filename is None) and (sample_rate is None or signal is None):
|
||||
raise ValueError("Either a wav_filename must be provdied or a sample_rate and signal")
|
||||
elif wav_filename is None:
|
||||
pass
|
||||
else:
|
||||
sample_rate, signal = scipy.io.wavfile.read(wav_filename)
|
||||
|
||||
if not(downsample_to is None):
|
||||
signal = scipy.signal.resample(signal, downsample_to)
|
||||
|
||||
emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
|
||||
|
||||
frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate # Convert from seconds to samples
|
||||
signal_length = len(emphasized_signal)
|
||||
frame_length = int(round(frame_length))
|
||||
frame_step = int(round(frame_step))
|
||||
num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step)) # Make sure that we have at least 1 frame
|
||||
|
||||
pad_signal_length = num_frames * frame_step + frame_length
|
||||
z = np.zeros((pad_signal_length - signal_length))
|
||||
pad_signal = np.append(emphasized_signal, z) # Pad Signal to make sure that all frames have equal number of samples without truncating any samples from the original signal
|
||||
|
||||
indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
|
||||
frames = pad_signal[indices.astype(np.int32, copy=False)]
|
||||
|
||||
frames *= np.hamming(frame_length)
|
||||
mag_frames = np.absolute(np.fft.rfft(frames, NFFT)) # Magnitude of the FFT
|
||||
pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2)) # Power Spectrum
|
||||
|
||||
low_freq_mel = 0
|
||||
high_freq_mel = (2595 * np.log10(1 + (sample_rate / 2) / 700)) # Convert Hz to Mel
|
||||
mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2) # Equally spaced in Mel scale
|
||||
hz_points = (700 * (10**(mel_points / 2595) - 1)) # Convert Mel to Hz
|
||||
bin = np.floor((NFFT + 1) * hz_points / sample_rate)
|
||||
|
||||
fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
|
||||
for m in range(1, nfilt + 1):
|
||||
f_m_minus = int(bin[m - 1]) # left
|
||||
f_m = int(bin[m]) # center
|
||||
f_m_plus = int(bin[m + 1]) # right
|
||||
|
||||
for k in range(f_m_minus, f_m):
|
||||
fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
|
||||
for k in range(f_m, f_m_plus):
|
||||
fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
|
||||
filter_banks = np.dot(pow_frames, fbank.T)
|
||||
filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks) # Numerical Stability
|
||||
filter_banks = 20 * np.log10(filter_banks) # dB
|
||||
|
||||
mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 0: (num_ceps + 1)] # Keep filters 1-13 by default.
|
||||
(nframes, ncoeff) = mfcc.shape
|
||||
n = np.arange(ncoeff)
|
||||
lift = 1 + (cep_lifter / 2) * np.sin(np.pi * n / cep_lifter)
|
||||
mfcc *= lift
|
||||
|
||||
filter_banks -= (np.mean(filter_banks, axis=0) + 1e-8)
|
||||
mfcc -= (np.mean(mfcc, axis=0) + 1e-8)
|
||||
return mfcc[np.newaxis, :, :] # Create a batch dimension.
|
@ -1 +1 @@
|
||||
2.3.7b
|
||||
2.3.7b1
|
||||
|
4
setup.py
4
setup.py
@ -5,7 +5,7 @@ except ImportError:
|
||||
|
||||
setup(
|
||||
name='gradio',
|
||||
version='2.3.7b',
|
||||
version='2.3.7b1',
|
||||
include_package_data=True,
|
||||
description='Python library for easily interacting with trained machine learning models',
|
||||
author='Abubakar Abid',
|
||||
@ -16,7 +16,7 @@ setup(
|
||||
keywords=['machine learning', 'visualization', 'reproducibility'],
|
||||
install_requires=[
|
||||
'numpy',
|
||||
'scipy',
|
||||
'pydub',
|
||||
'matplotlib',
|
||||
'pandas',
|
||||
'pillow',
|
||||
|
@ -2,7 +2,7 @@ import unittest
|
||||
import gradio as gr
|
||||
import PIL
|
||||
import numpy as np
|
||||
import scipy
|
||||
from pydub import AudioSegment
|
||||
import os
|
||||
|
||||
class TestTextbox(unittest.TestCase):
|
||||
@ -97,7 +97,8 @@ class TestAudio(unittest.TestCase):
|
||||
def test_in_interface(self):
|
||||
x_wav = gr.test_data.BASE64_AUDIO
|
||||
def max_amplitude_from_wav_file(wav_file):
|
||||
_, data = scipy.io.wavfile.read(wav_file.name)
|
||||
audio_segment = AudioSegment.from_file(wav_file.name)
|
||||
data = np.array(audio_segment.get_array_of_samples())
|
||||
return np.max(data)
|
||||
|
||||
iface = gr.Interface(
|
||||
@ -111,7 +112,7 @@ class TestFile(unittest.TestCase):
|
||||
x_file = {
|
||||
"name": "audio.wav",
|
||||
"data": gr.test_data.BASE64_AUDIO,
|
||||
"is_local_example": False
|
||||
"is_example": False
|
||||
}
|
||||
def get_size_of_file(file_obj):
|
||||
return os.path.getsize(file_obj.name)
|
||||
|
Loading…
Reference in New Issue
Block a user