mirror of
https://github.com/gradio-app/gradio.git
synced 2024-11-21 01:01:05 +08:00
New Blocks Demo: neural instrument cloning (#975)
* cleaning up launchable * removing launchables broken * deleted launchables * relaunch parameters * fixed close method * formatting * renamed blocks demos * creating a new blocks demo * added neural blocks demo * format backend * fixed naming bug i introduced
This commit is contained in:
parent
a1509850dc
commit
65f5733ab4
BIN
demo/blocks_neural_instrument_coding/new-sax-1.mp3
Normal file
BIN
demo/blocks_neural_instrument_coding/new-sax-1.mp3
Normal file
Binary file not shown.
BIN
demo/blocks_neural_instrument_coding/new-sax-1.wav
Normal file
BIN
demo/blocks_neural_instrument_coding/new-sax-1.wav
Normal file
Binary file not shown.
BIN
demo/blocks_neural_instrument_coding/new-sax.wav
Normal file
BIN
demo/blocks_neural_instrument_coding/new-sax.wav
Normal file
Binary file not shown.
121
demo/blocks_neural_instrument_coding/run.py
Normal file
121
demo/blocks_neural_instrument_coding/run.py
Normal file
@ -0,0 +1,121 @@
|
||||
# A Blocks implementation of https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6
|
||||
|
||||
import gradio as gr
|
||||
from gradio.components import Markdown as m
|
||||
import datetime
|
||||
import random
|
||||
|
||||
def get_time():
|
||||
now = datetime.datetime.now()
|
||||
return now.strftime("%m/%d/%Y, %H:%M:%S")
|
||||
|
||||
def generate_recording():
|
||||
return random.choice(["new-sax-1.mp3", "new-sax-1.wav"])
|
||||
|
||||
def reconstruct(audio):
|
||||
return random.choice(["new-sax-1.mp3", "new-sax-1.wav"])
|
||||
|
||||
io1 = gr.Interface(
|
||||
lambda x,y,z:"sax.wav",
|
||||
[gr.Slider(label="pitch"),
|
||||
gr.Slider(label="loudness"),
|
||||
gr.Audio(label="base audio file (optional)")
|
||||
],
|
||||
gr.Audio()
|
||||
)
|
||||
|
||||
io2 = gr.Interface(
|
||||
lambda x,y,z:"flute.wav",
|
||||
[gr.Slider(label="pitch"),
|
||||
gr.Slider(label="loudness"),
|
||||
gr.Audio(label="base audio file (optional)")
|
||||
],
|
||||
gr.Audio()
|
||||
)
|
||||
|
||||
io3 = gr.Interface(
|
||||
lambda x,y,z:"trombone.wav",
|
||||
[gr.Slider(label="pitch"),
|
||||
gr.Slider(label="loudness"),
|
||||
gr.Audio(label="base audio file (optional)")
|
||||
],
|
||||
gr.Audio()
|
||||
)
|
||||
|
||||
io4 = gr.Interface(
|
||||
lambda x,y,z:"sax2.wav",
|
||||
[gr.Slider(label="pitch"),
|
||||
gr.Slider(label="loudness"),
|
||||
gr.Audio(label="base audio file (optional)")
|
||||
],
|
||||
gr.Audio()
|
||||
)
|
||||
|
||||
demo = gr.Blocks()
|
||||
|
||||
with demo.clear():
|
||||
m("""
|
||||
## Neural Instrument Cloning from Very Few Samples
|
||||
<center><img src="https://media.istockphoto.com/photos/brass-trombone-picture-id490455809?k=20&m=490455809&s=612x612&w=0&h=l9KJvH_25z0QTLggHrcH_MsR4gPLH7uXwDPUAZ_C5zk=" width="400px"></center>"""
|
||||
)
|
||||
m("""
|
||||
This Blocks implementation is an adaptation [a report written](https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6) by Nicolas Jonason and Bob L.T. Sturm.
|
||||
|
||||
I've implemented it in Blocks to show off some cool features, such as embedding live ML demos. More on that ahead...
|
||||
|
||||
### What does this machine learning model do?
|
||||
It combines techniques from neural voice cloning with musical instrument synthesis. This makes it possible to produce neural instrument synthesisers from just seconds of target instrument audio.
|
||||
|
||||
### Audio Examples
|
||||
Here are some **real** 16 second saxophone recordings:
|
||||
""")
|
||||
gr.Audio("sax.wav", label="Here is a real 16 second saxophone recording:")
|
||||
gr.Audio("sax.wav")
|
||||
|
||||
m(
|
||||
"""\n
|
||||
Here is a **generated** saxophone recordings:"""
|
||||
)
|
||||
a = gr.Audio("new-sax.wav")
|
||||
|
||||
gr.Button("Generate a new saxophone recording")
|
||||
|
||||
|
||||
m("""
|
||||
### Inputs to the model
|
||||
The inputs to the model are:
|
||||
* pitch
|
||||
* loudness
|
||||
* base audio file
|
||||
""")
|
||||
|
||||
m("""
|
||||
Try the model live!
|
||||
""")
|
||||
|
||||
gr.TabbedInterface([io1, io2, io3, io4], ["Saxophone", "Flute", "Trombone", "Another Saxophone"])
|
||||
|
||||
m("""
|
||||
### Using the model for cloning
|
||||
You can also use this model a different way, to simply clone the audio file and reconstruct it
|
||||
using machine learning. Here, we'll show a demo of that below:
|
||||
""")
|
||||
|
||||
a2 = gr.Audio()
|
||||
a2.change(reconstruct, a2, a2)
|
||||
|
||||
m("""
|
||||
Thanks for reading this! As you may have realized, all of the "models" in this demo are fake. They are just designed to show you what is possible using Blocks 🤗.
|
||||
|
||||
For details of the model, read the [original report here](https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6).
|
||||
|
||||
*Details for nerds*: this report was "launched" on:
|
||||
""")
|
||||
|
||||
t = gr.Textbox(label="timestamp")
|
||||
|
||||
demo.load(get_time, [], t)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.launch(share=True)
|
BIN
demo/blocks_neural_instrument_coding/sax.wav
Normal file
BIN
demo/blocks_neural_instrument_coding/sax.wav
Normal file
Binary file not shown.
BIN
demo/blocks_neural_instrument_coding/sax2.wav
Normal file
BIN
demo/blocks_neural_instrument_coding/sax2.wav
Normal file
Binary file not shown.
BIN
demo/blocks_neural_instrument_coding/trombone.wav
Normal file
BIN
demo/blocks_neural_instrument_coding/trombone.wav
Normal file
Binary file not shown.
@ -1761,7 +1761,9 @@ class Audio(Component):
|
||||
(str): base64 url data
|
||||
"""
|
||||
if self.output_type in ["numpy", "file", "auto"]:
|
||||
if self.type == "numpy" or (self.type == "auto" and isinstance(y, tuple)):
|
||||
if self.output_type == "numpy" or (
|
||||
self.output_type == "auto" and isinstance(y, tuple)
|
||||
):
|
||||
sample_rate, data = y
|
||||
file = tempfile.NamedTemporaryFile(
|
||||
prefix="sample", suffix=".wav", delete=False
|
||||
@ -2948,28 +2950,23 @@ class Interpretation(Component):
|
||||
}
|
||||
|
||||
|
||||
# TODO: (faruk) does this take component or interface as a input?
|
||||
# see this line in Carousel
|
||||
# self.components = [get_component_instance(component) for component in components]
|
||||
def get_component_instance(iface: Component):
|
||||
# TODO: function may not work properly, and it needs updates regarding its design. See:
|
||||
# https://github.com/gradio-app/gradio/issues/731
|
||||
if isinstance(iface, str):
|
||||
shortcut = Component.get_all_shortcut_implementations()[iface]
|
||||
def get_component_instance(comp: str | dict | Component):
|
||||
if isinstance(comp, str):
|
||||
shortcut = Component.get_all_shortcut_implementations()[comp]
|
||||
return shortcut[0](**shortcut[1], without_rendering=True)
|
||||
elif isinstance(
|
||||
iface, dict
|
||||
comp, dict
|
||||
): # a dict with `name` as the input component type and other keys as parameters
|
||||
name = iface.pop("name")
|
||||
name = comp.pop("name")
|
||||
for component in Component.__subclasses__():
|
||||
if component.__name__.lower() == name:
|
||||
break
|
||||
else:
|
||||
raise ValueError(f"No such Component: {name}")
|
||||
return component(**iface, without_rendering=True)
|
||||
elif isinstance(iface, Component):
|
||||
return iface
|
||||
return component(**comp, without_rendering=True)
|
||||
elif isinstance(comp, Component):
|
||||
return comp
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Input interface must be of type `str` or `dict` or `InputComponent` but is {iface}"
|
||||
f"Component must provided as a `str` or `dict` or `Component` but is {comp}"
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user