New Blocks Demo: neural instrument cloning (#975)

* cleaning up launchable * removing launchables broken * deleted launchables * relaunch parameters * fixed close method * formatting * renamed blocks demos * creating a new blocks demo * added neural blocks demo * format backend * fixed naming bug i introduced
2025-02-17 11:29:58 +08:00 · 2022-04-11 20:50:21 -07:00 · 2022-04-11 20:50:21 -07:00 · 65f5733ab4
commit 65f5733ab4
parent a1509850dc
13 changed files with 133 additions and 15 deletions
--- a/demo/blocks_kinematics/run.py
+++ b/demo/blocks_kinematics/run.py
--- a/demo/blocks_neural_instrument_coding/new-sax-1.mp3
+++ b/demo/blocks_neural_instrument_coding/new-sax-1.mp3
--- a/demo/blocks_neural_instrument_coding/new-sax-1.wav
+++ b/demo/blocks_neural_instrument_coding/new-sax-1.wav
--- a/demo/blocks_neural_instrument_coding/new-sax.wav
+++ b/demo/blocks_neural_instrument_coding/new-sax.wav
--- a/demo/blocks_neural_instrument_coding/run.py
+++ b/demo/blocks_neural_instrument_coding/run.py
@ -0,0 +1,121 @@
+# A Blocks implementation of https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6
+
+import gradio as gr
+from gradio.components import Markdown as m
+import datetime
+import random
+
+def get_time():
+    now = datetime.datetime.now()
+    return now.strftime("%m/%d/%Y, %H:%M:%S")
+
+def generate_recording():
+    return random.choice(["new-sax-1.mp3", "new-sax-1.wav"])
+
+def reconstruct(audio):
+    return random.choice(["new-sax-1.mp3", "new-sax-1.wav"])
+
+io1 = gr.Interface(
+    lambda x,y,z:"sax.wav", 
+    [gr.Slider(label="pitch"), 
+     gr.Slider(label="loudness"), 
+     gr.Audio(label="base audio file (optional)")
+    ], 
+    gr.Audio()
+)
+
+io2 = gr.Interface(
+    lambda x,y,z:"flute.wav", 
+    [gr.Slider(label="pitch"), 
+     gr.Slider(label="loudness"), 
+     gr.Audio(label="base audio file (optional)")
+    ], 
+    gr.Audio()
+)
+
+io3 = gr.Interface(
+    lambda x,y,z:"trombone.wav", 
+    [gr.Slider(label="pitch"), 
+     gr.Slider(label="loudness"), 
+     gr.Audio(label="base audio file (optional)")
+    ], 
+    gr.Audio()
+)
+
+io4 = gr.Interface(
+    lambda x,y,z:"sax2.wav", 
+    [gr.Slider(label="pitch"), 
+     gr.Slider(label="loudness"), 
+     gr.Audio(label="base audio file (optional)")
+    ], 
+    gr.Audio()
+)
+
+demo = gr.Blocks()
+
+with demo.clear():
+    m("""
+    ## Neural Instrument Cloning from Very Few Samples
+    <center><img src="https://media.istockphoto.com/photos/brass-trombone-picture-id490455809?k=20&m=490455809&s=612x612&w=0&h=l9KJvH_25z0QTLggHrcH_MsR4gPLH7uXwDPUAZ_C5zk=" width="400px"></center>"""
+    )
+    m("""
+    This Blocks implementation is an adaptation [a report written](https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6) by Nicolas Jonason and Bob L.T. Sturm.
+    
+    I've implemented it in Blocks to show off some cool features, such as embedding live ML demos. More on that ahead...
+    
+    ### What does this machine learning model do?
+    It combines techniques from neural voice cloning with musical instrument synthesis. This makes it possible to produce neural instrument synthesisers from just seconds of target instrument audio.
+    
+    ### Audio Examples
+    Here are some **real** 16 second saxophone recordings:
+    """)    
+    gr.Audio("sax.wav", label="Here is a real 16 second saxophone recording:")
+    gr.Audio("sax.wav")
+    
+    m(
+        """\n
+        Here is a **generated** saxophone recordings:"""
+    )
+    a = gr.Audio("new-sax.wav")
+    
+    gr.Button("Generate a new saxophone recording")
+    
+    
+    m("""
+    ### Inputs to the model
+    The inputs to the model are:
+    * pitch
+    * loudness
+    * base audio file
+    """)
+    
+    m("""
+    Try the model live!
+    """)
+    
+    gr.TabbedInterface([io1, io2, io3, io4], ["Saxophone", "Flute", "Trombone", "Another Saxophone"])
+    
+    m("""
+    ### Using the model for cloning
+    You can also use this model a different way, to simply clone the audio file and reconstruct it 
+    using machine learning. Here, we'll show a demo of that below:
+    """)
+    
+    a2 = gr.Audio()
+    a2.change(reconstruct, a2, a2)
+    
+    m("""
+    Thanks for reading this! As you may have realized, all of the "models" in this demo are fake. They are just designed to show you what is possible using Blocks 🤗.
+    
+    For details of the model, read the [original report here](https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6).
+    
+    *Details for nerds*: this report was "launched" on:
+    """)
+    
+    t = gr.Textbox(label="timestamp")
+    
+    demo.load(get_time, [], t)
+
+
+if __name__ == "__main__":
+    demo.launch(share=True)
--- a/demo/blocks_neural_instrument_coding/sax.wav
+++ b/demo/blocks_neural_instrument_coding/sax.wav
--- a/demo/blocks_neural_instrument_coding/sax2.wav
+++ b/demo/blocks_neural_instrument_coding/sax2.wav
--- a/demo/blocks_neural_instrument_coding/trombone.wav
+++ b/demo/blocks_neural_instrument_coding/trombone.wav
--- a/demo/blocks_speech_text_length/run.py
+++ b/demo/blocks_speech_text_length/run.py
--- a/demo/blocks_static_textbox/run.py
+++ b/demo/blocks_static_textbox/run.py
--- a/demo/blocks_xray/config.json
+++ b/demo/blocks_xray/config.json
--- a/demo/blocks_xray/run.py
+++ b/demo/blocks_xray/run.py
--- a/gradio/components.py
+++ b/gradio/components.py
@ -1761,7 +1761,9 @@ class Audio(Component):
        (str): base64 url data
        """
        if self.output_type in ["numpy", "file", "auto"]:
-            if self.type == "numpy" or (self.type == "auto" and isinstance(y, tuple)):
+            if self.output_type == "numpy" or (
+                self.output_type == "auto" and isinstance(y, tuple)
+            ):
                sample_rate, data = y
                file = tempfile.NamedTemporaryFile(
                    prefix="sample", suffix=".wav", delete=False
@ -2948,28 +2950,23 @@ class Interpretation(Component):
        }


-# TODO: (faruk) does this take component or interface as a input?
-# see this line in Carousel
-# self.components = [get_component_instance(component) for component in components]
-def get_component_instance(iface: Component):
-    # TODO: function may not work properly, and it needs updates regarding its design. See:
-    # https://github.com/gradio-app/gradio/issues/731
-    if isinstance(iface, str):
-        shortcut = Component.get_all_shortcut_implementations()[iface]
+def get_component_instance(comp: str | dict | Component):
+    if isinstance(comp, str):
+        shortcut = Component.get_all_shortcut_implementations()[comp]
        return shortcut[0](**shortcut[1], without_rendering=True)
    elif isinstance(
-        iface, dict
+        comp, dict
    ):  # a dict with `name` as the input component type and other keys as parameters
-        name = iface.pop("name")
+        name = comp.pop("name")
        for component in Component.__subclasses__():
            if component.__name__.lower() == name:
                break
        else:
            raise ValueError(f"No such Component: {name}")
-        return component(**iface, without_rendering=True)
-    elif isinstance(iface, Component):
-        return iface
+        return component(**comp, without_rendering=True)
+    elif isinstance(comp, Component):
+        return comp
    else:
        raise ValueError(
-            f"Input interface must be of type `str` or `dict` or `InputComponent` but is {iface}"
+            f"Component must provided as a `str` or `dict` or `Component` but is {comp}"
        )