gradio/demo/main_note/run.py

56 lines
1.2 KiB
Python
Raw Normal View History

from math import log2, pow
import matplotlib.pyplot as plt
2020-10-14 23:25:58 +08:00
import numpy as np
from scipy.fftpack import fft
import gradio as gr
2020-10-14 23:25:58 +08:00
A4 = 440
C0 = A4 * pow(2, -4.75)
2020-10-14 23:25:58 +08:00
name = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
2020-10-14 23:25:58 +08:00
def get_pitch(freq):
h = round(12 * log2(freq / C0))
2020-10-14 23:25:58 +08:00
n = h % 12
return name[n]
2020-10-14 23:25:58 +08:00
def main_note(audio):
rate, y = audio
if len(y.shape) == 2:
y = y.T[0]
N = len(y)
T = 1.0 / rate
x = np.linspace(0.0, N * T, N)
2020-10-14 23:25:58 +08:00
yf = fft(y)
yf2 = 2.0 / N * np.abs(yf[0 : N // 2])
xf = np.linspace(0.0, 1.0 / (2.0 * T), N // 2)
2020-10-14 23:25:58 +08:00
volume_per_pitch = {}
total_volume = np.sum(yf2)
for freq, volume in zip(xf, yf2):
if freq == 0:
continue
pitch = get_pitch(freq)
if pitch not in volume_per_pitch:
volume_per_pitch[pitch] = 0
volume_per_pitch[pitch] += 1.0 * volume / total_volume
return volume_per_pitch
2020-11-11 22:15:53 +08:00
iface = gr.Interface(
main_note,
"audio",
gr.outputs.Label(num_top_classes=4),
examples=[
["audio/recording1.wav"],
["audio/cantina.wav"],
],
interpretation="default",
)
2020-10-14 23:25:58 +08:00
2020-11-11 22:15:53 +08:00
if __name__ == "__main__":
iface.launch()