gradio/demo/musical_instrument_identification/data_setups.py
Ali Abdalla 597337dcb8
Adding a Playground Tab to the Website (#1860)
* added playground with 12 demos

* change name to recipes, restyle navbar

* add explanatory text to page

* fix demo mapping

* categorize demos, clean up design

* styling

* cateogry naming and emojis

* refactor and add text demos

* add view code button

* remove opening slash in embed

* styling

* add image demos

* adding plot demos

* remove see code button

* removed submodules

* changes

* add audio models

* remove fun section

* remove tests in image semgentation demo repo

* requested changes

* add outbreak_forecast

* fix broken demos

* remove images and models, add new demos

* remove readmes, change to run.py, add description as comment

* move to /demos folder, clean up dict

* add upload_to_spaces script

* fix script, clean repos, and add to docker file

* fix python versioning issue

* env variable

* fix

* env fixes

* spaces instead of tabs

* revert to original networking.py

* fix rate limiting in asr and autocomplete

* change name to demos

* clean up navbar

* move url and description, remove code comments

* add tabs to demos

* remove margins and footer from embedded demo

* font consistency

Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
2022-09-15 08:24:10 -07:00

80 lines
2.4 KiB
Python

# Make function to find classes in target directory
import os
import librosa
import torch
import numpy as np
from torchaudio.transforms import Resample
SAMPLE_RATE = 44100
AUDIO_LEN = 2.90
# Parameters to control the MelSpec generation
N_MELS = 128
F_MIN = 20
F_MAX = 16000
N_FFT = 1024
HOP_LEN = 512
# Make function to find classes in target directory
def find_classes(directory: str):
# 1. Get the class names by scanning the target directory
classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())
# 2. Raise an error if class names not found
if not classes:
raise FileNotFoundError(f"Couldn't find any classes in {directory}.")
# 3. Crearte a dictionary of index labels (computers prefer numerical rather than string labels)
class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
return classes, class_to_idx
def resample(wav, sample_rate, new_sample_rate):
if wav.shape[0] >= 2:
wav = torch.mean(wav, dim=0)
else:
wav = wav.squeeze(0)
if sample_rate > new_sample_rate:
resampler = Resample(sample_rate, new_sample_rate)
wav = resampler(wav)
return wav
def mono_to_color(X, eps=1e-6, mean=None, std=None):
X = np.stack([X, X, X], axis=-1)
# Standardize
mean = mean or X.mean()
std = std or X.std()
X = (X - mean) / (std + eps)
# Normalize to [0, 255]
_min, _max = X.min(), X.max()
if (_max - _min) > eps:
V = np.clip(X, _min, _max)
V = 255 * (V - _min) / (_max - _min)
V = V.astype(np.uint8)
else:
V = np.zeros_like(X, dtype=np.uint8)
return V
def normalize(image, mean=None, std=None):
image = image / 255.0
if mean is not None and std is not None:
image = (image - mean) / std
return np.moveaxis(image, 2, 0).astype(np.float32)
def compute_melspec(wav, sample_rate=SAMPLE_RATE):
melspec = librosa.feature.melspectrogram(
y=wav,
sr=sample_rate,
n_fft=N_FFT,
fmin=F_MIN,
fmax=F_MAX,
n_mels=N_MELS,
hop_length=HOP_LEN
)
melspec = librosa.power_to_db(melspec).astype(np.float32)
return melspec
def audio_preprocess(wav, sample_rate):
wav = wav.numpy()
melspec = compute_melspec(wav, sample_rate)
image = mono_to_color(melspec)
image = normalize(image, mean=None, std=None)
image = torch.from_numpy(image)
return image