mirror of
https://github.com/gradio-app/gradio.git
synced 2025-01-12 10:34:32 +08:00
597337dcb8
* added playground with 12 demos * change name to recipes, restyle navbar * add explanatory text to page * fix demo mapping * categorize demos, clean up design * styling * cateogry naming and emojis * refactor and add text demos * add view code button * remove opening slash in embed * styling * add image demos * adding plot demos * remove see code button * removed submodules * changes * add audio models * remove fun section * remove tests in image semgentation demo repo * requested changes * add outbreak_forecast * fix broken demos * remove images and models, add new demos * remove readmes, change to run.py, add description as comment * move to /demos folder, clean up dict * add upload_to_spaces script * fix script, clean repos, and add to docker file * fix python versioning issue * env variable * fix * env fixes * spaces instead of tabs * revert to original networking.py * fix rate limiting in asr and autocomplete * change name to demos * clean up navbar * move url and description, remove code comments * add tabs to demos * remove margins and footer from embedded demo * font consistency Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
80 lines
2.4 KiB
Python
80 lines
2.4 KiB
Python
# Make function to find classes in target directory
|
|
import os
|
|
import librosa
|
|
import torch
|
|
import numpy as np
|
|
from torchaudio.transforms import Resample
|
|
|
|
SAMPLE_RATE = 44100
|
|
AUDIO_LEN = 2.90
|
|
|
|
# Parameters to control the MelSpec generation
|
|
N_MELS = 128
|
|
F_MIN = 20
|
|
F_MAX = 16000
|
|
N_FFT = 1024
|
|
HOP_LEN = 512
|
|
|
|
# Make function to find classes in target directory
|
|
def find_classes(directory: str):
|
|
# 1. Get the class names by scanning the target directory
|
|
classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())
|
|
# 2. Raise an error if class names not found
|
|
if not classes:
|
|
raise FileNotFoundError(f"Couldn't find any classes in {directory}.")
|
|
# 3. Crearte a dictionary of index labels (computers prefer numerical rather than string labels)
|
|
class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
|
|
return classes, class_to_idx
|
|
|
|
def resample(wav, sample_rate, new_sample_rate):
|
|
if wav.shape[0] >= 2:
|
|
wav = torch.mean(wav, dim=0)
|
|
else:
|
|
wav = wav.squeeze(0)
|
|
if sample_rate > new_sample_rate:
|
|
resampler = Resample(sample_rate, new_sample_rate)
|
|
wav = resampler(wav)
|
|
return wav
|
|
|
|
def mono_to_color(X, eps=1e-6, mean=None, std=None):
|
|
X = np.stack([X, X, X], axis=-1)
|
|
# Standardize
|
|
mean = mean or X.mean()
|
|
std = std or X.std()
|
|
X = (X - mean) / (std + eps)
|
|
# Normalize to [0, 255]
|
|
_min, _max = X.min(), X.max()
|
|
if (_max - _min) > eps:
|
|
V = np.clip(X, _min, _max)
|
|
V = 255 * (V - _min) / (_max - _min)
|
|
V = V.astype(np.uint8)
|
|
else:
|
|
V = np.zeros_like(X, dtype=np.uint8)
|
|
return V
|
|
|
|
def normalize(image, mean=None, std=None):
|
|
image = image / 255.0
|
|
if mean is not None and std is not None:
|
|
image = (image - mean) / std
|
|
return np.moveaxis(image, 2, 0).astype(np.float32)
|
|
|
|
def compute_melspec(wav, sample_rate=SAMPLE_RATE):
|
|
melspec = librosa.feature.melspectrogram(
|
|
y=wav,
|
|
sr=sample_rate,
|
|
n_fft=N_FFT,
|
|
fmin=F_MIN,
|
|
fmax=F_MAX,
|
|
n_mels=N_MELS,
|
|
hop_length=HOP_LEN
|
|
)
|
|
melspec = librosa.power_to_db(melspec).astype(np.float32)
|
|
return melspec
|
|
|
|
def audio_preprocess(wav, sample_rate):
|
|
wav = wav.numpy()
|
|
melspec = compute_melspec(wav, sample_rate)
|
|
image = mono_to_color(melspec)
|
|
image = normalize(image, mean=None, std=None)
|
|
image = torch.from_numpy(image)
|
|
return image |