mirror of
https://github.com/gradio-app/gradio.git
synced 2025-03-25 12:10:31 +08:00
added transformers based demo for sst
This commit is contained in:
parent
55c69891e6
commit
0e220e51b1
1
demo/streaming_wav2vec/requirements.txt
Normal file
1
demo/streaming_wav2vec/requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
deepspeech==0.8.2
|
43
demo/streaming_wav2vec/run.py
Normal file
43
demo/streaming_wav2vec/run.py
Normal file
@ -0,0 +1,43 @@
|
||||
from deepspeech import Model
|
||||
import gradio as gr
|
||||
import scipy.io.wavfile
|
||||
import numpy as np
|
||||
|
||||
model_file_path = "deepspeech-0.8.2-models.pbmm"
|
||||
lm_file_path = "deepspeech-0.8.2-models.scorer"
|
||||
beam_width = 100
|
||||
lm_alpha = 0.93
|
||||
lm_beta = 1.18
|
||||
|
||||
model = Model(model_file_path)
|
||||
model.enableExternalScorer(lm_file_path)
|
||||
model.setScorerAlphaBeta(lm_alpha, lm_beta)
|
||||
model.setBeamWidth(beam_width)
|
||||
|
||||
|
||||
def reformat_freq(sr, y):
|
||||
if sr not in (
|
||||
48000,
|
||||
16000,
|
||||
): # Deepspeech only supports 16k, (we convert 48k -> 16k)
|
||||
raise ValueError("Unsupported rate", sr)
|
||||
if sr == 48000:
|
||||
y = (
|
||||
((y / max(np.max(y), 1)) * 32767)
|
||||
.reshape((-1, 3))
|
||||
.mean(axis=1)
|
||||
.astype("int16")
|
||||
)
|
||||
sr = 16000
|
||||
return sr, y
|
||||
|
||||
|
||||
def transcribe(speech, stream):
|
||||
_, y = reformat_freq(*speech)
|
||||
if stream is None:
|
||||
stream = model.createStream()
|
||||
stream.feedAudioContent(y)
|
||||
text = stream.intermediateDecode()
|
||||
return text, stream
|
||||
|
||||
gr.Interface(transcribe, ["microphone", "state"], ["text", "state"], live=True).launch()
|
Loading…
x
Reference in New Issue
Block a user