mirror of
https://github.com/gradio-app/gradio.git
synced 2024-12-21 02:19:59 +08:00
b888db4a9a
* Update real-time-speech-recognition.md added necessary dependency * Update run.py updated code to handle cases with stereo microphone * Update real-time-speech-recognition.md improved english * Update run.py updated code for streaming * Update run.py
27 lines
566 B
Python
27 lines
566 B
Python
import gradio as gr
|
|
from transformers import pipeline
|
|
import numpy as np
|
|
|
|
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
|
|
|
|
def transcribe(audio):
|
|
sr, y = audio
|
|
|
|
# Convert to mono if stereo
|
|
if y.ndim > 1:
|
|
y = y.mean(axis=1)
|
|
|
|
y = y.astype(np.float32)
|
|
y /= np.max(np.abs(y))
|
|
|
|
return transcriber({"sampling_rate": sr, "raw": y})["text"] # type: ignore
|
|
|
|
demo = gr.Interface(
|
|
transcribe,
|
|
gr.Audio(sources="microphone"),
|
|
"text",
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
demo.launch()
|