mirror of
https://github.com/gradio-app/gradio.git
synced 2024-11-27 01:40:20 +08:00
b888db4a9a
* Update real-time-speech-recognition.md added necessary dependency * Update run.py updated code to handle cases with stereo microphone * Update real-time-speech-recognition.md improved english * Update run.py updated code for streaming * Update run.py
32 lines
751 B
Python
32 lines
751 B
Python
import gradio as gr
|
|
from transformers import pipeline
|
|
import numpy as np
|
|
|
|
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
|
|
|
|
def transcribe(stream, new_chunk):
|
|
sr, y = new_chunk
|
|
|
|
# Convert to mono if stereo
|
|
if y.ndim > 1:
|
|
y = y.mean(axis=1)
|
|
|
|
y = y.astype(np.float32)
|
|
y /= np.max(np.abs(y))
|
|
|
|
if stream is not None:
|
|
stream = np.concatenate([stream, y])
|
|
else:
|
|
stream = y
|
|
return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"] # type: ignore
|
|
|
|
demo = gr.Interface(
|
|
transcribe,
|
|
["state", gr.Audio(sources=["microphone"], streaming=True)],
|
|
["state", "text"],
|
|
live=True,
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
demo.launch()
|