mirror of
https://github.com/gradio-app/gradio.git
synced 2025-04-12 12:40:29 +08:00
Fix blocks_kitchen_sink and streaming_stt demos (#4699)
* Add code * Add json file * Remove streaming_stt demo * Undo generate_notebooks * Add blocks_kitchen_sink
This commit is contained in:
parent
29c916c9a6
commit
b7072b1535
2
demo/streaming_stt/.gitignore
vendored
2
demo/streaming_stt/.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
*.pbmm
|
||||
*.scorer
|
@ -1 +0,0 @@
|
||||
deepspeech==0.9.3
|
@ -1 +0,0 @@
|
||||
{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: streaming_stt"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio deepspeech==0.9.3"]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["from deepspeech import Model\n", "import gradio as gr\n", "import numpy as np\n", "import urllib.request\n", "\n", "model_file_path = \"deepspeech-0.9.3-models.pbmm\"\n", "lm_file_path = \"deepspeech-0.9.3-models.scorer\"\n", "url = \"https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/\"\n", "\n", "urllib.request.urlretrieve(url + model_file_path, filename=model_file_path)\n", "urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path)\n", "\n", "beam_width = 100\n", "lm_alpha = 0.93\n", "lm_beta = 1.18\n", "\n", "model = Model(model_file_path)\n", "model.enableExternalScorer(lm_file_path)\n", "model.setScorerAlphaBeta(lm_alpha, lm_beta)\n", "model.setBeamWidth(beam_width)\n", "\n", "\n", "def reformat_freq(sr, y):\n", " if sr not in (\n", " 48000,\n", " 16000,\n", " ): # Deepspeech only supports 16k, (we convert 48k -> 16k)\n", " raise ValueError(\"Unsupported rate\", sr)\n", " if sr == 48000:\n", " y = (\n", " ((y / max(np.max(y), 1)) * 32767)\n", " .reshape((-1, 3))\n", " .mean(axis=1)\n", " .astype(\"int16\")\n", " )\n", " sr = 16000\n", " return sr, y\n", "\n", "\n", "def transcribe(speech, stream):\n", " _, y = reformat_freq(*speech)\n", " if stream is None:\n", " stream = model.createStream()\n", " stream.feedAudioContent(y)\n", " text = stream.intermediateDecode()\n", " return text, stream\n", "\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " [gr.Audio(source=\"microphone\", streaming=True), \"state\"],\n", " [\"text\", \"state\"],\n", " live=True,\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
@ -1,57 +0,0 @@
|
||||
from deepspeech import Model
|
||||
import gradio as gr
|
||||
import numpy as np
|
||||
import urllib.request
|
||||
|
||||
model_file_path = "deepspeech-0.9.3-models.pbmm"
|
||||
lm_file_path = "deepspeech-0.9.3-models.scorer"
|
||||
url = "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/"
|
||||
|
||||
urllib.request.urlretrieve(url + model_file_path, filename=model_file_path)
|
||||
urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path)
|
||||
|
||||
beam_width = 100
|
||||
lm_alpha = 0.93
|
||||
lm_beta = 1.18
|
||||
|
||||
model = Model(model_file_path)
|
||||
model.enableExternalScorer(lm_file_path)
|
||||
model.setScorerAlphaBeta(lm_alpha, lm_beta)
|
||||
model.setBeamWidth(beam_width)
|
||||
|
||||
|
||||
def reformat_freq(sr, y):
|
||||
if sr not in (
|
||||
48000,
|
||||
16000,
|
||||
): # Deepspeech only supports 16k, (we convert 48k -> 16k)
|
||||
raise ValueError("Unsupported rate", sr)
|
||||
if sr == 48000:
|
||||
y = (
|
||||
((y / max(np.max(y), 1)) * 32767)
|
||||
.reshape((-1, 3))
|
||||
.mean(axis=1)
|
||||
.astype("int16")
|
||||
)
|
||||
sr = 16000
|
||||
return sr, y
|
||||
|
||||
|
||||
def transcribe(speech, stream):
|
||||
_, y = reformat_freq(*speech)
|
||||
if stream is None:
|
||||
stream = model.createStream()
|
||||
stream.feedAudioContent(y)
|
||||
text = stream.intermediateDecode()
|
||||
return text, stream
|
||||
|
||||
|
||||
demo = gr.Interface(
|
||||
transcribe,
|
||||
[gr.Audio(source="microphone", streaming=True), "state"],
|
||||
["text", "state"],
|
||||
live=True,
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.launch()
|
@ -1,3 +0,0 @@
|
||||
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspeech-0.8.2-models.pbmm
|
||||
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspeech-0.8.2-models.scorer
|
||||
apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
|
@ -22,8 +22,8 @@ gradio_version = gradio_version.strip()
|
||||
# 1. all_demos includes all demos and is for testing PRs
|
||||
# 2. reset_components includes media files that are only present in all_demos (only for PRs)
|
||||
# 3. custom_path doesn't have .launch since the point is to show how to launch with uvicorn
|
||||
# 4. The same reason as 2 for kitchen_sink_random
|
||||
DEMOS_TO_SKIP = {"all_demos", "reset_components", "custom_path", "kitchen_sink_random"}
|
||||
# 4. The same reason as 2 for kitchen_sink_random and blocks_kitchen_sink
|
||||
DEMOS_TO_SKIP = {"all_demos", "reset_components", "custom_path", "kitchen_sink_random", "blocks_kitchen_sink"}
|
||||
|
||||
|
||||
def upload_demo_to_space(
|
||||
|
Loading…
x
Reference in New Issue
Block a user