Fix blocks_kitchen_sink and streaming_stt demos (#4699)

* Add code * Add json file * Remove streaming_stt demo * Undo generate_notebooks * Add blocks_kitchen_sink
2025-04-12 12:40:29 +08:00 · 2023-06-27 20:55:34 -04:00 · 2023-06-27 20:55:34 -04:00 · b7072b1535
commit b7072b1535
parent 29c916c9a6
6 changed files with 2 additions and 66 deletions
--- a/demo/streaming_stt/.gitignore
+++ b/demo/streaming_stt/.gitignore
@ -1,2 +0,0 @@
-*.pbmm
-*.scorer
--- a/demo/streaming_stt/requirements.txt
+++ b/demo/streaming_stt/requirements.txt
@ -1 +0,0 @@
-deepspeech==0.9.3
--- a/demo/streaming_stt/run.ipynb
+++ b/demo/streaming_stt/run.ipynb
@ -1 +0,0 @@
-{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: streaming_stt"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio deepspeech==0.9.3"]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["from deepspeech import Model\n", "import gradio as gr\n", "import numpy as np\n", "import urllib.request\n", "\n", "model_file_path = \"deepspeech-0.9.3-models.pbmm\"\n", "lm_file_path = \"deepspeech-0.9.3-models.scorer\"\n", "url = \"https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/\"\n", "\n", "urllib.request.urlretrieve(url + model_file_path, filename=model_file_path)\n", "urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path)\n", "\n", "beam_width = 100\n", "lm_alpha = 0.93\n", "lm_beta = 1.18\n", "\n", "model = Model(model_file_path)\n", "model.enableExternalScorer(lm_file_path)\n", "model.setScorerAlphaBeta(lm_alpha, lm_beta)\n", "model.setBeamWidth(beam_width)\n", "\n", "\n", "def reformat_freq(sr, y):\n", "    if sr not in (\n", "        48000,\n", "        16000,\n", "    ):  # Deepspeech only supports 16k, (we convert 48k -> 16k)\n", "        raise ValueError(\"Unsupported rate\", sr)\n", "    if sr == 48000:\n", "        y = (\n", "            ((y / max(np.max(y), 1)) * 32767)\n", "            .reshape((-1, 3))\n", "            .mean(axis=1)\n", "            .astype(\"int16\")\n", "        )\n", "        sr = 16000\n", "    return sr, y\n", "\n", "\n", "def transcribe(speech, stream):\n", "    _, y = reformat_freq(*speech)\n", "    if stream is None:\n", "        stream = model.createStream()\n", "    stream.feedAudioContent(y)\n", "    text = stream.intermediateDecode()\n", "    return text, stream\n", "\n", "\n", "demo = gr.Interface(\n", "    transcribe,\n", "    [gr.Audio(source=\"microphone\", streaming=True), \"state\"],\n", "    [\"text\", \"state\"],\n", "    live=True,\n", ")\n", "\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
--- a/demo/streaming_stt/run.py
+++ b/demo/streaming_stt/run.py
@ -1,57 +0,0 @@
-from deepspeech import Model
-import gradio as gr
-import numpy as np
-import urllib.request
-
-model_file_path = "deepspeech-0.9.3-models.pbmm"
-lm_file_path = "deepspeech-0.9.3-models.scorer"
-url = "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/"
-
-urllib.request.urlretrieve(url + model_file_path, filename=model_file_path)
-urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path)
-
-beam_width = 100
-lm_alpha = 0.93
-lm_beta = 1.18
-
-model = Model(model_file_path)
-model.enableExternalScorer(lm_file_path)
-model.setScorerAlphaBeta(lm_alpha, lm_beta)
-model.setBeamWidth(beam_width)
-
-
-def reformat_freq(sr, y):
-    if sr not in (
-        48000,
-        16000,
-    ):  # Deepspeech only supports 16k, (we convert 48k -> 16k)
-        raise ValueError("Unsupported rate", sr)
-    if sr == 48000:
-        y = (
-            ((y / max(np.max(y), 1)) * 32767)
-            .reshape((-1, 3))
-            .mean(axis=1)
-            .astype("int16")
-        )
-        sr = 16000
-    return sr, y
-
-
-def transcribe(speech, stream):
-    _, y = reformat_freq(*speech)
-    if stream is None:
-        stream = model.createStream()
-    stream.feedAudioContent(y)
-    text = stream.intermediateDecode()
-    return text, stream
-
-
-demo = gr.Interface(
-    transcribe,
-    [gr.Audio(source="microphone", streaming=True), "state"],
-    ["text", "state"],
-    live=True,
-)
-
-if __name__ == "__main__":
-    demo.launch()
--- a/demo/streaming_stt/setup.sh
+++ b/demo/streaming_stt/setup.sh
@ -1,3 +0,0 @@
-wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspeech-0.8.2-models.pbmm
-wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspeech-0.8.2-models.scorer
-apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
--- a/website/homepage/upload_demos.py
+++ b/website/homepage/upload_demos.py
@ -22,8 +22,8 @@ gradio_version = gradio_version.strip()
 # 1. all_demos includes all demos and is for testing PRs
 # 2. reset_components includes media files that are only present in all_demos (only for PRs)
 # 3. custom_path doesn't have .launch since the point is to show how to launch with uvicorn
-# 4. The same reason as 2 for kitchen_sink_random
-DEMOS_TO_SKIP = {"all_demos", "reset_components", "custom_path", "kitchen_sink_random"}
+# 4. The same reason as 2 for kitchen_sink_random and blocks_kitchen_sink
+DEMOS_TO_SKIP = {"all_demos", "reset_components", "custom_path", "kitchen_sink_random", "blocks_kitchen_sink"}


 def upload_demo_to_space(
				`@ -1 +0,0 @@`
				{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: streaming_stt"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio deepspeech==0.9.3"]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["from deepspeech import Model\n", "import gradio as gr\n", "import numpy as np\n", "import urllib.request\n", "\n", "model_file_path = \"deepspeech-0.9.3-models.pbmm\"\n", "lm_file_path = \"deepspeech-0.9.3-models.scorer\"\n", "url = \"https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/\"\n", "\n", "urllib.request.urlretrieve(url + model_file_path, filename=model_file_path)\n", "urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path)\n", "\n", "beam_width = 100\n", "lm_alpha = 0.93\n", "lm_beta = 1.18\n", "\n", "model = Model(model_file_path)\n", "model.enableExternalScorer(lm_file_path)\n", "model.setScorerAlphaBeta(lm_alpha, lm_beta)\n", "model.setBeamWidth(beam_width)\n", "\n", "\n", "def reformat_freq(sr, y):\n", " if sr not in (\n", " 48000,\n", " 16000,\n", " ): # Deepspeech only supports 16k, (we convert 48k -> 16k)\n", " raise ValueError(\"Unsupported rate\", sr)\n", " if sr == 48000:\n", " y = (\n", " ((y / max(np.max(y), 1)) * 32767)\n", " .reshape((-1, 3))\n", " .mean(axis=1)\n", " .astype(\"int16\")\n", " )\n", " sr = 16000\n", " return sr, y\n", "\n", "\n", "def transcribe(speech, stream):\n", " _, y = reformat_freq(*speech)\n", " if stream is None:\n", " stream = model.createStream()\n", " stream.feedAudioContent(y)\n", " text = stream.intermediateDecode()\n", " return text, stream\n", "\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " [gr.Audio(source=\"microphone\", streaming=True), \"state\"],\n", " [\"text\", \"state\"],\n", " live=True,\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}