From 10be5e7a2c6d1fc2ed35df1249aaa3147a91fba6 Mon Sep 17 00:00:00 2001 From: Ali Abid Date: Wed, 2 Mar 2022 14:40:59 -0600 Subject: [PATCH 1/7] first commit --- demo/streaming_stt/requirements.txt | 1 + ui/packages/app/src/Interface.svelte | 96 ++++++++++--------- .../src/components/input/Audio/Audio.svelte | 46 ++++++--- 3 files changed, 81 insertions(+), 62 deletions(-) create mode 100644 demo/streaming_stt/requirements.txt diff --git a/demo/streaming_stt/requirements.txt b/demo/streaming_stt/requirements.txt new file mode 100644 index 0000000000..407657ed66 --- /dev/null +++ b/demo/streaming_stt/requirements.txt @@ -0,0 +1 @@ +deepspeech diff --git a/ui/packages/app/src/Interface.svelte b/ui/packages/app/src/Interface.svelte index 3d1314bd2a..8165c6e0e5 100644 --- a/ui/packages/app/src/Interface.svelte +++ b/ui/packages/app/src/Interface.svelte @@ -50,7 +50,7 @@ let avg_duration = Array.isArray(avg_durations) ? avg_durations[0] : null; let expected_duration: number | null = null; - const setValues = (index: number, value: unknown) => { + const setValues = async (index: number, value: unknown) => { has_changed = true; input_values[index] = value; if (live && state !== "PENDING") { @@ -85,7 +85,7 @@ clearInterval(timer); }; - const submit = () => { + const submit = async () => { if (state === "PENDING") { return; } @@ -103,53 +103,55 @@ has_changed = false; let submission_count_at_click = submission_count; startTimer(); - fn("predict", { data: input_values }, queue, queueCallback) - .then((output) => { - if ( - state !== "PENDING" || - submission_count_at_click !== submission_count - ) { - return; - } - stopTimer(); - output_values = output["data"]; - for (let [i, value] of output_values.entries()) { - if (output_components[i].name === "state") { - for (let [j, input_component] of input_components.entries()) { - if (input_component.name === "state") { - input_values[j] = value; - } - } + let output: any; + try { + output = await fn( + "predict", + { data: input_values }, + queue, + queueCallback + ); + } catch (e) { + if ( + state !== "PENDING" || + submission_count_at_click !== submission_count + ) { + return; + } + stopTimer(); + console.error(e); + state = "ERROR"; + output_values = deepCopy(default_outputs); + } + if (state !== "PENDING" || submission_count_at_click !== submission_count) { + return; + } + stopTimer(); + output_values = output["data"]; + for (let [i, value] of output_values.entries()) { + if (output_components[i].name === "state") { + for (let [j, input_component] of input_components.entries()) { + if (input_component.name === "state") { + input_values[j] = value; } } - if ("durations" in output) { - last_duration = output["durations"][0]; - } - if ("avg_durations" in output) { - avg_duration = output["avg_durations"][0]; - if (queue && initial_queue_index) { - expected_duration = avg_duration * (initial_queue_index + 1); - } else { - expected_duration = avg_duration; - } - } - state = "COMPLETE"; - if (live && has_changed) { - submit(); - } - }) - .catch((e) => { - if ( - state !== "PENDING" || - submission_count_at_click !== submission_count - ) { - return; - } - stopTimer(); - console.error(e); - state = "ERROR"; - output_values = deepCopy(default_outputs); - }); + } + } + if ("durations" in output) { + last_duration = output["durations"][0]; + } + if ("avg_durations" in output) { + avg_duration = output["avg_durations"][0]; + if (queue && initial_queue_index) { + expected_duration = avg_duration * (initial_queue_index + 1); + } else { + expected_duration = avg_duration; + } + } + state = "COMPLETE"; + if (live && has_changed) { + await submit(); + } }; const clear = () => { input_values = deepCopy(default_inputs); diff --git a/ui/packages/app/src/components/input/Audio/Audio.svelte b/ui/packages/app/src/components/input/Audio/Audio.svelte index fb483a8774..5ec6c7f6e6 100644 --- a/ui/packages/app/src/components/input/Audio/Audio.svelte +++ b/ui/packages/app/src/components/input/Audio/Audio.svelte @@ -9,6 +9,7 @@ import { _ } from "svelte-i18n"; export let value: null | Value; + export let live: boolean; export let setValue: (val: typeof value) => typeof value; export let theme: string; export let name: string; @@ -24,33 +25,48 @@ let player; let inited = false; let crop_values = [0, 100]; + let converting_blob = false; - function blob_to_data_url(blob: Blob): Promise { - return new Promise((fulfill, reject) => { - let reader = new FileReader(); - reader.onerror = reject; - reader.onload = (e) => fulfill(reader.result as string); - reader.readAsDataURL(blob); - }); + async function generate_data(): Promise<{ + data: string; + name: string; + is_example: boolean; + }> { + function blob_to_data_url(blob: Blob): Promise { + return new Promise((fulfill, reject) => { + let reader = new FileReader(); + reader.onerror = reject; + reader.onload = (e) => fulfill(reader.result as string); + reader.readAsDataURL(blob); + }); + } + audio_blob = new Blob(audio_chunks, { type: "audio/wav" }); + return { + data: await blob_to_data_url(audio_blob), + name, + is_example + }; } async function prepare_audio() { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); recorder = new MediaRecorder(stream); - recorder.addEventListener("dataavailable", (event) => { + recorder.addEventListener("dataavailable", async (event) => { audio_chunks.push(event.data); + if (live && !converting_blob) { + converting_blob = true; + await setValue(await generate_data()); + converting_blob = false; + } }); recorder.addEventListener("stop", async () => { recording = false; - audio_blob = new Blob(audio_chunks, { type: "audio/wav" }); - setValue({ - data: await blob_to_data_url(audio_blob), - name, - is_example - }); + if (!live) { + setValue(await generate_data()); + } }); } @@ -117,7 +133,7 @@
- {#if value === null} + {#if value === null || (source === "microphone" && live)} {#if source === "microphone"} {#if recording} - + {#if !live} + + {/if}
{#if state !== "START"}
diff --git a/ui/packages/app/src/components/input/Audio/Audio.svelte b/ui/packages/app/src/components/input/Audio/Audio.svelte index 5ec6c7f6e6..9f52fd09ea 100644 --- a/ui/packages/app/src/components/input/Audio/Audio.svelte +++ b/ui/packages/app/src/components/input/Audio/Audio.svelte @@ -25,7 +25,8 @@ let player; let inited = false; let crop_values = [0, 100]; - let converting_blob = false; + let submitting_data = false; + let record_interval; async function generate_data(): Promise<{ data: string; @@ -54,16 +55,15 @@ recorder.addEventListener("dataavailable", async (event) => { audio_chunks.push(event.data); - if (live && !converting_blob) { - converting_blob = true; + if (live && !submitting_data) { + submitting_data = true; await setValue(await generate_data()); - converting_blob = false; + submitting_data = false; + audio_chunks = []; } }); recorder.addEventListener("stop", async () => { - recording = false; - if (!live) { setValue(await generate_data()); } @@ -77,6 +77,12 @@ if (!inited) await prepare_audio(); recorder.start(); + if (live) { + record_interval = setInterval(() => { + recorder.stop(); + recorder.start(); + }, 1000) + } } onDestroy(() => { @@ -86,7 +92,11 @@ }); const stop = () => { + recording = false; recorder.stop(); + if (live) { + clearInterval(record_interval); + } }; function clear() { From 0e220e51b1317e322d65a69813a479dd918c67a6 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Tue, 15 Mar 2022 14:23:38 -0700 Subject: [PATCH 5/7] added transformers based demo for sst --- demo/streaming_wav2vec/requirements.txt | 1 + demo/streaming_wav2vec/run.py | 43 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 demo/streaming_wav2vec/requirements.txt create mode 100644 demo/streaming_wav2vec/run.py diff --git a/demo/streaming_wav2vec/requirements.txt b/demo/streaming_wav2vec/requirements.txt new file mode 100644 index 0000000000..19c989b6cd --- /dev/null +++ b/demo/streaming_wav2vec/requirements.txt @@ -0,0 +1 @@ +deepspeech==0.8.2 diff --git a/demo/streaming_wav2vec/run.py b/demo/streaming_wav2vec/run.py new file mode 100644 index 0000000000..da14e22e7a --- /dev/null +++ b/demo/streaming_wav2vec/run.py @@ -0,0 +1,43 @@ +from deepspeech import Model +import gradio as gr +import scipy.io.wavfile +import numpy as np + +model_file_path = "deepspeech-0.8.2-models.pbmm" +lm_file_path = "deepspeech-0.8.2-models.scorer" +beam_width = 100 +lm_alpha = 0.93 +lm_beta = 1.18 + +model = Model(model_file_path) +model.enableExternalScorer(lm_file_path) +model.setScorerAlphaBeta(lm_alpha, lm_beta) +model.setBeamWidth(beam_width) + + +def reformat_freq(sr, y): + if sr not in ( + 48000, + 16000, + ): # Deepspeech only supports 16k, (we convert 48k -> 16k) + raise ValueError("Unsupported rate", sr) + if sr == 48000: + y = ( + ((y / max(np.max(y), 1)) * 32767) + .reshape((-1, 3)) + .mean(axis=1) + .astype("int16") + ) + sr = 16000 + return sr, y + + +def transcribe(speech, stream): + _, y = reformat_freq(*speech) + if stream is None: + stream = model.createStream() + stream.feedAudioContent(y) + text = stream.intermediateDecode() + return text, stream + +gr.Interface(transcribe, ["microphone", "state"], ["text", "state"], live=True).launch() From a4ddcefb23ab6e3662f7dffc74cc2f827db9b12f Mon Sep 17 00:00:00 2001 From: Ali Abid Date: Tue, 29 Mar 2022 02:15:32 +0200 Subject: [PATCH 6/7] fix streaming --- gradio.egg-info/PKG-INFO | 7 +++++-- gradio.egg-info/requires.txt | 4 ++-- ui/packages/app/src/Interface.svelte | 4 ++-- ui/packages/app/src/components/input/Audio/Audio.svelte | 4 +++- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/gradio.egg-info/PKG-INFO b/gradio.egg-info/PKG-INFO index 62f767ad93..25e021d808 100644 --- a/gradio.egg-info/PKG-INFO +++ b/gradio.egg-info/PKG-INFO @@ -1,4 +1,4 @@ -Metadata-Version: 1.0 +Metadata-Version: 2.1 Name: gradio Version: 2.8.14 Summary: Python library for easily interacting with trained machine learning models @@ -6,6 +6,9 @@ Home-page: https://github.com/gradio-app/gradio-UI Author: Abubakar Abid, Ali Abid, Ali Abdalla, Dawood Khan, Ahsen Khaliq Author-email: team@gradio.app License: Apache License 2.0 -Description: UNKNOWN Keywords: machine learning,visualization,reproducibility Platform: UNKNOWN +License-File: LICENSE + +UNKNOWN + diff --git a/gradio.egg-info/requires.txt b/gradio.egg-info/requires.txt index abbeed4a16..004eeeac3e 100644 --- a/gradio.egg-info/requires.txt +++ b/gradio.egg-info/requires.txt @@ -1,5 +1,5 @@ -aiohttp analytics-python +aiohttp fastapi ffmpy markdown-it-py[linkify,plugins] @@ -10,7 +10,7 @@ pandas paramiko pillow pycryptodome -pydub python-multipart +pydub requests uvicorn diff --git a/ui/packages/app/src/Interface.svelte b/ui/packages/app/src/Interface.svelte index f9526c0d7b..37d6a34d62 100644 --- a/ui/packages/app/src/Interface.svelte +++ b/ui/packages/app/src/Interface.svelte @@ -53,12 +53,12 @@ let expected_duration: number | null = null; let example_id: number | null = null; - const setValues = (index: number, value: unknown) => { + const setValues = async (index: number, value: unknown) => { example_id = null; has_changed = true; input_values[index] = value; if (live && state !== "PENDING") { - submit(); + await submit(); } }; diff --git a/ui/packages/app/src/components/input/Audio/Audio.svelte b/ui/packages/app/src/components/input/Audio/Audio.svelte index 9f52fd09ea..91d9d346d8 100644 --- a/ui/packages/app/src/components/input/Audio/Audio.svelte +++ b/ui/packages/app/src/components/input/Audio/Audio.svelte @@ -21,6 +21,7 @@ let recorder: MediaRecorder; let mode = ""; let audio_chunks: Array = []; + let chunks_at_submit: number = 0; let audio_blob; let player; let inited = false; @@ -57,9 +58,10 @@ audio_chunks.push(event.data); if (live && !submitting_data) { submitting_data = true; + chunks_at_submit = audio_chunks.length; await setValue(await generate_data()); submitting_data = false; - audio_chunks = []; + audio_chunks = audio_chunks.slice(chunks_at_submit); } }); From fd53b54c753f8b66ae004084dfc620dc07936605 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Tue, 29 Mar 2022 00:06:35 -0700 Subject: [PATCH 7/7] fix formatting --- ui/packages/app/src/Interface.svelte | 6 +++++- ui/packages/app/src/components/input/Audio/Audio.svelte | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ui/packages/app/src/Interface.svelte b/ui/packages/app/src/Interface.svelte index 37d6a34d62..d5010f8ba6 100644 --- a/ui/packages/app/src/Interface.svelte +++ b/ui/packages/app/src/Interface.svelte @@ -113,7 +113,11 @@ try { output = await fn( "predict", - { data: input_values, cleared: cleared_since_last_submit, example_id: example_id }, + { + data: input_values, + cleared: cleared_since_last_submit, + example_id: example_id + }, queue, queueCallback ); diff --git a/ui/packages/app/src/components/input/Audio/Audio.svelte b/ui/packages/app/src/components/input/Audio/Audio.svelte index 91d9d346d8..0afc164ba0 100644 --- a/ui/packages/app/src/components/input/Audio/Audio.svelte +++ b/ui/packages/app/src/components/input/Audio/Audio.svelte @@ -83,7 +83,7 @@ record_interval = setInterval(() => { recorder.stop(); recorder.start(); - }, 1000) + }, 1000); } }