Add Microphone Input to MultimodalTextbox (#10186)

* microphone * add changeset * undo css changes * notebook * css fix * fixes * add changeset * fixes * pr fixes * guides * format * ally ignore * type fix --------- Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com> Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
2025-04-12 12:40:29 +08:00 · 2024-12-17 17:15:16 -05:00 · 2024-12-17 17:15:16 -05:00 · 9b17032a65
commit 9b17032a65
parent a95f8ef3ee
10 changed files with 323 additions and 131 deletions
--- a/.changeset/fluffy-pots-clap.md
+++ b/.changeset/fluffy-pots-clap.md
@ -0,0 +1,7 @@
+---
+"@gradio/audio": minor
+"@gradio/multimodaltextbox": minor
+"gradio": minor
+---
+
+feat:Add Microphone Input to MultimodalTextbox
--- a/demo/chatbot_multimodal/run.ipynb
+++ b/demo/chatbot_multimodal/run.ipynb
@ -1 +1 @@
-{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/tuples_testcase.py"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import time\n", "\n", "# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.\n", "\n", "\n", "def print_like_dislike(x: gr.LikeData):\n", "    print(x.index, x.value, x.liked)\n", "\n", "\n", "def add_message(history, message):\n", "    for x in message[\"files\"]:\n", "        history.append({\"role\": \"user\", \"content\": {\"path\": x}})\n", "    if message[\"text\"] is not None:\n", "        history.append({\"role\": \"user\", \"content\": message[\"text\"]})\n", "    return history, gr.MultimodalTextbox(value=None, interactive=False)\n", "\n", "\n", "def bot(history: list):\n", "    response = \"**That's cool!**\"\n", "    history.append({\"role\": \"assistant\", \"content\": \"\"})\n", "    for character in response:\n", "        history[-1][\"content\"] += character\n", "        time.sleep(0.05)\n", "        yield history\n", "\n", "\n", "with gr.Blocks() as demo:\n", "    chatbot = gr.Chatbot(elem_id=\"chatbot\", bubble_full_width=False, type=\"messages\")\n", "\n", "    chat_input = gr.MultimodalTextbox(\n", "        interactive=True,\n", "        file_count=\"multiple\",\n", "        placeholder=\"Enter message or upload file...\",\n", "        show_label=False,\n", "    )\n", "\n", "    chat_msg = chat_input.submit(\n", "        add_message, [chatbot, chat_input], [chatbot, chat_input]\n", "    )\n", "    bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name=\"bot_response\")\n", "    bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])\n", "\n", "    chatbot.like(print_like_dislike, None, None, like_user_message=True)\n", "\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/tuples_testcase.py"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import time\n", "\n", "# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.\n", "\n", "\n", "def print_like_dislike(x: gr.LikeData):\n", "    print(x.index, x.value, x.liked)\n", "\n", "\n", "def add_message(history, message):\n", "    for x in message[\"files\"]:\n", "        history.append({\"role\": \"user\", \"content\": {\"path\": x}})\n", "    if message[\"text\"] is not None:\n", "        history.append({\"role\": \"user\", \"content\": message[\"text\"]})\n", "    return history, gr.MultimodalTextbox(value=None, interactive=False)\n", "\n", "\n", "def bot(history: list):\n", "    response = \"**That's cool!**\"\n", "    history.append({\"role\": \"assistant\", \"content\": \"\"})\n", "    for character in response:\n", "        history[-1][\"content\"] += character\n", "        time.sleep(0.05)\n", "        yield history\n", "\n", "\n", "with gr.Blocks() as demo:\n", "    chatbot = gr.Chatbot(elem_id=\"chatbot\", bubble_full_width=False, type=\"messages\")\n", "\n", "    chat_input = gr.MultimodalTextbox(\n", "        interactive=True,\n", "        file_count=\"multiple\",\n", "        placeholder=\"Enter message or upload file...\",\n", "        show_label=False,\n", "        sources=[\"microphone\", \"upload\"],\n", "    )\n", "\n", "    chat_msg = chat_input.submit(\n", "        add_message, [chatbot, chat_input], [chatbot, chat_input]\n", "    )\n", "    bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name=\"bot_response\")\n", "    bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])\n", "\n", "    chatbot.like(print_like_dislike, None, None, like_user_message=True)\n", "\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
--- a/demo/chatbot_multimodal/run.py
+++ b/demo/chatbot_multimodal/run.py
@ -33,6 +33,7 @@ with gr.Blocks() as demo:
        file_count="multiple",
        placeholder="Enter message or upload file...",
        show_label=False,
+        sources=["microphone", "upload"],
    )

    chat_msg = chat_input.submit(
--- a/gradio/components/multimodal_textbox.py
+++ b/gradio/components/multimodal_textbox.py
@ -61,6 +61,9 @@ class MultimodalTextbox(FormComponent):
        self,
        value: str | dict[str, str | list] | Callable | None = None,
        *,
+        sources: list[Literal["upload", "microphone"]]
+        | Literal["upload", "microphone"]
+        | None = None,
        file_types: list[str] | None = None,
        file_count: Literal["single", "multiple", "directory"] = "single",
        lines: int = 1,
@ -91,6 +94,7 @@ class MultimodalTextbox(FormComponent):
        """
        Parameters:
            value: Default value to show in MultimodalTextbox. A string value, or a dictionary of the form {"text": "sample text", "files": [{path: "files/file.jpg", orig_name: "file.jpg", url: "http://image_url.jpg", size: 100}]}. If callable, the function will be called whenever the app loads to set the initial value of the component.
+            sources: A list of sources permitted. "upload" creates a button where users can click to upload or drop files, "microphone" creates a microphone input. If None, defaults to ["upload"].
            file_count: if single, allows user to upload one file. If "multiple", user uploads multiple files. If "directory", user uploads all files in selected directory. Return type will be list for each file in case of "multiple" or "directory".
            file_types: List of file extensions or types of files to be uploaded (e.g. ['image', '.json', '.mp4']). "file" allows any file to be uploaded, "image" allows only image files to be uploaded, "audio" allows only audio files to be uploaded, "video" allows only video files to be uploaded, "text" allows only text files to be uploaded.
            lines: minimum number of line rows to provide in textarea.
@ -118,6 +122,22 @@ class MultimodalTextbox(FormComponent):
            stop_btn: If True, will show a stop button (useful for streaming demos). If a string, will use that string as the stop button text.
            max_plain_text_length: Maximum length of plain text in the textbox. If the text exceeds this length, the text will be pasted as a file. Default is 1000.
        """
+        valid_sources: list[Literal["upload", "microphone"]] = ["upload", "microphone"]
+        if sources is None:
+            self.sources = ["upload"]
+        elif isinstance(sources, str) and sources in valid_sources:
+            self.sources = [sources]
+        elif isinstance(sources, list):
+            self.sources = sources
+        else:
+            raise ValueError(
+                f"`sources` must be a list consisting of elements in {valid_sources}"
+            )
+        for source in self.sources:
+            if source not in valid_sources:
+                raise ValueError(
+                    f"`sources` must a list consisting of elements in {valid_sources}"
+                )
        self.file_types = file_types
        self.file_count = file_count
        if file_types is not None and not isinstance(file_types, list):
--- a/guides/05_chatbots/01_creating-a-chatbot-fast.md
+++ b/guides/05_chatbots/01_creating-a-chatbot-fast.md
@ -194,7 +194,7 @@ This second parameter of your chat function, `history`, will be in the same open

 The return type of your chat function does *not change* when setting `multimodal=True` (i.e. in the simplest case, you should still return a string value). We discuss more complex cases, e.g. returning files [below](#returning-complex-responses).

-If you are customizing a multimodal chat interface, you should pass in an instance of `gr.MultimodalTextbox` to the `textbox` parameter. Here's an example that illustrates how to set up and customize and multimodal chat interface:
+If you are customizing a multimodal chat interface, you should pass in an instance of `gr.MultimodalTextbox` to the `textbox` parameter. You can customize the `MultimodalTextbox` further by passing in the `sources` parameter, which is a list of sources to enable. Here's an example that illustrates how to set up and customize and multimodal chat interface:
 

 ```python
@ -215,7 +215,7 @@ demo = gr.ChatInterface(
        {"text": "No files", "files": []}
    ], 
    multimodal=True,
-    textbox=gr.MultimodalTextbox(file_count="multiple", file_types=["image"])
+    textbox=gr.MultimodalTextbox(file_count="multiple", file_types=["image"], sources=["upload", "microphone"])
 )

 demo.launch()
--- a/guides/05_chatbots/04_creating-a-custom-chatbot-with-blocks.md
+++ b/guides/05_chatbots/04_creating-a-custom-chatbot-with-blocks.md
@ -70,7 +70,7 @@ def bot(history):
    return history
 ```

-In addition, it can handle media files, such as images, audio, and video. You can use the `MultimodalTextbox` component to easily upload all types of media files to your chatbot. To pass in a media file, we must pass in the file a dictionary with a `path` key pointing to a local file and an `alt_text` key. The `alt_text` is optional, so you can also just pass in a tuple with a single element `{"path": "filepath"}`, like this:
+In addition, it can handle media files, such as images, audio, and video. You can use the `MultimodalTextbox` component to easily upload all types of media files to your chatbot. You can customize the `MultimodalTextbox` further by passing in the `sources` parameter, which is a list of sources to enable. To pass in a media file, we must pass in the file a dictionary with a `path` key pointing to a local file and an `alt_text` key. The `alt_text` is optional, so you can also just pass in a tuple with a single element `{"path": "filepath"}`, like this:

 ```python
 def add_message(history, message):
@ -78,7 +78,7 @@ def add_message(history, message):
        history.append({"role": "user", "content": {"path": x}})
    if message["text"] is not None:
        history.append({"role": "user", "content": message["text"]})
-    return history, gr.MultimodalTextbox(value=None, interactive=False, file_types=["image"])
+    return history, gr.MultimodalTextbox(value=None, interactive=False, file_types=["image"], sources=["upload", "microphone"])
 ```

 Putting this together, we can create a _multimodal_ chatbot with a multimodal textbox for a user to submit text and media files. The rest of the code looks pretty much the same as before:
--- a/js/audio/interactive/InteractiveAudio.svelte
+++ b/js/audio/interactive/InteractiveAudio.svelte
@ -41,6 +41,7 @@
 	export let stream_every: number;
 	export let uploading = false;
 	export let recording = false;
+	export let class_name = "";

 	let time_limit: number | null = null;
 	let stream_state: "open" | "waiting" | "closed" = "closed";
@ -246,7 +247,7 @@
 	float={active_source === "upload" && value === null}
 	label={label || i18n("audio.audio")}
 />
-<div class="audio-container">
+<div class="audio-container {class_name}">
 	<StreamingBar {time_limit} />
 	{#if value === null || streaming}
 		{#if active_source === "microphone"}
@ -329,4 +330,30 @@
 		flex-direction: column;
 		justify-content: space-between;
 	}
+
+	.audio-container.compact-audio {
+		margin-top: calc(var(--size-8) * -1);
+		height: auto;
+		padding: 0px;
+		gap: var(--size-2);
+		min-height: var(--size-5);
+	}
+
+	.compact-audio :global(.audio-player) {
+		padding: 0px;
+	}
+
+	.compact-audio :global(.controls) {
+		gap: 0px;
+		padding: 0px;
+	}
+
+	.compact-audio :global(.waveform-container) {
+		height: var(--size-12) !important;
+	}
+
+	.compact-audio :global(.player-container) {
+		min-height: unset;
+		height: auto;
+	}
 </style>
--- a/js/multimodaltextbox/Index.svelte
+++ b/js/multimodaltextbox/Index.svelte
@ -12,6 +12,8 @@
 	import { StatusTracker } from "@gradio/statustracker";
 	import type { LoadingStatus } from "@gradio/statustracker";
 	import type { FileData } from "@gradio/client";
+	import { onMount } from "svelte";
+	import type { WaveformOptions } from "../audio/shared/types";

 	export let gradio: Gradio<{
 		change: typeof value;
@ -23,6 +25,11 @@
 		focus: never;
 		error: string;
 		clear_status: LoadingStatus;
+		start_recording: never;
+		pause_recording: never;
+		stop_recording: never;
+		upload: FileData[] | FileData;
+		clear: undefined;
 	}>;
 	export let elem_id = "";
 	export let elem_classes: string[] = [];
@ -38,7 +45,6 @@
 	export let info: string | undefined = undefined;
 	export let show_label: boolean;
 	export let max_lines: number;
-	export let container = true;
 	export let scale: number | null = null;
 	export let min_width: number | undefined = undefined;
 	export let submit_btn: string | boolean | null = null;
@ -53,8 +59,52 @@
 	export let root: string;
 	export let file_count: "single" | "multiple" | "directory";
 	export let max_plain_text_length: number;
+	export let sources: ["microphone" | "upload"] = ["upload"];
+	export let waveform_options: WaveformOptions = {};

 	let dragging: boolean;
+	let active_source: "microphone" | null = null;
+	let waveform_settings: Record<string, any>;
+	let color_accent = "darkorange";
+
+	onMount(() => {
+		color_accent = getComputedStyle(document?.documentElement).getPropertyValue(
+			"--color-accent"
+		);
+		set_trim_region_colour();
+		waveform_settings.waveColor = waveform_options.waveform_color || "#9ca3af";
+		waveform_settings.progressColor =
+			waveform_options.waveform_progress_color || color_accent;
+		waveform_settings.mediaControls = waveform_options.show_controls;
+		waveform_settings.sampleRate = waveform_options.sample_rate || 44100;
+	});
+
+	$: waveform_settings = {
+		height: 50,
+
+		barWidth: 2,
+		barGap: 3,
+		cursorWidth: 2,
+		cursorColor: "#ddd5e9",
+		autoplay: false,
+		barRadius: 10,
+		dragToSeek: true,
+		normalize: true,
+		minPxPerSec: 20
+	};
+
+	const trim_region_settings = {
+		color: waveform_options.trim_region_color,
+		drag: true,
+		resize: true
+	};
+
+	function set_trim_region_colour(): void {
+		document.documentElement.style.setProperty(
+			"--trim-region-color",
+			trim_region_settings.color || color_accent
+		);
+	}
 </script>

 <Block
@ -80,6 +130,7 @@
 		bind:value
 		bind:value_is_output
 		bind:dragging
+		bind:active_source
 		{file_types}
 		{root}
 		{label}
@ -88,14 +139,16 @@
 		{lines}
 		{rtl}
 		{text_align}
+		{waveform_settings}
+		i18n={gradio.i18n}
 		max_lines={!max_lines ? lines + 1 : max_lines}
 		{placeholder}
 		{submit_btn}
 		{stop_btn}
 		{autofocus}
-		{container}
 		{autoscroll}
 		{file_count}
+		{sources}
 		max_file_size={gradio.max_file_size}
 		on:change={() => gradio.dispatch("change", value)}
 		on:input={() => gradio.dispatch("input")}
@ -107,6 +160,11 @@
 		on:error={({ detail }) => {
 			gradio.dispatch("error", detail);
 		}}
+		on:start_recording={() => gradio.dispatch("start_recording")}
+		on:pause_recording={() => gradio.dispatch("pause_recording")}
+		on:stop_recording={() => gradio.dispatch("stop_recording")}
+		on:upload={(e) => gradio.dispatch("upload", e.detail)}
+		on:clear={() => gradio.dispatch("clear")}
 		disabled={!interactive}
 		upload={(...args) => gradio.client.upload(...args)}
 		stream_handler={(...args) => gradio.client.stream(...args)}
--- a/js/multimodaltextbox/MultimodalTextbox.stories.svelte
+++ b/js/multimodaltextbox/MultimodalTextbox.stories.svelte
@ -42,6 +42,12 @@
 			description: "Whether to render right-to-left",
 			control: { type: "boolean" },
 			defaultValue: false
+		},
+		sources: {
+			options: ["upload", "microphone"],
+			description: "The sources to enable",
+			control: { type: "select" },
+			defaultValue: ["upload", "microphone"]
 		}
 	}}
 />
@ -87,3 +93,7 @@
 		}
 	}}
 />
+<Story
+	name="MultimodalTextbox with microphone input"
+	args={{ sources: ["microphone"] }}
+/>
--- a/js/multimodaltextbox/shared/MultimodalTextbox.svelte
+++ b/js/multimodaltextbox/shared/MultimodalTextbox.svelte
@ -10,7 +10,9 @@
 	import { BlockTitle } from "@gradio/atoms";
 	import { Upload } from "@gradio/upload";
 	import { Image } from "@gradio/image/shared";
+	import type { I18nFormatter } from "js/core/src/gradio_helper";
 	import type { FileData, Client } from "@gradio/client";
+	import type { WaveformOptions } from "../../audio/shared/types";
 	import {
 		Clear,
 		File,
@ -18,9 +20,11 @@
 		Paperclip,
 		Video,
 		Send,
-		Square
+		Square,
+		Microphone
 	} from "@gradio/icons";
 	import type { SelectData } from "@gradio/utils";
+	import InteractiveAudio from "../../audio/interactive/InteractiveAudio.svelte";

 	export let value: { text: string; files: FileData[] } = {
 		text: "",
@ -29,12 +33,12 @@

 	export let value_is_output = false;
 	export let lines = 1;
+	export let i18n: I18nFormatter;
 	export let placeholder = "Type here...";
 	export let disabled = false;
 	export let label: string;
 	export let info: string | undefined = undefined;
 	export let show_label = true;
-	export let container = true;
 	export let max_lines: number;
 	export let submit_btn: string | boolean | null = null;
 	export let stop_btn: string | boolean | null = null;
@ -49,7 +53,10 @@
 	export let stream_handler: Client["stream"];
 	export let file_count: "single" | "multiple" | "directory" = "multiple";
 	export let max_plain_text_length = 1000;
-
+	export let waveform_settings: Record<string, any>;
+	export let waveform_options: WaveformOptions = {};
+	export let sources: ["microphone" | "upload"] = ["upload"];
+	export let active_source: "microphone" | null = null;
 	let upload_component: Upload;
 	let hidden_upload: HTMLInputElement;
 	let el: HTMLTextAreaElement | HTMLInputElement;
@ -59,7 +66,9 @@
 	export let dragging = false;
 	let uploading = false;
 	let oldValue = value.text;
+	let recording = false;
 	$: dispatch("drag", dragging);
+	let mic_audio: FileData | null = null;

 	let full_container: HTMLDivElement;

@ -84,6 +93,9 @@
 		clear: undefined;
 		load: FileData[] | FileData;
 		error: string;
+		start_recording: undefined;
+		pause_recording: undefined;
+		stop_recording: undefined;
 	}>();

 	beforeUpdate(() => {
@ -141,6 +153,11 @@
 		) {
 			e.preventDefault();
 			dispatch("submit");
+			active_source = null;
+			if (mic_audio) {
+				value.files.push(mic_audio);
+				value = value;
+			}
 		}
 	}

@ -161,7 +178,7 @@

 	async function handle_upload({
 		detail
-	}: CustomEvent<FileData | FileData[]>): Promise<void> {
+	}: CustomEvent<FileData>): Promise<void> {
 		handle_change();
 		if (Array.isArray(detail)) {
 			for (let file of detail) {
@ -197,6 +214,11 @@

 	function handle_submit(): void {
 		dispatch("submit");
+		active_source = null;
+		if (mic_audio) {
+			value.files.push(mic_audio);
+			value = value;
+		}
 	}

 	async function handle_paste(event: ClipboardEvent): Promise<void> {
@ -289,127 +311,167 @@
 	role="group"
 	aria-label="Multimedia input field"
 >
-	<!-- svelte-ignore a11y-autofocus -->
-	<label class:container>
-		<BlockTitle {root} {show_label} {info}>{label}</BlockTitle>
-		{#if value.files.length > 0 || uploading}
-			<div
-				class="thumbnails scroll-hide"
-				aria-label="Uploaded files"
-				data-testid="container_el"
-				style="display: {value.files.length > 0 || uploading
-					? 'flex'
-					: 'none'};"
-			>
-				{#each value.files as file, index}
-					<span role="listitem" aria-label="File thumbnail">
-						<button class="thumbnail-item thumbnail-small">
-							<button
-								class:disabled
-								class="delete-button"
-								on:click={(event) => remove_thumbnail(event, index)}
-								><Clear /></button
-							>
-							{#if file.mime_type && file.mime_type.includes("image")}
-								<Image
-									src={file.url}
-									title={null}
-									alt=""
-									loading="lazy"
-									class={"thumbnail-image"}
-								/>
-							{:else if file.mime_type && file.mime_type.includes("audio")}
-								<Music />
-							{:else if file.mime_type && file.mime_type.includes("video")}
-								<Video />
-							{:else}
-								<File />
-							{/if}
-						</button>
-					</span>
-				{/each}
-				{#if uploading}
-					<div class="loader" role="status" aria-label="Uploading"></div>
-				{/if}
-			</div>
-		{/if}
-		<div class="input-container">
-			{#if !disabled && !(file_count === "single" && value.files.length > 0)}
-				<Upload
-					bind:this={upload_component}
-					on:load={handle_upload}
-					{file_count}
-					filetype={file_types}
-					{root}
-					{max_file_size}
-					bind:dragging
-					bind:uploading
-					show_progress={false}
-					disable_click={true}
-					bind:hidden_upload
-					on:error
-					hidden={true}
-					{upload}
-					{stream_handler}
-				></Upload>
-				<button
-					data-testid="upload-button"
-					class="upload-button"
-					on:click={handle_upload_click}><Paperclip /></button
-				>
-			{/if}
-			<textarea
-				data-testid="textbox"
-				use:text_area_resize={{
-					text: value.text,
-					lines: lines,
-					max_lines: max_lines
-				}}
-				class="scroll-hide"
-				class:no-label={!show_label}
-				dir={rtl ? "rtl" : "ltr"}
-				bind:value={value.text}
-				bind:this={el}
-				{placeholder}
-				rows={lines}
-				{disabled}
-				{autofocus}
-				on:keypress={handle_keypress}
-				on:blur
-				on:select={handle_select}
-				on:focus
-				on:scroll={handle_scroll}
-				on:paste={handle_paste}
-				style={text_align ? "text-align: " + text_align : ""}
-			/>
-			{#if submit_btn}
-				<button
-					class="submit-button"
-					class:padded-button={submit_btn !== true}
-					on:click={handle_submit}
-				>
-					{#if submit_btn === true}
-						<Send />
-					{:else}
-						{submit_btn}
-					{/if}
-				</button>
-			{/if}
-			{#if stop_btn}
-				<button
-					class="stop-button"
-					class:padded-button={stop_btn !== true}
-					on:click={handle_stop}
-				>
-					{#if stop_btn === true}
-						<Square fill={"none"} stroke_width={2.5} />
-					{:else}
-						{stop_btn}
-					{/if}
-				</button>
+	<BlockTitle {root} {show_label} {info}>{label}</BlockTitle>
+	{#if value.files.length > 0 || uploading}
+		<div
+			class="thumbnails scroll-hide"
+			aria-label="Uploaded files"
+			data-testid="container_el"
+			style="display: {value.files.length > 0 || uploading ? 'flex' : 'none'};"
+		>
+			{#each value.files as file, index}
+				<span role="listitem" aria-label="File thumbnail">
+					<button class="thumbnail-item thumbnail-small">
+						<button
+							class:disabled
+							class="delete-button"
+							on:click={(event) => remove_thumbnail(event, index)}
+							><Clear /></button
+						>
+						{#if file.mime_type && file.mime_type.includes("image")}
+							<Image
+								src={file.url}
+								title={null}
+								alt=""
+								loading="lazy"
+								class={"thumbnail-image"}
+							/>
+						{:else if file.mime_type && file.mime_type.includes("audio")}
+							<Music />
+						{:else if file.mime_type && file.mime_type.includes("video")}
+							<Video />
+						{:else}
+							<File />
+						{/if}
+					</button>
+				</span>
+			{/each}
+			{#if uploading}
+				<div class="loader" role="status" aria-label="Uploading"></div>
 			{/if}
 		</div>
-	</label>
+	{/if}
+	{#if sources && sources.includes("microphone") && active_source === "microphone"}
+		<InteractiveAudio
+			on:change={({ detail }) => {
+				if (detail !== null) {
+					mic_audio = detail;
+				}
+			}}
+			on:clear={() => {
+				active_source = null;
+			}}
+			on:start_recording={() => dispatch("start_recording")}
+			on:pause_recording={() => dispatch("pause_recording")}
+			on:stop_recording={() => dispatch("stop_recording")}
+			sources={["microphone"]}
+			class_name="compact-audio"
+			{recording}
+			{waveform_settings}
+			{waveform_options}
+			{i18n}
+			{active_source}
+			{upload}
+			{stream_handler}
+			stream_every={1}
+			editable={true}
+			{label}
+			{root}
+			loop={false}
+			show_label={false}
+			show_download_button={false}
+			dragging={false}
+		/>
+	{/if}
+	<div class="input-container">
+		{#if sources && sources.includes("upload") && !disabled && !(file_count === "single" && value.files.length > 0)}
+			<Upload
+				bind:this={upload_component}
+				on:load={handle_upload}
+				{file_count}
+				filetype={file_types}
+				{root}
+				{max_file_size}
+				bind:dragging
+				bind:uploading
+				show_progress={false}
+				disable_click={true}
+				bind:hidden_upload
+				on:error
+				hidden={true}
+				{upload}
+				{stream_handler}
+			/>
+			<button
+				data-testid="upload-button"
+				class="upload-button"
+				on:click={handle_upload_click}><Paperclip /></button
+			>
+		{/if}
+		{#if sources && sources.includes("microphone")}
+			<button
+				data-testid="microphone-button"
+				class="microphone-button"
+				class:recording
+				on:click={() => {
+					active_source = active_source !== "microphone" ? "microphone" : null;
+				}}
+			>
+				<Microphone />
+			</button>
+		{/if}
+		<!-- svelte-ignore a11y-autofocus -->
+		<textarea
+			data-testid="textbox"
+			use:text_area_resize={{
+				text: value.text,
+				lines: lines,
+				max_lines: max_lines
+			}}
+			class="scroll-hide"
+			class:no-label={!show_label}
+			dir={rtl ? "rtl" : "ltr"}
+			bind:value={value.text}
+			bind:this={el}
+			{placeholder}
+			rows={lines}
+			{disabled}
+			{autofocus}
+			on:keypress={handle_keypress}
+			on:blur
+			on:select={handle_select}
+			on:focus
+			on:scroll={handle_scroll}
+			on:paste={handle_paste}
+			style={text_align ? "text-align: " + text_align : ""}
+		/>
+		{#if submit_btn}
+			<button
+				class="submit-button"
+				class:padded-button={submit_btn !== true}
+				on:click={handle_submit}
+			>
+				{#if submit_btn === true}
+					<Send />
+				{:else}
+					{submit_btn}
+				{/if}
+			</button>
+		{/if}
+		{#if stop_btn}
+			<button
+				class="stop-button"
+				class:padded-button={stop_btn !== true}
+				on:click={handle_stop}
+			>
+				{#if stop_btn === true}
+					<Square fill={"none"} stroke_width={2.5} />
+				{:else}
+					{stop_btn}
+				{/if}
+			</button>
+		{/if}
+	</div>
 </div>

 <style>
@ -471,6 +533,7 @@
 		color: var(--input-placeholder-color);
 	}

+	.microphone-button,
 	.upload-button,
 	.submit-button,
 	.stop-button {
@ -487,29 +550,34 @@
 		justify-content: center;
 		align-items: center;
 		z-index: var(--layer-1);
+		margin-left: var(--spacing-sm);
 	}
 	.padded-button {
 		padding: 0 10px;
 	}

+	.microphone-button,
 	.stop-button,
 	.upload-button,
 	.submit-button {
 		background: var(--button-secondary-background-fill);
 	}

+	.microphone-button:hover,
 	.stop-button:hover,
 	.upload-button:hover,
 	.submit-button:hover {
 		background: var(--button-secondary-background-fill-hover);
 	}

+	.microphone-button:disabled,
 	.stop-button:disabled,
 	.upload-button:disabled,
 	.submit-button:disabled {
 		background: var(--button-secondary-background-fill);
 		cursor: initial;
 	}
+	.microphone-button:active,
 	.stop-button:active,
 	.upload-button:active,
 	.submit-button:active {
@ -520,6 +588,7 @@
 		height: 22px;
 		width: 22px;
 	}
+	.microphone-button :global(svg),
 	.upload-button :global(svg) {
 		height: 17px;
 		width: 17px;