Bugfix: Allow multiple files to be uploaded via multimodal textbox (#8608)

* Add code * Add file_count parameter to MultimodalTextbox * add changeset * Update fresh-years-feel.md --------- Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com>
2025-04-06 12:30:29 +08:00 · 2024-06-26 17:14:25 +01:00 · 2024-06-26 17:14:25 +01:00 · c12f82a36d
commit c12f82a36d
parent a4897d62c0
10 changed files with 40 additions and 6 deletions
--- a/.changeset/fresh-years-feel.md
+++ b/.changeset/fresh-years-feel.md
@ -0,0 +1,8 @@
+---
+"@gradio/file": patch
+"@gradio/multimodaltextbox": patch
+"@gradio/upload": patch
+"gradio": patch
+---
+
+fix:Bugfix: Add a `file_count` parameter to `gr.MultimodalTextbox`. Multiple files cab be uploaded by setting `file_count="multiple"`. Default is `"single"` to preserve the previous behavior.
--- a/demo/chatbot_multimodal/run.ipynb
+++ b/demo/chatbot_multimodal/run.ipynb
@ -1 +1 @@
-{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio plotly"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('files')\n", "!wget -q -O files/avatar.png https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/files/avatar.png"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import os\n", "import plotly.express as px\n", "\n", "# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.\n", "\n", "def random_plot():\n", "    df = px.data.iris()\n", "    fig = px.scatter(df, x=\"sepal_width\", y=\"sepal_length\", color=\"species\",\n", "                    size='petal_length', hover_data=['petal_width'])\n", "    return fig\n", "\n", "def print_like_dislike(x: gr.LikeData):\n", "    print(x.index, x.value, x.liked)\n", "\n", "def add_message(history, message):\n", "    for x in message[\"files\"]:\n", "        history.append(((x,), None))\n", "    if message[\"text\"] is not None:\n", "        history.append((message[\"text\"], None))\n", "    return history, gr.MultimodalTextbox(value=None, interactive=False)\n", "\n", "def bot(history):\n", "    history[-1][1] = \"Cool!\"\n", "    return history\n", "\n", "fig = random_plot()\n", "\n", "with gr.Blocks(fill_height=True) as demo:\n", "    chatbot = gr.Chatbot(\n", "        elem_id=\"chatbot\",\n", "        bubble_full_width=False,\n", "        scale=1,\n", "    )\n", "\n", "    chat_input = gr.MultimodalTextbox(interactive=True, placeholder=\"Enter message or upload file...\", show_label=False)\n", "\n", "    chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])\n", "    bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name=\"bot_response\")\n", "    bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])\n", "\n", "    chatbot.like(print_like_dislike, None, None)\n", "\n", "demo.queue()\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio plotly"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('files')\n", "!wget -q -O files/avatar.png https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/files/avatar.png"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import os\n", "import plotly.express as px\n", "\n", "# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.\n", "\n", "def random_plot():\n", "    df = px.data.iris()\n", "    fig = px.scatter(df, x=\"sepal_width\", y=\"sepal_length\", color=\"species\",\n", "                    size='petal_length', hover_data=['petal_width'])\n", "    return fig\n", "\n", "def print_like_dislike(x: gr.LikeData):\n", "    print(x.index, x.value, x.liked)\n", "\n", "def add_message(history, message):\n", "    for x in message[\"files\"]:\n", "        history.append(((x,), None))\n", "    if message[\"text\"] is not None:\n", "        history.append((message[\"text\"], None))\n", "    return history, gr.MultimodalTextbox(value=None, interactive=False)\n", "\n", "def bot(history):\n", "    history[-1][1] = \"Cool!\"\n", "    return history\n", "\n", "fig = random_plot()\n", "\n", "with gr.Blocks(fill_height=True) as demo:\n", "    chatbot = gr.Chatbot(\n", "        elem_id=\"chatbot\",\n", "        bubble_full_width=False,\n", "        scale=1,\n", "    )\n", "\n", "    chat_input = gr.MultimodalTextbox(interactive=True,\n", "                                      file_count=\"multiple\",\n", "                                      placeholder=\"Enter message or upload file...\", show_label=False)\n", "\n", "    chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])\n", "    bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name=\"bot_response\")\n", "    bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])\n", "\n", "    chatbot.like(print_like_dislike, None, None)\n", "\n", "demo.queue()\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
--- a/demo/chatbot_multimodal/run.py
+++ b/demo/chatbot_multimodal/run.py
@ -33,7 +33,9 @@ with gr.Blocks(fill_height=True) as demo:
        scale=1,
    )

-    chat_input = gr.MultimodalTextbox(interactive=True, placeholder="Enter message or upload file...", show_label=False)
+    chat_input = gr.MultimodalTextbox(interactive=True,
+                                      file_count="multiple",
+                                      placeholder="Enter message or upload file...", show_label=False)

    chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
    bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name="bot_response")
--- a/gradio/components/multimodal_textbox.py
+++ b/gradio/components/multimodal_textbox.py
@ -55,6 +55,7 @@ class MultimodalTextbox(FormComponent):
        value: dict[str, str | list] | Callable | None = None,
        *,
        file_types: list[str] | None = None,
+        file_count: Literal["single", "multiple", "directory"] = "single",
        lines: int = 1,
        max_lines: int = 20,
        placeholder: str | None = None,
@ -80,6 +81,7 @@ class MultimodalTextbox(FormComponent):
        """
        Parameters:
            value: Default value to show in MultimodalTextbox. A dictionary of the form {"text": "sample text", "files": [{path: "files/file.jpg", orig_name: "file.jpg", url: "http://image_url.jpg", size: 100}]}. If callable, the function will be called whenever the app loads to set the initial value of the component.
+            file_count: if single, allows user to upload one file. If "multiple", user uploads multiple files. If "directory", user uploads all files in selected directory. Return type will be list for each file in case of "multiple" or "directory".
            file_types: List of file extensions or types of files to be uploaded (e.g. ['image', '.json', '.mp4']). "file" allows any file to be uploaded, "image" allows only image files to be uploaded, "audio" allows only audio files to be uploaded, "video" allows only video files to be uploaded, "text" allows only text files to be uploaded.
            lines: minimum number of line rows to provide in textarea.
            max_lines: maximum number of line rows to provide in textarea.
@ -104,6 +106,7 @@ class MultimodalTextbox(FormComponent):
            submit_btn: If False, will not show a submit button. If a string, will use that string as the submit button text.
        """
        self.file_types = file_types
+        self.file_count = file_count
        if value is None:
            value = {"text": "", "files": []}
        if file_types is not None and not isinstance(file_types, list):
--- a/js/app/test/chatbot_multimodal.spec.ts
+++ b/js/app/test/chatbot_multimodal.spec.ts
@ -52,6 +52,23 @@ test("images uploaded by a user should be shown in the chat", async ({
 	expect(bot_message).toBeTruthy();
 });

+test("Users can upload multiple images and they will be shown as thumbnails", async ({
+	page
+}) => {
+	const fileChooserPromise = page.waitForEvent("filechooser");
+	await page.getByTestId("upload-button").click();
+	const fileChooser = await fileChooserPromise;
+	await fileChooser.setFiles([
+		"./test/files/cheetah1.jpg",
+		"./test/files/cheetah1.jpg"
+	]);
+	expect
+		.poll(async () => await page.locator("thumbnail-image").count(), {
+			timeout: 5000
+		})
+		.toEqual(2);
+});
+
 test("audio uploaded by a user should be shown in the chatbot", async ({
 	page
 }) => {
--- a/js/file/Index.svelte
+++ b/js/file/Index.svelte
@ -42,7 +42,7 @@
 		clear_status: LoadingStatus;
 		delete: FileData;
 	}>;
-	export let file_count: string;
+	export let file_count: "single" | "multiple" | "directory";
 	export let file_types: string[] = ["file"];

 	let old_value = value;
--- a/js/file/shared/FileUpload.svelte
+++ b/js/file/shared/FileUpload.svelte
@ -12,7 +12,7 @@

 	export let label: string;
 	export let show_label = true;
-	export let file_count = "single";
+	export let file_count: "single" | "multiple" | "directory" = "single";
 	export let file_types: string[] | null = null;
 	export let selectable = false;
 	export let root: string;
--- a/js/multimodaltextbox/Index.svelte
+++ b/js/multimodaltextbox/Index.svelte
@ -49,6 +49,7 @@
 	export let autoscroll = true;
 	export let interactive: boolean;
 	export let root: string;
+	export let file_count: "single" | "multiple" | "directory";
 </script>

 <Block
@ -86,6 +87,7 @@
 		{autofocus}
 		{container}
 		{autoscroll}
+		{file_count}
 		max_file_size={gradio.max_file_size}
 		on:change={() => gradio.dispatch("change", value)}
 		on:input={() => gradio.dispatch("input")}
--- a/js/multimodaltextbox/shared/MultimodalTextbox.svelte
+++ b/js/multimodaltextbox/shared/MultimodalTextbox.svelte
@ -37,6 +37,7 @@
 	export let max_file_size: number | null = null;
 	export let upload: Client["upload"];
 	export let stream_handler: Client["stream"];
+	export let file_count: "single" | "multiple" | "directory" = "multiple";

 	let upload_component: Upload;
 	let hidden_upload: HTMLInputElement;
@ -161,6 +162,7 @@
 			for (let file of detail) {
 				value.files.push(file);
 			}
+			value = value;
 		} else {
 			value.files.push(detail);
 			value = value;
@ -244,7 +246,7 @@
 		<Upload
 			bind:this={upload_component}
 			on:load={handle_upload}
-			filetype={accept_file_types}
+			{file_count}
 			{root}
 			{max_file_size}
 			bind:dragging
--- a/js/upload/src/Upload.svelte
+++ b/js/upload/src/Upload.svelte
@ -10,7 +10,7 @@
 	export let boundedheight = true;
 	export let center = true;
 	export let flex = true;
-	export let file_count = "single";
+	export let file_count: "single" | "multiple" | "directory" = "single";
 	export let disable_click = false;
 	export let root: string;
 	export let hidden = false;