Multimodal Textbox (Chat Input Component) (#7420)

* first pass

* multimodal textbox

* add changeset

* remove file

* more changes

* changes

* add changeset

* revert demo

* doc strings fix

* update demo

* file icons

* more updates

* format

* add story

* remove doc line

* type fixes

* chat interface

* new demo

* image upload fix

* ui changes

* addressing PR comments

* format

* type check

* more pr fixes

* format

* format

* test fixes

* test fixes

* Streaming fixes + other stuff

* optional keys to dict value

* final fixes

* notebook

* format

* Update guides/04_chatbots/01_creating-a-chatbot-fast.md

Co-authored-by: Abubakar Abid <abubakar@huggingface.co>

* Update guides/04_chatbots/01_creating-a-chatbot-fast.md

Co-authored-by: Abubakar Abid <abubakar@huggingface.co>

* Update guides/04_chatbots/01_creating-a-chatbot-fast.md

Co-authored-by: Abubakar Abid <abubakar@huggingface.co>

* merge

* backend fixes

* story fix

* ui test fix

* format

* story

* format

* demo fix

* streaming test fix

* stories fix

* stories fix

---------

Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com>
Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
This commit is contained in:
Dawood Khan 2024-03-19 16:16:05 -04:00 committed by GitHub
parent c9aba8d8a5
commit 15da39fca0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 1291 additions and 76 deletions

View File

@ -0,0 +1,8 @@
---
"@gradio/app": minor
"@gradio/multimodaltextbox": minor
"@gradio/upload": minor
"gradio": minor
---
feat: Multimodal Textbox (Chat Input Component)

View File

Before

Width:  |  Height:  |  Size: 5.2 KiB

After

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

View File

@ -1 +1 @@
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/avatar.png"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import os\n", "import time\n", "\n", "# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.\n", "\n", "\n", "def print_like_dislike(x: gr.LikeData):\n", " print(x.index, x.value, x.liked)\n", "\n", "\n", "def add_text(history, text):\n", " history = history + [(text, None)]\n", " return history, gr.Textbox(value=\"\", interactive=False)\n", "\n", "\n", "def add_file(history, file):\n", " history = history + [((file.name,), None)]\n", " return history\n", "\n", "\n", "def bot(history):\n", " response = \"**That's cool!**\"\n", " history[-1][1] = \"\"\n", " for character in response:\n", " history[-1][1] += character\n", " time.sleep(0.05)\n", " yield history\n", "\n", "\n", "with gr.Blocks() as demo:\n", " chatbot = gr.Chatbot(\n", " [],\n", " elem_id=\"chatbot\",\n", " bubble_full_width=False,\n", " avatar_images=(None, (os.path.join(os.path.abspath(''), \"avatar.png\"))),\n", " )\n", "\n", " with gr.Row():\n", " txt = gr.Textbox(\n", " scale=4,\n", " show_label=False,\n", " placeholder=\"Enter text and press enter, or upload an image\",\n", " container=False,\n", " )\n", " btn = gr.UploadButton(\"\ud83d\udcc1\", file_types=[\"image\", \"video\", \"audio\"])\n", "\n", " txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(\n", " bot, chatbot, chatbot, api_name=\"bot_response\"\n", " )\n", " txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)\n", " file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False).then(\n", " bot, chatbot, chatbot\n", " )\n", "\n", " chatbot.like(print_like_dislike, None, None)\n", "\n", "\n", "demo.queue()\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('files')\n", "!wget -q -O files/avatar.png https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/files/avatar.png\n", "!wget -q -O files/lion.jpg https://github.com/gradio-app/gradio/raw/main/demo/chatbot_multimodal/files/lion.jpg"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import os\n", "import time\n", "\n", "# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.\n", "\n", "\n", "def print_like_dislike(x: gr.LikeData):\n", " print(x.index, x.value, x.liked)\n", "\n", "def add_message(history, message):\n", " for x in message[\"files\"]:\n", " history.append(((x[\"path\"],), None)) \n", " if message[\"text\"] is not None:\n", " history.append((message[\"text\"], None))\n", " return history, gr.MultimodalTextbox(value=None, interactive=False, file_types=[\"image\"])\n", "\n", "def bot(history):\n", " response = \"**That's cool!**\"\n", " history[-1][1] = \"\"\n", " for character in response:\n", " history[-1][1] += character\n", " time.sleep(0.05)\n", " yield history\n", "\n", "\n", "with gr.Blocks() as demo:\n", " chatbot = gr.Chatbot(\n", " [],\n", " elem_id=\"chatbot\",\n", " bubble_full_width=False,\n", " avatar_images=(None, (os.path.join(os.path.abspath(''), \"files/avatar.png\"))),\n", " )\n", "\n", " chat_input = gr.MultimodalTextbox(interactive=True, file_types=[\"image\"], placeholder=\"Enter message or upload file...\", show_label=False)\n", " chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input], queue=False).then(\n", " bot, chatbot, chatbot, api_name=\"bot_response\"\n", " )\n", " chat_msg.then(lambda: gr.Textbox(interactive=True), None, [chat_input], queue=False)\n", " chatbot.like(print_like_dislike, None, None)\n", "\n", "demo.queue()\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}

View File

@ -8,16 +8,12 @@ import time
def print_like_dislike(x: gr.LikeData):
print(x.index, x.value, x.liked)
def add_text(history, text):
history = history + [(text, None)]
return history, gr.Textbox(value="", interactive=False)
def add_file(history, file):
history = history + [((file.name,), None)]
return history
def add_message(history, message):
for x in message["files"]:
history.append(((x["path"],), None))
if message["text"] is not None:
history.append((message["text"], None))
return history, gr.MultimodalTextbox(value=None, interactive=False, file_types=["image"])
def bot(history):
response = "**That's cool!**"
@ -33,29 +29,16 @@ with gr.Blocks() as demo:
[],
elem_id="chatbot",
bubble_full_width=False,
avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))),
avatar_images=(None, (os.path.join(os.path.dirname(__file__), "files/avatar.png"))),
)
with gr.Row():
txt = gr.Textbox(
scale=4,
show_label=False,
placeholder="Enter text and press enter, or upload an image",
container=False,
)
btn = gr.UploadButton("📁", file_types=["image", "video", "audio"])
txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input], queue=False).then(
bot, chatbot, chatbot, api_name="bot_response"
)
txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False).then(
bot, chatbot, chatbot
)
chat_msg.then(lambda: gr.Textbox(interactive=True), None, [chat_input], queue=False)
chatbot.like(print_like_dislike, None, None)
demo.queue()
if __name__ == "__main__":
demo.launch()

View File

@ -0,0 +1 @@
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatinterface_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "def echo(message, history):\n", " return message[\"text\"]\n", "\n", "demo = gr.ChatInterface(fn=echo, examples=[{\"text\": \"hello\"}, {\"text\": \"hola\"}, {\"text\": \"merhaba\"}], title=\"Echo Bot\", multimodal=True)\n", "demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}

View File

@ -0,0 +1,7 @@
import gradio as gr
def echo(message, history):
return message["text"]
demo = gr.ChatInterface(fn=echo, examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}], title="Echo Bot", multimodal=True)
demo.launch()

View File

@ -0,0 +1 @@
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: multimodaltextbox_component"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "with gr.Blocks() as demo:\n", " gr.MultimodalTextbox(interactive=True)\n", "\n", "demo.launch()"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}

View File

@ -0,0 +1,6 @@
import gradio as gr
with gr.Blocks() as demo:
gr.MultimodalTextbox(interactive=True)
demo.launch()

View File

@ -44,6 +44,7 @@ from gradio.components import (
LogoutButton,
Markdown,
Model3D,
MultimodalTextbox,
Number,
ParamViewer,
Plot,

View File

@ -18,6 +18,7 @@ from gradio.components import (
Chatbot,
Component,
Markdown,
MultimodalTextbox,
State,
Textbox,
get_component_instance,
@ -47,7 +48,7 @@ class ChatInterface(Blocks):
demo = gr.ChatInterface(fn=echo, examples=["hello", "hola", "merhaba"], title="Echo Bot")
demo.launch()
Demos: chatinterface_random_response, chatinterface_streaming_echo
Demos: chatinterface_multimodal, chatinterface_random_response, chatinterface_streaming_echo
Guides: creating-a-chatbot-fast, sharing-your-app
"""
@ -55,12 +56,13 @@ class ChatInterface(Blocks):
self,
fn: Callable,
*,
multimodal: bool = False,
chatbot: Chatbot | None = None,
textbox: Textbox | None = None,
textbox: Textbox | MultimodalTextbox | None = None,
additional_inputs: str | Component | list[str | Component] | None = None,
additional_inputs_accordion_name: str | None = None,
additional_inputs_accordion: str | Accordion | None = None,
examples: list[str] | None = None,
examples: list[str] | list[dict[str, str | list]] | None = None,
cache_examples: bool | None = None,
title: str | None = None,
description: str | None = None,
@ -82,8 +84,9 @@ class ChatInterface(Blocks):
"""
Parameters:
fn: The function to wrap the chat interface around. Should accept two parameters: a string input message and list of two-element lists of the form [[user_message, bot_message], ...] representing the chat history, and return a string response. See the Chatbot documentation for more information on the chat history format.
multimodal: If True, the chat interface will use a gr.MultimodalTextbox component for the input, which allows for the uploading of multimedia files. If False, the chat interface will use a gr.Textbox component for the input.
chatbot: An instance of the gr.Chatbot component to use for the chat interface, if you would like to customize the chatbot properties. If not provided, a default gr.Chatbot component will be created.
textbox: An instance of the gr.Textbox component to use for the chat interface, if you would like to customize the textbox properties. If not provided, a default gr.Textbox component will be created.
textbox: An instance of the gr.Textbox or gr.MultimodalTextbox component to use for the chat interface, if you would like to customize the textbox properties. If not provided, a default gr.Textbox or gr.MultimodalTextbox component will be created.
additional_inputs: An instance or list of instances of gradio components (or their string shortcuts) to use as additional inputs to the chatbot. If components are not already rendered in a surrounding Blocks, then the components will be displayed under the chatbot, in an accordion.
additional_inputs_accordion_name: Deprecated. Will be removed in a future version of Gradio. Use the `additional_inputs_accordion` parameter instead.
additional_inputs_accordion: If a string is provided, this is the label of the `gr.Accordion` to use to contain additional inputs. A `gr.Accordion` object can be provided as well to configure other properties of the container holding the additional inputs. Defaults to a `gr.Accordion(label="Additional Inputs", open=False)`. This parameter is only used if `additional_inputs` is provided.
@ -117,6 +120,7 @@ class ChatInterface(Blocks):
fill_height=fill_height,
delete_cache=delete_cache,
)
self.multimodal = multimodal
self.concurrency_limit = concurrency_limit
self.fn = fn
self.is_async = inspect.iscoroutinefunction(
@ -202,11 +206,22 @@ class ChatInterface(Blocks):
textbox.container = False
textbox.show_label = False
textbox_ = textbox.render()
if not isinstance(textbox_, Textbox):
if not isinstance(textbox_, Textbox) or not isinstance(
textbox_, MultimodalTextbox
):
raise TypeError(
f"Expected a gr.Textbox, but got {type(textbox_)}"
f"Expected a gr.Textbox or gr.MultimodalTextbox component, but got {type(textbox_)}"
)
self.textbox = textbox_
elif self.multimodal:
submit_btn = None
self.textbox = MultimodalTextbox(
show_label=False,
label="Message",
placeholder="Type a message...",
scale=7,
autofocus=autofocus,
)
else:
self.textbox = Textbox(
container=False,
@ -216,7 +231,7 @@ class ChatInterface(Blocks):
scale=7,
autofocus=autofocus,
)
if submit_btn is not None:
if submit_btn is not None and not multimodal:
if isinstance(submit_btn, Button):
submit_btn.render()
elif isinstance(submit_btn, str):
@ -331,7 +346,7 @@ class ChatInterface(Blocks):
retry_event = (
self.retry_btn.click(
self._delete_prev_fn,
[self.chatbot_state],
[self.saved_input, self.chatbot_state],
[self.chatbot, self.saved_input, self.chatbot_state],
show_api=False,
queue=False,
@ -358,7 +373,7 @@ class ChatInterface(Blocks):
if self.undo_btn:
self.undo_btn.click(
self._delete_prev_fn,
[self.chatbot_state],
[self.saved_input, self.chatbot_state],
[self.chatbot, self.saved_input, self.chatbot_state],
show_api=False,
queue=False,
@ -439,23 +454,48 @@ class ChatInterface(Blocks):
),
)
def _clear_and_save_textbox(self, message: str) -> tuple[str, str]:
return "", message
def _clear_and_save_textbox(self, message: str) -> tuple[str | dict, str]:
if self.multimodal:
return {"text": "", "files": []}, message
else:
return "", message
def _append_multimodal_history(
self,
message: dict[str, list],
response: str | None,
history: list[list[str | tuple | None]],
):
for x in message["files"]:
history.append([(x["path"],), None])
if message["text"] is not None and isinstance(message["text"], str):
history.append([message["text"], response])
def _display_input(
self, message: str, history: list[list[str | None]]
) -> tuple[list[list[str | None]], list[list[str | None]]]:
history.append([message, None])
self, message: str | dict[str, list], history: list[list[str | tuple | None]]
) -> tuple[list[list[str | tuple | None]], list[list[str | tuple | None]]]:
if self.multimodal and isinstance(message, dict):
self._append_multimodal_history(message, None, history)
elif isinstance(message, str):
history.append([message, None])
return history, history
async def _submit_fn(
self,
message: str,
history_with_input: list[list[str | None]],
message: str | dict[str, list],
history_with_input: list[list[str | tuple | None]],
request: Request,
*args,
) -> tuple[list[list[str | None]], list[list[str | None]]]:
history = history_with_input[:-1]
) -> tuple[list[list[str | tuple | None]], list[list[str | tuple | None]]]:
if self.multimodal and isinstance(message, dict):
remove_input = (
len(message["files"]) + 1
if message["text"] is not None
else len(message["files"])
)
history = history_with_input[:-remove_input]
else:
history = history_with_input[:-1]
inputs, _, _ = special_args(
self.fn, inputs=[message, history, *args], request=request
)
@ -467,17 +507,28 @@ class ChatInterface(Blocks):
self.fn, *inputs, limiter=self.limiter
)
history.append([message, response])
if self.multimodal and isinstance(message, dict):
self._append_multimodal_history(message, response, history)
elif isinstance(message, str):
history.append([message, response])
return history, history
async def _stream_fn(
self,
message: str,
history_with_input: list[list[str | None]],
message: str | dict[str, list],
history_with_input: list[list[str | tuple | None]],
request: Request,
*args,
) -> AsyncGenerator:
history = history_with_input[:-1]
if self.multimodal and isinstance(message, dict):
remove_input = (
len(message["files"]) + 1
if message["text"] is not None
else len(message["files"])
)
history = history_with_input[:-remove_input]
else:
history = history_with_input[:-1]
inputs, _, _ = special_args(
self.fn, inputs=[message, history, *args], request=request
)
@ -491,14 +542,28 @@ class ChatInterface(Blocks):
generator = SyncToAsyncIterator(generator, self.limiter)
try:
first_response = await async_iteration(generator)
update = history + [[message, first_response]]
yield update, update
if self.multimodal and isinstance(message, dict):
for x in message["files"]:
history.append([(x["path"],), None])
update = history + [[message["text"], first_response]]
yield update, update
else:
update = history + [[message, first_response]]
yield update, update
except StopIteration:
update = history + [[message, None]]
yield update, update
if self.multimodal and isinstance(message, dict):
self._append_multimodal_history(message, None, history)
yield history, history
else:
update = history + [[message, None]]
yield update, update
async for response in generator:
update = history + [[message, response]]
yield update, update
if self.multimodal and isinstance(message, dict):
update = history + [[message["text"], response]]
yield update, update
else:
update = history + [[message, response]]
yield update, update
async def _api_submit_fn(
self, message: str, history: list[list[str | None]], request: Request, *args
@ -567,10 +632,21 @@ class ChatInterface(Blocks):
yield [[message, response]]
def _delete_prev_fn(
self, history: list[list[str | None]]
) -> tuple[list[list[str | None]], str, list[list[str | None]]]:
try:
message, _ = history.pop()
except IndexError:
message = ""
self,
message: str | dict[str, list],
history: list[list[str | tuple | None]],
) -> tuple[
list[list[str | tuple | None]],
str | dict[str, list],
list[list[str | tuple | None]],
]:
if self.multimodal and isinstance(message, dict):
remove_input = (
len(message["files"]) + 1
if message["text"] is not None
else len(message["files"])
)
history = history[:-remove_input]
else:
history = history[:-1]
return history, message or "", history

View File

@ -37,6 +37,7 @@ from gradio.components.login_button import LoginButton
from gradio.components.logout_button import LogoutButton
from gradio.components.markdown import Markdown
from gradio.components.model3d import Model3D
from gradio.components.multimodal_textbox import MultimodalTextbox
from gradio.components.number import Number
from gradio.components.paramviewer import ParamViewer
from gradio.components.plot import Plot
@ -114,4 +115,5 @@ __all__ = [
"StreamingOutput",
"ImageEditor",
"ParamViewer",
"MultimodalTextbox",
]

View File

@ -0,0 +1,183 @@
"""gr.MultimodalTextbox() component."""
from __future__ import annotations
from pathlib import Path
from typing import Any, Callable, List, Literal, Optional, TypedDict
import gradio_client.utils as client_utils
from gradio_client.documentation import document
from pydantic import Field
from gradio.components.base import FormComponent
from gradio.data_classes import FileData, GradioModel
from gradio.events import Events
class MultimodalData(GradioModel):
text: Optional[str] = None
files: Optional[List[FileData]] = Field(default_factory=list)
class MultimodalPostprocess(TypedDict):
text: str
files: List[FileData]
@document()
class MultimodalTextbox(FormComponent):
"""
Creates a textarea for users to enter string input or display string output and also allows for the uploading of multimedia files.
Demos: chatbot_multimodal
Guides: creating-a-chatbot
"""
data_model = MultimodalData
EVENTS = [
Events.change,
Events.input,
Events.select,
Events.submit,
Events.focus,
Events.blur,
]
def __init__(
self,
value: dict[str, str | list] | Callable | None = None,
*,
file_types: list[str] | None = None,
lines: int = 1,
max_lines: int = 20,
placeholder: str | None = None,
label: str | None = None,
info: str | None = None,
every: float | None = None,
show_label: bool | None = None,
container: bool = True,
scale: int | None = None,
min_width: int = 160,
interactive: bool | None = None,
visible: bool = True,
elem_id: str | None = None,
autofocus: bool = False,
autoscroll: bool = True,
elem_classes: list[str] | str | None = None,
render: bool = True,
text_align: Literal["left", "right"] | None = None,
rtl: bool = False,
submit_btn: str | Literal[False] = "",
):
"""
Parameters:
value: Default value to show in MultimodalTextbox. A dictionary of the form {"text": "sample text", "files": [{path: "files/file.jpg", orig_name: "file.jpg", url: "http://image_url.jpg", size: 100}]}. If callable, the function will be called whenever the app loads to set the initial value of the component.
file_types: List of file extensions or types of files to be uploaded (e.g. ['image', '.json', '.mp4']). "file" allows any file to be uploaded, "image" allows only image files to be uploaded, "audio" allows only audio files to be uploaded, "video" allows only video files to be uploaded, "text" allows only text files to be uploaded.
lines: minimum number of line rows to provide in textarea.
max_lines: maximum number of line rows to provide in textarea.
placeholder: placeholder hint to provide behind textarea.
label: The label for this component. Appears above the component and is also used as the header if there is a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component is assigned to.
info: additional component description.
every: If `value` is a callable, run the function 'every' number of seconds while the client connection is open. Has no effect otherwise. The event can be accessed (e.g. to cancel it) via this component's .load_event attribute.
show_label: if True, will display label.
container: If True, will place the component in a container - providing some extra padding around the border.
scale: relative size compared to adjacent Components. For example if Components A and B are in a Row, and A has scale=2, and B has scale=1, A will be twice as wide as B. Should be an integer. scale applies in Rows, and to top-level Components in Blocks where fill_height=True.
min_width: minimum pixel width, will wrap if not sufficient screen space to satisfy this value. If a certain scale value results in this Component being narrower than min_width, the min_width parameter will be respected first.
interactive: if True, will be rendered as an editable textbox; if False, editing will be disabled. If not provided, this is inferred based on whether the component is used as an input or output.
visible: If False, component will be hidden.
autofocus: If True, will focus on the textbox when the page loads. Use this carefully, as it can cause usability issues for sighted and non-sighted users.
elem_id: An optional string that is assigned as the id of this component in the HTML DOM. Can be used for targeting CSS styles.
elem_classes: An optional list of strings that are assigned as the classes of this component in the HTML DOM. Can be used for targeting CSS styles.
render: If False, component will not render be rendered in the Blocks context. Should be used if the intention is to assign event listeners now but render the component later.
text_align: How to align the text in the textbox, can be: "left", "right", or None (default). If None, the alignment is left if `rtl` is False, or right if `rtl` is True. Can only be changed if `type` is "text".
rtl: If True and `type` is "text", sets the direction of the text to right-to-left (cursor appears on the left of the text). Default is False, which renders cursor on the right.
autoscroll: If True, will automatically scroll to the bottom of the textbox when the value changes, unless the user scrolls up. If False, will not scroll to the bottom of the textbox when the value changes.
submit_btn: If False, will not show a submit button. If a string, will use that string as the submit button text. Only applies if `interactive` is True.
"""
self.file_types = file_types
if value is None:
value = {"text": "", "files": []}
if file_types is not None and not isinstance(file_types, list):
raise ValueError(
f"Parameter file_types must be a list. Received {file_types.__class__.__name__}"
)
self.lines = lines
self.max_lines = max(lines, max_lines)
self.placeholder = placeholder
self.submit_btn = submit_btn
self.autofocus = autofocus
self.autoscroll = autoscroll
super().__init__(
label=label,
info=info,
every=every,
show_label=show_label,
container=container,
scale=scale,
min_width=min_width,
interactive=interactive,
visible=visible,
elem_id=elem_id,
elem_classes=elem_classes,
render=render,
value=value,
)
self.rtl = rtl
self.text_align = text_align
def preprocess(
self, payload: MultimodalData | None
) -> dict[str, str | list] | None:
"""
Parameters:
payload: the text and list of file(s) entered in the multimodal textbox.
Returns:
Passes text value and list of file(s) as a {dict} into the function.
"""
return None if payload is None else payload.model_dump()
def postprocess(self, value: dict[str, str | list] | None) -> MultimodalData:
"""
Parameters:
value: Expects a {dict} with "text" and "files", both optional. The files array is a list of file paths or URLs.
Returns:
The value to display in the multimodal textbox. Files information as a list of FileData objects.
"""
if value is None:
return MultimodalData(text="", files=[])
if not isinstance(value, dict):
raise ValueError(
f"MultimodalTextbox expects a dictionary with optional keys 'text' and 'files'. Received {value.__class__.__name__}"
)
if "files" in value and isinstance(value["files"], list):
value["files"] = [
file
if isinstance(file, FileData)
else FileData(
path=file["path"] if "path" in file else file,
mime_type=file["mime_type"]
if "mime_type" in file
else client_utils.get_mimetype(file),
orig_name=file["orig_name"]
if "orig_name" in file
else Path(file).name,
size=file["size"] if "size" in file else Path(file).stat().st_size,
)
for file in value["files"]
]
text = value.get("text", "")
files = value.get("files", [])
if not isinstance(text, str):
raise TypeError(
f"Expected 'text' to be a string, but got {type(text).__name__}"
)
if not isinstance(files, list):
raise TypeError(
f"Expected 'files' to be a list, but got {type(files).__name__}"
)
return MultimodalData(text=text, files=files)
def example_inputs(self) -> Any:
return {"text": "sample text", "files": []}

View File

@ -124,6 +124,35 @@ gr.ChatInterface(
).launch()
```
## Add Multimodal Capability to your chatbot
You may want to add multimodal capability to your chatbot. For example, you may want users to be able to easily upload images or files to your chatbot and ask questions about it. You can make your chatbot "multimodal" by passing in a single parameter (`multimodal=True`) to the `gr.ChatInterface` class.
```python
import gradio as gr
chat_input = gr.MultimodalTextbox(file_types=["image"], placeholder="Enter message or upload file...")
```
`gr.ChatInterface` also supports multimodality, simply pass in the `multimodal` parameter as `True`:
```python
import gradio as gr
import time
def echo(message, history):
t = x['text']
for i in range(len(t)):
time.sleep(0.5)
yield t[:i+1]
demo = gr.ChatInterface(fn=echo, examples=["hello", "hola", "merhaba"], title="Echo Bot", multimodal=True)
demo.launch()
```
When `multimodal=True`, the first parameter of your function should receives a dictionary consisting of the submitted text and uploaded files that looks like this: `{"text": "user input", "file": ["file_path1", "file_path2", ...]}`.
## Additional Inputs
You may want to add additional parameters to your chatbot and expose them to your users through the Chatbot UI. For example, suppose you want to add a textbox for a system prompt, or a slider that sets the number of tokens in the chatbot's response. The `ChatInterface` class supports an `additional_inputs` parameter which can be used to add additional input components.

View File

@ -91,15 +91,18 @@ def bot(history):
return history
```
In addition, it can handle media files, such as images, audio, and video. To pass in a media file, we must pass in the file as a tuple of two strings, like this: `(filepath, alt_text)`. The `alt_text` is optional, so you can also just pass in a tuple with a single element `(filepath,)`, like this:
In addition, it can handle media files, such as images, audio, and video. You can use the `MultimodalTextbox` component to easily upload all types of media files to your chatbot. To pass in a media file, we must pass in the file as a tuple of two strings, like this: `(filepath, alt_text)`. The `alt_text` is optional, so you can also just pass in a tuple with a single element `(filepath,)`, like this:
```python
def add_file(history, file):
history = history + [((file.name,), None)]
return history
def add_message(history, message):
for x in message["files"]:
history.append(((x["path"],), None))
if message["text"] is not None:
history.append((message["text"], None))
return history, gr.MultimodalTextbox(value=None, interactive=False, file_types=["image"])
```
Putting this together, we can create a _multimodal_ chatbot with a textbox for a user to submit text and an file upload button to submit images / audio / video files. The rest of the code looks pretty much the same as before:
Putting this together, we can create a _multimodal_ chatbot with a multimodal textbox for a user to submit text and media files. The rest of the code looks pretty much the same as before:
$code_chatbot_multimodal
$demo_chatbot_multimodal

View File

@ -55,6 +55,7 @@
"@gradio/label": "workspace:^",
"@gradio/markdown": "workspace:^",
"@gradio/model3d": "workspace:^",
"@gradio/multimodaltextbox": "workspace:^",
"@gradio/number": "workspace:^",
"@gradio/paramviewer": "workspace:^",
"@gradio/plot": "workspace:^",

View File

@ -24,9 +24,10 @@ test("images uploaded by a user should be shown in the chat", async ({
page
}) => {
const fileChooserPromise = page.waitForEvent("filechooser");
await page.getByRole("button", { name: "📁" }).click();
await page.getByRole("button", { name: "+", exact: true }).click();
const fileChooser = await fileChooserPromise;
await fileChooser.setFiles("./test/files/cheetah1.jpg");
await page.getByTestId("textbox").click();
await page.keyboard.press("Enter");
const user_message = await page.getByTestId("user").first().getByRole("img");
@ -45,9 +46,10 @@ test("audio uploaded by a user should be shown in the chatbot", async ({
page
}) => {
const fileChooserPromise = page.waitForEvent("filechooser");
await page.getByRole("button", { name: "📁" }).click();
await page.getByRole("button", { name: "+" }).click();
const fileChooser = await fileChooserPromise;
await fileChooser.setFiles("../../test/test_files/audio_sample.wav");
await page.getByTestId("textbox").click();
await page.keyboard.press("Enter");
const user_message = await page.getByTestId("user").first().locator("audio");
@ -65,9 +67,10 @@ test("videos uploaded by a user should be shown in the chatbot", async ({
page
}) => {
const fileChooserPromise = page.waitForEvent("filechooser");
await page.getByRole("button", { name: "📁" }).click();
await page.getByRole("button", { name: "+" }).click();
const fileChooser = await fileChooserPromise;
await fileChooser.setFiles("../../test/test_files/video_sample.mp4");
await page.getByTestId("textbox").click();
await page.keyboard.press("Enter");
const user_message = await page.getByTestId("user").first().locator("video");

View File

@ -40,7 +40,7 @@ test("chatinterface works with streaming functions and all buttons behave as exp
await retry_button.click();
const expected_text_el_2 = page.locator(".bot p", {
hasText: "Run 3 - You typed: hello"
hasText: ""
});
await expect(expected_text_el_2).toBeVisible();

View File

@ -0,0 +1,72 @@
<script lang="ts">
import { onMount } from "svelte";
import { Image } from "@gradio/image/shared";
import type { FileData } from "@gradio/client";
export let value: { text: string; files: FileData[] } = {
text: "",
files: []
};
export let type: "gallery" | "table";
export let selected = false;
let size: number;
let el: HTMLDivElement;
function set_styles(element: HTMLElement, el_width: number): void {
if (!element || !el_width) return;
el.style.setProperty(
"--local-text-width",
`${el_width < 150 ? el_width : 200}px`
);
el.style.whiteSpace = "unset";
}
onMount(() => {
set_styles(el, size);
});
</script>
<div
bind:clientWidth={size}
bind:this={el}
class:table={type === "table"}
class:gallery={type === "gallery"}
class:selected
>
<p>{value.text ? value.text : ""}</p>
{#each value.files as file}
{#if file.mime_type && file.mime_type.includes("image")}
<Image src={file.url} alt="" />
{:else}
{file.path}
{/if}
{/each}
</div>
<style>
.gallery {
padding: var(--size-1) var(--size-2);
display: flex;
align-items: center;
gap: 20px;
overflow-x: auto;
}
div {
overflow: hidden;
min-width: var(--local-text-width);
white-space: nowrap;
}
:global(img) {
width: 100px;
height: 100px;
}
div > :global(p) {
font-size: var(--text-lg);
white-space: normal;
}
</style>

View File

@ -0,0 +1,94 @@
<svelte:options accessors={true} />
<script context="module" lang="ts">
export { default as BaseMultimodalTextbox } from "./shared/MultimodalTextbox.svelte";
export { default as BaseExample } from "./Example.svelte";
</script>
<script lang="ts">
import type { Gradio, SelectData } from "@gradio/utils";
import MultimodalTextbox from "./shared/MultimodalTextbox.svelte";
import { Block } from "@gradio/atoms";
import { StatusTracker } from "@gradio/statustracker";
import type { LoadingStatus } from "@gradio/statustracker";
import type { FileData } from "@gradio/client";
export let gradio: Gradio<{
change: typeof value;
submit: never;
blur: never;
select: SelectData;
input: never;
focus: never;
}>;
export let elem_id = "";
export let elem_classes: string[] = [];
export let visible = true;
export let value: { text: string; files: FileData[] } = {
text: "",
files: []
};
export let file_types: string[] | null = null;
export let lines: number;
export let placeholder = "";
export let label = "MultimodalTextbox";
export let info: string | undefined = undefined;
export let show_label: boolean;
export let max_lines: number;
export let container = true;
export let scale: number | null = null;
export let min_width: number | undefined = undefined;
export let submit_btn = "⌲";
export let loading_status: LoadingStatus | undefined = undefined;
export let value_is_output = false;
export let rtl = false;
export let text_align: "left" | "right" | undefined = undefined;
export let autofocus = false;
export let autoscroll = true;
export let interactive: boolean;
export let root: string;
</script>
<Block
{visible}
{elem_id}
{elem_classes}
{scale}
{min_width}
allow_overflow={false}
padding={container}
>
{#if loading_status}
<StatusTracker
autoscroll={gradio.autoscroll}
i18n={gradio.i18n}
{...loading_status}
/>
{/if}
<MultimodalTextbox
bind:value
bind:value_is_output
{file_types}
{root}
{label}
{info}
{show_label}
{lines}
{rtl}
{text_align}
max_lines={!max_lines ? lines + 1 : max_lines}
{placeholder}
{submit_btn}
{autofocus}
{container}
{autoscroll}
on:change={() => gradio.dispatch("change", value)}
on:input={() => gradio.dispatch("input")}
on:submit={() => gradio.dispatch("submit")}
on:blur={() => gradio.dispatch("blur")}
on:select={(e) => gradio.dispatch("select", e.detail)}
on:focus={() => gradio.dispatch("focus")}
disabled={!interactive}
/>
</Block>

View File

@ -0,0 +1,75 @@
<script>
import { Meta, Template, Story } from "@storybook/addon-svelte-csf";
import MultimodalTextbox from "./Index.svelte";
</script>
<Meta
title="Components/MultimodalTextbox"
component={MultimodalTextbox}
argTypes={{
label: {
control: "text",
description: "The textbox label",
name: "label"
},
show_label: {
options: [true, false],
description: "Whether to show the label",
control: { type: "boolean" },
defaultValue: true
},
text_align: {
options: ["left", "right"],
description: "Whether to align the text left or right",
control: { type: "select" },
defaultValue: "left"
},
lines: {
options: [1, 5, 10, 20],
description: "The number of lines to display in the textbox",
control: { type: "select" },
defaultValue: 1
},
max_lines: {
options: [1, 5, 10, 20],
description:
"The maximum number of lines to allow users to type in the textbox",
control: { type: "select" },
defaultValue: 1
},
rtl: {
options: [true, false],
description: "Whether to render right-to-left",
control: { type: "boolean" },
defaultValue: false
}
}}
/>
<Template let:args>
<MultimodalTextbox {...args} />
</Template>
<Story
name="MultimodalTextbox with file and label"
args={{
value: {
text: "sample text",
files: [
{
path: "https://gradio-builds.s3.amazonaws.com/demo-files/ghepardo-primo-piano.jpg",
url: "https://gradio-builds.s3.amazonaws.com/demo-files/ghepardo-primo-piano.jpg",
orig_name: "cheetah.jpg"
}
]
},
label: "My simple label",
show_label: true
}}
/>
<Story
name="MultimodalTextbox with 5 lines and max 5 lines"
args={{ lines: 5, max_lines: 5 }}
/>
<Story name="Right aligned textbox" args={{ text_align: "right" }} />
<Story name="RTL textbox" args={{ rtl: true }} />

View File

@ -0,0 +1,69 @@
import { test, describe, assert, afterEach } from "vitest";
import { spy } from "tinyspy";
import { cleanup, fireEvent, render, get_text, wait } from "@gradio/tootils";
import event from "@testing-library/user-event";
import MultimodalTextbox from "./Index.svelte";
import type { LoadingStatus } from "@gradio/statustracker";
const loading_status: LoadingStatus = {
eta: 0,
queue_position: 1,
queue_size: 1,
status: "complete" as LoadingStatus["status"],
scroll_to_output: false,
visible: true,
fn_index: 0,
show_progress: "full"
};
describe("MultimodalTextbox", () => {
afterEach(() => cleanup());
test("renders provided value", async () => {
const { getByDisplayValue } = await render(MultimodalTextbox, {
show_label: true,
max_lines: 1,
loading_status,
lines: 1,
value: { text: "hello world", files: [] },
label: "Textbox",
interactive: false,
root: ""
});
const item: HTMLInputElement = getByDisplayValue(
"hello world"
) as HTMLInputElement;
assert.equal(item.value, "hello world");
});
test("changing the text should update the value", async () => {
const { component, getByDisplayValue, listen } = await render(
MultimodalTextbox,
{
show_label: true,
max_lines: 10,
loading_status,
lines: 1,
value: { text: "hi ", files: [] },
label: "MultimodalTextbox",
interactive: true,
root: ""
}
);
const item: HTMLInputElement = getByDisplayValue("hi") as HTMLInputElement;
const mock = listen("change");
item.focus();
await event.keyboard("some text");
assert.equal(item.value, "hi some text");
assert.equal(component.value.text, "hi some text");
assert.equal(mock.callCount, 9);
assert.equal(mock.calls[8][0].detail.data.text, "hi some text");
assert.equal(mock.calls[8][0].detail.data.files.length, 0);
});
});

View File

@ -0,0 +1,27 @@
<script>
import { Meta, Template, Story } from "@storybook/addon-svelte-csf";
import MultimodalTextbox from "./Example.svelte";
</script>
<Meta
title="Components/MultimodalTextbox/Example"
component={MultimodalTextbox}
/>
<Template let:args>
<MultimodalTextbox {...args} />
</Template>
<Story
name="Text value"
args={{
value: { text: "the quick brown fox", files: [] }
}}
/>
<Story
name="Empty Value"
args={{
value: { text: "", files: [] }
}}
/>

View File

@ -0,0 +1,34 @@
# `@gradio/multimodaltextbox`
```html
<script>
import { BaseMultimodalTextbox, BaseExample } from "@gradio/multimodaltextbox";
</script>
```
BaseMultimodalTextbox
```javascript
export let value = "";
export let value_is_output = false;
export let lines = 1;
export let placeholder = "Type here...";
export let label: string;
export let info: string | undefined = undefined;
export let disabled = false;
export let show_label = true;
export let container = true;
export let max_lines: number;
export let type: "text" | "password" | "email" = "text";
export let show_copy_button = false;
export let rtl = false;
export let autofocus = false;
export let text_align: "left" | "right" | undefined = undefined;
export let autoscroll = true;
```
BaseExample
```javascript
export let value: string;
export let type: "gallery" | "table";
export let selected = false;
```

View File

@ -0,0 +1,25 @@
{
"name": "@gradio/multimodaltextbox",
"version": "0.1.0",
"description": "Gradio UI packages",
"type": "module",
"author": "",
"license": "ISC",
"private": false,
"main_changeset": true,
"main": "Index.svelte",
"exports": {
".": "./Index.svelte",
"./example": "./Example.svelte",
"./package.json": "./package.json"
},
"dependencies": {
"@gradio/atoms": "workspace:^",
"@gradio/icons": "workspace:^",
"@gradio/statustracker": "workspace:^",
"@gradio/utils": "workspace:^",
"@gradio/upload": "workspace:^",
"@gradio/image": "workspace:^",
"@gradio/client": "workspace:^"
}
}

View File

@ -0,0 +1,421 @@
<script lang="ts">
import {
beforeUpdate,
afterUpdate,
createEventDispatcher,
tick
} from "svelte";
import { text_area_resize, resize } from "../shared/utils";
import { BlockTitle } from "@gradio/atoms";
import { Upload } from "@gradio/upload";
import { Image } from "@gradio/image/shared";
import type { FileData } from "@gradio/client";
import { Clear, File, Music, Video } from "@gradio/icons";
import type { SelectData } from "@gradio/utils";
export let value: { text: string; files: FileData[] } = {
text: "",
files: []
};
export let value_is_output = false;
export let lines = 1;
export let placeholder = "Type here...";
export let disabled = false;
export let label: string;
export let info: string | undefined = undefined;
export let show_label = true;
export let container = true;
export let max_lines: number;
export let submit_btn = "⌲";
export let rtl = false;
export let autofocus = false;
export let text_align: "left" | "right" | undefined = undefined;
export let autoscroll = true;
export let root: string;
export let file_types: string[] | null = null;
let el: HTMLTextAreaElement | HTMLInputElement;
let can_scroll: boolean;
let previous_scroll_top = 0;
let user_has_scrolled_up = false;
let dragging = false;
let oldValue = value.text;
$: dispatch("drag", dragging);
$: if (oldValue !== value.text) {
dispatch("change", value);
oldValue = value.text;
}
let accept_file_types: string | null;
if (file_types == null) {
accept_file_types = null;
} else {
file_types = file_types.map((x) => {
if (x.startsWith(".")) {
return x;
}
return x + "/*";
});
accept_file_types = file_types.join(", ");
}
$: if (value === null) value = { text: "", files: [] };
$: value, el && lines !== max_lines && resize(el, lines, max_lines);
const dispatch = createEventDispatcher<{
change: typeof value;
submit: undefined;
blur: undefined;
select: SelectData;
input: undefined;
focus: undefined;
drag: boolean;
upload: FileData[] | FileData;
clear: undefined;
load: FileData[] | FileData;
error: string;
}>();
beforeUpdate(() => {
can_scroll = el && el.offsetHeight + el.scrollTop > el.scrollHeight - 100;
});
const scroll = (): void => {
if (can_scroll && autoscroll && !user_has_scrolled_up) {
el.scrollTo(0, el.scrollHeight);
}
};
async function handle_change(): Promise<void> {
dispatch("change", value);
if (!value_is_output) {
dispatch("input");
}
}
afterUpdate(() => {
if (autofocus && el !== null) {
el.focus();
}
if (can_scroll && autoscroll) {
scroll();
}
value_is_output = false;
});
function handle_select(event: Event): void {
const target: HTMLTextAreaElement | HTMLInputElement = event.target as
| HTMLTextAreaElement
| HTMLInputElement;
const text = target.value;
const index: [number, number] = [
target.selectionStart as number,
target.selectionEnd as number
];
dispatch("select", { value: text.substring(...index), index: index });
}
async function handle_keypress(e: KeyboardEvent): Promise<void> {
await tick();
if (e.key === "Enter" && e.shiftKey && lines > 1) {
e.preventDefault();
dispatch("submit");
} else if (
e.key === "Enter" &&
!e.shiftKey &&
lines === 1 &&
max_lines >= 1
) {
e.preventDefault();
dispatch("submit");
}
}
function handle_scroll(event: Event): void {
const target = event.target as HTMLElement;
const current_scroll_top = target.scrollTop;
if (current_scroll_top < previous_scroll_top) {
user_has_scrolled_up = true;
}
previous_scroll_top = current_scroll_top;
const max_scroll_top = target.scrollHeight - target.clientHeight;
const user_has_scrolled_to_bottom = current_scroll_top >= max_scroll_top;
if (user_has_scrolled_to_bottom) {
user_has_scrolled_up = false;
}
}
async function handle_upload({
detail
}: CustomEvent<FileData | FileData[]>): Promise<void> {
handle_change();
if (Array.isArray(detail)) {
for (let file of detail) {
value.files.push(file);
}
} else {
value.files.push(detail);
value = value;
}
await tick();
dispatch("change", value);
dispatch("upload", detail);
}
function remove_thumbnail(event: MouseEvent, index: number): void {
handle_change();
event.stopPropagation();
value.files.splice(index, 1);
value = value;
}
let hidden_upload: HTMLInputElement;
function handle_upload_click(): void {
if (hidden_upload) {
hidden_upload.click();
}
}
async function handle_submit(): Promise<void> {
dispatch("submit");
}
</script>
<!-- svelte-ignore a11y-autofocus -->
<label class:container>
<BlockTitle {show_label} {info}>{label}</BlockTitle>
<div class="input-container">
<Upload
on:load={handle_upload}
filetype={accept_file_types}
{root}
bind:dragging
disable_click={true}
bind:hidden_upload
>
{#if submit_btn}
<button class:disabled class="submit-button" on:click={handle_submit}
>{submit_btn}</button
>
{/if}
<button class:disabled class="plus-button" on:click={handle_upload_click}
>+</button
>
{#if value.files.length > 0}
<div
class="thumbnails scroll-hide"
data-testid="container_el"
style="display: {value.files.length > 0 ? 'flex' : 'none'};"
>
{#each value.files as file, index}
<button class="thumbnail-item thumbnail-small">
<button
class:disabled
class="delete-button"
on:click={(event) => remove_thumbnail(event, index)}
><Clear /></button
>
{#if file.mime_type && file.mime_type.includes("image")}
<Image
src={file.url}
title={null}
alt=""
loading="lazy"
class={"thumbnail-image"}
/>
{:else if file.mime_type && file.mime_type.includes("audio")}
<Music />
{:else if file.mime_type && file.mime_type.includes("video")}
<Video />
{:else}
<File />
{/if}
</button>
{/each}
</div>
{/if}
<textarea
data-testid="textbox"
use:text_area_resize={{
text: value.text,
lines: lines,
max_lines: max_lines
}}
class="scroll-hide"
dir={rtl ? "rtl" : "ltr"}
bind:value={value.text}
bind:this={el}
{placeholder}
rows={lines}
{disabled}
{autofocus}
on:keypress={handle_keypress}
on:blur
on:select={handle_select}
on:focus
on:scroll={handle_scroll}
style={text_align ? "text-align: " + text_align : ""}
/>
</Upload>
</div>
</label>
<style>
.input-container {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
}
textarea {
align-self: flex-start;
outline: none !important;
background: var(--input-background-fill);
padding: var(--input-padding);
width: 90%;
max-height: 100%;
height: 25px;
color: var(--body-text-color);
font-weight: var(--input-text-weight);
font-size: var(--input-text-size);
line-height: var(--line-sm);
border: none;
margin-top: 0px;
margin-bottom: 0px;
margin-left: 30px;
padding-top: 12px;
}
textarea:disabled {
-webkit-text-fill-color: var(--body-text-color);
-webkit-opacity: 1;
opacity: 1;
width: 100%;
margin-left: 0px;
}
textarea::placeholder {
color: var(--input-placeholder-color);
}
.plus-button,
.submit-button {
position: absolute;
background: var(--button-secondary-background-fill);
color: var(--button-secondary-text-color);
border: none;
text-align: center;
text-decoration: none;
font-size: 20px;
cursor: pointer;
border-radius: 50%;
width: 30px;
height: 30px;
bottom: 15px;
}
.plus-button:hover,
.submit-button:hover {
background: var(--button-secondary-background-fill-hover);
}
.plus-button:active,
.submit-button:active {
box-shadow: var(--button-shadow-active);
}
.submit-button {
right: 10px;
margin-left: 5px;
padding-bottom: 5px;
padding-left: 2px;
}
.plus-button {
left: 10px;
margin-right: 5px;
}
.thumbnails :global(img) {
width: var(--size-full);
height: var(--size-full);
object-fit: cover;
border-radius: var(--radius-lg);
}
.thumbnails {
align-self: flex-start;
display: flex;
justify-content: left;
align-items: center;
gap: var(--spacing-lg);
}
.thumbnail-item {
display: flex;
justify-content: center;
align-items: center;
--ring-color: transparent;
position: relative;
box-shadow:
0 0 0 2px var(--ring-color),
var(--shadow-drop);
border: 1px solid var(--border-color-primary);
border-radius: var(--radius-lg);
background: var(--background-fill-secondary);
aspect-ratio: var(--ratio-square);
width: var(--size-full);
height: var(--size-full);
cursor: default;
}
.thumbnail-small {
flex: none;
transform: scale(0.9);
transition: 0.075s;
width: var(--size-12);
height: var(--size-12);
}
.thumbnail-item :global(svg) {
width: 30px;
height: 30px;
}
.delete-button {
display: flex;
justify-content: center;
align-items: center;
position: absolute;
right: -7px;
top: -7px;
color: var(--button-secondary-text-color);
background: var(--button-secondary-background-fill);
border: none;
text-align: center;
text-decoration: none;
font-size: 10px;
cursor: pointer;
border-radius: 50%;
width: 20px;
height: 20px;
}
.disabled {
display: none;
}
.delete-button :global(svg) {
width: 12px;
height: 12px;
}
.delete-button:hover {
filter: brightness(1.2);
border: 0.8px solid var(--color-grey-500);
}
</style>

View File

@ -0,0 +1,58 @@
import { tick } from "svelte";
interface Value {
lines: number;
max_lines: number;
text: string;
}
export async function resize(
target: HTMLTextAreaElement | HTMLInputElement,
lines: number,
max_lines: number
): Promise<void> {
await tick();
if (lines === max_lines) return;
let max =
max_lines === undefined
? false
: max_lines === undefined // default
? 21 * 11
: 21 * (max_lines + 1);
let min = 21 * (lines + 1);
target.style.height = "1px";
let scroll_height;
if (max && target.scrollHeight > max) {
scroll_height = max;
} else if (target.scrollHeight < min) {
scroll_height = min;
} else {
scroll_height = target.scrollHeight;
}
target.style.height = `${scroll_height}px`;
}
export function text_area_resize(
_el: HTMLTextAreaElement,
_value: Value
): any | undefined {
if (_value.lines === _value.max_lines) return;
_el.style.overflowY = "scroll";
_el.addEventListener("input", (event: Event) =>
resize(event.target as HTMLTextAreaElement, _value.lines, _value.max_lines)
);
if (!_value.text.trim()) return;
resize(_el, _value.lines, _value.max_lines);
return {
destroy: () =>
_el.removeEventListener("input", (e: Event) =>
resize(e.target as HTMLTextAreaElement, _value.lines, _value.max_lines)
)
};
}

View File

@ -16,6 +16,7 @@
export let hidden = false;
export let format: "blob" | "file" = "file";
export let uploading = false;
export let hidden_upload: HTMLInputElement | null = null;
let upload_id: string;
let file_data: FileData[];
@ -24,7 +25,6 @@
// Needed for wasm support
const upload_fn = getContext<typeof upload_files>("upload_files");
let hidden_upload: HTMLInputElement;
const dispatch = createEventDispatcher();
const validFileTypes = ["image", "video", "audio", "text", "file"];
const processFileType = (type: string): string => {
@ -70,8 +70,10 @@
export function open_file_upload(): void {
if (disable_click) return;
hidden_upload.value = "";
hidden_upload.click();
if (hidden_upload) {
hidden_upload.value = "";
hidden_upload.click();
}
}
async function handle_upload(
@ -92,7 +94,9 @@
if (!files.length) {
return;
}
let _files: File[] = files.map((f) => new File([f], f.name));
let _files: File[] = files.map(
(f) => new File([f], f.name, { type: f.type })
);
file_data = await prepare_files(_files);
return await handle_upload(file_data);
}
@ -191,6 +195,7 @@
class:center
class:boundedheight
class:flex
class:disable_click
style:height="100%"
tabindex={hidden ? -1 : 0}
on:drag|preventDefault|stopPropagation
@ -212,7 +217,7 @@
type="file"
bind:this={hidden_upload}
on:change={load_files_from_upload}
accept={accept_file_types}
accept={accept_file_types || undefined}
multiple={file_count === "multiple" || undefined}
webkitdirectory={file_count === "directory" || undefined}
mozdirectory={file_count === "directory" || undefined}
@ -240,9 +245,13 @@
}
.flex {
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
}
.disable_click {
cursor: default;
}
input {
display: none;

27
pnpm-lock.yaml generated
View File

@ -453,6 +453,9 @@ importers:
'@gradio/model3d':
specifier: workspace:^
version: link:../model3D
'@gradio/multimodaltextbox':
specifier: workspace:^
version: link:../multimodaltextbox
'@gradio/number':
specifier: workspace:^
version: link:../number
@ -1193,6 +1196,30 @@ importers:
specifier: ^1.0.5
version: 1.0.5
js/multimodaltextbox:
dependencies:
'@gradio/atoms':
specifier: workspace:^
version: link:../atoms
'@gradio/client':
specifier: workspace:^
version: link:../../client/js
'@gradio/icons':
specifier: workspace:^
version: link:../icons
'@gradio/image':
specifier: workspace:^
version: link:../image
'@gradio/statustracker':
specifier: workspace:^
version: link:../statustracker
'@gradio/upload':
specifier: workspace:^
version: link:../upload
'@gradio/utils':
specifier: workspace:^
version: link:../utils
js/number:
dependencies:
'@gradio/atoms':