mirror of
https://github.com/gradio-app/gradio.git
synced 2025-03-31 12:20:26 +08:00
gr.load_chat: Allow loading any openai-compatible server immediately as a ChatInterface (#10222)
* changes * add changeset * add changeset * Update gradio/external.py Co-authored-by: Abubakar Abid <abubakar@huggingface.co> * changes * changes * Update guides/05_chatbots/01_creating-a-chatbot-fast.md Co-authored-by: Abubakar Abid <abubakar@huggingface.co> * changes --------- Co-authored-by: Ali Abid <aliabid94@gmail.com> Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com> Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
This commit is contained in:
parent
64d1864f8f
commit
9c6d83d12e
5
.changeset/thick-dingos-help.md
Normal file
5
.changeset/thick-dingos-help.md
Normal file
@ -0,0 +1,5 @@
|
||||
---
|
||||
"gradio": minor
|
||||
---
|
||||
|
||||
feat:gr.load_chat: Allow loading any openai-compatible server immediately as a ChatInterface
|
@ -77,7 +77,7 @@ from gradio.events import (
|
||||
on,
|
||||
)
|
||||
from gradio.exceptions import Error
|
||||
from gradio.external import load
|
||||
from gradio.external import load, load_chat
|
||||
from gradio.flagging import (
|
||||
CSVLogger,
|
||||
FlaggingCallback,
|
||||
|
@ -155,7 +155,10 @@ class ChatInterface(Blocks):
|
||||
self.type = type
|
||||
self.multimodal = multimodal
|
||||
self.concurrency_limit = concurrency_limit
|
||||
self.fn = fn
|
||||
if isinstance(fn, ChatInterface):
|
||||
self.fn = fn.fn
|
||||
else:
|
||||
self.fn = fn
|
||||
self.is_async = inspect.iscoroutinefunction(
|
||||
self.fn
|
||||
) or inspect.isasyncgenfunction(self.fn)
|
||||
|
@ -8,7 +8,7 @@ import os
|
||||
import re
|
||||
import tempfile
|
||||
import warnings
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Callable, Generator
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
@ -30,6 +30,7 @@ from gradio.processing_utils import save_base64_to_cache, to_binary
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gradio.blocks import Blocks
|
||||
from gradio.chat_interface import ChatInterface
|
||||
from gradio.interface import Interface
|
||||
|
||||
|
||||
@ -581,3 +582,66 @@ def from_spaces_interface(
|
||||
kwargs["_api_mode"] = True
|
||||
interface = gradio.Interface(**kwargs)
|
||||
return interface
|
||||
|
||||
|
||||
@document()
|
||||
def load_chat(
|
||||
base_url: str,
|
||||
model: str,
|
||||
token: str | None = None,
|
||||
*,
|
||||
system_message: str | None = None,
|
||||
streaming: bool = True,
|
||||
) -> ChatInterface:
|
||||
"""
|
||||
Load a chat interface from an OpenAI API chat compatible endpoint.
|
||||
Parameters:
|
||||
base_url: The base URL of the endpoint.
|
||||
model: The model name.
|
||||
token: The API token.
|
||||
system_message: The system message for the conversation, if any.
|
||||
streaming: Whether the response should be streamed.
|
||||
"""
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"To use OpenAI API Client, you must install the `openai` package. You can install it with `pip install openai`."
|
||||
) from e
|
||||
from gradio.chat_interface import ChatInterface
|
||||
|
||||
client = OpenAI(api_key=token, base_url=base_url)
|
||||
start_message = (
|
||||
[{"role": "system", "content": system_message}] if system_message else []
|
||||
)
|
||||
|
||||
def open_api(message: str, history: list | None) -> str:
|
||||
history = history or start_message
|
||||
if len(history) > 0 and isinstance(history[0], (list, tuple)):
|
||||
history = ChatInterface._tuples_to_messages(history)
|
||||
return (
|
||||
client.chat.completions.create(
|
||||
model=model,
|
||||
messages=history + [{"role": "user", "content": message}],
|
||||
)
|
||||
.choices[0]
|
||||
.message.content
|
||||
)
|
||||
|
||||
def open_api_stream(
|
||||
message: str, history: list | None
|
||||
) -> Generator[str, None, None]:
|
||||
history = history or start_message
|
||||
if len(history) > 0 and isinstance(history[0], (list, tuple)):
|
||||
history = ChatInterface._tuples_to_messages(history)
|
||||
stream = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=history + [{"role": "user", "content": message}],
|
||||
stream=True,
|
||||
)
|
||||
response = ""
|
||||
for chunk in stream:
|
||||
response += chunk.choices[0].delta.content
|
||||
yield response
|
||||
|
||||
return ChatInterface(open_api_stream if streaming else open_api, type="messages")
|
||||
|
@ -14,6 +14,16 @@ This tutorial uses `gr.ChatInterface()`, which is a high-level abstraction that
|
||||
$ pip install --upgrade gradio
|
||||
```
|
||||
|
||||
## Quickly loading from Ollama or any OpenAI-API compatible endpoint
|
||||
|
||||
If you have a chat server serving an OpenAI API compatible endpoint (skip ahead if you don't), you can spin up a ChatInterface in a single line. First, also run `pip install openai`. Then, with your own URL, model, and optional token:
|
||||
|
||||
```python
|
||||
import gradio as gr
|
||||
|
||||
gr.load_chat("http://localhost:11434/v1/", model="llama3.2", token=None).launch()
|
||||
```
|
||||
|
||||
## Defining a chat function
|
||||
|
||||
When working with `gr.ChatInterface()`, the first thing you should do is define your **chat function**. In the simplest case, your chat function should accept two arguments: `message` and `history` (the arguments can be named anything, but must be in this order).
|
||||
|
Loading…
x
Reference in New Issue
Block a user