diff --git a/README.md b/README.md index fce62ba37..0c06b73f0 100644 --- a/README.md +++ b/README.md @@ -96,10 +96,11 @@ For enterprise inquiries, please contact: **business@nextchat.dev** - [x] Artifacts: Easily preview, copy and share generated content/webpages through a separate window [#5092](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/pull/5092) - [x] Plugins: support network search, calculator, any other apis etc. [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353) - [x] network search, calculator, any other apis etc. [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353) +- [x] Supports Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672) - [ ] local knowledge base ## What's New - +- 🚀 v2.15.8 Now supports Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672) - 🚀 v2.15.4 The Application supports using Tauri fetch LLM API, MORE SECURITY! [#5379](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5379) - 🚀 v2.15.0 Now supports Plugins! Read this: [NextChat-Awesome-Plugins](https://github.com/ChatGPTNextWeb/NextChat-Awesome-Plugins) - 🚀 v2.14.0 Now supports Artifacts & SD @@ -134,10 +135,11 @@ For enterprise inquiries, please contact: **business@nextchat.dev** - [x] Artifacts: 通过独立窗口,轻松预览、复制和分享生成的内容/可交互网页 [#5092](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/pull/5092) - [x] 插件机制,支持`联网搜索`、`计算器`、调用其他平台 api [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353) - [x] 支持联网搜索、计算器、调用其他平台 api [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353) + - [x] 支持 Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672) - [ ] 本地知识库 ## 最新动态 - +- 🚀 v2.15.8 现在支持Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672) - 🚀 v2.15.4 客户端支持Tauri本地直接调用大模型API,更安全 - 🚀 v2.15.0 现在支持插件功能了!了解更多:[NextChat-Awesome-Plugins](https://github.com/ChatGPTNextWeb/NextChat-Awesome-Plugins) - 🚀 v2.14.0 现在支持 Artifacts & SD 了。 diff --git a/app/api/common.ts b/app/api/common.ts index b4c792d6f..495a12ccd 100644 --- a/app/api/common.ts +++ b/app/api/common.ts @@ -1,8 +1,8 @@ import { NextRequest, NextResponse } from "next/server"; import { getServerSideConfig } from "../config/server"; import { OPENAI_BASE_URL, ServiceProvider } from "../constant"; -import { isModelAvailableInServer } from "../utils/model"; import { cloudflareAIGatewayUrl } from "../utils/cloudflare"; +import { getModelProvider, isModelAvailableInServer } from "../utils/model"; const serverConfig = getServerSideConfig(); @@ -71,7 +71,7 @@ export async function requestOpenai(req: NextRequest) { .filter((v) => !!v && !v.startsWith("-") && v.includes(modelName)) .forEach((m) => { const [fullName, displayName] = m.split("="); - const [_, providerName] = fullName.split("@"); + const [_, providerName] = getModelProvider(fullName); if (providerName === "azure" && !displayName) { const [_, deployId] = (serverConfig?.azureUrl ?? "").split( "deployments/", diff --git a/app/api/proxy.ts b/app/api/proxy.ts index 731003aa1..b3e5e7b7b 100644 --- a/app/api/proxy.ts +++ b/app/api/proxy.ts @@ -1,4 +1,5 @@ import { NextRequest, NextResponse } from "next/server"; +import { getServerSideConfig } from "@/app/config/server"; export async function handle( req: NextRequest, @@ -9,6 +10,7 @@ export async function handle( if (req.method === "OPTIONS") { return NextResponse.json({ body: "OK" }, { status: 200 }); } + const serverConfig = getServerSideConfig(); // remove path params from searchParams req.nextUrl.searchParams.delete("path"); @@ -31,6 +33,18 @@ export async function handle( return true; }), ); + // if dalle3 use openai api key + const baseUrl = req.headers.get("x-base-url"); + if (baseUrl?.includes("api.openai.com")) { + if (!serverConfig.apiKey) { + return NextResponse.json( + { error: "OpenAI API key not configured" }, + { status: 500 }, + ); + } + headers.set("Authorization", `Bearer ${serverConfig.apiKey}`); + } + const controller = new AbortController(); const fetchOptions: RequestInit = { headers, diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index 6e893ed14..7c1588440 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -65,6 +65,7 @@ export interface RequestPayload { frequency_penalty: number; top_p: number; max_tokens?: number; + max_completion_tokens?: number; } export interface DalleRequestPayload { @@ -233,6 +234,11 @@ export class ChatGPTApi implements LLMApi { // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore. }; + // O1 使用 max_completion_tokens 控制token数 (https://platform.openai.com/docs/guides/reasoning#controlling-costs) + if (isO1) { + requestPayload["max_completion_tokens"] = modelConfig.max_tokens; + } + // add max_tokens to vision model if (visionModel) { requestPayload["max_tokens"] = Math.max(modelConfig.max_tokens, 4000); diff --git a/app/components/auth.tsx b/app/components/auth.tsx index 539a52eec..5375bda3f 100644 --- a/app/components/auth.tsx +++ b/app/components/auth.tsx @@ -18,6 +18,8 @@ import { trackSettingsPageGuideToCPaymentClick, trackAuthorizationPageButtonToCPaymentClick, } from "../utils/auth-settings-events"; +import clsx from "clsx"; + const storage = safeLocalStorage(); export function AuthPage() { @@ -54,7 +56,7 @@ export function AuthPage() { onClick={() => navigate(Path.Home)} > -
(list: T[], startIndex: number, endIndex: number): T[] {
@@ -588,7 +589,7 @@ export function MaskPage() {
diff --git a/app/components/realtime-chat/index.ts b/app/components/realtime-chat/index.ts new file mode 100644 index 000000000..fdf090f41 --- /dev/null +++ b/app/components/realtime-chat/index.ts @@ -0,0 +1 @@ +export * from "./realtime-chat"; diff --git a/app/components/realtime-chat/realtime-chat.module.scss b/app/components/realtime-chat/realtime-chat.module.scss new file mode 100644 index 000000000..ef58bebb6 --- /dev/null +++ b/app/components/realtime-chat/realtime-chat.module.scss @@ -0,0 +1,74 @@ +.realtime-chat { + width: 100%; + justify-content: center; + align-items: center; + position: relative; + display: flex; + flex-direction: column; + height: 100%; + padding: 20px; + box-sizing: border-box; + .circle-mic { + width: 150px; + height: 150px; + border-radius: 50%; + background: linear-gradient(to bottom right, #a0d8ef, #f0f8ff); + display: flex; + justify-content: center; + align-items: center; + } + .icon-center { + font-size: 24px; + } + + .bottom-icons { + display: flex; + justify-content: space-between; + align-items: center; + width: 100%; + position: absolute; + bottom: 20px; + box-sizing: border-box; + padding: 0 20px; + } + + .icon-left, + .icon-right { + width: 46px; + height: 46px; + font-size: 36px; + background: var(--second); + border-radius: 50%; + padding: 2px; + display: flex; + justify-content: center; + align-items: center; + cursor: pointer; + &:hover { + opacity: 0.8; + } + } + + &.mobile { + display: none; + } +} + +.pulse { + animation: pulse 1.5s infinite; +} + +@keyframes pulse { + 0% { + transform: scale(1); + opacity: 0.7; + } + 50% { + transform: scale(1.1); + opacity: 1; + } + 100% { + transform: scale(1); + opacity: 0.7; + } +} diff --git a/app/components/realtime-chat/realtime-chat.tsx b/app/components/realtime-chat/realtime-chat.tsx new file mode 100644 index 000000000..faa36373a --- /dev/null +++ b/app/components/realtime-chat/realtime-chat.tsx @@ -0,0 +1,359 @@ +import VoiceIcon from "@/app/icons/voice.svg"; +import VoiceOffIcon from "@/app/icons/voice-off.svg"; +import PowerIcon from "@/app/icons/power.svg"; + +import styles from "./realtime-chat.module.scss"; +import clsx from "clsx"; + +import { useState, useRef, useEffect } from "react"; + +import { useChatStore, createMessage, useAppConfig } from "@/app/store"; + +import { IconButton } from "@/app/components/button"; + +import { + Modality, + RTClient, + RTInputAudioItem, + RTResponse, + TurnDetection, +} from "rt-client"; +import { AudioHandler } from "@/app/lib/audio"; +import { uploadImage } from "@/app/utils/chat"; +import { VoicePrint } from "@/app/components/voice-print"; + +interface RealtimeChatProps { + onClose?: () => void; + onStartVoice?: () => void; + onPausedVoice?: () => void; +} + +export function RealtimeChat({ + onClose, + onStartVoice, + onPausedVoice, +}: RealtimeChatProps) { + const chatStore = useChatStore(); + const session = chatStore.currentSession(); + const config = useAppConfig(); + const [status, setStatus] = useState(""); + const [isRecording, setIsRecording] = useState(false); + const [isConnected, setIsConnected] = useState(false); + const [isConnecting, setIsConnecting] = useState(false); + const [modality, setModality] = useState("audio"); + const [useVAD, setUseVAD] = useState(true); + const [frequencies, setFrequencies] = useState(); + + const clientRef = useRef (null); + const audioHandlerRef = useRef (null); + const initRef = useRef(false); + + const temperature = config.realtimeConfig.temperature; + const apiKey = config.realtimeConfig.apiKey; + const model = config.realtimeConfig.model; + const azure = config.realtimeConfig.provider === "Azure"; + const azureEndpoint = config.realtimeConfig.azure.endpoint; + const azureDeployment = config.realtimeConfig.azure.deployment; + const voice = config.realtimeConfig.voice; + + const handleConnect = async () => { + if (isConnecting) return; + if (!isConnected) { + try { + setIsConnecting(true); + clientRef.current = azure + ? new RTClient( + new URL(azureEndpoint), + { key: apiKey }, + { deployment: azureDeployment }, + ) + : new RTClient({ key: apiKey }, { model }); + const modalities: Modality[] = + modality === "audio" ? ["text", "audio"] : ["text"]; + const turnDetection: TurnDetection = useVAD + ? { type: "server_vad" } + : null; + await clientRef.current.configure({ + instructions: "", + voice, + input_audio_transcription: { model: "whisper-1" }, + turn_detection: turnDetection, + tools: [], + temperature, + modalities, + }); + startResponseListener(); + + setIsConnected(true); + // TODO + // try { + // const recentMessages = chatStore.getMessagesWithMemory(); + // for (const message of recentMessages) { + // const { role, content } = message; + // if (typeof content === "string") { + // await clientRef.current.sendItem({ + // type: "message", + // role: role as any, + // content: [ + // { + // type: (role === "assistant" ? "text" : "input_text") as any, + // text: content as string, + // }, + // ], + // }); + // } + // } + // // await clientRef.current.generateResponse(); + // } catch (error) { + // console.error("Set message failed:", error); + // } + } catch (error) { + console.error("Connection failed:", error); + setStatus("Connection failed"); + } finally { + setIsConnecting(false); + } + } else { + await disconnect(); + } + }; + + const disconnect = async () => { + if (clientRef.current) { + try { + await clientRef.current.close(); + clientRef.current = null; + setIsConnected(false); + } catch (error) { + console.error("Disconnect failed:", error); + } + } + }; + + const startResponseListener = async () => { + if (!clientRef.current) return; + + try { + for await (const serverEvent of clientRef.current.events()) { + if (serverEvent.type === "response") { + await handleResponse(serverEvent); + } else if (serverEvent.type === "input_audio") { + await handleInputAudio(serverEvent); + } + } + } catch (error) { + if (clientRef.current) { + console.error("Response iteration error:", error); + } + } + }; + + const handleResponse = async (response: RTResponse) => { + for await (const item of response) { + if (item.type === "message" && item.role === "assistant") { + const botMessage = createMessage({ + role: item.role, + content: "", + }); + // add bot message first + chatStore.updateTargetSession(session, (session) => { + session.messages = session.messages.concat([botMessage]); + }); + let hasAudio = false; + for await (const content of item) { + if (content.type === "text") { + for await (const text of content.textChunks()) { + botMessage.content += text; + } + } else if (content.type === "audio") { + const textTask = async () => { + for await (const text of content.transcriptChunks()) { + botMessage.content += text; + } + }; + const audioTask = async () => { + audioHandlerRef.current?.startStreamingPlayback(); + for await (const audio of content.audioChunks()) { + hasAudio = true; + audioHandlerRef.current?.playChunk(audio); + } + }; + await Promise.all([textTask(), audioTask()]); + } + // update message.content + chatStore.updateTargetSession(session, (session) => { + session.messages = session.messages.concat(); + }); + } + if (hasAudio) { + // upload audio get audio_url + const blob = audioHandlerRef.current?.savePlayFile(); + uploadImage(blob!).then((audio_url) => { + botMessage.audio_url = audio_url; + // update text and audio_url + chatStore.updateTargetSession(session, (session) => { + session.messages = session.messages.concat(); + }); + }); + } + } + } + }; + + const handleInputAudio = async (item: RTInputAudioItem) => { + await item.waitForCompletion(); + if (item.transcription) { + const userMessage = createMessage({ + role: "user", + content: item.transcription, + }); + chatStore.updateTargetSession(session, (session) => { + session.messages = session.messages.concat([userMessage]); + }); + // save input audio_url, and update session + const { audioStartMillis, audioEndMillis } = item; + // upload audio get audio_url + const blob = audioHandlerRef.current?.saveRecordFile( + audioStartMillis, + audioEndMillis, + ); + uploadImage(blob!).then((audio_url) => { + userMessage.audio_url = audio_url; + chatStore.updateTargetSession(session, (session) => { + session.messages = session.messages.concat(); + }); + }); + } + // stop streaming play after get input audio. + audioHandlerRef.current?.stopStreamingPlayback(); + }; + + const toggleRecording = async () => { + if (!isRecording && clientRef.current) { + try { + if (!audioHandlerRef.current) { + audioHandlerRef.current = new AudioHandler(); + await audioHandlerRef.current.initialize(); + } + await audioHandlerRef.current.startRecording(async (chunk) => { + await clientRef.current?.sendAudio(chunk); + }); + setIsRecording(true); + } catch (error) { + console.error("Failed to start recording:", error); + } + } else if (audioHandlerRef.current) { + try { + audioHandlerRef.current.stopRecording(); + if (!useVAD) { + const inputAudio = await clientRef.current?.commitAudio(); + await handleInputAudio(inputAudio!); + await clientRef.current?.generateResponse(); + } + setIsRecording(false); + } catch (error) { + console.error("Failed to stop recording:", error); + } + } + }; + + useEffect(() => { + // 防止重复初始化 + if (initRef.current) return; + initRef.current = true; + + const initAudioHandler = async () => { + const handler = new AudioHandler(); + await handler.initialize(); + audioHandlerRef.current = handler; + await handleConnect(); + await toggleRecording(); + }; + + initAudioHandler().catch((error) => { + setStatus(error); + console.error(error); + }); + + return () => { + if (isRecording) { + toggleRecording(); + } + audioHandlerRef.current?.close().catch(console.error); + disconnect(); + }; + }, []); + + useEffect(() => { + let animationFrameId: number; + + if (isConnected && isRecording) { + const animationFrame = () => { + if (audioHandlerRef.current) { + const freqData = audioHandlerRef.current.getByteFrequencyData(); + setFrequencies(freqData); + } + animationFrameId = requestAnimationFrame(animationFrame); + }; + + animationFrameId = requestAnimationFrame(animationFrame); + } else { + setFrequencies(undefined); + } + + return () => { + if (animationFrameId) { + cancelAnimationFrame(animationFrameId); + } + }; + }, [isConnected, isRecording]); + + // update session params + useEffect(() => { + clientRef.current?.configure({ voice }); + }, [voice]); + useEffect(() => { + clientRef.current?.configure({ temperature }); + }, [temperature]); + + const handleClose = async () => { + onClose?.(); + if (isRecording) { + await toggleRecording(); + } + disconnect().catch(console.error); + }; + + return ( + ++ ); +} diff --git a/app/components/realtime-chat/realtime-config.tsx b/app/components/realtime-chat/realtime-config.tsx new file mode 100644 index 000000000..08809afda --- /dev/null +++ b/app/components/realtime-chat/realtime-config.tsx @@ -0,0 +1,173 @@ +import { RealtimeConfig } from "@/app/store"; + +import Locale from "@/app/locales"; +import { ListItem, Select, PasswordInput } from "@/app/components/ui-lib"; + +import { InputRange } from "@/app/components/input-range"; +import { Voice } from "rt-client"; +import { ServiceProvider } from "@/app/constant"; + +const providers = [ServiceProvider.OpenAI, ServiceProvider.Azure]; + +const models = ["gpt-4o-realtime-preview-2024-10-01"]; + +const voice = ["alloy", "shimmer", "echo"]; + +export function RealtimeConfigList(props: { + realtimeConfig: RealtimeConfig; + updateConfig: (updater: (config: RealtimeConfig) => void) => void; +}) { + const azureConfigComponent = props.realtimeConfig.provider === + ServiceProvider.Azure && ( + <> +++ ++ ++++: } + onClick={toggleRecording} + disabled={!isConnected} + shadow + bordered + /> + {status}+++} + onClick={handleClose} + shadow + bordered + /> + + { + props.updateConfig( + (config) => (config.azure.endpoint = e.currentTarget.value), + ); + }} + /> + ++ { + props.updateConfig( + (config) => (config.azure.deployment = e.currentTarget.value), + ); + }} + /> + + > + ); + + return ( + <> ++ + props.updateConfig( + (config) => (config.enable = e.currentTarget.checked), + ) + } + > + + + {props.realtimeConfig.enable && ( + <> ++ + ++ + ++ + {azureConfigComponent} +{ + props.updateConfig( + (config) => (config.apiKey = e.currentTarget.value), + ); + }} + /> + + + ++ + > + )} + > + ); +} diff --git a/app/components/sd/sd-panel.tsx b/app/components/sd/sd-panel.tsx index a71e560dd..15aff0ab6 100644 --- a/app/components/sd/sd-panel.tsx +++ b/app/components/sd/sd-panel.tsx @@ -4,6 +4,7 @@ import { Select } from "@/app/components/ui-lib"; import { IconButton } from "@/app/components/button"; import Locale from "@/app/locales"; import { useSdStore } from "@/app/store/sd"; +import clsx from "clsx"; export const params = [ { @@ -136,7 +137,7 @@ export function ControlParamItem(props: { className?: string; }) { return ( -{ + props.updateConfig( + (config) => + (config.temperature = e.currentTarget.valueAsNumber), + ); + }} + > ++diff --git a/app/components/sd/sd.tsx b/app/components/sd/sd.tsx index 0ace62a83..1ccc0647e 100644 --- a/app/components/sd/sd.tsx +++ b/app/components/sd/sd.tsx @@ -36,6 +36,7 @@ import { removeImage } from "@/app/utils/chat"; import { SideBar } from "./sd-sidebar"; import { WindowContent } from "@/app/components/home"; import { params } from "./sd-panel"; +import clsx from "clsx"; function getSdTaskStatus(item: any) { let s: string; @@ -104,7 +105,7 @@ export function Sd() { return ( <> -+ @@ -121,7 +122,10 @@ export function Sd() {)}Stability AIdiff --git a/app/components/settings.tsx b/app/components/settings.tsx index e2666b551..ddbda1b73 100644 --- a/app/components/settings.tsx +++ b/app/components/settings.tsx @@ -85,6 +85,7 @@ import { nanoid } from "nanoid"; import { useMaskStore } from "../store/mask"; import { ProviderType } from "../utils/cloud"; import { TTSConfigList } from "./tts-config"; +import { RealtimeConfigList } from "./realtime-chat/realtime-config"; function EditPromptModal(props: { id: string; onClose: () => void }) { const promptStore = usePromptStore(); @@ -1799,7 +1800,18 @@ export function Settings() { {shouldShowPromptModal && (setShowPromptModal(false)} /> )} - + +
{ + const realtimeConfig = { ...config.realtimeConfig }; + updater(realtimeConfig); + config.update( + (config) => (config.realtimeConfig = realtimeConfig), + ); + }} + /> +
(await import("./chat-list")).ChatList, { loading: () => null, @@ -141,9 +142,9 @@ export function SideBarContainer(props: { const { children, className, onDragStart, shouldNarrow } = props; return ( {children} @@ -286,7 +287,7 @@ export function SideBar(props: { className?: string }) {-@@ -182,7 +183,7 @@ export function SideBarHeader(props: {{subTitle}{logo}+{logo}- +} onClick={async () => { diff --git a/app/components/ui-lib.tsx b/app/components/ui-lib.tsx index 4af37dbba..a64265235 100644 --- a/app/components/ui-lib.tsx +++ b/app/components/ui-lib.tsx @@ -23,6 +23,7 @@ import React, { useRef, } from "react"; import { IconButton } from "./button"; +import clsx from "clsx"; export function Popover(props: { children: JSX.Element; @@ -45,7 +46,7 @@ export function Popover(props: { export function Card(props: { children: JSX.Element[]; className?: string }) { return ( - {props.children}+{props.children}); } @@ -60,11 +61,13 @@ export function ListItem(props: { }) { return (@@ -135,9 +138,9 @@ export function Modal(props: ModalProps) { return ({props.title}@@ -260,7 +263,7 @@ export function Input(props: InputProps) { return ( ); } @@ -301,9 +304,13 @@ export function Select( const { className, children, align, ...otherProps } = props; return (