mirror of
https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web.git
synced 2025-04-12 19:00:28 +08:00
feat(alibaba): Added alibaba vision model and omni model support
This commit is contained in:
parent
f5f3ce94f6
commit
b709ee3983
@ -40,6 +40,11 @@ export interface MultimodalContent {
|
||||
};
|
||||
}
|
||||
|
||||
export interface MultimodalContentForAlibaba {
|
||||
text?: string;
|
||||
image?: string;
|
||||
}
|
||||
|
||||
export interface RequestMessage {
|
||||
role: MessageRole;
|
||||
content: string | MultimodalContent[];
|
||||
|
@ -7,7 +7,10 @@ import {
|
||||
ChatMessageTool,
|
||||
usePluginStore,
|
||||
} from "@/app/store";
|
||||
import { streamWithThink } from "@/app/utils/chat";
|
||||
import {
|
||||
preProcessImageContentForAlibabaDashScope,
|
||||
streamWithThink,
|
||||
} from "@/app/utils/chat";
|
||||
import {
|
||||
ChatOptions,
|
||||
getHeaders,
|
||||
@ -15,12 +18,14 @@ import {
|
||||
LLMModel,
|
||||
SpeechOptions,
|
||||
MultimodalContent,
|
||||
MultimodalContentForAlibaba,
|
||||
} from "../api";
|
||||
import { getClientConfig } from "@/app/config/client";
|
||||
import {
|
||||
getMessageTextContent,
|
||||
getMessageTextContentWithoutThinking,
|
||||
getTimeoutMSByModel,
|
||||
isVisionModel,
|
||||
} from "@/app/utils";
|
||||
import { fetch } from "@/app/utils/stream";
|
||||
|
||||
@ -89,14 +94,6 @@ export class QwenApi implements LLMApi {
|
||||
}
|
||||
|
||||
async chat(options: ChatOptions) {
|
||||
const messages = options.messages.map((v) => ({
|
||||
role: v.role,
|
||||
content:
|
||||
v.role === "assistant"
|
||||
? getMessageTextContentWithoutThinking(v)
|
||||
: getMessageTextContent(v),
|
||||
}));
|
||||
|
||||
const modelConfig = {
|
||||
...useAppConfig.getState().modelConfig,
|
||||
...useChatStore.getState().currentSession().mask.modelConfig,
|
||||
@ -105,6 +102,21 @@ export class QwenApi implements LLMApi {
|
||||
},
|
||||
};
|
||||
|
||||
const visionModel = isVisionModel(options.config.model);
|
||||
|
||||
const messages: ChatOptions["messages"] = [];
|
||||
for (const v of options.messages) {
|
||||
const content = (
|
||||
visionModel
|
||||
? await preProcessImageContentForAlibabaDashScope(v.content)
|
||||
: v.role === "assistant"
|
||||
? getMessageTextContentWithoutThinking(v)
|
||||
: getMessageTextContent(v)
|
||||
) as any;
|
||||
|
||||
messages.push({ role: v.role, content });
|
||||
}
|
||||
|
||||
const shouldStream = !!options.config.stream;
|
||||
const requestPayload: RequestPayload = {
|
||||
model: modelConfig.model,
|
||||
@ -129,7 +141,7 @@ export class QwenApi implements LLMApi {
|
||||
"X-DashScope-SSE": shouldStream ? "enable" : "disable",
|
||||
};
|
||||
|
||||
const chatPath = this.path(Alibaba.ChatPath);
|
||||
const chatPath = this.path(Alibaba.ChatPath(modelConfig.model));
|
||||
const chatPayload = {
|
||||
method: "POST",
|
||||
body: JSON.stringify(requestPayload),
|
||||
@ -162,7 +174,7 @@ export class QwenApi implements LLMApi {
|
||||
const json = JSON.parse(text);
|
||||
const choices = json.output.choices as Array<{
|
||||
message: {
|
||||
content: string | null;
|
||||
content: string | null | MultimodalContentForAlibaba[];
|
||||
tool_calls: ChatMessageTool[];
|
||||
reasoning_content: string | null;
|
||||
};
|
||||
@ -212,7 +224,9 @@ export class QwenApi implements LLMApi {
|
||||
} else if (content && content.length > 0) {
|
||||
return {
|
||||
isThinking: false,
|
||||
content: content,
|
||||
content: Array.isArray(content)
|
||||
? content.map((item) => item.text).join(",")
|
||||
: content,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -221,7 +221,12 @@ export const ByteDance = {
|
||||
|
||||
export const Alibaba = {
|
||||
ExampleEndpoint: ALIBABA_BASE_URL,
|
||||
ChatPath: "v1/services/aigc/text-generation/generation",
|
||||
ChatPath: (modelName: string) => {
|
||||
if (modelName.includes("vl") || modelName.includes("omni")) {
|
||||
return "v1/services/aigc/multimodal-generation/generation";
|
||||
}
|
||||
return `v1/services/aigc/text-generation/generation`;
|
||||
},
|
||||
};
|
||||
|
||||
export const Tencent = {
|
||||
@ -568,6 +573,9 @@ const alibabaModes = [
|
||||
"qwen-max-0403",
|
||||
"qwen-max-0107",
|
||||
"qwen-max-longcontext",
|
||||
"qwen-omni-turbo",
|
||||
"qwen-vl-plus",
|
||||
"qwen-vl-max",
|
||||
];
|
||||
|
||||
const tencentModels = [
|
||||
|
@ -92,6 +92,28 @@ export async function preProcessImageContent(
|
||||
return result;
|
||||
}
|
||||
|
||||
export async function preProcessImageContentForAlibabaDashScope(
|
||||
content: RequestMessage["content"],
|
||||
) {
|
||||
if (typeof content === "string") {
|
||||
return content;
|
||||
}
|
||||
const result = [];
|
||||
for (const part of content) {
|
||||
if (part?.type == "image_url" && part?.image_url?.url) {
|
||||
try {
|
||||
const url = await cacheImageToBase64Image(part?.image_url?.url);
|
||||
result.push({ image: url });
|
||||
} catch (error) {
|
||||
console.error("Error processing image URL:", error);
|
||||
}
|
||||
} else {
|
||||
result.push({ ...part });
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const imageCaches: Record<string, string> = {};
|
||||
export function cacheImageToBase64Image(imageUrl: string) {
|
||||
if (imageUrl.includes(CACHE_URL_PREFIX)) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user