Skip to content

Commit fd4e295

Browse files
committed
fix(qwen): use DashScope native image format for Qwen vision models
DashScope's OpenAI-compatible endpoint rejects the standard `image_url` content part type with 'Unexpected item type in content' for Qwen vision models. Convert to DashScope native format (`type: image` with direct data URI string) when the provider or baseUrl indicates a DashScope endpoint. Detection: provider includes 'dashscope', provider is 'qwen' or 'qwen-dashscope', or baseUrl includes 'dashscope.aliyuncs.com'. Closes #92688
1 parent 7994880 commit fd4e295

1 file changed

Lines changed: 42 additions & 9 deletions

File tree

src/llm/providers/openai-completions.ts

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,22 @@ function isImageContentBlock(block: { type: string }): block is ImageContent {
8181
return block.type === "image";
8282
}
8383

84+
function isDashScopeEndpoint(model: Model<"openai-completions">): boolean {
85+
const provider = model.provider?.toLowerCase() ?? "";
86+
const baseUrl = model.baseUrl?.toLowerCase() ?? "";
87+
return (
88+
provider.includes("dashscope") ||
89+
provider === "qwen" ||
90+
provider === "qwen-dashscope" ||
91+
baseUrl.includes("dashscope.aliyuncs.com")
92+
);
93+
}
94+
95+
type DashScopeImageContentPart = {
96+
type: "image";
97+
image: string;
98+
};
99+
84100
export interface OpenAICompletionsOptions extends StreamOptions {
85101
toolChoice?: "auto" | "none" | "required" | { type: "function"; function: { name: string } };
86102
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
@@ -950,6 +966,7 @@ export function convertMessages(
950966
content: sanitizeSurrogates(msg.content),
951967
});
952968
} else {
969+
const useDashScopeFormat = isDashScopeEndpoint(model);
953970
const content: ChatCompletionContentPart[] = msg.content.map(
954971
(item): ChatCompletionContentPart => {
955972
if (item.type === "text") {
@@ -958,6 +975,12 @@ export function convertMessages(
958975
text: sanitizeSurrogates(item.text),
959976
} satisfies ChatCompletionContentPartText;
960977
}
978+
if (useDashScopeFormat) {
979+
return {
980+
type: "image",
981+
image: `data:${item.mimeType};base64,${item.data}`,
982+
} as unknown as ChatCompletionContentPart;
983+
}
961984
return {
962985
type: "image_url",
963986
image_url: {
@@ -1079,7 +1102,10 @@ export function convertMessages(
10791102
}
10801103
params.push(assistantMsg);
10811104
} else if (msg.role === "toolResult") {
1082-
const imageBlocks: Array<{ type: "image_url"; image_url: { url: string } }> = [];
1105+
const useDashScopeFormat = isDashScopeEndpoint(model);
1106+
const imageBlocks: Array<
1107+
{ type: "image_url"; image_url: { url: string } } | { type: "image"; image: string }
1108+
> = [];
10831109
let j = i;
10841110

10851111
for (; j < transformedMessages.length && transformedMessages[j].role === "toolResult"; j++) {
@@ -1108,12 +1134,19 @@ export function convertMessages(
11081134
if (hasImages && model.input.includes("image")) {
11091135
for (const block of toolMsg.content) {
11101136
if (isImageContentBlock(block)) {
1111-
imageBlocks.push({
1112-
type: "image_url",
1113-
image_url: {
1114-
url: `data:${block.mimeType};base64,${block.data}`,
1115-
},
1116-
});
1137+
if (useDashScopeFormat) {
1138+
imageBlocks.push({
1139+
type: "image",
1140+
image: `data:${block.mimeType};base64,${block.data}`,
1141+
});
1142+
} else {
1143+
imageBlocks.push({
1144+
type: "image_url",
1145+
image_url: {
1146+
url: `data:${block.mimeType};base64,${block.data}`,
1147+
},
1148+
});
1149+
}
11171150
}
11181151
}
11191152
}
@@ -1137,8 +1170,8 @@ export function convertMessages(
11371170
text: "Attached image(s) from tool result:",
11381171
},
11391172
...imageBlocks,
1140-
],
1141-
});
1173+
] as ChatCompletionContentPart[],
1174+
} as ChatCompletionMessageParam);
11421175
lastRole = "user";
11431176
} else {
11441177
lastRole = "toolResult";

0 commit comments

Comments
 (0)