解决方案
把reasoning content的东西移到content中来 并在reasoning时,手动加上标签。具体做法是截获第三方api返回的stream,并修改其中的内容,再移交给open webUI处理。
在backend\open_webui\routers\openai.py中 找到 generate_chat_completion 这个函数
在
r = None
session = None
streaming = False
response = None
下方 增加截获函数 这个方案适用于 火山 百炼 vLLM我没试过,但是思路是一样的,根据vLLM返回的chunk内容,把reasoning的部分贴到content中去就能正常显示了。
async def modify_stream_content(original_stream):
start_reasoning = True
end_reasoning = True
# 逐块处理流式内容
async for chunk in original_stream:
# 示例:修改 chunk 内容(假设 chunk 是 JSON 字符串)
try:
# 1. 解码字节流为字符串
decoded_chunk = chunk.decode('utf-8').lstrip('data: ').strip()
# 2. 解析 JSON(根据实际格式调整)
chunk_data = json.loads(decoded_chunk)
# 3. 修改内容
if "choices" in chunk_data and len(chunk_data["choices"]) > 0:
delta = chunk_data["choices"][0].get("delta", {})
# print(delta)
if delta["content"] == None:
delta["content"] = ""
if "reasoning_content" in delta and delta["reasoning_content"] == None:
delta["reasoning_content"] = ""
if delta["content"] == "":
if start_reasoning:
delta["content"] = "<think>" + delta["content"] + delta["reasoning_content"]
start_reasoning = False
else:
delta["content"] = delta["content"] + delta["reasoning_content"]
else:
if end_reasoning:
delta["content"] = "</think>" + delta["content"]
end_reasoning = False
# print(delta)
# 4. 重新编码为字节流
modified_chunk = f"data: {json.dumps(chunk_data)}\n\n".encode('utf-8')
except (json.JSONDecodeError, KeyError) as e:
# 处理错误(可选:记录日志或跳过)
modified_chunk = chunk # 保留原始数据
# 5. 返回修改后的 chunk
yield modified_chunk
在下方,if "text/event-stream" in r.headers.get("Content-Type", ""):
分支里。调用截获函数并返回
# Check if response is SSE
if "text/event-stream" in r.headers.get("Content-Type", ""):
streaming = True
# 调用截获函数
modified_stream = modify_stream_content(r.content)
return StreamingResponse(
# r.content,
modified_stream,
status_code=r.status,
headers=dict(r.headers),
background=BackgroundTask(
cleanup_response, response=r, session=session
),
)