feat: 添加物流查询功能和完善 token 传递

- 添加 get_logistics 工具查询 Mall API /mall/api/order/parcel
- 修复 Cookie token 传递到 MCP 的问题
- 增强 LLM 客户端超时处理和日志
- 移除 MALL_API_TOKEN,使用用户登录 token
- 更新测试页面使用 setUser 设置用户属性
- 增强 webhook 调试日志
This commit is contained in:
wangliang
2026-01-16 18:36:17 +08:00
parent cd787d608b
commit c4e97cf312
9 changed files with 334 additions and 111 deletions

View File

@@ -1,7 +1,8 @@
"""
ZhipuAI LLM Client for B2B Shopping AI Assistant
"""
from typing import Any, AsyncGenerator, Optional
import concurrent.futures
from typing import Any, Optional
from dataclasses import dataclass
from zhipuai import ZhipuAI
@@ -29,23 +30,21 @@ class LLMResponse:
class ZhipuLLMClient:
"""ZhipuAI LLM Client wrapper"""
DEFAULT_TIMEOUT = 30 # seconds
def __init__(
self,
api_key: Optional[str] = None,
model: Optional[str] = None
model: Optional[str] = None,
timeout: Optional[int] = None
):
"""Initialize ZhipuAI client
Args:
api_key: ZhipuAI API key, defaults to settings
model: Model name, defaults to settings
"""
self.api_key = api_key or settings.zhipu_api_key
self.model = model or settings.zhipu_model
self.timeout = timeout or self.DEFAULT_TIMEOUT
self._client = ZhipuAI(api_key=self.api_key)
logger.info("ZhipuAI client initialized", model=self.model)
logger.info("ZhipuAI client initialized", model=self.model, timeout=self.timeout)
async def chat(
self,
messages: list[Message],
@@ -54,32 +53,21 @@ class ZhipuLLMClient:
top_p: float = 0.9,
**kwargs: Any
) -> LLMResponse:
"""Send chat completion request
Args:
messages: List of chat messages
temperature: Sampling temperature
max_tokens: Maximum tokens to generate
top_p: Top-p sampling parameter
**kwargs: Additional parameters
Returns:
LLM response with content and metadata
"""
"""Send chat completion request"""
formatted_messages = [
{"role": msg.role, "content": msg.content}
for msg in messages
]
logger.debug(
logger.info(
"Sending chat request",
model=self.model,
message_count=len(messages),
temperature=temperature
)
try:
response = self._client.chat.completions.create(
def _make_request():
return self._client.chat.completions.create(
model=self.model,
messages=formatted_messages,
temperature=temperature,
@@ -87,10 +75,27 @@ class ZhipuLLMClient:
top_p=top_p,
**kwargs
)
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(_make_request)
response = future.result(timeout=self.timeout)
choice = response.choices[0]
result = LLMResponse(
content=choice.message.content,
content = choice.message.content
logger.info(
"Chat response received",
finish_reason=choice.finish_reason,
content_length=len(content) if content else 0,
usage=response.usage.__dict__ if hasattr(response, 'usage') else {}
)
if not content:
logger.warning("LLM returned empty content")
return LLMResponse(
content=content or "",
finish_reason=choice.finish_reason,
usage={
"prompt_tokens": response.usage.prompt_tokens,
@@ -98,48 +103,34 @@ class ZhipuLLMClient:
"total_tokens": response.usage.total_tokens
}
)
logger.debug(
"Chat response received",
finish_reason=result.finish_reason,
total_tokens=result.usage["total_tokens"]
)
return result
except concurrent.futures.TimeoutError:
logger.error("Chat request timed out", timeout=self.timeout)
raise TimeoutError(f"Request timed out after {self.timeout} seconds")
except Exception as e:
logger.error("Chat request failed", error=str(e))
raise
async def chat_with_tools(
self,
messages: list[Message],
tools: list[dict[str, Any]],
temperature: float = 0.7,
**kwargs: Any
) -> tuple[LLMResponse, Optional[list[dict[str, Any]]]]:
"""Send chat completion request with tool calling
Args:
messages: List of chat messages
tools: List of tool definitions
temperature: Sampling temperature
**kwargs: Additional parameters
Returns:
Tuple of (LLM response, tool calls if any)
"""
) -> tuple[LLMResponse, None]:
"""Send chat completion request with tool calling"""
formatted_messages = [
{"role": msg.role, "content": msg.content}
for msg in messages
]
logger.debug(
logger.info(
"Sending chat request with tools",
model=self.model,
tool_count=len(tools)
)
try:
response = self._client.chat.completions.create(
model=self.model,
@@ -148,42 +139,25 @@ class ZhipuLLMClient:
temperature=temperature,
**kwargs
)
choice = response.choices[0]
result = LLMResponse(
content=choice.message.content or "",
content = choice.message.content or ""
return LLMResponse(
content=content,
finish_reason=choice.finish_reason,
usage={
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
}
)
# Extract tool calls if present
tool_calls = None
if hasattr(choice.message, 'tool_calls') and choice.message.tool_calls:
tool_calls = [
{
"id": tc.id,
"type": tc.type,
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments
}
}
for tc in choice.message.tool_calls
]
logger.debug("Tool calls received", tool_count=len(tool_calls))
return result, tool_calls
), None
except Exception as e:
logger.error("Chat with tools request failed", error=str(e))
raise
# Global LLM client instance
llm_client: Optional[ZhipuLLMClient] = None