feat: 添加物流查询功能和完善 token 传递

- 添加 get_logistics 工具查询 Mall API /mall/api/order/parcel - 修复 Cookie token 传递到 MCP 的问题 - 增强 LLM 客户端超时处理和日志 - 移除 MALL_API_TOKEN，使用用户登录 token - 更新测试页面使用 setUser 设置用户属性 - 增强 webhook 调试日志
2026-01-16 18:36:17 +08:00
parent cd787d608b
commit c4e97cf312
9 changed files with 334 additions and 111 deletions
--- a/agent/core/llm.py
+++ b/agent/core/llm.py
@@ -1,7 +1,8 @@
 """
 ZhipuAI LLM Client for B2B Shopping AI Assistant
 """
-from typing import Any, AsyncGenerator, Optional
+import concurrent.futures
+from typing import Any, Optional
 from dataclasses import dataclass

 from zhipuai import ZhipuAI
@@ -29,23 +30,21 @@ class LLMResponse:

 class ZhipuLLMClient:
    """ZhipuAI LLM Client wrapper"""
-    
+
+    DEFAULT_TIMEOUT = 30  # seconds
+
    def __init__(
        self,
        api_key: Optional[str] = None,
-        model: Optional[str] = None
+        model: Optional[str] = None,
+        timeout: Optional[int] = None
    ):
-        """Initialize ZhipuAI client
-        
-        Args:
-            api_key: ZhipuAI API key, defaults to settings
-            model: Model name, defaults to settings
-        """
        self.api_key = api_key or settings.zhipu_api_key
        self.model = model or settings.zhipu_model
+        self.timeout = timeout or self.DEFAULT_TIMEOUT
        self._client = ZhipuAI(api_key=self.api_key)
-        logger.info("ZhipuAI client initialized", model=self.model)
-    
+        logger.info("ZhipuAI client initialized", model=self.model, timeout=self.timeout)
+
    async def chat(
        self,
        messages: list[Message],
@@ -54,32 +53,21 @@ class ZhipuLLMClient:
        top_p: float = 0.9,
        **kwargs: Any
    ) -> LLMResponse:
-        """Send chat completion request
-        
-        Args:
-            messages: List of chat messages
-            temperature: Sampling temperature
-            max_tokens: Maximum tokens to generate
-            top_p: Top-p sampling parameter
-            **kwargs: Additional parameters
-            
-        Returns:
-            LLM response with content and metadata
-        """
+        """Send chat completion request"""
        formatted_messages = [
            {"role": msg.role, "content": msg.content}
            for msg in messages
        ]
-        
-        logger.debug(
+
+        logger.info(
            "Sending chat request",
            model=self.model,
            message_count=len(messages),
            temperature=temperature
        )
-        
-        try:
-            response = self._client.chat.completions.create(
+
+        def _make_request():
+            return self._client.chat.completions.create(
                model=self.model,
                messages=formatted_messages,
                temperature=temperature,
@@ -87,10 +75,27 @@ class ZhipuLLMClient:
                top_p=top_p,
                **kwargs
            )
-            
+
+        try:
+            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+                future = executor.submit(_make_request)
+                response = future.result(timeout=self.timeout)
+
            choice = response.choices[0]
-            result = LLMResponse(
-                content=choice.message.content,
+            content = choice.message.content
+
+            logger.info(
+                "Chat response received",
+                finish_reason=choice.finish_reason,
+                content_length=len(content) if content else 0,
+                usage=response.usage.__dict__ if hasattr(response, 'usage') else {}
+            )
+
+            if not content:
+                logger.warning("LLM returned empty content")
+
+            return LLMResponse(
+                content=content or "",
                finish_reason=choice.finish_reason,
                usage={
                    "prompt_tokens": response.usage.prompt_tokens,
@@ -98,48 +103,34 @@ class ZhipuLLMClient:
                    "total_tokens": response.usage.total_tokens
                }
            )
-            
-            logger.debug(
-                "Chat response received",
-                finish_reason=result.finish_reason,
-                total_tokens=result.usage["total_tokens"]
-            )
-            
-            return result
-            
+
+        except concurrent.futures.TimeoutError:
+            logger.error("Chat request timed out", timeout=self.timeout)
+            raise TimeoutError(f"Request timed out after {self.timeout} seconds")
+
        except Exception as e:
            logger.error("Chat request failed", error=str(e))
            raise
-    
+
    async def chat_with_tools(
        self,
        messages: list[Message],
        tools: list[dict[str, Any]],
        temperature: float = 0.7,
        **kwargs: Any
-    ) -> tuple[LLMResponse, Optional[list[dict[str, Any]]]]:
-        """Send chat completion request with tool calling
-        
-        Args:
-            messages: List of chat messages
-            tools: List of tool definitions
-            temperature: Sampling temperature
-            **kwargs: Additional parameters
-            
-        Returns:
-            Tuple of (LLM response, tool calls if any)
-        """
+    ) -> tuple[LLMResponse, None]:
+        """Send chat completion request with tool calling"""
        formatted_messages = [
            {"role": msg.role, "content": msg.content}
            for msg in messages
        ]
-        
-        logger.debug(
+
+        logger.info(
            "Sending chat request with tools",
            model=self.model,
            tool_count=len(tools)
        )
-        
+
        try:
            response = self._client.chat.completions.create(
                model=self.model,
@@ -148,42 +139,25 @@ class ZhipuLLMClient:
                temperature=temperature,
                **kwargs
            )
-            
+
            choice = response.choices[0]
-            result = LLMResponse(
-                content=choice.message.content or "",
+            content = choice.message.content or ""
+
+            return LLMResponse(
+                content=content,
                finish_reason=choice.finish_reason,
                usage={
                    "prompt_tokens": response.usage.prompt_tokens,
                    "completion_tokens": response.usage.completion_tokens,
                    "total_tokens": response.usage.total_tokens
                }
-            )
-            
-            # Extract tool calls if present
-            tool_calls = None
-            if hasattr(choice.message, 'tool_calls') and choice.message.tool_calls:
-                tool_calls = [
-                    {
-                        "id": tc.id,
-                        "type": tc.type,
-                        "function": {
-                            "name": tc.function.name,
-                            "arguments": tc.function.arguments
-                        }
-                    }
-                    for tc in choice.message.tool_calls
-                ]
-                logger.debug("Tool calls received", tool_count=len(tool_calls))
-            
-            return result, tool_calls
-            
+            ), None
+
        except Exception as e:
            logger.error("Chat with tools request failed", error=str(e))
            raise


-# Global LLM client instance
 llm_client: Optional[ZhipuLLMClient] = None