feat: 优化 FAQ 处理和系统稳定性

- 添加本地 FAQ 库快速路径（问候语等社交响应） - 修复 Chatwoot 重启循环问题（PID 文件清理） - 添加 LLM 响应缓存（Redis 缓存，提升性能） - 添加智能推理模式（根据查询复杂度自动启用） - 添加订单卡片消息功能（Chatwoot 富媒体） - 增加 LLM 超时时间至 60 秒 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-20 14:51:30 +08:00
parent c4e97cf312
commit 6b6172d8f0
8 changed files with 684 additions and 18 deletions
--- a/agent/core/llm.py
+++ b/agent/core/llm.py
@@ -9,6 +9,7 @@ from zhipuai import ZhipuAI

 from config import settings
 from utils.logger import get_logger
+from utils.response_cache import get_response_cache

 logger = get_logger(__name__)

@@ -31,19 +32,80 @@ class LLMResponse:
 class ZhipuLLMClient:
    """ZhipuAI LLM Client wrapper"""

-    DEFAULT_TIMEOUT = 30  # seconds
+    DEFAULT_TIMEOUT = 60  # seconds (increased from 30 for better reliability)

    def __init__(
        self,
        api_key: Optional[str] = None,
        model: Optional[str] = None,
-        timeout: Optional[int] = None
+        timeout: Optional[int] = None,
+        enable_reasoning: Optional[bool] = None
    ):
        self.api_key = api_key or settings.zhipu_api_key
        self.model = model or settings.zhipu_model
        self.timeout = timeout or self.DEFAULT_TIMEOUT
+        self.enable_reasoning = enable_reasoning if enable_reasoning is not None else settings.enable_reasoning_mode
        self._client = ZhipuAI(api_key=self.api_key)
-        logger.info("ZhipuAI client initialized", model=self.model, timeout=self.timeout)
+        logger.info(
+            "ZhipuAI client initialized",
+            model=self.model,
+            timeout=self.timeout,
+            reasoning_mode=self.enable_reasoning
+        )
+
+    def _should_use_reasoning(self, messages: list[dict[str, str]]) -> bool:
+        """Determine if reasoning mode should be used based on query complexity
+
+        Args:
+            messages: List of message dictionaries
+
+        Returns:
+            True if reasoning mode should be used
+        """
+        if not self.enable_reasoning:
+            return False
+
+        if not settings.reasoning_mode_for_complex:
+            # If smart mode is disabled, use the global setting
+            return self.enable_reasoning
+
+        # Smart mode: analyze the last user message
+        last_message = ""
+        for msg in reversed(messages):
+            if msg.get("role") == "user":
+                last_message = msg.get("content", "")
+                break
+
+        # Simple queries that don't need reasoning
+        simple_patterns = [
+            "你好", "hi", "hello", "嗨",
+            "谢谢", "thank", "感谢",
+            "再见", "bye", "拜拜",
+            "退货政策", "营业时间", "联系方式",
+            "发货", "配送", "物流"
+        ]
+
+        last_message_lower = last_message.lower()
+        for pattern in simple_patterns:
+            if pattern in last_message_lower:
+                logger.debug("Simple query detected, disabling reasoning", query=last_message[:50])
+                return False
+
+        # Complex queries that benefit from reasoning
+        complex_patterns = [
+            "为什么", "how", "why", "如何",
+            "推荐", "recommend", "建议",
+            "比较", "compare", "区别",
+            "怎么样", "如何选择"
+        ]
+
+        for pattern in complex_patterns:
+            if pattern in last_message_lower:
+                logger.debug("Complex query detected, enabling reasoning", query=last_message[:50])
+                return True
+
+        # Default: disable reasoning for speed
+        return False

    async def chat(
        self,
@@ -51,14 +113,39 @@ class ZhipuLLMClient:
        temperature: float = 0.7,
        max_tokens: int = 2048,
        top_p: float = 0.9,
+        use_cache: bool = True,
+        enable_reasoning: Optional[bool] = None,
        **kwargs: Any
    ) -> LLMResponse:
-        """Send chat completion request"""
+        """Send chat completion request with caching support"""
        formatted_messages = [
            {"role": msg.role, "content": msg.content}
            for msg in messages
        ]

+        # Try cache first
+        if use_cache:
+            try:
+                cache = get_response_cache()
+                cached_response = await cache.get(
+                    model=self.model,
+                    messages=formatted_messages,
+                    temperature=temperature
+                )
+                if cached_response is not None:
+                    logger.info(
+                        "Returning cached response",
+                        model=self.model,
+                        response_length=len(cached_response)
+                    )
+                    return LLMResponse(
+                        content=cached_response,
+                        finish_reason="cache_hit",
+                        usage={"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+                    )
+            except Exception as e:
+                logger.warning("Cache check failed", error=str(e))
+
        logger.info(
            "Sending chat request",
            model=self.model,
@@ -66,15 +153,32 @@ class ZhipuLLMClient:
            temperature=temperature
        )

+        # Determine if reasoning mode should be used
+        use_reasoning = enable_reasoning if enable_reasoning is not None else self._should_use_reasoning(formatted_messages)
+
+        if use_reasoning:
+            logger.info("Reasoning mode enabled for this request")
+
        def _make_request():
-            return self._client.chat.completions.create(
-                model=self.model,
-                messages=formatted_messages,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                top_p=top_p,
-                **kwargs
-            )
+            request_params = {
+                "model": self.model,
+                "messages": formatted_messages,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
+                "top_p": top_p,
+            }
+
+            # Add thinking mode control
+            # Format: {"thinking": {"type": "disabled"}} or {"type": "enabled"}
+            if use_reasoning:
+                request_params["thinking"] = {"type": "enabled"}
+                logger.info("Thinking mode: enabled", request_params={"thinking": {"type": "enabled"}})
+            else:
+                request_params["thinking"] = {"type": "disabled"}
+                logger.info("Thinking mode: disabled", request_params={"thinking": {"type": "disabled"}})
+
+            request_params.update(kwargs)
+            return self._client.chat.completions.create(**request_params)

        try:
            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
@@ -94,6 +198,19 @@ class ZhipuLLMClient:
            if not content:
                logger.warning("LLM returned empty content")

+            # Cache the response
+            if use_cache and content:
+                try:
+                    cache = get_response_cache()
+                    await cache.set(
+                        model=self.model,
+                        messages=formatted_messages,
+                        response=content,
+                        temperature=temperature
+                    )
+                except Exception as e:
+                    logger.warning("Failed to cache response", error=str(e))
+
            return LLMResponse(
                content=content or "",
                finish_reason=choice.finish_reason,