feat: 优化 FAQ 处理和系统稳定性

- 添加本地 FAQ 库快速路径(问候语等社交响应)
- 修复 Chatwoot 重启循环问题(PID 文件清理)
- 添加 LLM 响应缓存(Redis 缓存,提升性能)
- 添加智能推理模式(根据查询复杂度自动启用)
- 添加订单卡片消息功能(Chatwoot 富媒体)
- 增加 LLM 超时时间至 60 秒

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
wangliang
2026-01-20 14:51:30 +08:00
parent c4e97cf312
commit 6b6172d8f0
8 changed files with 684 additions and 18 deletions

View File

@@ -9,6 +9,7 @@ from zhipuai import ZhipuAI
from config import settings
from utils.logger import get_logger
from utils.response_cache import get_response_cache
logger = get_logger(__name__)
@@ -31,19 +32,80 @@ class LLMResponse:
class ZhipuLLMClient:
"""ZhipuAI LLM Client wrapper"""
DEFAULT_TIMEOUT = 30 # seconds
DEFAULT_TIMEOUT = 60 # seconds (increased from 30 for better reliability)
def __init__(
self,
api_key: Optional[str] = None,
model: Optional[str] = None,
timeout: Optional[int] = None
timeout: Optional[int] = None,
enable_reasoning: Optional[bool] = None
):
self.api_key = api_key or settings.zhipu_api_key
self.model = model or settings.zhipu_model
self.timeout = timeout or self.DEFAULT_TIMEOUT
self.enable_reasoning = enable_reasoning if enable_reasoning is not None else settings.enable_reasoning_mode
self._client = ZhipuAI(api_key=self.api_key)
logger.info("ZhipuAI client initialized", model=self.model, timeout=self.timeout)
logger.info(
"ZhipuAI client initialized",
model=self.model,
timeout=self.timeout,
reasoning_mode=self.enable_reasoning
)
def _should_use_reasoning(self, messages: list[dict[str, str]]) -> bool:
"""Determine if reasoning mode should be used based on query complexity
Args:
messages: List of message dictionaries
Returns:
True if reasoning mode should be used
"""
if not self.enable_reasoning:
return False
if not settings.reasoning_mode_for_complex:
# If smart mode is disabled, use the global setting
return self.enable_reasoning
# Smart mode: analyze the last user message
last_message = ""
for msg in reversed(messages):
if msg.get("role") == "user":
last_message = msg.get("content", "")
break
# Simple queries that don't need reasoning
simple_patterns = [
"你好", "hi", "hello", "",
"谢谢", "thank", "感谢",
"再见", "bye", "拜拜",
"退货政策", "营业时间", "联系方式",
"发货", "配送", "物流"
]
last_message_lower = last_message.lower()
for pattern in simple_patterns:
if pattern in last_message_lower:
logger.debug("Simple query detected, disabling reasoning", query=last_message[:50])
return False
# Complex queries that benefit from reasoning
complex_patterns = [
"为什么", "how", "why", "如何",
"推荐", "recommend", "建议",
"比较", "compare", "区别",
"怎么样", "如何选择"
]
for pattern in complex_patterns:
if pattern in last_message_lower:
logger.debug("Complex query detected, enabling reasoning", query=last_message[:50])
return True
# Default: disable reasoning for speed
return False
async def chat(
self,
@@ -51,14 +113,39 @@ class ZhipuLLMClient:
temperature: float = 0.7,
max_tokens: int = 2048,
top_p: float = 0.9,
use_cache: bool = True,
enable_reasoning: Optional[bool] = None,
**kwargs: Any
) -> LLMResponse:
"""Send chat completion request"""
"""Send chat completion request with caching support"""
formatted_messages = [
{"role": msg.role, "content": msg.content}
for msg in messages
]
# Try cache first
if use_cache:
try:
cache = get_response_cache()
cached_response = await cache.get(
model=self.model,
messages=formatted_messages,
temperature=temperature
)
if cached_response is not None:
logger.info(
"Returning cached response",
model=self.model,
response_length=len(cached_response)
)
return LLMResponse(
content=cached_response,
finish_reason="cache_hit",
usage={"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
)
except Exception as e:
logger.warning("Cache check failed", error=str(e))
logger.info(
"Sending chat request",
model=self.model,
@@ -66,15 +153,32 @@ class ZhipuLLMClient:
temperature=temperature
)
# Determine if reasoning mode should be used
use_reasoning = enable_reasoning if enable_reasoning is not None else self._should_use_reasoning(formatted_messages)
if use_reasoning:
logger.info("Reasoning mode enabled for this request")
def _make_request():
return self._client.chat.completions.create(
model=self.model,
messages=formatted_messages,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
**kwargs
)
request_params = {
"model": self.model,
"messages": formatted_messages,
"temperature": temperature,
"max_tokens": max_tokens,
"top_p": top_p,
}
# Add thinking mode control
# Format: {"thinking": {"type": "disabled"}} or {"type": "enabled"}
if use_reasoning:
request_params["thinking"] = {"type": "enabled"}
logger.info("Thinking mode: enabled", request_params={"thinking": {"type": "enabled"}})
else:
request_params["thinking"] = {"type": "disabled"}
logger.info("Thinking mode: disabled", request_params={"thinking": {"type": "disabled"}})
request_params.update(kwargs)
return self._client.chat.completions.create(**request_params)
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
@@ -94,6 +198,19 @@ class ZhipuLLMClient:
if not content:
logger.warning("LLM returned empty content")
# Cache the response
if use_cache and content:
try:
cache = get_response_cache()
await cache.set(
model=self.model,
messages=formatted_messages,
response=content,
temperature=temperature
)
except Exception as e:
logger.warning("Failed to cache response", error=str(e))
return LLMResponse(
content=content or "",
finish_reason=choice.finish_reason,