feat: 优化 FAQ 处理和系统稳定性

- 添加本地 FAQ 库快速路径(问候语等社交响应)
- 修复 Chatwoot 重启循环问题(PID 文件清理)
- 添加 LLM 响应缓存(Redis 缓存,提升性能)
- 添加智能推理模式(根据查询复杂度自动启用)
- 添加订单卡片消息功能(Chatwoot 富媒体)
- 增加 LLM 超时时间至 60 秒

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
wangliang
2026-01-20 14:51:30 +08:00
parent c4e97cf312
commit 6b6172d8f0
8 changed files with 684 additions and 18 deletions

View File

@@ -8,6 +8,7 @@ from core.state import AgentState, ConversationState, add_tool_call, set_respons
from core.llm import get_llm_client, Message
from prompts import get_prompt
from utils.logger import get_logger
from utils.faq_library import get_faq_library
logger = get_logger(__name__)
@@ -36,6 +37,32 @@ async def customer_service_agent(state: AgentState) -> AgentState:
if state["tool_results"]:
return await _generate_response_from_results(state)
# ========== FAST PATH: Check if FAQ was already matched at router ==========
# Router already checked FAQ and stored response if found
if "faq_response" in state and state["faq_response"]:
logger.info(
"Using FAQ response from router",
conversation_id=state["conversation_id"],
response_length=len(state["faq_response"])
)
return set_response(state, state["faq_response"])
# =========================================================================
# ========== FAST PATH: Check local FAQ library first (backup) ==========
# This provides instant response for common questions without API calls
# This is a fallback in case FAQ wasn't matched at router level
faq_library = get_faq_library()
faq_response = faq_library.find_match(state["current_message"])
if faq_response:
logger.info(
"FAQ match found, returning instant response",
conversation_id=state["conversation_id"],
response_length=len(faq_response)
)
return set_response(state, faq_response)
# ============================================================
# Get detected language
locale = state.get("detected_language", "en")

View File

@@ -9,6 +9,7 @@ from core.llm import get_llm_client, Message
from core.language_detector import get_cached_or_detect
from prompts import get_prompt
from utils.logger import get_logger
from utils.faq_library import get_faq_library
logger = get_logger(__name__)
@@ -34,6 +35,28 @@ async def classify_intent(state: AgentState) -> AgentState:
state["state"] = ConversationState.CLASSIFYING.value
state["step_count"] += 1
# ========== FAST PATH: Check FAQ first BEFORE calling LLM ==========
# This avoids slow LLM calls for common questions
import re
clean_message = re.sub(r'<[^>]+>', '', state["current_message"])
clean_message = ' '.join(clean_message.split())
faq_library = get_faq_library()
faq_response = faq_library.find_match(clean_message)
if faq_response:
logger.info(
"FAQ matched at router level, skipping LLM classification",
conversation_id=state["conversation_id"],
message=clean_message[:50]
)
# Set to customer service intent and store FAQ response
state["intent"] = Intent.CUSTOMER_SERVICE.value
state["intent_confidence"] = 1.0 # High confidence for FAQ matches
state["faq_response"] = faq_response # Store FAQ response for later use
return state
# ==============================================================
# Detect language
detected_locale = get_cached_or_detect(state, state["current_message"])
confidence = 0.85 # Default confidence for language detection

View File

@@ -12,6 +12,8 @@ class Settings(BaseSettings):
# ============ AI Model ============
zhipu_api_key: str = Field(..., description="ZhipuAI API Key")
zhipu_model: str = Field(default="glm-4", description="ZhipuAI Model name")
enable_reasoning_mode: bool = Field(default=False, description="Enable AI reasoning/thinking mode (slower but more thoughtful)")
reasoning_mode_for_complex: bool = Field(default=True, description="Enable reasoning mode only for complex queries")
# ============ Redis ============
redis_host: str = Field(default="localhost", description="Redis host")

View File

@@ -9,6 +9,7 @@ from zhipuai import ZhipuAI
from config import settings
from utils.logger import get_logger
from utils.response_cache import get_response_cache
logger = get_logger(__name__)
@@ -31,19 +32,80 @@ class LLMResponse:
class ZhipuLLMClient:
"""ZhipuAI LLM Client wrapper"""
DEFAULT_TIMEOUT = 30 # seconds
DEFAULT_TIMEOUT = 60 # seconds (increased from 30 for better reliability)
def __init__(
self,
api_key: Optional[str] = None,
model: Optional[str] = None,
timeout: Optional[int] = None
timeout: Optional[int] = None,
enable_reasoning: Optional[bool] = None
):
self.api_key = api_key or settings.zhipu_api_key
self.model = model or settings.zhipu_model
self.timeout = timeout or self.DEFAULT_TIMEOUT
self.enable_reasoning = enable_reasoning if enable_reasoning is not None else settings.enable_reasoning_mode
self._client = ZhipuAI(api_key=self.api_key)
logger.info("ZhipuAI client initialized", model=self.model, timeout=self.timeout)
logger.info(
"ZhipuAI client initialized",
model=self.model,
timeout=self.timeout,
reasoning_mode=self.enable_reasoning
)
def _should_use_reasoning(self, messages: list[dict[str, str]]) -> bool:
"""Determine if reasoning mode should be used based on query complexity
Args:
messages: List of message dictionaries
Returns:
True if reasoning mode should be used
"""
if not self.enable_reasoning:
return False
if not settings.reasoning_mode_for_complex:
# If smart mode is disabled, use the global setting
return self.enable_reasoning
# Smart mode: analyze the last user message
last_message = ""
for msg in reversed(messages):
if msg.get("role") == "user":
last_message = msg.get("content", "")
break
# Simple queries that don't need reasoning
simple_patterns = [
"你好", "hi", "hello", "",
"谢谢", "thank", "感谢",
"再见", "bye", "拜拜",
"退货政策", "营业时间", "联系方式",
"发货", "配送", "物流"
]
last_message_lower = last_message.lower()
for pattern in simple_patterns:
if pattern in last_message_lower:
logger.debug("Simple query detected, disabling reasoning", query=last_message[:50])
return False
# Complex queries that benefit from reasoning
complex_patterns = [
"为什么", "how", "why", "如何",
"推荐", "recommend", "建议",
"比较", "compare", "区别",
"怎么样", "如何选择"
]
for pattern in complex_patterns:
if pattern in last_message_lower:
logger.debug("Complex query detected, enabling reasoning", query=last_message[:50])
return True
# Default: disable reasoning for speed
return False
async def chat(
self,
@@ -51,14 +113,39 @@ class ZhipuLLMClient:
temperature: float = 0.7,
max_tokens: int = 2048,
top_p: float = 0.9,
use_cache: bool = True,
enable_reasoning: Optional[bool] = None,
**kwargs: Any
) -> LLMResponse:
"""Send chat completion request"""
"""Send chat completion request with caching support"""
formatted_messages = [
{"role": msg.role, "content": msg.content}
for msg in messages
]
# Try cache first
if use_cache:
try:
cache = get_response_cache()
cached_response = await cache.get(
model=self.model,
messages=formatted_messages,
temperature=temperature
)
if cached_response is not None:
logger.info(
"Returning cached response",
model=self.model,
response_length=len(cached_response)
)
return LLMResponse(
content=cached_response,
finish_reason="cache_hit",
usage={"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
)
except Exception as e:
logger.warning("Cache check failed", error=str(e))
logger.info(
"Sending chat request",
model=self.model,
@@ -66,15 +153,32 @@ class ZhipuLLMClient:
temperature=temperature
)
# Determine if reasoning mode should be used
use_reasoning = enable_reasoning if enable_reasoning is not None else self._should_use_reasoning(formatted_messages)
if use_reasoning:
logger.info("Reasoning mode enabled for this request")
def _make_request():
return self._client.chat.completions.create(
model=self.model,
messages=formatted_messages,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
**kwargs
)
request_params = {
"model": self.model,
"messages": formatted_messages,
"temperature": temperature,
"max_tokens": max_tokens,
"top_p": top_p,
}
# Add thinking mode control
# Format: {"thinking": {"type": "disabled"}} or {"type": "enabled"}
if use_reasoning:
request_params["thinking"] = {"type": "enabled"}
logger.info("Thinking mode: enabled", request_params={"thinking": {"type": "enabled"}})
else:
request_params["thinking"] = {"type": "disabled"}
logger.info("Thinking mode: disabled", request_params={"thinking": {"type": "disabled"}})
request_params.update(kwargs)
return self._client.chat.completions.create(**request_params)
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
@@ -94,6 +198,19 @@ class ZhipuLLMClient:
if not content:
logger.warning("LLM returned empty content")
# Cache the response
if use_cache and content:
try:
cache = get_response_cache()
await cache.set(
model=self.model,
messages=formatted_messages,
response=content,
temperature=temperature
)
except Exception as e:
logger.warning("Failed to cache response", error=str(e))
return LLMResponse(
content=content or "",
finish_reason=choice.finish_reason,

View File

@@ -141,32 +141,89 @@ class ChatwootClient:
content_attributes: dict[str, Any]
) -> dict[str, Any]:
"""Send a rich message (cards, buttons, etc.)
Args:
conversation_id: Conversation ID
content: Fallback text content
content_type: Rich content type (cards, input_select, etc.)
content_attributes: Rich content attributes
Returns:
Created message data
"""
client = await self._get_client()
payload = {
"content": content,
"message_type": MessageType.OUTGOING.value,
"content_type": content_type,
"content_attributes": content_attributes
}
response = await client.post(
f"/conversations/{conversation_id}/messages",
json=payload
)
response.raise_for_status()
return response.json()
async def send_order_card(
self,
conversation_id: int,
order_data: dict[str, Any],
actions: list[dict[str, Any]]
) -> dict[str, Any]:
"""发送订单卡片消息Markdown 文本 + 操作按钮)
Args:
conversation_id: 会话 ID
order_data: 订单数据,包含:
- order_id: 订单号
- status: 订单状态
- status_text: 状态文本
- created_at: 下单时间(可选)
- items: 商品列表(可选)
- total_amount: 总金额
- shipping_fee: 运费(可选)
- logistics: 物流信息(可选)
- remark: 备注(可选)
actions: 操作按钮配置列表,每个按钮包含:
- type: "link""postback"
- text: 按钮文字
- uri: 链接地址type=link 时必需)
- payload: 回传数据type=postback 时必需)
Returns:
发送结果
Example:
>>> order_data = {
... "order_id": "123456789",
... "status": "shipped",
... "status_text": "已发货",
... "total_amount": "1058.00",
... "items": [...]
... }
>>> actions = [
... {"type": "link", "text": "查看详情", "uri": "https://..."},
... {"type": "postback", "text": "联系客服", "payload": "CONTACT_SUPPORT"}
... ]
>>> await chatwoot.send_order_card(123, order_data, actions)
"""
# 生成 Markdown 内容
markdown_content = format_order_card_markdown(order_data)
# 生成按钮卡片
buttons = create_action_buttons(actions)
# 发送富媒体消息
return await self.send_rich_message(
conversation_id=conversation_id,
content=markdown_content,
content_type="cards",
content_attributes=buttons
)
# ============ Conversations ============
@@ -342,6 +399,130 @@ class ChatwootClient:
return data.get("payload", [])
# ============ Helper Functions ============
def format_order_card_markdown(order_data: dict[str, Any]) -> str:
"""格式化订单信息为 Markdown 卡片
Args:
order_data: 订单数据,包含订单号、状态、商品、金额、物流等信息
Returns:
格式化的 Markdown 字符串
Example:
>>> order = {
... "order_id": "123456789",
... "status": "shipped",
... "status_text": "已发货",
... "created_at": "2023-10-27 14:30",
... "items": [...],
... "total_amount": "1058.00",
... "shipping_fee": "0.00",
... "logistics": {...}
... }
>>> markdown = format_order_card_markdown(order)
"""
# 订单状态 emoji 映射
status_emoji = {
"pending": "",
"paid": "💰",
"processing": "⚙️",
"shipped": "📦",
"delivered": "",
"completed": "",
"cancelled": "",
"refunded": "💸",
"failed": "⚠️",
}
# 获取状态文本和 emoji
status = order_data.get("status", "unknown")
status_text = order_data.get("status_text", status)
emoji = status_emoji.get(status, "📦")
lines = [
f"{emoji} **订单状态:{status_text}**",
f"📝 **订单号:** `{order_data.get('order_id', '')}`",
]
# 添加下单时间(如果有)
if order_data.get("created_at"):
lines.append(f"📅 **下单时间:** {order_data['created_at']}")
lines.append("") # 空行
lines.append("**商品详情**")
# 添加商品列表
items = order_data.get("items", [])
if items:
for item in items:
name = item.get("name", "未知商品")
quantity = item.get("quantity", 1)
price = item.get("price", "0.00")
# 可选:添加图片链接
image_markdown = ""
if item.get("image_url"):
image_markdown = f" [图片]({item['image_url']})"
lines.append(f"▫️{image_markdown} {name} × {quantity} ¥{price}")
else:
lines.append("▫️ 无商品信息")
# 添加金额信息
lines.extend([
"",
f"💰 **实付:** ¥{order_data.get('total_amount', '0.00')} (含运费 ¥{order_data.get('shipping_fee', '0.00')})"
])
# 添加物流信息(如果有)
logistics = order_data.get("logistics")
if logistics:
lines.extend([
"",
"🚚 **物流信息**",
f"承运商:{logistics.get('carrier', '未知')}",
f"单号:{logistics.get('tracking_number', '未知')}",
"*点击单号可复制跟踪*"
])
# 添加备注(如果有)
if order_data.get("remark"):
lines.extend([
"",
f"📋 **备注:** {order_data['remark']}"
])
return "\n".join(lines)
def create_action_buttons(actions: list[dict[str, Any]]) -> dict[str, Any]:
"""创建 Chatwoot 操作按钮卡片
Args:
actions: 按钮配置列表,每个按钮包含:
- type: "link""postback"
- text: 按钮文字
- uri: 链接地址type=link 时)
- payload: 回传数据type=postback 时)
Returns:
符合 Chatwoot content_attributes 格式的字典
Example:
>>> actions = [
... {"type": "link", "text": "查看详情", "uri": "https://example.com"},
... {"type": "postback", "text": "联系客服", "payload": "CONTACT_SUPPORT"}
... ]
>>> buttons = create_action_buttons(actions)
"""
return {
"items": [{
"title": "操作",
"actions": actions
}]
}
# Global Chatwoot client instance
chatwoot_client: Optional[ChatwootClient] = None

121
agent/utils/faq_library.py Normal file
View File

@@ -0,0 +1,121 @@
"""
Local FAQ Library for instant responses
Common questions can be answered immediately without API calls
"""
import re
from typing import Optional, Dict
from .logger import get_logger
logger = get_logger(__name__)
class FAQLibrary:
"""Local FAQ library for instant common question responses"""
def __init__(self):
"""Initialize FAQ library with ONLY common greetings and social responses
Note: Business-related FAQs (register, order, payment, shipment, return, etc.)
should be handled by Strapi MCP to ensure accuracy and consistency.
This library only contains instant social responses for better UX.
"""
self.faqs = {
# ========== 问候类 Greetings ==========
"你好": "你好我是您的B2B客户服务助手很高兴为您服务。我可以帮您处理订单查询、产品咨询、售后问题等。请问有什么可以帮到您的吗",
"您好": "您好我是您的B2B客户服务助手很高兴为您服务。我可以帮您处理订单查询、产品咨询、售后问题等。请问有什么可以帮到您的吗",
"hi": "Hello! I'm your B2B customer service assistant. How can I help you today?",
"hello": "Hello! I'm here to assist you. How can I help you today?",
"hey": "Hey there! How can I help you today?",
# ========== 感谢类 Gratitude ==========
"谢谢": "不客气!如果还有其他问题,随时可以问我。祝您购物愉快!",
"感谢": "感谢您的支持!如有任何问题,随时联系我们。",
"thank you": "You're welcome! If you have any other questions, feel free to ask. Have a great day!",
"thanks": "You're welcome! Let me know if you need anything else.",
# ========== 再见类 Farewell ==========
"再见": "再见!如有需要,随时联系。祝您生活愉快!",
"bye": "Goodbye! Feel free to reach out anytime. Have a great day!",
"goodbye": "Goodbye! Have a wonderful day!",
# ========== 社交礼貌类 Social Politeness ==========
"早上好": "早上好!很高兴为您服务。请问有什么可以帮到您的吗?",
"下午好": "下午好!很高兴为您服务。请问有什么可以帮到您的吗?",
"晚上好": "晚上好!很高兴为您服务。请问有什么可以帮到您的吗?",
"good morning": "Good morning! How can I assist you today?",
"good afternoon": "Good afternoon! How can I assist you today?",
"good evening": "Good evening! How can I assist you today?",
}
# Compile regex patterns for fuzzy matching
self._compile_patterns()
def _compile_patterns(self):
"""Compile regex patterns for fuzzy FAQ matching"""
self.patterns = []
for keyword, response in self.faqs.items():
# Case-insensitive pattern with word boundaries
pattern = re.compile(re.escape(keyword), re.IGNORECASE)
self.patterns.append((pattern, response))
def find_match(self, query: str) -> Optional[str]:
"""Find matching FAQ response
Args:
query: User query text
Returns:
Matching FAQ response or None if no match found
"""
# Remove HTML tags and extra whitespace
clean_query = re.sub(r'<[^>]+>', '', query)
clean_query = ' '.join(clean_query.split())
# Try exact match first
if clean_query.lower() in (k.lower() for k in self.faqs.keys()):
for key, response in self.faqs.items():
if key.lower() == clean_query.lower():
logger.info("FAQ exact match", key=key, query=clean_query[:50])
return response
# Try fuzzy match (contains keyword)
for pattern, response in self.patterns:
if pattern.search(clean_query):
logger.info("FAQ fuzzy match", pattern=pattern.pattern, query=clean_query[:50])
return response
# No match found
logger.debug("No FAQ match found", query=clean_query[:50])
return None
def add_faq(self, keyword: str, response: str) -> None:
"""Add or update FAQ entry
Args:
keyword: Question keyword
response: Answer text
"""
self.faqs[keyword] = response
pattern = re.compile(re.escape(keyword), re.IGNORECASE)
self.patterns.append((pattern, response))
logger.info("FAQ added", keyword=keyword)
def get_all_keywords(self) -> list[str]:
"""Get all FAQ keywords
Returns:
List of FAQ keywords
"""
return list(self.faqs.keys())
# Global FAQ library instance
faq_library: Optional[FAQLibrary] = None
def get_faq_library() -> FAQLibrary:
"""Get or create global FAQ library instance"""
global faq_library
if faq_library is None:
faq_library = FAQLibrary()
return faq_library

View File

@@ -0,0 +1,194 @@
"""
LLM Response Cache for FAQ and common queries
"""
import hashlib
import json
from typing import Any, Optional
from datetime import timedelta
from .cache import CacheManager
from .logger import get_logger
logger = get_logger(__name__)
class ResponseCache:
"""Cache LLM responses for common queries"""
def __init__(
self,
cache_manager: Optional[CacheManager] = None,
default_ttl: int = 3600 # 1 hour default
):
"""Initialize response cache
Args:
cache_manager: Cache manager instance
default_ttl: Default TTL in seconds for cached responses
"""
self.cache = cache_manager
self.default_ttl = default_ttl
def _generate_key(
self,
model: str,
messages: list[dict[str, str]],
temperature: float = 0.7,
**kwargs: Any
) -> str:
"""Generate cache key from request parameters
Args:
model: Model name
messages: List of messages
temperature: Temperature parameter
**kwargs: Additional parameters
Returns:
Cache key string
"""
# Create a normalized representation of the request
cache_input = {
"model": model,
"messages": messages,
"temperature": temperature,
**{k: v for k, v in kwargs.items() if v is not None}
}
# Hash the input to create a short, unique key
cache_str = json.dumps(cache_input, sort_keys=True, ensure_ascii=False)
cache_hash = hashlib.sha256(cache_str.encode()).hexdigest()[:16]
return f"llm_response:{model}:{cache_hash}"
async def get(
self,
model: str,
messages: list[dict[str, str]],
temperature: float = 0.7,
**kwargs: Any
) -> Optional[str]:
"""Get cached response if available
Args:
model: Model name
messages: List of messages
temperature: Temperature parameter
**kwargs: Additional parameters
Returns:
Cached response content or None
"""
if not self.cache:
return None
key = self._generate_key(model, messages, temperature, **kwargs)
cached = await self.cache.get(key)
if cached:
logger.info(
"Cache hit",
model=model,
key=key,
response_length=len(cached)
)
try:
data = json.loads(cached)
return data.get("response")
except json.JSONDecodeError:
logger.warning("Invalid cached data", key=key)
return None
logger.debug("Cache miss", model=model, key=key)
return None
async def set(
self,
model: str,
messages: list[dict[str, str]],
response: str,
temperature: float = 0.7,
ttl: Optional[int] = None,
**kwargs: Any
) -> None:
"""Cache LLM response
Args:
model: Model name
messages: List of messages
response: Response content to cache
temperature: Temperature parameter
ttl: Time-to-live in seconds
**kwargs: Additional parameters
"""
if not self.cache:
return
key = self._generate_key(model, messages, temperature, **kwargs)
ttl = ttl or self.default_ttl
# Store response with metadata
data = {
"response": response,
"model": model,
"response_length": len(response),
"temperature": temperature
}
await self.cache.set(
key,
json.dumps(data, ensure_ascii=False),
ttl=ttl
)
logger.info(
"Response cached",
model=model,
key=key,
response_length=len(response),
ttl=ttl
)
async def invalidate(self, pattern: str = "llm_response:*") -> int:
"""Invalidate cached responses matching pattern
Args:
pattern: Redis key pattern to match
Returns:
Number of keys deleted
"""
if not self.cache:
return 0
# This would need scan/delete operation
# For now, just log
logger.info("Cache invalidation requested", pattern=pattern)
return 0
def get_cache_stats(self) -> dict[str, Any]:
"""Get cache statistics
Returns:
Dictionary with cache stats
"""
return {
"enabled": self.cache is not None,
"default_ttl": self.default_ttl
}
# Global response cache instance
response_cache: Optional[ResponseCache] = None
def get_response_cache() -> ResponseCache:
"""Get or create global response cache instance"""
global response_cache
if response_cache is None:
from .cache import get_cache_manager
response_cache = ResponseCache(
cache_manager=get_cache_manager(),
default_ttl=3600 # 1 hour
)
return response_cache