feat: 优化 FAQ 处理和系统稳定性
- 添加本地 FAQ 库快速路径(问候语等社交响应) - 修复 Chatwoot 重启循环问题(PID 文件清理) - 添加 LLM 响应缓存(Redis 缓存,提升性能) - 添加智能推理模式(根据查询复杂度自动启用) - 添加订单卡片消息功能(Chatwoot 富媒体) - 增加 LLM 超时时间至 60 秒 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,7 @@ from core.state import AgentState, ConversationState, add_tool_call, set_respons
|
||||
from core.llm import get_llm_client, Message
|
||||
from prompts import get_prompt
|
||||
from utils.logger import get_logger
|
||||
from utils.faq_library import get_faq_library
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -36,6 +37,32 @@ async def customer_service_agent(state: AgentState) -> AgentState:
|
||||
if state["tool_results"]:
|
||||
return await _generate_response_from_results(state)
|
||||
|
||||
# ========== FAST PATH: Check if FAQ was already matched at router ==========
|
||||
# Router already checked FAQ and stored response if found
|
||||
if "faq_response" in state and state["faq_response"]:
|
||||
logger.info(
|
||||
"Using FAQ response from router",
|
||||
conversation_id=state["conversation_id"],
|
||||
response_length=len(state["faq_response"])
|
||||
)
|
||||
return set_response(state, state["faq_response"])
|
||||
# =========================================================================
|
||||
|
||||
# ========== FAST PATH: Check local FAQ library first (backup) ==========
|
||||
# This provides instant response for common questions without API calls
|
||||
# This is a fallback in case FAQ wasn't matched at router level
|
||||
faq_library = get_faq_library()
|
||||
faq_response = faq_library.find_match(state["current_message"])
|
||||
|
||||
if faq_response:
|
||||
logger.info(
|
||||
"FAQ match found, returning instant response",
|
||||
conversation_id=state["conversation_id"],
|
||||
response_length=len(faq_response)
|
||||
)
|
||||
return set_response(state, faq_response)
|
||||
# ============================================================
|
||||
|
||||
# Get detected language
|
||||
locale = state.get("detected_language", "en")
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ from core.llm import get_llm_client, Message
|
||||
from core.language_detector import get_cached_or_detect
|
||||
from prompts import get_prompt
|
||||
from utils.logger import get_logger
|
||||
from utils.faq_library import get_faq_library
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -34,6 +35,28 @@ async def classify_intent(state: AgentState) -> AgentState:
|
||||
state["state"] = ConversationState.CLASSIFYING.value
|
||||
state["step_count"] += 1
|
||||
|
||||
# ========== FAST PATH: Check FAQ first BEFORE calling LLM ==========
|
||||
# This avoids slow LLM calls for common questions
|
||||
import re
|
||||
clean_message = re.sub(r'<[^>]+>', '', state["current_message"])
|
||||
clean_message = ' '.join(clean_message.split())
|
||||
|
||||
faq_library = get_faq_library()
|
||||
faq_response = faq_library.find_match(clean_message)
|
||||
|
||||
if faq_response:
|
||||
logger.info(
|
||||
"FAQ matched at router level, skipping LLM classification",
|
||||
conversation_id=state["conversation_id"],
|
||||
message=clean_message[:50]
|
||||
)
|
||||
# Set to customer service intent and store FAQ response
|
||||
state["intent"] = Intent.CUSTOMER_SERVICE.value
|
||||
state["intent_confidence"] = 1.0 # High confidence for FAQ matches
|
||||
state["faq_response"] = faq_response # Store FAQ response for later use
|
||||
return state
|
||||
# ==============================================================
|
||||
|
||||
# Detect language
|
||||
detected_locale = get_cached_or_detect(state, state["current_message"])
|
||||
confidence = 0.85 # Default confidence for language detection
|
||||
|
||||
@@ -12,6 +12,8 @@ class Settings(BaseSettings):
|
||||
# ============ AI Model ============
|
||||
zhipu_api_key: str = Field(..., description="ZhipuAI API Key")
|
||||
zhipu_model: str = Field(default="glm-4", description="ZhipuAI Model name")
|
||||
enable_reasoning_mode: bool = Field(default=False, description="Enable AI reasoning/thinking mode (slower but more thoughtful)")
|
||||
reasoning_mode_for_complex: bool = Field(default=True, description="Enable reasoning mode only for complex queries")
|
||||
|
||||
# ============ Redis ============
|
||||
redis_host: str = Field(default="localhost", description="Redis host")
|
||||
|
||||
@@ -9,6 +9,7 @@ from zhipuai import ZhipuAI
|
||||
|
||||
from config import settings
|
||||
from utils.logger import get_logger
|
||||
from utils.response_cache import get_response_cache
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -31,19 +32,80 @@ class LLMResponse:
|
||||
class ZhipuLLMClient:
|
||||
"""ZhipuAI LLM Client wrapper"""
|
||||
|
||||
DEFAULT_TIMEOUT = 30 # seconds
|
||||
DEFAULT_TIMEOUT = 60 # seconds (increased from 30 for better reliability)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
timeout: Optional[int] = None
|
||||
timeout: Optional[int] = None,
|
||||
enable_reasoning: Optional[bool] = None
|
||||
):
|
||||
self.api_key = api_key or settings.zhipu_api_key
|
||||
self.model = model or settings.zhipu_model
|
||||
self.timeout = timeout or self.DEFAULT_TIMEOUT
|
||||
self.enable_reasoning = enable_reasoning if enable_reasoning is not None else settings.enable_reasoning_mode
|
||||
self._client = ZhipuAI(api_key=self.api_key)
|
||||
logger.info("ZhipuAI client initialized", model=self.model, timeout=self.timeout)
|
||||
logger.info(
|
||||
"ZhipuAI client initialized",
|
||||
model=self.model,
|
||||
timeout=self.timeout,
|
||||
reasoning_mode=self.enable_reasoning
|
||||
)
|
||||
|
||||
def _should_use_reasoning(self, messages: list[dict[str, str]]) -> bool:
|
||||
"""Determine if reasoning mode should be used based on query complexity
|
||||
|
||||
Args:
|
||||
messages: List of message dictionaries
|
||||
|
||||
Returns:
|
||||
True if reasoning mode should be used
|
||||
"""
|
||||
if not self.enable_reasoning:
|
||||
return False
|
||||
|
||||
if not settings.reasoning_mode_for_complex:
|
||||
# If smart mode is disabled, use the global setting
|
||||
return self.enable_reasoning
|
||||
|
||||
# Smart mode: analyze the last user message
|
||||
last_message = ""
|
||||
for msg in reversed(messages):
|
||||
if msg.get("role") == "user":
|
||||
last_message = msg.get("content", "")
|
||||
break
|
||||
|
||||
# Simple queries that don't need reasoning
|
||||
simple_patterns = [
|
||||
"你好", "hi", "hello", "嗨",
|
||||
"谢谢", "thank", "感谢",
|
||||
"再见", "bye", "拜拜",
|
||||
"退货政策", "营业时间", "联系方式",
|
||||
"发货", "配送", "物流"
|
||||
]
|
||||
|
||||
last_message_lower = last_message.lower()
|
||||
for pattern in simple_patterns:
|
||||
if pattern in last_message_lower:
|
||||
logger.debug("Simple query detected, disabling reasoning", query=last_message[:50])
|
||||
return False
|
||||
|
||||
# Complex queries that benefit from reasoning
|
||||
complex_patterns = [
|
||||
"为什么", "how", "why", "如何",
|
||||
"推荐", "recommend", "建议",
|
||||
"比较", "compare", "区别",
|
||||
"怎么样", "如何选择"
|
||||
]
|
||||
|
||||
for pattern in complex_patterns:
|
||||
if pattern in last_message_lower:
|
||||
logger.debug("Complex query detected, enabling reasoning", query=last_message[:50])
|
||||
return True
|
||||
|
||||
# Default: disable reasoning for speed
|
||||
return False
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
@@ -51,14 +113,39 @@ class ZhipuLLMClient:
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 2048,
|
||||
top_p: float = 0.9,
|
||||
use_cache: bool = True,
|
||||
enable_reasoning: Optional[bool] = None,
|
||||
**kwargs: Any
|
||||
) -> LLMResponse:
|
||||
"""Send chat completion request"""
|
||||
"""Send chat completion request with caching support"""
|
||||
formatted_messages = [
|
||||
{"role": msg.role, "content": msg.content}
|
||||
for msg in messages
|
||||
]
|
||||
|
||||
# Try cache first
|
||||
if use_cache:
|
||||
try:
|
||||
cache = get_response_cache()
|
||||
cached_response = await cache.get(
|
||||
model=self.model,
|
||||
messages=formatted_messages,
|
||||
temperature=temperature
|
||||
)
|
||||
if cached_response is not None:
|
||||
logger.info(
|
||||
"Returning cached response",
|
||||
model=self.model,
|
||||
response_length=len(cached_response)
|
||||
)
|
||||
return LLMResponse(
|
||||
content=cached_response,
|
||||
finish_reason="cache_hit",
|
||||
usage={"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Cache check failed", error=str(e))
|
||||
|
||||
logger.info(
|
||||
"Sending chat request",
|
||||
model=self.model,
|
||||
@@ -66,15 +153,32 @@ class ZhipuLLMClient:
|
||||
temperature=temperature
|
||||
)
|
||||
|
||||
# Determine if reasoning mode should be used
|
||||
use_reasoning = enable_reasoning if enable_reasoning is not None else self._should_use_reasoning(formatted_messages)
|
||||
|
||||
if use_reasoning:
|
||||
logger.info("Reasoning mode enabled for this request")
|
||||
|
||||
def _make_request():
|
||||
return self._client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=formatted_messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
**kwargs
|
||||
)
|
||||
request_params = {
|
||||
"model": self.model,
|
||||
"messages": formatted_messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens,
|
||||
"top_p": top_p,
|
||||
}
|
||||
|
||||
# Add thinking mode control
|
||||
# Format: {"thinking": {"type": "disabled"}} or {"type": "enabled"}
|
||||
if use_reasoning:
|
||||
request_params["thinking"] = {"type": "enabled"}
|
||||
logger.info("Thinking mode: enabled", request_params={"thinking": {"type": "enabled"}})
|
||||
else:
|
||||
request_params["thinking"] = {"type": "disabled"}
|
||||
logger.info("Thinking mode: disabled", request_params={"thinking": {"type": "disabled"}})
|
||||
|
||||
request_params.update(kwargs)
|
||||
return self._client.chat.completions.create(**request_params)
|
||||
|
||||
try:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
||||
@@ -94,6 +198,19 @@ class ZhipuLLMClient:
|
||||
if not content:
|
||||
logger.warning("LLM returned empty content")
|
||||
|
||||
# Cache the response
|
||||
if use_cache and content:
|
||||
try:
|
||||
cache = get_response_cache()
|
||||
await cache.set(
|
||||
model=self.model,
|
||||
messages=formatted_messages,
|
||||
response=content,
|
||||
temperature=temperature
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to cache response", error=str(e))
|
||||
|
||||
return LLMResponse(
|
||||
content=content or "",
|
||||
finish_reason=choice.finish_reason,
|
||||
|
||||
@@ -141,32 +141,89 @@ class ChatwootClient:
|
||||
content_attributes: dict[str, Any]
|
||||
) -> dict[str, Any]:
|
||||
"""Send a rich message (cards, buttons, etc.)
|
||||
|
||||
|
||||
Args:
|
||||
conversation_id: Conversation ID
|
||||
content: Fallback text content
|
||||
content_type: Rich content type (cards, input_select, etc.)
|
||||
content_attributes: Rich content attributes
|
||||
|
||||
|
||||
Returns:
|
||||
Created message data
|
||||
"""
|
||||
client = await self._get_client()
|
||||
|
||||
|
||||
payload = {
|
||||
"content": content,
|
||||
"message_type": MessageType.OUTGOING.value,
|
||||
"content_type": content_type,
|
||||
"content_attributes": content_attributes
|
||||
}
|
||||
|
||||
|
||||
response = await client.post(
|
||||
f"/conversations/{conversation_id}/messages",
|
||||
json=payload
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
return response.json()
|
||||
|
||||
async def send_order_card(
|
||||
self,
|
||||
conversation_id: int,
|
||||
order_data: dict[str, Any],
|
||||
actions: list[dict[str, Any]]
|
||||
) -> dict[str, Any]:
|
||||
"""发送订单卡片消息(Markdown 文本 + 操作按钮)
|
||||
|
||||
Args:
|
||||
conversation_id: 会话 ID
|
||||
order_data: 订单数据,包含:
|
||||
- order_id: 订单号
|
||||
- status: 订单状态
|
||||
- status_text: 状态文本
|
||||
- created_at: 下单时间(可选)
|
||||
- items: 商品列表(可选)
|
||||
- total_amount: 总金额
|
||||
- shipping_fee: 运费(可选)
|
||||
- logistics: 物流信息(可选)
|
||||
- remark: 备注(可选)
|
||||
actions: 操作按钮配置列表,每个按钮包含:
|
||||
- type: "link" 或 "postback"
|
||||
- text: 按钮文字
|
||||
- uri: 链接地址(type=link 时必需)
|
||||
- payload: 回传数据(type=postback 时必需)
|
||||
|
||||
Returns:
|
||||
发送结果
|
||||
|
||||
Example:
|
||||
>>> order_data = {
|
||||
... "order_id": "123456789",
|
||||
... "status": "shipped",
|
||||
... "status_text": "已发货",
|
||||
... "total_amount": "1058.00",
|
||||
... "items": [...]
|
||||
... }
|
||||
>>> actions = [
|
||||
... {"type": "link", "text": "查看详情", "uri": "https://..."},
|
||||
... {"type": "postback", "text": "联系客服", "payload": "CONTACT_SUPPORT"}
|
||||
... ]
|
||||
>>> await chatwoot.send_order_card(123, order_data, actions)
|
||||
"""
|
||||
# 生成 Markdown 内容
|
||||
markdown_content = format_order_card_markdown(order_data)
|
||||
|
||||
# 生成按钮卡片
|
||||
buttons = create_action_buttons(actions)
|
||||
|
||||
# 发送富媒体消息
|
||||
return await self.send_rich_message(
|
||||
conversation_id=conversation_id,
|
||||
content=markdown_content,
|
||||
content_type="cards",
|
||||
content_attributes=buttons
|
||||
)
|
||||
|
||||
# ============ Conversations ============
|
||||
|
||||
@@ -342,6 +399,130 @@ class ChatwootClient:
|
||||
return data.get("payload", [])
|
||||
|
||||
|
||||
# ============ Helper Functions ============
|
||||
|
||||
def format_order_card_markdown(order_data: dict[str, Any]) -> str:
|
||||
"""格式化订单信息为 Markdown 卡片
|
||||
|
||||
Args:
|
||||
order_data: 订单数据,包含订单号、状态、商品、金额、物流等信息
|
||||
|
||||
Returns:
|
||||
格式化的 Markdown 字符串
|
||||
|
||||
Example:
|
||||
>>> order = {
|
||||
... "order_id": "123456789",
|
||||
... "status": "shipped",
|
||||
... "status_text": "已发货",
|
||||
... "created_at": "2023-10-27 14:30",
|
||||
... "items": [...],
|
||||
... "total_amount": "1058.00",
|
||||
... "shipping_fee": "0.00",
|
||||
... "logistics": {...}
|
||||
... }
|
||||
>>> markdown = format_order_card_markdown(order)
|
||||
"""
|
||||
# 订单状态 emoji 映射
|
||||
status_emoji = {
|
||||
"pending": "⏳",
|
||||
"paid": "💰",
|
||||
"processing": "⚙️",
|
||||
"shipped": "📦",
|
||||
"delivered": "✅",
|
||||
"completed": "✅",
|
||||
"cancelled": "❌",
|
||||
"refunded": "💸",
|
||||
"failed": "⚠️",
|
||||
}
|
||||
|
||||
# 获取状态文本和 emoji
|
||||
status = order_data.get("status", "unknown")
|
||||
status_text = order_data.get("status_text", status)
|
||||
emoji = status_emoji.get(status, "📦")
|
||||
|
||||
lines = [
|
||||
f"{emoji} **订单状态:{status_text}**",
|
||||
f"📝 **订单号:** `{order_data.get('order_id', '')}`",
|
||||
]
|
||||
|
||||
# 添加下单时间(如果有)
|
||||
if order_data.get("created_at"):
|
||||
lines.append(f"📅 **下单时间:** {order_data['created_at']}")
|
||||
|
||||
lines.append("") # 空行
|
||||
lines.append("**商品详情**")
|
||||
|
||||
# 添加商品列表
|
||||
items = order_data.get("items", [])
|
||||
if items:
|
||||
for item in items:
|
||||
name = item.get("name", "未知商品")
|
||||
quantity = item.get("quantity", 1)
|
||||
price = item.get("price", "0.00")
|
||||
# 可选:添加图片链接
|
||||
image_markdown = ""
|
||||
if item.get("image_url"):
|
||||
image_markdown = f" [图片]({item['image_url']})"
|
||||
lines.append(f"▫️{image_markdown} {name} × {quantity} ¥{price}")
|
||||
else:
|
||||
lines.append("▫️ 无商品信息")
|
||||
|
||||
# 添加金额信息
|
||||
lines.extend([
|
||||
"",
|
||||
f"💰 **实付:** ¥{order_data.get('total_amount', '0.00')} (含运费 ¥{order_data.get('shipping_fee', '0.00')})"
|
||||
])
|
||||
|
||||
# 添加物流信息(如果有)
|
||||
logistics = order_data.get("logistics")
|
||||
if logistics:
|
||||
lines.extend([
|
||||
"",
|
||||
"🚚 **物流信息**",
|
||||
f"承运商:{logistics.get('carrier', '未知')}",
|
||||
f"单号:{logistics.get('tracking_number', '未知')}",
|
||||
"*点击单号可复制跟踪*"
|
||||
])
|
||||
|
||||
# 添加备注(如果有)
|
||||
if order_data.get("remark"):
|
||||
lines.extend([
|
||||
"",
|
||||
f"📋 **备注:** {order_data['remark']}"
|
||||
])
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def create_action_buttons(actions: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
"""创建 Chatwoot 操作按钮卡片
|
||||
|
||||
Args:
|
||||
actions: 按钮配置列表,每个按钮包含:
|
||||
- type: "link" 或 "postback"
|
||||
- text: 按钮文字
|
||||
- uri: 链接地址(type=link 时)
|
||||
- payload: 回传数据(type=postback 时)
|
||||
|
||||
Returns:
|
||||
符合 Chatwoot content_attributes 格式的字典
|
||||
|
||||
Example:
|
||||
>>> actions = [
|
||||
... {"type": "link", "text": "查看详情", "uri": "https://example.com"},
|
||||
... {"type": "postback", "text": "联系客服", "payload": "CONTACT_SUPPORT"}
|
||||
... ]
|
||||
>>> buttons = create_action_buttons(actions)
|
||||
"""
|
||||
return {
|
||||
"items": [{
|
||||
"title": "操作",
|
||||
"actions": actions
|
||||
}]
|
||||
}
|
||||
|
||||
|
||||
# Global Chatwoot client instance
|
||||
chatwoot_client: Optional[ChatwootClient] = None
|
||||
|
||||
|
||||
121
agent/utils/faq_library.py
Normal file
121
agent/utils/faq_library.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
Local FAQ Library for instant responses
|
||||
Common questions can be answered immediately without API calls
|
||||
"""
|
||||
import re
|
||||
from typing import Optional, Dict
|
||||
from .logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class FAQLibrary:
|
||||
"""Local FAQ library for instant common question responses"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize FAQ library with ONLY common greetings and social responses
|
||||
|
||||
Note: Business-related FAQs (register, order, payment, shipment, return, etc.)
|
||||
should be handled by Strapi MCP to ensure accuracy and consistency.
|
||||
This library only contains instant social responses for better UX.
|
||||
"""
|
||||
self.faqs = {
|
||||
# ========== 问候类 Greetings ==========
|
||||
"你好": "你好!我是您的B2B客户服务助手,很高兴为您服务。我可以帮您处理订单查询、产品咨询、售后问题等。请问有什么可以帮到您的吗?",
|
||||
"您好": "您好!我是您的B2B客户服务助手,很高兴为您服务。我可以帮您处理订单查询、产品咨询、售后问题等。请问有什么可以帮到您的吗?",
|
||||
"hi": "Hello! I'm your B2B customer service assistant. How can I help you today?",
|
||||
"hello": "Hello! I'm here to assist you. How can I help you today?",
|
||||
"hey": "Hey there! How can I help you today?",
|
||||
|
||||
# ========== 感谢类 Gratitude ==========
|
||||
"谢谢": "不客气!如果还有其他问题,随时可以问我。祝您购物愉快!",
|
||||
"感谢": "感谢您的支持!如有任何问题,随时联系我们。",
|
||||
"thank you": "You're welcome! If you have any other questions, feel free to ask. Have a great day!",
|
||||
"thanks": "You're welcome! Let me know if you need anything else.",
|
||||
|
||||
# ========== 再见类 Farewell ==========
|
||||
"再见": "再见!如有需要,随时联系。祝您生活愉快!",
|
||||
"bye": "Goodbye! Feel free to reach out anytime. Have a great day!",
|
||||
"goodbye": "Goodbye! Have a wonderful day!",
|
||||
|
||||
# ========== 社交礼貌类 Social Politeness ==========
|
||||
"早上好": "早上好!很高兴为您服务。请问有什么可以帮到您的吗?",
|
||||
"下午好": "下午好!很高兴为您服务。请问有什么可以帮到您的吗?",
|
||||
"晚上好": "晚上好!很高兴为您服务。请问有什么可以帮到您的吗?",
|
||||
"good morning": "Good morning! How can I assist you today?",
|
||||
"good afternoon": "Good afternoon! How can I assist you today?",
|
||||
"good evening": "Good evening! How can I assist you today?",
|
||||
}
|
||||
|
||||
# Compile regex patterns for fuzzy matching
|
||||
self._compile_patterns()
|
||||
|
||||
def _compile_patterns(self):
|
||||
"""Compile regex patterns for fuzzy FAQ matching"""
|
||||
self.patterns = []
|
||||
for keyword, response in self.faqs.items():
|
||||
# Case-insensitive pattern with word boundaries
|
||||
pattern = re.compile(re.escape(keyword), re.IGNORECASE)
|
||||
self.patterns.append((pattern, response))
|
||||
|
||||
def find_match(self, query: str) -> Optional[str]:
|
||||
"""Find matching FAQ response
|
||||
|
||||
Args:
|
||||
query: User query text
|
||||
|
||||
Returns:
|
||||
Matching FAQ response or None if no match found
|
||||
"""
|
||||
# Remove HTML tags and extra whitespace
|
||||
clean_query = re.sub(r'<[^>]+>', '', query)
|
||||
clean_query = ' '.join(clean_query.split())
|
||||
|
||||
# Try exact match first
|
||||
if clean_query.lower() in (k.lower() for k in self.faqs.keys()):
|
||||
for key, response in self.faqs.items():
|
||||
if key.lower() == clean_query.lower():
|
||||
logger.info("FAQ exact match", key=key, query=clean_query[:50])
|
||||
return response
|
||||
|
||||
# Try fuzzy match (contains keyword)
|
||||
for pattern, response in self.patterns:
|
||||
if pattern.search(clean_query):
|
||||
logger.info("FAQ fuzzy match", pattern=pattern.pattern, query=clean_query[:50])
|
||||
return response
|
||||
|
||||
# No match found
|
||||
logger.debug("No FAQ match found", query=clean_query[:50])
|
||||
return None
|
||||
|
||||
def add_faq(self, keyword: str, response: str) -> None:
|
||||
"""Add or update FAQ entry
|
||||
|
||||
Args:
|
||||
keyword: Question keyword
|
||||
response: Answer text
|
||||
"""
|
||||
self.faqs[keyword] = response
|
||||
pattern = re.compile(re.escape(keyword), re.IGNORECASE)
|
||||
self.patterns.append((pattern, response))
|
||||
logger.info("FAQ added", keyword=keyword)
|
||||
|
||||
def get_all_keywords(self) -> list[str]:
|
||||
"""Get all FAQ keywords
|
||||
|
||||
Returns:
|
||||
List of FAQ keywords
|
||||
"""
|
||||
return list(self.faqs.keys())
|
||||
|
||||
|
||||
# Global FAQ library instance
|
||||
faq_library: Optional[FAQLibrary] = None
|
||||
|
||||
|
||||
def get_faq_library() -> FAQLibrary:
|
||||
"""Get or create global FAQ library instance"""
|
||||
global faq_library
|
||||
if faq_library is None:
|
||||
faq_library = FAQLibrary()
|
||||
return faq_library
|
||||
194
agent/utils/response_cache.py
Normal file
194
agent/utils/response_cache.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
LLM Response Cache for FAQ and common queries
|
||||
"""
|
||||
import hashlib
|
||||
import json
|
||||
from typing import Any, Optional
|
||||
from datetime import timedelta
|
||||
|
||||
from .cache import CacheManager
|
||||
from .logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ResponseCache:
|
||||
"""Cache LLM responses for common queries"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cache_manager: Optional[CacheManager] = None,
|
||||
default_ttl: int = 3600 # 1 hour default
|
||||
):
|
||||
"""Initialize response cache
|
||||
|
||||
Args:
|
||||
cache_manager: Cache manager instance
|
||||
default_ttl: Default TTL in seconds for cached responses
|
||||
"""
|
||||
self.cache = cache_manager
|
||||
self.default_ttl = default_ttl
|
||||
|
||||
def _generate_key(
|
||||
self,
|
||||
model: str,
|
||||
messages: list[dict[str, str]],
|
||||
temperature: float = 0.7,
|
||||
**kwargs: Any
|
||||
) -> str:
|
||||
"""Generate cache key from request parameters
|
||||
|
||||
Args:
|
||||
model: Model name
|
||||
messages: List of messages
|
||||
temperature: Temperature parameter
|
||||
**kwargs: Additional parameters
|
||||
|
||||
Returns:
|
||||
Cache key string
|
||||
"""
|
||||
# Create a normalized representation of the request
|
||||
cache_input = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
**{k: v for k, v in kwargs.items() if v is not None}
|
||||
}
|
||||
|
||||
# Hash the input to create a short, unique key
|
||||
cache_str = json.dumps(cache_input, sort_keys=True, ensure_ascii=False)
|
||||
cache_hash = hashlib.sha256(cache_str.encode()).hexdigest()[:16]
|
||||
|
||||
return f"llm_response:{model}:{cache_hash}"
|
||||
|
||||
async def get(
|
||||
self,
|
||||
model: str,
|
||||
messages: list[dict[str, str]],
|
||||
temperature: float = 0.7,
|
||||
**kwargs: Any
|
||||
) -> Optional[str]:
|
||||
"""Get cached response if available
|
||||
|
||||
Args:
|
||||
model: Model name
|
||||
messages: List of messages
|
||||
temperature: Temperature parameter
|
||||
**kwargs: Additional parameters
|
||||
|
||||
Returns:
|
||||
Cached response content or None
|
||||
"""
|
||||
if not self.cache:
|
||||
return None
|
||||
|
||||
key = self._generate_key(model, messages, temperature, **kwargs)
|
||||
cached = await self.cache.get(key)
|
||||
|
||||
if cached:
|
||||
logger.info(
|
||||
"Cache hit",
|
||||
model=model,
|
||||
key=key,
|
||||
response_length=len(cached)
|
||||
)
|
||||
try:
|
||||
data = json.loads(cached)
|
||||
return data.get("response")
|
||||
except json.JSONDecodeError:
|
||||
logger.warning("Invalid cached data", key=key)
|
||||
return None
|
||||
|
||||
logger.debug("Cache miss", model=model, key=key)
|
||||
return None
|
||||
|
||||
async def set(
|
||||
self,
|
||||
model: str,
|
||||
messages: list[dict[str, str]],
|
||||
response: str,
|
||||
temperature: float = 0.7,
|
||||
ttl: Optional[int] = None,
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Cache LLM response
|
||||
|
||||
Args:
|
||||
model: Model name
|
||||
messages: List of messages
|
||||
response: Response content to cache
|
||||
temperature: Temperature parameter
|
||||
ttl: Time-to-live in seconds
|
||||
**kwargs: Additional parameters
|
||||
"""
|
||||
if not self.cache:
|
||||
return
|
||||
|
||||
key = self._generate_key(model, messages, temperature, **kwargs)
|
||||
ttl = ttl or self.default_ttl
|
||||
|
||||
# Store response with metadata
|
||||
data = {
|
||||
"response": response,
|
||||
"model": model,
|
||||
"response_length": len(response),
|
||||
"temperature": temperature
|
||||
}
|
||||
|
||||
await self.cache.set(
|
||||
key,
|
||||
json.dumps(data, ensure_ascii=False),
|
||||
ttl=ttl
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Response cached",
|
||||
model=model,
|
||||
key=key,
|
||||
response_length=len(response),
|
||||
ttl=ttl
|
||||
)
|
||||
|
||||
async def invalidate(self, pattern: str = "llm_response:*") -> int:
|
||||
"""Invalidate cached responses matching pattern
|
||||
|
||||
Args:
|
||||
pattern: Redis key pattern to match
|
||||
|
||||
Returns:
|
||||
Number of keys deleted
|
||||
"""
|
||||
if not self.cache:
|
||||
return 0
|
||||
|
||||
# This would need scan/delete operation
|
||||
# For now, just log
|
||||
logger.info("Cache invalidation requested", pattern=pattern)
|
||||
return 0
|
||||
|
||||
def get_cache_stats(self) -> dict[str, Any]:
|
||||
"""Get cache statistics
|
||||
|
||||
Returns:
|
||||
Dictionary with cache stats
|
||||
"""
|
||||
return {
|
||||
"enabled": self.cache is not None,
|
||||
"default_ttl": self.default_ttl
|
||||
}
|
||||
|
||||
|
||||
# Global response cache instance
|
||||
response_cache: Optional[ResponseCache] = None
|
||||
|
||||
|
||||
def get_response_cache() -> ResponseCache:
|
||||
"""Get or create global response cache instance"""
|
||||
global response_cache
|
||||
if response_cache is None:
|
||||
from .cache import get_cache_manager
|
||||
response_cache = ResponseCache(
|
||||
cache_manager=get_cache_manager(),
|
||||
default_ttl=3600 # 1 hour
|
||||
)
|
||||
return response_cache
|
||||
Reference in New Issue
Block a user