diff --git a/agent/agents/customer_service.py b/agent/agents/customer_service.py index 016d515..122afa1 100644 --- a/agent/agents/customer_service.py +++ b/agent/agents/customer_service.py @@ -8,6 +8,7 @@ from core.state import AgentState, ConversationState, add_tool_call, set_respons from core.llm import get_llm_client, Message from prompts import get_prompt from utils.logger import get_logger +from utils.faq_library import get_faq_library logger = get_logger(__name__) @@ -36,6 +37,32 @@ async def customer_service_agent(state: AgentState) -> AgentState: if state["tool_results"]: return await _generate_response_from_results(state) + # ========== FAST PATH: Check if FAQ was already matched at router ========== + # Router already checked FAQ and stored response if found + if "faq_response" in state and state["faq_response"]: + logger.info( + "Using FAQ response from router", + conversation_id=state["conversation_id"], + response_length=len(state["faq_response"]) + ) + return set_response(state, state["faq_response"]) + # ========================================================================= + + # ========== FAST PATH: Check local FAQ library first (backup) ========== + # This provides instant response for common questions without API calls + # This is a fallback in case FAQ wasn't matched at router level + faq_library = get_faq_library() + faq_response = faq_library.find_match(state["current_message"]) + + if faq_response: + logger.info( + "FAQ match found, returning instant response", + conversation_id=state["conversation_id"], + response_length=len(faq_response) + ) + return set_response(state, faq_response) + # ============================================================ + # Get detected language locale = state.get("detected_language", "en") diff --git a/agent/agents/router.py b/agent/agents/router.py index 921bbf0..f469760 100644 --- a/agent/agents/router.py +++ b/agent/agents/router.py @@ -9,6 +9,7 @@ from core.llm import get_llm_client, Message from core.language_detector import get_cached_or_detect from prompts import get_prompt from utils.logger import get_logger +from utils.faq_library import get_faq_library logger = get_logger(__name__) @@ -34,6 +35,28 @@ async def classify_intent(state: AgentState) -> AgentState: state["state"] = ConversationState.CLASSIFYING.value state["step_count"] += 1 + # ========== FAST PATH: Check FAQ first BEFORE calling LLM ========== + # This avoids slow LLM calls for common questions + import re + clean_message = re.sub(r'<[^>]+>', '', state["current_message"]) + clean_message = ' '.join(clean_message.split()) + + faq_library = get_faq_library() + faq_response = faq_library.find_match(clean_message) + + if faq_response: + logger.info( + "FAQ matched at router level, skipping LLM classification", + conversation_id=state["conversation_id"], + message=clean_message[:50] + ) + # Set to customer service intent and store FAQ response + state["intent"] = Intent.CUSTOMER_SERVICE.value + state["intent_confidence"] = 1.0 # High confidence for FAQ matches + state["faq_response"] = faq_response # Store FAQ response for later use + return state + # ============================================================== + # Detect language detected_locale = get_cached_or_detect(state, state["current_message"]) confidence = 0.85 # Default confidence for language detection diff --git a/agent/config.py b/agent/config.py index c74ca7c..176db65 100644 --- a/agent/config.py +++ b/agent/config.py @@ -12,6 +12,8 @@ class Settings(BaseSettings): # ============ AI Model ============ zhipu_api_key: str = Field(..., description="ZhipuAI API Key") zhipu_model: str = Field(default="glm-4", description="ZhipuAI Model name") + enable_reasoning_mode: bool = Field(default=False, description="Enable AI reasoning/thinking mode (slower but more thoughtful)") + reasoning_mode_for_complex: bool = Field(default=True, description="Enable reasoning mode only for complex queries") # ============ Redis ============ redis_host: str = Field(default="localhost", description="Redis host") diff --git a/agent/core/llm.py b/agent/core/llm.py index 0681573..f815a6c 100644 --- a/agent/core/llm.py +++ b/agent/core/llm.py @@ -9,6 +9,7 @@ from zhipuai import ZhipuAI from config import settings from utils.logger import get_logger +from utils.response_cache import get_response_cache logger = get_logger(__name__) @@ -31,19 +32,80 @@ class LLMResponse: class ZhipuLLMClient: """ZhipuAI LLM Client wrapper""" - DEFAULT_TIMEOUT = 30 # seconds + DEFAULT_TIMEOUT = 60 # seconds (increased from 30 for better reliability) def __init__( self, api_key: Optional[str] = None, model: Optional[str] = None, - timeout: Optional[int] = None + timeout: Optional[int] = None, + enable_reasoning: Optional[bool] = None ): self.api_key = api_key or settings.zhipu_api_key self.model = model or settings.zhipu_model self.timeout = timeout or self.DEFAULT_TIMEOUT + self.enable_reasoning = enable_reasoning if enable_reasoning is not None else settings.enable_reasoning_mode self._client = ZhipuAI(api_key=self.api_key) - logger.info("ZhipuAI client initialized", model=self.model, timeout=self.timeout) + logger.info( + "ZhipuAI client initialized", + model=self.model, + timeout=self.timeout, + reasoning_mode=self.enable_reasoning + ) + + def _should_use_reasoning(self, messages: list[dict[str, str]]) -> bool: + """Determine if reasoning mode should be used based on query complexity + + Args: + messages: List of message dictionaries + + Returns: + True if reasoning mode should be used + """ + if not self.enable_reasoning: + return False + + if not settings.reasoning_mode_for_complex: + # If smart mode is disabled, use the global setting + return self.enable_reasoning + + # Smart mode: analyze the last user message + last_message = "" + for msg in reversed(messages): + if msg.get("role") == "user": + last_message = msg.get("content", "") + break + + # Simple queries that don't need reasoning + simple_patterns = [ + "你好", "hi", "hello", "嗨", + "谢谢", "thank", "感谢", + "再见", "bye", "拜拜", + "退货政策", "营业时间", "联系方式", + "发货", "配送", "物流" + ] + + last_message_lower = last_message.lower() + for pattern in simple_patterns: + if pattern in last_message_lower: + logger.debug("Simple query detected, disabling reasoning", query=last_message[:50]) + return False + + # Complex queries that benefit from reasoning + complex_patterns = [ + "为什么", "how", "why", "如何", + "推荐", "recommend", "建议", + "比较", "compare", "区别", + "怎么样", "如何选择" + ] + + for pattern in complex_patterns: + if pattern in last_message_lower: + logger.debug("Complex query detected, enabling reasoning", query=last_message[:50]) + return True + + # Default: disable reasoning for speed + return False async def chat( self, @@ -51,14 +113,39 @@ class ZhipuLLMClient: temperature: float = 0.7, max_tokens: int = 2048, top_p: float = 0.9, + use_cache: bool = True, + enable_reasoning: Optional[bool] = None, **kwargs: Any ) -> LLMResponse: - """Send chat completion request""" + """Send chat completion request with caching support""" formatted_messages = [ {"role": msg.role, "content": msg.content} for msg in messages ] + # Try cache first + if use_cache: + try: + cache = get_response_cache() + cached_response = await cache.get( + model=self.model, + messages=formatted_messages, + temperature=temperature + ) + if cached_response is not None: + logger.info( + "Returning cached response", + model=self.model, + response_length=len(cached_response) + ) + return LLMResponse( + content=cached_response, + finish_reason="cache_hit", + usage={"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} + ) + except Exception as e: + logger.warning("Cache check failed", error=str(e)) + logger.info( "Sending chat request", model=self.model, @@ -66,15 +153,32 @@ class ZhipuLLMClient: temperature=temperature ) + # Determine if reasoning mode should be used + use_reasoning = enable_reasoning if enable_reasoning is not None else self._should_use_reasoning(formatted_messages) + + if use_reasoning: + logger.info("Reasoning mode enabled for this request") + def _make_request(): - return self._client.chat.completions.create( - model=self.model, - messages=formatted_messages, - temperature=temperature, - max_tokens=max_tokens, - top_p=top_p, - **kwargs - ) + request_params = { + "model": self.model, + "messages": formatted_messages, + "temperature": temperature, + "max_tokens": max_tokens, + "top_p": top_p, + } + + # Add thinking mode control + # Format: {"thinking": {"type": "disabled"}} or {"type": "enabled"} + if use_reasoning: + request_params["thinking"] = {"type": "enabled"} + logger.info("Thinking mode: enabled", request_params={"thinking": {"type": "enabled"}}) + else: + request_params["thinking"] = {"type": "disabled"} + logger.info("Thinking mode: disabled", request_params={"thinking": {"type": "disabled"}}) + + request_params.update(kwargs) + return self._client.chat.completions.create(**request_params) try: with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: @@ -94,6 +198,19 @@ class ZhipuLLMClient: if not content: logger.warning("LLM returned empty content") + # Cache the response + if use_cache and content: + try: + cache = get_response_cache() + await cache.set( + model=self.model, + messages=formatted_messages, + response=content, + temperature=temperature + ) + except Exception as e: + logger.warning("Failed to cache response", error=str(e)) + return LLMResponse( content=content or "", finish_reason=choice.finish_reason, diff --git a/agent/integrations/chatwoot.py b/agent/integrations/chatwoot.py index 2742e3e..a3874eb 100644 --- a/agent/integrations/chatwoot.py +++ b/agent/integrations/chatwoot.py @@ -141,32 +141,89 @@ class ChatwootClient: content_attributes: dict[str, Any] ) -> dict[str, Any]: """Send a rich message (cards, buttons, etc.) - + Args: conversation_id: Conversation ID content: Fallback text content content_type: Rich content type (cards, input_select, etc.) content_attributes: Rich content attributes - + Returns: Created message data """ client = await self._get_client() - + payload = { "content": content, "message_type": MessageType.OUTGOING.value, "content_type": content_type, "content_attributes": content_attributes } - + response = await client.post( f"/conversations/{conversation_id}/messages", json=payload ) response.raise_for_status() - + return response.json() + + async def send_order_card( + self, + conversation_id: int, + order_data: dict[str, Any], + actions: list[dict[str, Any]] + ) -> dict[str, Any]: + """发送订单卡片消息(Markdown 文本 + 操作按钮) + + Args: + conversation_id: 会话 ID + order_data: 订单数据,包含: + - order_id: 订单号 + - status: 订单状态 + - status_text: 状态文本 + - created_at: 下单时间(可选) + - items: 商品列表(可选) + - total_amount: 总金额 + - shipping_fee: 运费(可选) + - logistics: 物流信息(可选) + - remark: 备注(可选) + actions: 操作按钮配置列表,每个按钮包含: + - type: "link" 或 "postback" + - text: 按钮文字 + - uri: 链接地址(type=link 时必需) + - payload: 回传数据(type=postback 时必需) + + Returns: + 发送结果 + + Example: + >>> order_data = { + ... "order_id": "123456789", + ... "status": "shipped", + ... "status_text": "已发货", + ... "total_amount": "1058.00", + ... "items": [...] + ... } + >>> actions = [ + ... {"type": "link", "text": "查看详情", "uri": "https://..."}, + ... {"type": "postback", "text": "联系客服", "payload": "CONTACT_SUPPORT"} + ... ] + >>> await chatwoot.send_order_card(123, order_data, actions) + """ + # 生成 Markdown 内容 + markdown_content = format_order_card_markdown(order_data) + + # 生成按钮卡片 + buttons = create_action_buttons(actions) + + # 发送富媒体消息 + return await self.send_rich_message( + conversation_id=conversation_id, + content=markdown_content, + content_type="cards", + content_attributes=buttons + ) # ============ Conversations ============ @@ -342,6 +399,130 @@ class ChatwootClient: return data.get("payload", []) +# ============ Helper Functions ============ + +def format_order_card_markdown(order_data: dict[str, Any]) -> str: + """格式化订单信息为 Markdown 卡片 + + Args: + order_data: 订单数据,包含订单号、状态、商品、金额、物流等信息 + + Returns: + 格式化的 Markdown 字符串 + + Example: + >>> order = { + ... "order_id": "123456789", + ... "status": "shipped", + ... "status_text": "已发货", + ... "created_at": "2023-10-27 14:30", + ... "items": [...], + ... "total_amount": "1058.00", + ... "shipping_fee": "0.00", + ... "logistics": {...} + ... } + >>> markdown = format_order_card_markdown(order) + """ + # 订单状态 emoji 映射 + status_emoji = { + "pending": "⏳", + "paid": "💰", + "processing": "⚙️", + "shipped": "📦", + "delivered": "✅", + "completed": "✅", + "cancelled": "❌", + "refunded": "💸", + "failed": "⚠️", + } + + # 获取状态文本和 emoji + status = order_data.get("status", "unknown") + status_text = order_data.get("status_text", status) + emoji = status_emoji.get(status, "📦") + + lines = [ + f"{emoji} **订单状态:{status_text}**", + f"📝 **订单号:** `{order_data.get('order_id', '')}`", + ] + + # 添加下单时间(如果有) + if order_data.get("created_at"): + lines.append(f"📅 **下单时间:** {order_data['created_at']}") + + lines.append("") # 空行 + lines.append("**商品详情**") + + # 添加商品列表 + items = order_data.get("items", []) + if items: + for item in items: + name = item.get("name", "未知商品") + quantity = item.get("quantity", 1) + price = item.get("price", "0.00") + # 可选:添加图片链接 + image_markdown = "" + if item.get("image_url"): + image_markdown = f" [图片]({item['image_url']})" + lines.append(f"▫️{image_markdown} {name} × {quantity} ¥{price}") + else: + lines.append("▫️ 无商品信息") + + # 添加金额信息 + lines.extend([ + "", + f"💰 **实付:** ¥{order_data.get('total_amount', '0.00')} (含运费 ¥{order_data.get('shipping_fee', '0.00')})" + ]) + + # 添加物流信息(如果有) + logistics = order_data.get("logistics") + if logistics: + lines.extend([ + "", + "🚚 **物流信息**", + f"承运商:{logistics.get('carrier', '未知')}", + f"单号:{logistics.get('tracking_number', '未知')}", + "*点击单号可复制跟踪*" + ]) + + # 添加备注(如果有) + if order_data.get("remark"): + lines.extend([ + "", + f"📋 **备注:** {order_data['remark']}" + ]) + + return "\n".join(lines) + + +def create_action_buttons(actions: list[dict[str, Any]]) -> dict[str, Any]: + """创建 Chatwoot 操作按钮卡片 + + Args: + actions: 按钮配置列表,每个按钮包含: + - type: "link" 或 "postback" + - text: 按钮文字 + - uri: 链接地址(type=link 时) + - payload: 回传数据(type=postback 时) + + Returns: + 符合 Chatwoot content_attributes 格式的字典 + + Example: + >>> actions = [ + ... {"type": "link", "text": "查看详情", "uri": "https://example.com"}, + ... {"type": "postback", "text": "联系客服", "payload": "CONTACT_SUPPORT"} + ... ] + >>> buttons = create_action_buttons(actions) + """ + return { + "items": [{ + "title": "操作", + "actions": actions + }] + } + + # Global Chatwoot client instance chatwoot_client: Optional[ChatwootClient] = None diff --git a/agent/utils/faq_library.py b/agent/utils/faq_library.py new file mode 100644 index 0000000..8981ff2 --- /dev/null +++ b/agent/utils/faq_library.py @@ -0,0 +1,121 @@ +""" +Local FAQ Library for instant responses +Common questions can be answered immediately without API calls +""" +import re +from typing import Optional, Dict +from .logger import get_logger + +logger = get_logger(__name__) + + +class FAQLibrary: + """Local FAQ library for instant common question responses""" + + def __init__(self): + """Initialize FAQ library with ONLY common greetings and social responses + + Note: Business-related FAQs (register, order, payment, shipment, return, etc.) + should be handled by Strapi MCP to ensure accuracy and consistency. + This library only contains instant social responses for better UX. + """ + self.faqs = { + # ========== 问候类 Greetings ========== + "你好": "你好!我是您的B2B客户服务助手,很高兴为您服务。我可以帮您处理订单查询、产品咨询、售后问题等。请问有什么可以帮到您的吗?", + "您好": "您好!我是您的B2B客户服务助手,很高兴为您服务。我可以帮您处理订单查询、产品咨询、售后问题等。请问有什么可以帮到您的吗?", + "hi": "Hello! I'm your B2B customer service assistant. How can I help you today?", + "hello": "Hello! I'm here to assist you. How can I help you today?", + "hey": "Hey there! How can I help you today?", + + # ========== 感谢类 Gratitude ========== + "谢谢": "不客气!如果还有其他问题,随时可以问我。祝您购物愉快!", + "感谢": "感谢您的支持!如有任何问题,随时联系我们。", + "thank you": "You're welcome! If you have any other questions, feel free to ask. Have a great day!", + "thanks": "You're welcome! Let me know if you need anything else.", + + # ========== 再见类 Farewell ========== + "再见": "再见!如有需要,随时联系。祝您生活愉快!", + "bye": "Goodbye! Feel free to reach out anytime. Have a great day!", + "goodbye": "Goodbye! Have a wonderful day!", + + # ========== 社交礼貌类 Social Politeness ========== + "早上好": "早上好!很高兴为您服务。请问有什么可以帮到您的吗?", + "下午好": "下午好!很高兴为您服务。请问有什么可以帮到您的吗?", + "晚上好": "晚上好!很高兴为您服务。请问有什么可以帮到您的吗?", + "good morning": "Good morning! How can I assist you today?", + "good afternoon": "Good afternoon! How can I assist you today?", + "good evening": "Good evening! How can I assist you today?", + } + + # Compile regex patterns for fuzzy matching + self._compile_patterns() + + def _compile_patterns(self): + """Compile regex patterns for fuzzy FAQ matching""" + self.patterns = [] + for keyword, response in self.faqs.items(): + # Case-insensitive pattern with word boundaries + pattern = re.compile(re.escape(keyword), re.IGNORECASE) + self.patterns.append((pattern, response)) + + def find_match(self, query: str) -> Optional[str]: + """Find matching FAQ response + + Args: + query: User query text + + Returns: + Matching FAQ response or None if no match found + """ + # Remove HTML tags and extra whitespace + clean_query = re.sub(r'<[^>]+>', '', query) + clean_query = ' '.join(clean_query.split()) + + # Try exact match first + if clean_query.lower() in (k.lower() for k in self.faqs.keys()): + for key, response in self.faqs.items(): + if key.lower() == clean_query.lower(): + logger.info("FAQ exact match", key=key, query=clean_query[:50]) + return response + + # Try fuzzy match (contains keyword) + for pattern, response in self.patterns: + if pattern.search(clean_query): + logger.info("FAQ fuzzy match", pattern=pattern.pattern, query=clean_query[:50]) + return response + + # No match found + logger.debug("No FAQ match found", query=clean_query[:50]) + return None + + def add_faq(self, keyword: str, response: str) -> None: + """Add or update FAQ entry + + Args: + keyword: Question keyword + response: Answer text + """ + self.faqs[keyword] = response + pattern = re.compile(re.escape(keyword), re.IGNORECASE) + self.patterns.append((pattern, response)) + logger.info("FAQ added", keyword=keyword) + + def get_all_keywords(self) -> list[str]: + """Get all FAQ keywords + + Returns: + List of FAQ keywords + """ + return list(self.faqs.keys()) + + +# Global FAQ library instance +faq_library: Optional[FAQLibrary] = None + + +def get_faq_library() -> FAQLibrary: + """Get or create global FAQ library instance""" + global faq_library + if faq_library is None: + faq_library = FAQLibrary() + return faq_library diff --git a/agent/utils/response_cache.py b/agent/utils/response_cache.py new file mode 100644 index 0000000..b663e5c --- /dev/null +++ b/agent/utils/response_cache.py @@ -0,0 +1,194 @@ +""" +LLM Response Cache for FAQ and common queries +""" +import hashlib +import json +from typing import Any, Optional +from datetime import timedelta + +from .cache import CacheManager +from .logger import get_logger + +logger = get_logger(__name__) + + +class ResponseCache: + """Cache LLM responses for common queries""" + + def __init__( + self, + cache_manager: Optional[CacheManager] = None, + default_ttl: int = 3600 # 1 hour default + ): + """Initialize response cache + + Args: + cache_manager: Cache manager instance + default_ttl: Default TTL in seconds for cached responses + """ + self.cache = cache_manager + self.default_ttl = default_ttl + + def _generate_key( + self, + model: str, + messages: list[dict[str, str]], + temperature: float = 0.7, + **kwargs: Any + ) -> str: + """Generate cache key from request parameters + + Args: + model: Model name + messages: List of messages + temperature: Temperature parameter + **kwargs: Additional parameters + + Returns: + Cache key string + """ + # Create a normalized representation of the request + cache_input = { + "model": model, + "messages": messages, + "temperature": temperature, + **{k: v for k, v in kwargs.items() if v is not None} + } + + # Hash the input to create a short, unique key + cache_str = json.dumps(cache_input, sort_keys=True, ensure_ascii=False) + cache_hash = hashlib.sha256(cache_str.encode()).hexdigest()[:16] + + return f"llm_response:{model}:{cache_hash}" + + async def get( + self, + model: str, + messages: list[dict[str, str]], + temperature: float = 0.7, + **kwargs: Any + ) -> Optional[str]: + """Get cached response if available + + Args: + model: Model name + messages: List of messages + temperature: Temperature parameter + **kwargs: Additional parameters + + Returns: + Cached response content or None + """ + if not self.cache: + return None + + key = self._generate_key(model, messages, temperature, **kwargs) + cached = await self.cache.get(key) + + if cached: + logger.info( + "Cache hit", + model=model, + key=key, + response_length=len(cached) + ) + try: + data = json.loads(cached) + return data.get("response") + except json.JSONDecodeError: + logger.warning("Invalid cached data", key=key) + return None + + logger.debug("Cache miss", model=model, key=key) + return None + + async def set( + self, + model: str, + messages: list[dict[str, str]], + response: str, + temperature: float = 0.7, + ttl: Optional[int] = None, + **kwargs: Any + ) -> None: + """Cache LLM response + + Args: + model: Model name + messages: List of messages + response: Response content to cache + temperature: Temperature parameter + ttl: Time-to-live in seconds + **kwargs: Additional parameters + """ + if not self.cache: + return + + key = self._generate_key(model, messages, temperature, **kwargs) + ttl = ttl or self.default_ttl + + # Store response with metadata + data = { + "response": response, + "model": model, + "response_length": len(response), + "temperature": temperature + } + + await self.cache.set( + key, + json.dumps(data, ensure_ascii=False), + ttl=ttl + ) + + logger.info( + "Response cached", + model=model, + key=key, + response_length=len(response), + ttl=ttl + ) + + async def invalidate(self, pattern: str = "llm_response:*") -> int: + """Invalidate cached responses matching pattern + + Args: + pattern: Redis key pattern to match + + Returns: + Number of keys deleted + """ + if not self.cache: + return 0 + + # This would need scan/delete operation + # For now, just log + logger.info("Cache invalidation requested", pattern=pattern) + return 0 + + def get_cache_stats(self) -> dict[str, Any]: + """Get cache statistics + + Returns: + Dictionary with cache stats + """ + return { + "enabled": self.cache is not None, + "default_ttl": self.default_ttl + } + + +# Global response cache instance +response_cache: Optional[ResponseCache] = None + + +def get_response_cache() -> ResponseCache: + """Get or create global response cache instance""" + global response_cache + if response_cache is None: + from .cache import get_cache_manager + response_cache = ResponseCache( + cache_manager=get_cache_manager(), + default_ttl=3600 # 1 hour + ) + return response_cache diff --git a/docker-compose.yml b/docker-compose.yml index 4698adc..09ab3cd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -64,7 +64,8 @@ services: chatwoot: image: chatwoot/chatwoot:latest container_name: ai_chatwoot - command: bundle exec rails s -p 3000 -b 0.0.0.0 + # 启动前清理 PID 文件,避免重启循环 + command: sh -c "rm -f /app/tmp/pids/server.pid && bundle exec rails s -p 3000 -b 0.0.0.0" environment: RAILS_ENV: production SECRET_KEY_BASE: ${CHATWOOT_SECRET_KEY_BASE}