Files
assistant/agent/core/llm.py
wangliang 0f13102a02 fix: 改进错误处理和清理测试代码
## 主要修复

### 1. JSON 解析错误处理
- 修复所有 Agent 的 LLM 响应解析失败时返回原始内容的问题
- 当 JSON 解析失败时,返回友好的兜底消息而不是原始文本
- 影响文件: customer_service.py, order.py, product.py, aftersale.py

### 2. FAQ 快速路径修复
- 修复 customer_service.py 中变量定义顺序问题
- has_faq_query 在使用前未定义导致 NameError
- 添加详细的错误日志记录

### 3. Chatwoot 集成改进
- 添加响应内容调试日志
- 改进错误处理和日志记录

### 4. 订单查询优化
- 将订单列表默认返回数量从 10 条改为 5 条
- 统一 MCP 工具层和 Mall Client 层的默认值

### 5. 代码清理
- 删除所有测试代码和示例文件
- 刋试文件包括: test_*.py, test_*.html, test_*.sh
- 删除测试目录: tests/, agent/tests/, agent/examples/

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-27 13:15:58 +08:00

288 lines
9.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
ZhipuAI LLM Client for B2B Shopping AI Assistant
"""
import concurrent.futures
from typing import Any, Optional
from dataclasses import dataclass
from zhipuai import ZhipuAI
from config import settings
from utils.logger import get_logger
from utils.response_cache import get_response_cache
logger = get_logger(__name__)
@dataclass
class Message:
"""Chat message structure"""
role: str # "system", "user", "assistant"
content: str
@dataclass
class LLMResponse:
"""LLM response structure"""
content: str
finish_reason: str
usage: dict[str, int]
class ZhipuLLMClient:
"""ZhipuAI LLM Client wrapper"""
DEFAULT_TIMEOUT = 60 # seconds (increased from 30 for better reliability)
def __init__(
self,
api_key: Optional[str] = None,
model: Optional[str] = None,
timeout: Optional[int] = None,
enable_reasoning: Optional[bool] = None
):
self.api_key = api_key or settings.zhipu_api_key
self.model = model or settings.zhipu_model
self.timeout = timeout or self.DEFAULT_TIMEOUT
self.enable_reasoning = enable_reasoning if enable_reasoning is not None else settings.enable_reasoning_mode
self._client = ZhipuAI(api_key=self.api_key)
logger.info(
"ZhipuAI client initialized",
model=self.model,
timeout=self.timeout,
reasoning_mode=self.enable_reasoning
)
def _should_use_reasoning(self, messages: list[dict[str, str]]) -> bool:
"""Determine if reasoning mode should be used based on query complexity
Args:
messages: List of message dictionaries
Returns:
True if reasoning mode should be used
"""
if not self.enable_reasoning:
return False
if not settings.reasoning_mode_for_complex:
# If smart mode is disabled, use the global setting
return self.enable_reasoning
# Smart mode: analyze the last user message
last_message = ""
for msg in reversed(messages):
if msg.get("role") == "user":
last_message = msg.get("content", "")
break
# Simple queries that don't need reasoning
simple_patterns = [
"你好", "hi", "hello", "",
"谢谢", "thank", "感谢",
"再见", "bye", "拜拜",
"退货政策", "营业时间", "联系方式",
"发货", "配送", "物流"
]
last_message_lower = last_message.lower()
for pattern in simple_patterns:
if pattern in last_message_lower:
logger.debug("Simple query detected, disabling reasoning", query=last_message[:50])
return False
# Complex queries that benefit from reasoning
complex_patterns = [
"为什么", "how", "why", "如何",
"推荐", "recommend", "建议",
"比较", "compare", "区别",
"怎么样", "如何选择"
]
for pattern in complex_patterns:
if pattern in last_message_lower:
logger.debug("Complex query detected, enabling reasoning", query=last_message[:50])
return True
# Default: disable reasoning for speed
return False
async def chat(
self,
messages: list[Message],
temperature: float = 0.7,
max_tokens: int = 2048,
top_p: float = 0.9,
use_cache: bool = True,
enable_reasoning: Optional[bool] = None,
**kwargs: Any
) -> LLMResponse:
"""Send chat completion request with caching support"""
formatted_messages = [
{"role": msg.role, "content": msg.content}
for msg in messages
]
# Try cache first
if use_cache:
try:
cache = get_response_cache()
cached_response = await cache.get(
model=self.model,
messages=formatted_messages,
temperature=temperature
)
if cached_response is not None:
logger.info(
"Returning cached response",
model=self.model,
response_length=len(cached_response)
)
return LLMResponse(
content=cached_response,
finish_reason="cache_hit",
usage={"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
)
except Exception as e:
logger.warning("Cache check failed", error=str(e))
logger.info(
"Sending chat request",
model=self.model,
message_count=len(messages),
temperature=temperature
)
# Determine if reasoning mode should be used
# 强制禁用深度思考模式以提升响应速度2026-01-26
use_reasoning = False # Override all settings to disable thinking mode
if use_reasoning:
logger.info("Reasoning mode enabled for this request")
def _make_request():
request_params = {
"model": self.model,
"messages": formatted_messages,
"temperature": temperature,
"max_tokens": max_tokens,
"top_p": top_p,
}
# Add thinking mode control
# Format: {"thinking": {"type": "disabled"}} or {"type": "enabled"}
if use_reasoning:
request_params["thinking"] = {"type": "enabled"}
logger.info("Thinking mode: enabled", request_params={"thinking": {"type": "enabled"}})
else:
request_params["thinking"] = {"type": "disabled"}
logger.info("Thinking mode: disabled", request_params={"thinking": {"type": "disabled"}})
request_params.update(kwargs)
return self._client.chat.completions.create(**request_params)
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(_make_request)
response = future.result(timeout=self.timeout)
choice = response.choices[0]
content = choice.message.content
logger.info(
"Chat response received",
finish_reason=choice.finish_reason,
content_length=len(content) if content else 0,
usage=response.usage.__dict__ if hasattr(response, 'usage') else {}
)
if not content:
logger.warning("LLM returned empty content")
# Cache the response
if use_cache and content:
try:
cache = get_response_cache()
await cache.set(
model=self.model,
messages=formatted_messages,
response=content,
temperature=temperature
)
except Exception as e:
logger.warning("Failed to cache response", error=str(e))
return LLMResponse(
content=content or "",
finish_reason=choice.finish_reason,
usage={
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
}
)
except concurrent.futures.TimeoutError:
logger.error("Chat request timed out", timeout=self.timeout)
raise TimeoutError(f"Request timed out after {self.timeout} seconds")
except Exception as e:
logger.error("Chat request failed", error=str(e))
raise
async def chat_with_tools(
self,
messages: list[Message],
tools: list[dict[str, Any]],
temperature: float = 0.7,
**kwargs: Any
) -> tuple[LLMResponse, None]:
"""Send chat completion request with tool calling"""
formatted_messages = [
{"role": msg.role, "content": msg.content}
for msg in messages
]
logger.info(
"Sending chat request with tools",
model=self.model,
tool_count=len(tools)
)
try:
response = self._client.chat.completions.create(
model=self.model,
messages=formatted_messages,
tools=tools,
temperature=temperature,
**kwargs
)
choice = response.choices[0]
content = choice.message.content or ""
return LLMResponse(
content=content,
finish_reason=choice.finish_reason,
usage={
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
}
), None
except Exception as e:
logger.error("Chat with tools request failed", error=str(e))
raise
llm_client: Optional[ZhipuLLMClient] = None
def get_llm_client() -> ZhipuLLMClient:
"""Get or create global LLM client instance"""
global llm_client
if llm_client is None:
llm_client = ZhipuLLMClient()
return llm_client