agent/utils/response_cache.py

"""
LLM Response Cache for FAQ and common queries
"""
import hashlib
import json
from typing import Any, Optional
from datetime import timedelta

from .cache import CacheManager
from .logger import get_logger

logger = get_logger(__name__)


class ResponseCache:
    """Cache LLM responses for common queries"""

    def __init__(
        self,
        cache_manager: Optional[CacheManager] = None,
        default_ttl: int = 3600  # 1 hour default
    ):
        """Initialize response cache

        Args:
            cache_manager: Cache manager instance
            default_ttl: Default TTL in seconds for cached responses
        """
        self.cache = cache_manager
        self.default_ttl = default_ttl

    def _generate_key(
        self,
        model: str,
        messages: list[dict[str, str]],
        temperature: float = 0.7,
        **kwargs: Any
    ) -> str:
        """Generate cache key from request parameters

        Args:
            model: Model name
            messages: List of messages
            temperature: Temperature parameter
            **kwargs: Additional parameters

        Returns:
            Cache key string
        """
        # Create a normalized representation of the request
        cache_input = {
            "model": model,
            "messages": messages,
            "temperature": temperature,
            **{k: v for k, v in kwargs.items() if v is not None}
        }

        # Hash the input to create a short, unique key
        cache_str = json.dumps(cache_input, sort_keys=True, ensure_ascii=False)
        cache_hash = hashlib.sha256(cache_str.encode()).hexdigest()[:16]

        return f"llm_response:{model}:{cache_hash}"

    async def get(
        self,
        model: str,
        messages: list[dict[str, str]],
        temperature: float = 0.7,
        **kwargs: Any
    ) -> Optional[str]:
        """Get cached response if available

        Args:
            model: Model name
            messages: List of messages
            temperature: Temperature parameter
            **kwargs: Additional parameters

        Returns:
            Cached response content or None
        """
        if not self.cache:
            return None

        key = self._generate_key(model, messages, temperature, **kwargs)
        cached = await self.cache.get(key)

        if cached:
            logger.info(
                "Cache hit",
                model=model,
                key=key,
                response_length=len(cached)
            )
            try:
                data = json.loads(cached)
                return data.get("response")
            except json.JSONDecodeError:
                logger.warning("Invalid cached data", key=key)
                return None

        logger.debug("Cache miss", model=model, key=key)
        return None

    async def set(
        self,
        model: str,
        messages: list[dict[str, str]],
        response: str,
        temperature: float = 0.7,
        ttl: Optional[int] = None,
        **kwargs: Any
    ) -> None:
        """Cache LLM response

        Args:
            model: Model name
            messages: List of messages
            response: Response content to cache
            temperature: Temperature parameter
            ttl: Time-to-live in seconds
            **kwargs: Additional parameters
        """
        if not self.cache:
            return

        key = self._generate_key(model, messages, temperature, **kwargs)
        ttl = ttl or self.default_ttl

        # Store response with metadata
        data = {
            "response": response,
            "model": model,
            "response_length": len(response),
            "temperature": temperature
        }

        await self.cache.set(
            key,
            json.dumps(data, ensure_ascii=False),
            ttl=ttl
        )

        logger.info(
            "Response cached",
            model=model,
            key=key,
            response_length=len(response),
            ttl=ttl
        )

    async def invalidate(self, pattern: str = "llm_response:*") -> int:
        """Invalidate cached responses matching pattern

        Args:
            pattern: Redis key pattern to match

        Returns:
            Number of keys deleted
        """
        if not self.cache:
            return 0

        # This would need scan/delete operation
        # For now, just log
        logger.info("Cache invalidation requested", pattern=pattern)
        return 0

    def get_cache_stats(self) -> dict[str, Any]:
        """Get cache statistics

        Returns:
            Dictionary with cache stats
        """
        return {
            "enabled": self.cache is not None,
            "default_ttl": self.default_ttl
        }


# Global response cache instance
response_cache: Optional[ResponseCache] = None


def get_response_cache() -> ResponseCache:
    """Get or create global response cache instance"""
    global response_cache
    if response_cache is None:
        from .cache import get_cache_manager
        response_cache = ResponseCache(
            cache_manager=get_cache_manager(),
            default_ttl=3600  # 1 hour
        )
    return response_cache
feat: 优化 FAQ 处理和系统稳定性 - 添加本地 FAQ 库快速路径（问候语等社交响应） - 修复 Chatwoot 重启循环问题（PID 文件清理） - 添加 LLM 响应缓存（Redis 缓存，提升性能） - 添加智能推理模式（根据查询复杂度自动启用） - 添加订单卡片消息功能（Chatwoot 富媒体） - 增加 LLM 超时时间至 60 秒 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-01-20 14:51:30 +08:00			`"""`
			`LLM Response Cache for FAQ and common queries`
			`"""`
			`import hashlib`
			`import json`
			`from typing import Any, Optional`
			`from datetime import timedelta`

			`from .cache import CacheManager`
			`from .logger import get_logger`

			`logger = get_logger(__name__)`


			`class ResponseCache:`
			`"""Cache LLM responses for common queries"""`

			`def __init__(`
			`self,`
			`cache_manager: Optional[CacheManager] = None,`
			`default_ttl: int = 3600 # 1 hour default`
			`):`
			`"""Initialize response cache`

			`Args:`
			`cache_manager: Cache manager instance`
			`default_ttl: Default TTL in seconds for cached responses`
			`"""`
			`self.cache = cache_manager`
			`self.default_ttl = default_ttl`

			`def _generate_key(`
			`self,`
			`model: str,`
			`messages: list[dict[str, str]],`
			`temperature: float = 0.7,`
			`**kwargs: Any`
			`) -> str:`
			`"""Generate cache key from request parameters`

			`Args:`
			`model: Model name`
			`messages: List of messages`
			`temperature: Temperature parameter`
			`**kwargs: Additional parameters`

			`Returns:`
			`Cache key string`
			`"""`
			`# Create a normalized representation of the request`
			`cache_input = {`
			`"model": model,`
			`"messages": messages,`
			`"temperature": temperature,`
			`**{k: v for k, v in kwargs.items() if v is not None}`
			`}`

			`# Hash the input to create a short, unique key`
			`cache_str = json.dumps(cache_input, sort_keys=True, ensure_ascii=False)`
			`cache_hash = hashlib.sha256(cache_str.encode()).hexdigest()[:16]`

			`return f"llm_response:{model}:{cache_hash}"`

			`async def get(`
			`self,`
			`model: str,`
			`messages: list[dict[str, str]],`
			`temperature: float = 0.7,`
			`**kwargs: Any`
			`) -> Optional[str]:`
			`"""Get cached response if available`

			`Args:`
			`model: Model name`
			`messages: List of messages`
			`temperature: Temperature parameter`
			`**kwargs: Additional parameters`

			`Returns:`
			`Cached response content or None`
			`"""`
			`if not self.cache:`
			`return None`

			`key = self._generate_key(model, messages, temperature, **kwargs)`
			`cached = await self.cache.get(key)`

			`if cached:`
			`logger.info(`
			`"Cache hit",`
			`model=model,`
			`key=key,`
			`response_length=len(cached)`
			`)`
			`try:`
			`data = json.loads(cached)`
			`return data.get("response")`
			`except json.JSONDecodeError:`
			`logger.warning("Invalid cached data", key=key)`
			`return None`

			`logger.debug("Cache miss", model=model, key=key)`
			`return None`

			`async def set(`
			`self,`
			`model: str,`
			`messages: list[dict[str, str]],`
			`response: str,`
			`temperature: float = 0.7,`
			`ttl: Optional[int] = None,`
			`**kwargs: Any`
			`) -> None:`
			`"""Cache LLM response`

			`Args:`
			`model: Model name`
			`messages: List of messages`
			`response: Response content to cache`
			`temperature: Temperature parameter`
			`ttl: Time-to-live in seconds`
			`**kwargs: Additional parameters`
			`"""`
			`if not self.cache:`
			`return`

			`key = self._generate_key(model, messages, temperature, **kwargs)`
			`ttl = ttl or self.default_ttl`

			`# Store response with metadata`
			`data = {`
			`"response": response,`
			`"model": model,`
			`"response_length": len(response),`
			`"temperature": temperature`
			`}`

			`await self.cache.set(`
			`key,`
			`json.dumps(data, ensure_ascii=False),`
			`ttl=ttl`
			`)`

			`logger.info(`
			`"Response cached",`
			`model=model,`
			`key=key,`
			`response_length=len(response),`
			`ttl=ttl`
			`)`

			`async def invalidate(self, pattern: str = "llm_response:*") -> int:`
			`"""Invalidate cached responses matching pattern`

			`Args:`
			`pattern: Redis key pattern to match`

			`Returns:`
			`Number of keys deleted`
			`"""`
			`if not self.cache:`
			`return 0`

			`# This would need scan/delete operation`
			`# For now, just log`
			`logger.info("Cache invalidation requested", pattern=pattern)`
			`return 0`

			`def get_cache_stats(self) -> dict[str, Any]:`
			`"""Get cache statistics`

			`Returns:`
			`Dictionary with cache stats`
			`"""`
			`return {`
			`"enabled": self.cache is not None,`
			`"default_ttl": self.default_ttl`
			`}`


			`# Global response cache instance`
			`response_cache: Optional[ResponseCache] = None`


			`def get_response_cache() -> ResponseCache:`
			`"""Get or create global response cache instance"""`
			`global response_cache`
			`if response_cache is None:`
			`from .cache import get_cache_manager`
			`response_cache = ResponseCache(`
			`cache_manager=get_cache_manager(),`
			`default_ttl=3600 # 1 hour`
			`)`
			`return response_cache`