""" Language Detection Module Automatically detects user message language and maps to Strapi-supported locales. """ from typing import Optional from langdetect import detect, LangDetectException from utils.logger import get_logger logger = get_logger(__name__) # Strapi-supported locales SUPPORTED_LOCALES = ["en", "nl", "de", "es", "fr", "it", "tr"] # Language code to locale mapping LOCALE_MAP = { "en": "en", # English "nl": "nl", # Dutch "de": "de", # German "es": "es", # Spanish "fr": "fr", # French "it": "it", # Italian "tr": "tr", # Turkish # Fallback mappings for unsupported languages "af": "en", # Afrikaans -> English "no": "en", # Norwegian -> English "sv": "en", # Swedish -> English "da": "en", # Danish -> English "pl": "en", # Polish -> English "pt": "en", # Portuguese -> English "ru": "en", # Russian -> English "zh": "en", # Chinese -> English "ja": "en", # Japanese -> English "ko": "en", # Korean -> English "ar": "en", # Arabic -> English "hi": "en", # Hindi -> English } # Minimum confidence threshold MIN_CONFIDENCE = 0.7 # Minimum message length for reliable detection MIN_LENGTH = 10 def detect_language(text: str) -> tuple[str, float]: """Detect language from text Args: text: Input text to detect language from Returns: Tuple of (locale_code, confidence_score) locale_code: Strapi locale (en, nl, de, etc.) confidence_score: Detection confidence (0-1), 0.0 if detection failed """ # Check minimum length if len(text.strip()) < MIN_LENGTH: logger.debug("Message too short for reliable detection", length=len(text)) return "en", 0.0 try: # Detect language using langdetect detected = detect(text) logger.debug("Language detected", language=detected, text_length=len(text)) # Map to Strapi locale locale = map_to_locale(detected) return locale, 0.85 # langdetect doesn't provide confidence, use default except LangDetectException as e: logger.warning("Language detection failed", error=str(e)) return "en", 0.0 def map_to_locale(lang_code: str) -> str: """Map detected language code to Strapi locale Args: lang_code: ISO 639-1 language code (e.g., "en", "nl", "de") Returns: Strapi locale code, or "en" as default if not supported """ # Direct mapping if lang_code in SUPPORTED_LOCALES: return lang_code # Use locale map locale = LOCALE_MAP.get(lang_code, "en") if locale != lang_code and locale == "en": logger.info( "Unsupported language mapped to default", detected_language=lang_code, mapped_locale=locale ) return locale def get_cached_or_detect(state, text: str) -> str: """Get language from cache or detect from text Priority: 1. Use state.detected_language if available 2. Use state.context["language"] if available 3. Detect from text Args: state: Agent state text: Input text to detect language from Returns: Detected locale code """ # Check state first if state.get("detected_language"): logger.debug("Using cached language from state", language=state["detected_language"]) return state["detected_language"] # Check context cache if state.get("context", {}).get("language"): logger.debug("Using cached language from context", language=state["context"]["language"]) return state["context"]["language"] # Detect from text locale, confidence = detect_language(text) if confidence < MIN_CONFIDENCE and confidence > 0: logger.warning( "Low detection confidence, using default", locale=locale, confidence=confidence ) return locale def is_supported_locale(locale: str) -> bool: """Check if locale is supported Args: locale: Locale code to check Returns: True if locale is in supported list """ return locale in SUPPORTED_LOCALES