diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 50d4d86afb0..a051d58411e 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -129,6 +129,7 @@ def _fixed_temperature_for_model(
         return OMIT_TEMPERATURE
     return None
 
+
 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
 _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
     "gemini": "gemini-3-flash-preview",
@@ -265,7 +266,9 @@ def _select_pool_entry(provider: str) -> Tuple[bool, Optional[Any]]:
     try:
         return True, pool.select()
     except Exception as exc:
-        logger.debug("Auxiliary client: could not select pool entry for %s: %s", provider, exc)
+        logger.debug(
+            "Auxiliary client: could not select pool entry for %s: %s", provider, exc
+        )
         return True, None
 
 
@@ -327,7 +330,11 @@ def _convert_content_for_responses(content: Any) -> Any:
         elif ptype == "image_url":
             # chat.completions nests the URL: {"image_url": {"url": "..."}}
             image_data = part.get("image_url", {})
-            url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
+            url = (
+                image_data.get("url", "")
+                if isinstance(image_data, dict)
+                else str(image_data)
+            )
             entry: Dict[str, Any] = {"type": "input_image", "image_url": url}
             # Preserve detail if specified
             detail = image_data.get("detail") if isinstance(image_data, dict) else None
@@ -369,10 +376,12 @@ def create(self, **kwargs) -> Any:
             if role == "system":
                 instructions = content if isinstance(content, str) else str(content)
             else:
-                input_msgs.append({
-                    "role": role,
-                    "content": _convert_content_for_responses(content),
-                })
+                input_msgs.append(
+                    {
+                        "role": role,
+                        "content": _convert_content_for_responses(content),
+                    }
+                )
 
         resp_kwargs: Dict[str, Any] = {
             "model": model,
@@ -393,12 +402,14 @@ def create(self, **kwargs) -> Any:
                 name = fn.get("name")
                 if not name:
                     continue
-                converted.append({
-                    "type": "function",
-                    "name": name,
-                    "description": fn.get("description", ""),
-                    "parameters": fn.get("parameters", {}),
-                })
+                converted.append(
+                    {
+                        "type": "function",
+                        "name": name,
+                        "description": fn.get("description", ""),
+                        "parameters": fn.get("parameters", {}),
+                    }
+                )
             if converted:
                 resp_kwargs["tools"] = converted
 
@@ -443,13 +454,20 @@ def create(self, **kwargs) -> Any:
                     # a function_call response with incidental text should not
                     # be collapsed into a plain-text message.
                     assembled = "".join(collected_text_deltas)
-                    final.output = [SimpleNamespace(
-                        type="message", role="assistant", status="completed",
-                        content=[SimpleNamespace(type="output_text", text=assembled)],
-                    )]
+                    final.output = [
+                        SimpleNamespace(
+                            type="message",
+                            role="assistant",
+                            status="completed",
+                            content=[
+                                SimpleNamespace(type="output_text", text=assembled)
+                            ],
+                        )
+                    ]
                     logger.debug(
                         "Codex auxiliary: synthesized from %d deltas (%d chars)",
-                        len(collected_text_deltas), len(assembled),
+                        len(collected_text_deltas),
+                        len(assembled),
                     )
 
             # Extract text and tool calls from the Responses output.
@@ -464,19 +482,21 @@ def _item_get(obj: Any, key: str, default: Any = None) -> Any:
             for item in getattr(final, "output", []):
                 item_type = _item_get(item, "type")
                 if item_type == "message":
-                    for part in (_item_get(item, "content") or []):
+                    for part in _item_get(item, "content") or []:
                         ptype = _item_get(part, "type")
                         if ptype in ("output_text", "text"):
                             text_parts.append(_item_get(part, "text", ""))
                 elif item_type == "function_call":
-                    tool_calls_raw.append(SimpleNamespace(
-                        id=_item_get(item, "call_id", ""),
-                        type="function",
-                        function=SimpleNamespace(
-                            name=_item_get(item, "name", ""),
-                            arguments=_item_get(item, "arguments", "{}"),
-                        ),
-                    ))
+                    tool_calls_raw.append(
+                        SimpleNamespace(
+                            id=_item_get(item, "call_id", ""),
+                            type="function",
+                            function=SimpleNamespace(
+                                name=_item_get(item, "name", ""),
+                                arguments=_item_get(item, "arguments", "{}"),
+                            ),
+                        )
+                    )
 
             resp_usage = getattr(final, "usage", None)
             if resp_usage:
@@ -546,6 +566,7 @@ def __init__(self, sync_adapter: _CodexCompletionsAdapter):
 
     async def create(self, **kwargs) -> Any:
         import asyncio
+
         return await asyncio.to_thread(self._sync.create, **kwargs)
 
 
@@ -574,13 +595,18 @@ def __init__(self, real_client: Any, model: str, is_oauth: bool = False):
         self._is_oauth = is_oauth
 
     def create(self, **kwargs) -> Any:
-        from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
+        from agent.anthropic_adapter import (
+            build_anthropic_kwargs,
+            normalize_anthropic_response,
+        )
 
         messages = kwargs.get("messages", [])
         model = kwargs.get("model", self._model)
         tools = kwargs.get("tools")
         tool_choice = kwargs.get("tool_choice")
-        max_tokens = kwargs.get("max_tokens") or kwargs.get("max_completion_tokens") or 2000
+        max_tokens = (
+            kwargs.get("max_tokens") or kwargs.get("max_completion_tokens") or 2000
+        )
         temperature = kwargs.get("temperature")
 
         normalized_tool_choice = None
@@ -607,6 +633,7 @@ def create(self, **kwargs) -> Any:
         # additionally strips these keys as a safety net — keep both layers.
         if temperature is not None:
             from agent.anthropic_adapter import _forbids_sampling_params
+
             if not _forbids_sampling_params(model):
                 anthropic_kwargs["temperature"] = temperature
 
@@ -617,7 +644,9 @@ def create(self, **kwargs) -> Any:
         if hasattr(response, "usage") and response.usage:
             prompt_tokens = getattr(response.usage, "input_tokens", 0) or 0
             completion_tokens = getattr(response.usage, "output_tokens", 0) or 0
-            total_tokens = getattr(response.usage, "total_tokens", 0) or (prompt_tokens + completion_tokens)
+            total_tokens = getattr(response.usage, "total_tokens", 0) or (
+                prompt_tokens + completion_tokens
+            )
             usage = SimpleNamespace(
                 prompt_tokens=prompt_tokens,
                 completion_tokens=completion_tokens,
@@ -644,7 +673,14 @@ def __init__(self, adapter: _AnthropicCompletionsAdapter):
 class AnthropicAuxiliaryClient:
     """OpenAI-client-compatible wrapper over a native Anthropic client."""
 
-    def __init__(self, real_client: Any, model: str, api_key: str, base_url: str, is_oauth: bool = False):
+    def __init__(
+        self,
+        real_client: Any,
+        model: str,
+        api_key: str,
+        base_url: str,
+        is_oauth: bool = False,
+    ):
         self._real_client = real_client
         adapter = _AnthropicCompletionsAdapter(real_client, model, is_oauth=is_oauth)
         self.chat = _AnthropicChatShim(adapter)
@@ -663,6 +699,7 @@ def __init__(self, sync_adapter: _AnthropicCompletionsAdapter):
 
     async def create(self, **kwargs) -> Any:
         import asyncio
+
         return await asyncio.to_thread(self._sync.create, **kwargs)
 
 
@@ -745,6 +782,7 @@ def _read_codex_access_token() -> Optional[str]:
 
     try:
         from hermes_cli.auth import _read_codex_tokens
+
         data = _read_codex_tokens()
         tokens = data.get("tokens", {})
         access_token = tokens.get("access_token")
@@ -755,6 +793,7 @@ def _read_codex_access_token() -> Optional[str]:
         # prevent fallback to working providers (e.g. Anthropic).
         try:
             import base64
+
             payload = access_token.split(".")[1]
             payload += "=" * (-len(payload) % 4)
             claims = json.loads(base64.urlsafe_b64decode(payload))
@@ -778,7 +817,10 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
     credentials, or (None, None) if none are configured.
     """
     try:
-        from hermes_cli.auth import PROVIDER_REGISTRY, resolve_api_key_provider_credentials
+        from hermes_cli.auth import (
+            PROVIDER_REGISTRY,
+            resolve_api_key_provider_credentials,
+        )
     except ImportError:
         logger.debug("Could not import PROVIDER_REGISTRY for API-key fallback")
         return None, None
@@ -792,6 +834,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
             # as auxiliary fallback when the user's primary provider fails.
             try:
                 from hermes_cli.auth import is_provider_explicitly_configured
+
                 if not is_provider_explicitly_configured("anthropic"):
                     continue
             except ImportError:
@@ -805,7 +848,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                 continue
 
             base_url = _to_openai_base_url(
-                _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
+                _pool_runtime_base_url(entry, pconfig.inference_base_url)
+                or pconfig.inference_base_url
             )
             model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
             if model is None:
@@ -831,7 +875,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
             continue
 
         base_url = _to_openai_base_url(
-            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+            str(creds.get("base_url", "")).strip().rstrip("/")
+            or pconfig.inference_base_url
         )
         model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
         if model is None:
@@ -857,24 +902,27 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
 # ── Provider resolution helpers ─────────────────────────────────────────────
 
 
-
 def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
     pool_present, entry = _select_pool_entry("openrouter")
     if pool_present:
         or_key = _pool_runtime_api_key(entry)
         if not or_key:
             return None, None
-        base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
+        base_url = (
+            _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
+        )
         logger.debug("Auxiliary client: OpenRouter via pool")
-        return OpenAI(api_key=or_key, base_url=base_url,
-                       default_headers=_OR_HEADERS), _OPENROUTER_MODEL
+        return OpenAI(
+            api_key=or_key, base_url=base_url, default_headers=_OR_HEADERS
+        ), _OPENROUTER_MODEL
 
     or_key = os.getenv("OPENROUTER_API_KEY")
     if not or_key:
         return None, None
     logger.debug("Auxiliary client: OpenRouter")
-    return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
-                   default_headers=_OR_HEADERS), _OPENROUTER_MODEL
+    return OpenAI(
+        api_key=or_key, base_url=OPENROUTER_BASE_URL, default_headers=_OR_HEADERS
+    ), _OPENROUTER_MODEL
 
 
 def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
@@ -883,6 +931,7 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
     # to avoid piling more requests onto the tapped RPH bucket.
     try:
         from agent.nous_rate_guard import nous_rate_limit_remaining
+
         _remaining = nous_rate_limit_remaining()
         if _remaining is not None and _remaining > 0:
             logger.debug(
@@ -907,16 +956,24 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
     # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
     try:
         from hermes_cli.models import check_nous_free_tier
+
         if check_nous_free_tier():
-            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
-            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
-                         model, "vision" if vision else "text")
+            model = (
+                _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
+            )
+            logger.debug(
+                "Free-tier Nous account — using %s for auxiliary/%s",
+                model,
+                "vision" if vision else "text",
+            )
     except Exception:
         pass
     return (
         OpenAI(
             api_key=_nous_api_key(nous),
-            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
+            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip(
+                "/"
+            ),
         ),
         model,
     )
@@ -930,6 +987,7 @@ def _read_main_model() -> str:
     """
     try:
         from hermes_cli.config import load_config
+
         cfg = load_config()
         model_cfg = cfg.get("model", {})
         if isinstance(model_cfg, str) and model_cfg.strip():
@@ -951,6 +1009,7 @@ def _read_main_provider() -> str:
     """
     try:
         from hermes_cli.config import load_config
+
         cfg = load_config()
         model_cfg = cfg.get("model", {})
         if isinstance(model_cfg, dict):
@@ -1028,15 +1087,21 @@ def _validate_proxy_env_urls() -> None:
     """
     from urllib.parse import urlparse
 
-    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
-                "https_proxy", "http_proxy", "all_proxy"):
+    for key in (
+        "HTTPS_PROXY",
+        "HTTP_PROXY",
+        "ALL_PROXY",
+        "https_proxy",
+        "http_proxy",
+        "all_proxy",
+    ):
         value = str(os.environ.get(key) or "").strip()
         if not value:
             continue
         try:
             parsed = urlparse(value)
             if parsed.scheme:
-                _ = parsed.port          # raises ValueError for e.g. '6153export'
+                _ = parsed.port  # raises ValueError for e.g. '6153export'
         except ValueError as exc:
             raise RuntimeError(
                 f"Malformed proxy environment variable {key}={value!r}. "
@@ -1054,7 +1119,7 @@ def _validate_base_url(base_url: str) -> None:
     try:
         parsed = urlparse(candidate)
         if parsed.scheme in {"http", "https"}:
-            _ = parsed.port              # raises ValueError for malformed ports
+            _ = parsed.port  # raises ValueError for malformed ports
     except ValueError as exc:
         raise RuntimeError(
             f"Malformed custom endpoint URL: {candidate!r}. "
@@ -1074,7 +1139,11 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
     if custom_base.lower().startswith(_CODEX_AUX_BASE_URL.lower()):
         return None, None
     model = _read_main_model() or "gpt-4o-mini"
-    logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
+    logger.debug(
+        "Auxiliary client: custom endpoint (%s, api_mode=%s)",
+        model,
+        custom_mode or "chat_completions",
+    )
     if custom_mode == "codex_responses":
         real_client = OpenAI(api_key=custom_key, base_url=custom_base)
         return CodexAuxiliaryClient(real_client, model), model
@@ -1103,7 +1172,10 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
     if pool_present:
         codex_token = _pool_runtime_api_key(entry)
         if codex_token:
-            base_url = _pool_runtime_base_url(entry, _CODEX_AUX_BASE_URL) or _CODEX_AUX_BASE_URL
+            base_url = (
+                _pool_runtime_base_url(entry, _CODEX_AUX_BASE_URL)
+                or _CODEX_AUX_BASE_URL
+            )
         else:
             codex_token = _read_codex_access_token()
             if not codex_token:
@@ -1125,7 +1197,10 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
 
 def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
     try:
-        from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
+        from agent.anthropic_adapter import (
+            build_anthropic_client,
+            resolve_anthropic_token,
+        )
     except ImportError:
         return None, None
 
@@ -1143,9 +1218,14 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
     # Allow base URL override from config.yaml model.base_url, but only
     # when the configured provider is anthropic — otherwise a non-Anthropic
     # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
-    base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL
+    base_url = (
+        _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL)
+        if pool_present
+        else _ANTHROPIC_DEFAULT_BASE_URL
+    )
     try:
         from hermes_cli.config import load_config
+
         cfg = load_config()
         model_cfg = cfg.get("model")
         if isinstance(model_cfg, dict):
@@ -1158,9 +1238,15 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
         pass
 
     from agent.anthropic_adapter import _is_oauth_token
+
     is_oauth = _is_oauth_token(token)
     model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
-    logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
+    logger.debug(
+        "Auxiliary client: Anthropic native (%s) at %s (oauth=%s)",
+        model,
+        base_url,
+        is_oauth,
+    )
     try:
         real_client = build_anthropic_client(token, base_url)
     except ImportError:
@@ -1168,7 +1254,9 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
         # missing — build_anthropic_client raises ImportError at call time
         # when _anthropic_sdk is None.  Treat as unavailable.
         return None, None
-    return AnthropicAuxiliaryClient(real_client, model, token, base_url, is_oauth=is_oauth), model
+    return AnthropicAuxiliaryClient(
+        real_client, model, token, base_url, is_oauth=is_oauth
+    ), model
 
 
 _AUTO_PROVIDER_LABELS = {
@@ -1225,9 +1313,16 @@ def _is_payment_error(exc: Exception) -> bool:
     # OpenRouter and other providers include "credits" or "afford" in 402 bodies,
     # but sometimes wrap them in 429 or other codes.
     if status in (402, 429, None):
-        if any(kw in err_lower for kw in ("credits", "insufficient funds",
-                                           "can only afford", "billing",
-                                           "payment required")):
+        if any(
+            kw in err_lower
+            for kw in (
+                "credits",
+                "insufficient funds",
+                "can only afford",
+                "billing",
+                "payment required",
+            )
+        ):
             return True
     return False
 
@@ -1249,11 +1344,17 @@ def _is_connection_error(exc: Exception) -> bool:
     if any(kw in err_type for kw in ("Connection", "Timeout", "DNS", "SSL")):
         return True
     err_lower = str(exc).lower()
-    if any(kw in err_lower for kw in (
-        "connection refused", "name or service not known",
-        "no route to host", "network is unreachable",
-        "timed out", "connection reset",
-    )):
+    if any(
+        kw in err_lower
+        for kw in (
+            "connection refused",
+            "name or service not known",
+            "no route to host",
+            "network is unreachable",
+            "timed out",
+            "connection reset",
+        )
+    ):
         return True
     return False
 
@@ -1280,9 +1381,14 @@ def _try_payment_fallback(
     if main_provider and main_provider.lower() in skip:
         skip_labels.add(main_provider.lower())
     # Map common resolved_provider values back to chain labels.
-    _alias_to_label = {"openrouter": "openrouter", "nous": "nous",
-                       "openai-codex": "openai-codex", "codex": "openai-codex",
-                       "custom": "local/custom", "local/custom": "local/custom"}
+    _alias_to_label = {
+        "openrouter": "openrouter",
+        "nous": "nous",
+        "openai-codex": "openai-codex",
+        "codex": "openai-codex",
+        "custom": "local/custom",
+        "local/custom": "local/custom",
+    }
     skip_chain_labels = {_alias_to_label.get(s, s) for s in skip_labels}
 
     tried = []
@@ -1293,19 +1399,28 @@ def _try_payment_fallback(
         if client is not None:
             logger.info(
                 "Auxiliary %s: %s on %s — falling back to %s (%s)",
-                task or "call", reason, failed_provider, label, model or "default",
+                task or "call",
+                reason,
+                failed_provider,
+                label,
+                model or "default",
             )
             return client, model, label
         tried.append(label)
 
     logger.warning(
         "Auxiliary %s: %s on %s and no fallback available (tried: %s)",
-        task or "call", reason, failed_provider, ", ".join(tried),
+        task or "call",
+        reason,
+        failed_provider,
+        ", ".join(tried),
     )
     return None, None, ""
 
 
-def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
+def _resolve_auto(
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> Tuple[Optional[OpenAI], Optional[str]]:
     """Full auto-detection chain.
 
     Priority:
@@ -1335,15 +1450,19 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
     if not _stale_base_url_warned:
         _env_base = os.getenv("OPENAI_BASE_URL", "").strip()
         _cfg_provider = runtime_provider or _read_main_provider()
-        if (_env_base and _cfg_provider
-                and _cfg_provider != "custom"
-                and not _cfg_provider.startswith("custom:")):
+        if (
+            _env_base
+            and _cfg_provider
+            and _cfg_provider != "custom"
+            and not _cfg_provider.startswith("custom:")
+        ):
             logger.warning(
                 "OPENAI_BASE_URL is set (%s) but model.provider is '%s'. "
                 "Auxiliary clients may route to the wrong endpoint. "
                 "Run: hermes model to reconfigure, or remove "
                 "OPENAI_BASE_URL from ~/.hermes/.env",
-                _env_base, _cfg_provider,
+                _env_base,
+                _cfg_provider,
             )
             _stale_base_url_warned = True
 
@@ -1356,12 +1475,13 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
     # config.yaml (auxiliary.<task>.provider) still win over this.
     main_provider = runtime_provider or _read_main_provider()
     main_model = runtime_model or _read_main_model()
-    if (main_provider and main_model
-            and main_provider not in ("auto", "")):
+    if main_provider and main_model and main_provider not in ("auto", ""):
         resolved_provider = main_provider
         explicit_base_url = None
         explicit_api_key = None
-        if runtime_base_url and (main_provider == "custom" or main_provider.startswith("custom:")):
+        if runtime_base_url and (
+            main_provider == "custom" or main_provider.startswith("custom:")
+        ):
             resolved_provider = "custom"
             explicit_base_url = runtime_base_url
             explicit_api_key = runtime_api_key or None
@@ -1373,8 +1493,11 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
             api_mode=runtime_api_mode or None,
         )
         if client is not None:
-            logger.info("Auxiliary auto-detect: using main provider %s (%s)",
-                        main_provider, resolved or main_model)
+            logger.info(
+                "Auxiliary auto-detect: using main provider %s (%s)",
+                main_provider,
+                resolved or main_model,
+            )
             return client, resolved or main_model
 
     # ── Step 2: aggregator / fallback chain ──────────────────────────────
@@ -1383,16 +1506,24 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
         client, model = try_fn()
         if client is not None:
             if tried:
-                logger.info("Auxiliary auto-detect: using %s (%s) — skipped: %s",
-                            label, model or "default", ", ".join(tried))
+                logger.info(
+                    "Auxiliary auto-detect: using %s (%s) — skipped: %s",
+                    label,
+                    model or "default",
+                    ", ".join(tried),
+                )
             else:
-                logger.info("Auxiliary auto-detect: using %s (%s)", label, model or "default")
+                logger.info(
+                    "Auxiliary auto-detect: using %s (%s)", label, model or "default"
+                )
             return client, model
         tried.append(label)
-    logger.warning("Auxiliary auto-detect: no provider available (tried: %s). "
-                   "Compression, summarization, and memory flush will not work. "
-                   "Set OPENROUTER_API_KEY or configure a local model in config.yaml.",
-                   ", ".join(tried))
+    logger.warning(
+        "Auxiliary auto-detect: no provider available (tried: %s). "
+        "Compression, summarization, and memory flush will not work. "
+        "Set OPENROUTER_API_KEY or configure a local model in config.yaml.",
+        ", ".join(tried),
+    )
     return None, None
 
 
@@ -1424,6 +1555,7 @@ def _to_async_client(sync_client, model: str):
         pass
     try:
         from agent.copilot_acp_client import CopilotACPClient
+
         if isinstance(sync_client, CopilotACPClient):
             return sync_client, model
     except ImportError:
@@ -1445,7 +1577,9 @@ def _to_async_client(sync_client, model: str):
     return AsyncOpenAI(**async_kwargs), model
 
 
-def _normalize_resolved_model(model_name: Optional[str], provider: str) -> Optional[str]:
+def _normalize_resolved_model(
+    model_name: Optional[str], provider: str
+) -> Optional[str]:
     """Normalize a resolved model for the provider that will receive it."""
     if not model_name:
         return model_name
@@ -1529,8 +1663,10 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
             logger.debug(
                 "resolve_provider_client: wrapping client in CodexAuxiliaryClient "
                 "(api_mode=%s, model=%s, base_url=%s)",
-                api_mode or "auto-detected", final_model_str,
-                base_url_str[:60] if base_url_str else "")
+                api_mode or "auto-detected",
+                final_model_str,
+                base_url_str[:60] if base_url_str else "",
+            )
             return CodexAuxiliaryClient(client_obj, final_model_str)
         return client_obj
 
@@ -1546,33 +1682,49 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
         if model and "/" in model and resolved and "/" not in resolved:
             logger.debug(
                 "Dropping OpenRouter-format model %r for non-OpenRouter "
-                "auxiliary provider (using %r instead)", model, resolved)
+                "auxiliary provider (using %r instead)",
+                model,
+                resolved,
+            )
             model = None
         final_model = model or resolved
-        return (_to_async_client(client, final_model) if async_mode
-                else (client, final_model))
+        return (
+            _to_async_client(client, final_model)
+            if async_mode
+            else (client, final_model)
+        )
 
     # ── OpenRouter ───────────────────────────────────────────────────
     if provider == "openrouter":
         client, default = _try_openrouter()
         if client is None:
-            logger.warning("resolve_provider_client: openrouter requested "
-                           "but OPENROUTER_API_KEY not set")
+            logger.warning(
+                "resolve_provider_client: openrouter requested "
+                "but OPENROUTER_API_KEY not set"
+            )
             return None, None
         final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model) if async_mode
-                else (client, final_model))
+        return (
+            _to_async_client(client, final_model)
+            if async_mode
+            else (client, final_model)
+        )
 
     # ── Nous Portal (OAuth) ──────────────────────────────────────────
     if provider == "nous":
         client, default = _try_nous()
         if client is None:
-            logger.warning("resolve_provider_client: nous requested "
-                           "but Nous Portal not configured (run: hermes auth)")
+            logger.warning(
+                "resolve_provider_client: nous requested "
+                "but Nous Portal not configured (run: hermes auth)"
+            )
             return None, None
         final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model) if async_mode
-                else (client, final_model))
+        return (
+            _to_async_client(client, final_model)
+            if async_mode
+            else (client, final_model)
+        )
 
     # ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
     if provider == "openai-codex":
@@ -1581,8 +1733,10 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
             # access to responses.stream() (e.g., the main agent loop).
             codex_token = _read_codex_access_token()
             if not codex_token:
-                logger.warning("resolve_provider_client: openai-codex requested "
-                               "but no Codex OAuth token found (run: hermes model)")
+                logger.warning(
+                    "resolve_provider_client: openai-codex requested "
+                    "but no Codex OAuth token found (run: hermes model)"
+                )
                 return None, None
             final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
             raw_client = OpenAI(
@@ -1594,12 +1748,17 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
         # Standard path: wrap in CodexAuxiliaryClient adapter
         client, default = _try_codex()
         if client is None:
-            logger.warning("resolve_provider_client: openai-codex requested "
-                           "but no Codex OAuth token found (run: hermes model)")
+            logger.warning(
+                "resolve_provider_client: openai-codex requested "
+                "but no Codex OAuth token found (run: hermes model)"
+            )
             return None, None
         final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model) if async_mode
-                else (client, final_model))
+        return (
+            _to_async_client(client, final_model)
+            if async_mode
+            else (client, final_model)
+        )
 
     # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
     if provider == "custom":
@@ -1625,28 +1784,37 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
                 extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
             elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
                 from hermes_cli.models import copilot_default_headers
+
                 extra["default_headers"] = copilot_default_headers()
             client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
             client = _wrap_if_needed(client, final_model, custom_base)
-            return (_to_async_client(client, final_model) if async_mode
-                    else (client, final_model))
+            return (
+                _to_async_client(client, final_model)
+                if async_mode
+                else (client, final_model)
+            )
         # Try custom first, then codex, then API-key providers
-        for try_fn in (_try_custom_endpoint, _try_codex,
-                       _resolve_api_key_provider):
+        for try_fn in (_try_custom_endpoint, _try_codex, _resolve_api_key_provider):
             client, default = try_fn()
             if client is not None:
                 final_model = _normalize_resolved_model(model or default, provider)
                 _cbase = str(getattr(client, "base_url", "") or "")
                 client = _wrap_if_needed(client, final_model, _cbase)
-                return (_to_async_client(client, final_model) if async_mode
-                        else (client, final_model))
-        logger.warning("resolve_provider_client: custom/main requested "
-                       "but no endpoint credentials found")
+                return (
+                    _to_async_client(client, final_model)
+                    if async_mode
+                    else (client, final_model)
+                )
+        logger.warning(
+            "resolve_provider_client: custom/main requested "
+            "but no endpoint credentials found"
+        )
         return None, None
 
     # ── Named custom providers (config.yaml custom_providers list) ───
     try:
         from hermes_cli.runtime_provider import _get_named_custom_provider
+
         custom_entry = _get_named_custom_provider(provider)
         if custom_entry:
             custom_base = custom_entry.get("base_url", "").strip()
@@ -1657,19 +1825,28 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
             custom_key = custom_key or "no-key-required"
             if custom_base:
                 final_model = _normalize_resolved_model(
-                    model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
+                    model
+                    or custom_entry.get("model")
+                    or _read_main_model()
+                    or "gpt-4o-mini",
                     provider,
                 )
                 client = OpenAI(api_key=custom_key, base_url=custom_base)
                 client = _wrap_if_needed(client, final_model, custom_base)
                 logger.debug(
                     "resolve_provider_client: named custom provider %r (%s)",
-                    provider, final_model)
-                return (_to_async_client(client, final_model) if async_mode
-                        else (client, final_model))
+                    provider,
+                    final_model,
+                )
+                return (
+                    _to_async_client(client, final_model)
+                    if async_mode
+                    else (client, final_model)
+                )
             logger.warning(
                 "resolve_provider_client: named custom provider %r has no base_url",
-                provider)
+                provider,
+            )
             return None, None
     except ImportError:
         pass
@@ -1694,10 +1871,16 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
         if provider == "anthropic":
             client, default_model = _try_anthropic()
             if client is None:
-                logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
+                logger.warning(
+                    "resolve_provider_client: anthropic requested but no Anthropic credentials found"
+                )
                 return None, None
             final_model = _normalize_resolved_model(model or default_model, provider)
-            return (_to_async_client(client, final_model) if async_mode else (client, final_model))
+            return (
+                _to_async_client(client, final_model)
+                if async_mode
+                else (client, final_model)
+            )
 
         creds = resolve_api_key_provider_credentials(provider)
         api_key = str(creds.get("api_key", "")).strip()
@@ -1705,13 +1888,17 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
             tried_sources = list(pconfig.api_key_env_vars)
             if provider == "copilot":
                 tried_sources.append("gh auth token")
-            logger.debug("resolve_provider_client: provider %s has no API "
-                         "key configured (tried: %s)",
-                         provider, ", ".join(tried_sources))
+            logger.debug(
+                "resolve_provider_client: provider %s has no API "
+                "key configured (tried: %s)",
+                provider,
+                ", ".join(tried_sources),
+            )
             return None, None
 
         base_url = _to_openai_base_url(
-            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+            str(creds.get("base_url", "")).strip().rstrip("/")
+            or pconfig.inference_base_url
         )
 
         default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
@@ -1734,8 +1921,12 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
             from hermes_cli.models import copilot_default_headers
 
             headers.update(copilot_default_headers())
-        client = OpenAI(api_key=api_key, base_url=base_url,
-                        **({"default_headers": headers} if headers else {}))
+
+        client = OpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            **({"default_headers": headers} if headers else {})
+        )
 
         # Copilot GPT-5+ models (except gpt-5-mini) require the Responses
         # API — they are not accessible via /chat/completions.  Wrap the
@@ -1744,11 +1935,13 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
         if provider == "copilot" and final_model and not raw_codex:
             try:
                 from hermes_cli.models import _should_use_copilot_responses_api
+
                 if _should_use_copilot_responses_api(final_model):
                     logger.debug(
                         "resolve_provider_client: copilot model %s needs "
                         "Responses API — wrapping with CodexAuxiliaryClient",
-                        final_model)
+                        final_model,
+                    )
                     client = CodexAuxiliaryClient(client, final_model)
             except ImportError:
                 pass
@@ -1759,8 +1952,11 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
         client = _wrap_if_needed(client, final_model, base_url)
 
         logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-        return (_to_async_client(client, final_model) if async_mode
-                else (client, final_model))
+        return (
+            _to_async_client(client, final_model)
+            if async_mode
+            else (client, final_model)
+        )
 
     if pconfig.auth_type == "external_process":
         creds = resolve_external_process_provider_credentials(provider)
@@ -1791,10 +1987,16 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
                 args=args,
             )
             logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-            return (_to_async_client(client, final_model) if async_mode
-                    else (client, final_model))
-        logger.warning("resolve_provider_client: external-process provider %s not "
-                       "directly supported", provider)
+            return (
+                _to_async_client(client, final_model)
+                if async_mode
+                else (client, final_model)
+            )
+        logger.warning(
+            "resolve_provider_client: external-process provider %s not "
+            "directly supported",
+            provider,
+        )
         return None, None
 
     elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
@@ -1804,17 +2006,24 @@ def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
         if provider == "openai-codex":
             return resolve_provider_client("openai-codex", model, async_mode)
         # Other OAuth providers not directly supported
-        logger.warning("resolve_provider_client: OAuth provider %s not "
-                       "directly supported, try 'auto'", provider)
+        logger.warning(
+            "resolve_provider_client: OAuth provider %s not "
+            "directly supported, try 'auto'",
+            provider,
+        )
         return None, None
 
-    logger.warning("resolve_provider_client: unhandled auth_type %s for %s",
-                   pconfig.auth_type, provider)
+    logger.warning(
+        "resolve_provider_client: unhandled auth_type %s for %s",
+        pconfig.auth_type,
+        provider,
+    )
     return None, None
 
 
 # ── Public API ──────────────────────────────────────────────────────────────
 
+
 def get_text_auxiliary_client(
     task: str = "",
     *,
@@ -1829,7 +2038,9 @@ def get_text_auxiliary_client(
     Callers may override the returned model via config.yaml
     (e.g. auxiliary.compression.model, auxiliary.web_extract.model).
     """
-    provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(task or None)
+    provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(
+        task or None
+    )
     return resolve_provider_client(
         provider,
         model=model,
@@ -1840,14 +2051,18 @@ def get_text_auxiliary_client(
     )
 
 
-def get_async_text_auxiliary_client(task: str = "", *, main_runtime: Optional[Dict[str, Any]] = None):
+def get_async_text_auxiliary_client(
+    task: str = "", *, main_runtime: Optional[Dict[str, Any]] = None
+):
     """Return (async_client, model_slug) for async consumers.
 
     For standard providers returns (AsyncOpenAI, model). For Codex returns
     (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
     Returns (None, None) when no provider is available.
     """
-    provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(task or None)
+    provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(
+        task or None
+    )
     return resolve_provider_client(
         provider,
         model=model,
@@ -1869,7 +2084,9 @@ def _normalize_vision_provider(provider: Optional[str]) -> str:
     return _normalize_aux_provider(provider)
 
 
-def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
+def _resolve_strict_vision_backend(
+    provider: str,
+) -> Tuple[Optional[Any], Optional[str]]:
     provider = _normalize_vision_provider(provider)
     if provider == "openrouter":
         return _try_openrouter()
@@ -1928,12 +2145,18 @@ def resolve_vision_provider_client(
     backends, so users can intentionally force experimental providers. Auto mode
     stays conservative and only tries vision backends known to work today.
     """
-    requested, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
-        "vision", provider, model, base_url, api_key
-    )
+    (
+        requested,
+        resolved_model,
+        resolved_base_url,
+        resolved_api_key,
+        resolved_api_mode,
+    ) = _resolve_task_provider_model("vision", provider, model, base_url, api_key)
     requested = _normalize_vision_provider(requested)
 
-    def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]):
+    def _finalize(
+        resolved_provider: str, sync_client: Any, default_model: Optional[str]
+    ):
         if sync_client is None:
             return resolved_provider, None, None
         final_model = resolved_model or default_model
@@ -1970,15 +2193,15 @@ def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[
         if main_provider and main_provider not in ("auto", ""):
             vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
             rpc_client, rpc_model = resolve_provider_client(
-                main_provider, vision_model,
-                api_mode=resolved_api_mode)
+                main_provider, vision_model, api_mode=resolved_api_mode
+            )
             if rpc_client is not None:
                 logger.info(
                     "Vision auto-detect: using main provider %s (%s)",
-                    main_provider, rpc_model or vision_model,
+                    main_provider,
+                    rpc_model or vision_model,
                 )
-                return _finalize(
-                    main_provider, rpc_client, rpc_model or vision_model)
+                return _finalize(main_provider, rpc_client, rpc_model or vision_model)
 
         # Fall back through aggregators (uses their dedicated vision model,
         # not the user's main model) when main provider has no client.
@@ -1996,8 +2219,9 @@ def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[
         sync_client, default_model = _resolve_strict_vision_backend(requested)
         return _finalize(requested, sync_client, default_model)
 
-    client, final_model = _get_cached_client(requested, resolved_model, async_mode,
-                                             api_mode=resolved_api_mode)
+    client, final_model = _get_cached_client(
+        requested, resolved_model, async_mode, api_mode=resolved_api_mode
+    )
     if client is None:
         return requested, None, None
     return requested, client, final_model
@@ -2005,7 +2229,7 @@ def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[
 
 def get_auxiliary_extra_body() -> dict:
     """Return extra_body kwargs for auxiliary API calls.
-    
+
     Includes Nous Portal product tags when the auxiliary client is backed
     by Nous Portal. Returns empty dict otherwise.
     """
@@ -2014,7 +2238,7 @@ def get_auxiliary_extra_body() -> dict:
 
 def auxiliary_max_tokens_param(value: int) -> dict:
     """Return the correct max tokens kwarg for the auxiliary client's provider.
-    
+
     OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
     models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
     The Codex adapter translates max_tokens internally, so we use max_tokens
@@ -2081,6 +2305,7 @@ def neuter_async_httpx_del() -> None:
     """
     try:
         from openai._base_client import AsyncHttpxClientWrapper
+
         AsyncHttpxClientWrapper.__del__ = lambda self: None  # type: ignore[assignment]
     except (ImportError, AttributeError):
         pass  # Graceful degradation if the SDK changes its internals
@@ -2099,6 +2324,7 @@ def _force_close_async_httpx(client: Any) -> None:
     """
     try:
         from httpx._client import ClientState
+
         inner = getattr(client, "_client", None)
         if inner is not None and not getattr(inner, "is_closed", True):
             inner._state = ClientState.CLOSED
@@ -2159,7 +2385,9 @@ def _is_openrouter_client(client: Any) -> bool:
     return False
 
 
-def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
+def _compat_model(
+    client: Any, model: Optional[str], cached_default: Optional[str]
+) -> Optional[str]:
     """Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
 
     Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
@@ -2202,12 +2430,24 @@ def _get_cached_client(
     if async_mode:
         try:
             import asyncio as _aio
+
             current_loop = _aio.get_event_loop()
         except RuntimeError:
             pass
     runtime = _normalize_main_runtime(main_runtime)
-    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+    runtime_key = (
+        tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS)
+        if provider == "auto"
+        else ()
+    )
+    cache_key = (
+        provider,
+        async_mode,
+        base_url or "",
+        api_key or "",
+        api_mode or "",
+        runtime_key,
+    )
     with _client_cache_lock:
         if cache_key in _client_cache:
             cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2301,7 +2541,13 @@ def _resolve_task_provider_model(
     if task:
         # Config.yaml is the primary source for per-task overrides.
         if cfg_base_url:
-            return "custom", resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
+            return (
+                "custom",
+                resolved_model,
+                cfg_base_url,
+                cfg_api_key,
+                resolved_api_mode,
+            )
         if cfg_provider and cfg_provider != "auto":
             return cfg_provider, resolved_model, None, None, resolved_api_mode
 
@@ -2319,6 +2565,7 @@ def _get_auxiliary_task_config(task: str) -> Dict[str, Any]:
         return {}
     try:
         from hermes_cli.config import load_config
+
         config = load_config()
     except ImportError:
         return {}
@@ -2394,23 +2641,27 @@ def _convert_openai_images_to_anthropic(messages: list) -> list:
                     media_type = "image/png"
                     if ":" in header and ";" in header:
                         media_type = header.split(":", 1)[1].split(";", 1)[0]
-                    new_content.append({
-                        "type": "image",
-                        "source": {
-                            "type": "base64",
-                            "media_type": media_type,
-                            "data": b64data,
-                        },
-                    })
+                    new_content.append(
+                        {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": media_type,
+                                "data": b64data,
+                            },
+                        }
+                    )
                 else:
                     # URL-based image
-                    new_content.append({
-                        "type": "image",
-                        "source": {
-                            "type": "url",
-                            "url": image_url_val,
-                        },
-                    })
+                    new_content.append(
+                        {
+                            "type": "image",
+                            "source": {
+                                "type": "url",
+                                "url": image_url_val,
+                            },
+                        }
+                    )
                 changed = True
             else:
                 new_content.append(block)
@@ -2418,6 +2669,33 @@ def _convert_openai_images_to_anthropic(messages: list) -> list:
     return converted
 
 
+def _sanitize_message_content(content: Any) -> str:
+    """Sanitize message content for strict API endpoints (StepFun, etc.).
+
+    Replaces problematic characters that can cause JSON serialization errors
+    like "Unterminated string" on strict API backends.
+    """
+    if not isinstance(content, str):
+        content = str(content) if content else ""
+    sanitized = content
+    for old, new in (
+            ("\\", "\\\\"),
+            ('"', '\\"'),
+            ("\n", "\\n"),
+            ("\r", "\\r"),
+            ("\t", "\\t"),
+        ):
+        sanitized = sanitized.replace(old, new)
+    return sanitized
+
+
+def _validate_messages_json(messages: list) -> list:
+    """Validate and sanitize messages for strict API endpoints.
+    
+    Returns messages as-is - JSON truncation is now handled in context_compressor.py.
+    """
+    return messages
+
 
 def _build_call_kwargs(
     provider: str,
@@ -2431,9 +2709,10 @@ def _build_call_kwargs(
     base_url: Optional[str] = None,
 ) -> dict:
     """Build kwargs for .chat.completions.create() with model/provider adjustments."""
+    sanitized_messages = _validate_messages_json(messages)
     kwargs: Dict[str, Any] = {
         "model": model,
-        "messages": messages,
+        "messages": sanitized_messages,
         "timeout": timeout,
     }
 
@@ -2449,6 +2728,7 @@ def _build_call_kwargs(
     # the aux model is flipped to 4.7.
     if temperature is not None:
         from agent.anthropic_adapter import _forbids_sampling_params
+
         if _forbids_sampling_params(model):
             temperature = None
 
@@ -2490,9 +2770,7 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
     See #7264.
     """
     if response is None:
-        raise RuntimeError(
-            f"Auxiliary {task or 'call'}: LLM returned None response"
-        )
+        raise RuntimeError(f"Auxiliary {task or 'call'}: LLM returned None response")
     # Allow SimpleNamespace responses from adapters (CodexAuxiliaryClient,
     # AnthropicAuxiliaryClient) — they have .choices[0].message.
     try:
@@ -2605,22 +2883,34 @@ def call_llm(
             # resolved_model may be an OpenRouter-format slug that doesn't
             # work on other providers.
             if not resolved_base_url:
-                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
-                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", main_runtime=main_runtime)
+                logger.info(
+                    "Auxiliary %s: provider %s unavailable, trying auto-detection chain",
+                    task or "call",
+                    resolved_provider,
+                )
+                client, final_model = _get_cached_client(
+                    "auto", main_runtime=main_runtime
+                )
         if client is None:
             raise RuntimeError(
                 f"No LLM provider configured for task={task} provider={resolved_provider}. "
-                f"Run: hermes setup")
+                f"Run: hermes setup"
+            )
 
     effective_timeout = timeout if timeout is not None else _get_task_timeout(task)
 
     # Log what we're about to do — makes auxiliary operations visible
     _base_info = str(getattr(client, "base_url", resolved_base_url) or "")
     if task:
-        logger.info("Auxiliary %s: using %s (%s)%s",
-                     task, resolved_provider or "auto", final_model or "default",
-                     f" at {_base_info}" if _base_info and "openrouter" not in _base_info else "")
+        logger.info(
+            "Auxiliary %s: using %s (%s)%s",
+            task,
+            resolved_provider or "auto",
+            final_model or "default",
+            f" at {_base_info}"
+            if _base_info and "openrouter" not in _base_info
+            else "",
+        )
 
     # Pass the client's actual base_url (not just resolved_base_url) so
     # endpoint-specific temperature overrides can distinguish
@@ -2638,8 +2928,7 @@ def call_llm(
 
     # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
     try:
-        return _validate_llm_response(
-            client.chat.completions.create(**kwargs), task)
+        return _validate_llm_response(client.chat.completions.create(**kwargs), task)
     except Exception as first_err:
         err_str = str(first_err)
         if "max_tokens" in err_str or "unsupported_parameter" in err_str:
@@ -2647,11 +2936,14 @@ def call_llm(
             kwargs["max_completion_tokens"] = max_tokens
             try:
                 return _validate_llm_response(
-                    client.chat.completions.create(**kwargs), task)
+                    client.chat.completions.create(**kwargs), task
+                )
             except Exception as retry_err:
                 # If the max_tokens retry also hits a payment or connection
                 # error, fall through to the fallback chain below.
-                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
+                if not (
+                    _is_payment_error(retry_err) or _is_connection_error(retry_err)
+                ):
                     raise
                 first_err = retry_err
 
@@ -2667,17 +2959,27 @@ def call_llm(
         # Codex/OAuth tokens that authenticate but whose endpoint is down,
         # and providers the user never configured that got picked up by
         # the auto-detection chain.
-        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
+        should_fallback = _is_payment_error(first_err) or _is_connection_error(
+            first_err
+        )
         # Only try alternative providers when the user didn't explicitly
         # configure this task's provider.  Explicit provider = hard constraint;
         # auto (the default) = best-effort fallback chain.  (#7559)
         is_auto = resolved_provider in ("auto", "", None)
         if should_fallback and is_auto:
-            reason = "payment error" if _is_payment_error(first_err) else "connection error"
-            logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
-                        task or "call", reason, resolved_provider, first_err)
+            reason = (
+                "payment error" if _is_payment_error(first_err) else "connection error"
+            )
+            logger.info(
+                "Auxiliary %s: %s on %s (%s), trying fallback",
+                task or "call",
+                reason,
+                resolved_provider,
+                first_err,
+            )
             fb_client, fb_model, fb_label = _try_payment_fallback(
-                resolved_provider, task, reason=reason)
+                resolved_provider, task, reason=reason
+            )
             if fb_client is not None:
                 fb_kwargs = _build_call_kwargs(
                     fb_label, fb_model, messages,
@@ -2686,7 +2988,8 @@ def call_llm(
                     extra_body=effective_extra_body,
                     base_url=str(getattr(fb_client, "base_url", "") or ""))
                 return _validate_llm_response(
-                    fb_client.chat.completions.create(**fb_kwargs), task)
+                    fb_client.chat.completions.create(**fb_kwargs), task
+                )
         raise
 
 
@@ -2716,7 +3019,9 @@ def extract_content_or_reasoning(response) -> str:
             r"<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>"
             r".*?"
             r"</(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>",
-            "", content, flags=re.DOTALL | re.IGNORECASE,
+            "",
+            content,
+            flags=re.DOTALL | re.IGNORECASE,
         ).strip()
         if cleaned:
             return cleaned
@@ -2733,12 +3038,12 @@ def extract_content_or_reasoning(response) -> str:
         for detail in details:
             if isinstance(detail, dict):
                 summary = (
-                    detail.get("summary")
-                    or detail.get("content")
-                    or detail.get("text")
+                    detail.get("summary") or detail.get("content") or detail.get("text")
                 )
                 if summary and summary not in reasoning_parts:
-                    reasoning_parts.append(summary.strip() if isinstance(summary, str) else str(summary))
+                    reasoning_parts.append(
+                        summary.strip() if isinstance(summary, str) else str(summary)
+                    )
 
     if reasoning_parts:
         return "\n\n".join(reasoning_parts)
@@ -2811,13 +3116,17 @@ async def async_call_llm(
                     f"variable, or switch to a different provider with `hermes model`."
                 )
             if not resolved_base_url:
-                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
-                            task or "call", resolved_provider)
+                logger.info(
+                    "Auxiliary %s: provider %s unavailable, trying auto-detection chain",
+                    task or "call",
+                    resolved_provider,
+                )
                 client, final_model = _get_cached_client("auto", async_mode=True)
         if client is None:
             raise RuntimeError(
                 f"No LLM provider configured for task={task} provider={resolved_provider}. "
-                f"Run: hermes setup")
+                f"Run: hermes setup"
+            )
 
     effective_timeout = timeout if timeout is not None else _get_task_timeout(task)
 
@@ -2837,7 +3146,8 @@ async def async_call_llm(
 
     try:
         return _validate_llm_response(
-            await client.chat.completions.create(**kwargs), task)
+            await client.chat.completions.create(**kwargs), task
+        )
     except Exception as first_err:
         err_str = str(first_err)
         if "max_tokens" in err_str or "unsupported_parameter" in err_str:
@@ -2845,23 +3155,36 @@ async def async_call_llm(
             kwargs["max_completion_tokens"] = max_tokens
             try:
                 return _validate_llm_response(
-                    await client.chat.completions.create(**kwargs), task)
+                    await client.chat.completions.create(**kwargs), task
+                )
             except Exception as retry_err:
                 # If the max_tokens retry also hits a payment or connection
                 # error, fall through to the fallback chain below.
-                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
+                if not (
+                    _is_payment_error(retry_err) or _is_connection_error(retry_err)
+                ):
                     raise
                 first_err = retry_err
 
         # ── Payment / connection fallback (mirrors sync call_llm) ─────
-        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
+        should_fallback = _is_payment_error(first_err) or _is_connection_error(
+            first_err
+        )
         is_auto = resolved_provider in ("auto", "", None)
         if should_fallback and is_auto:
-            reason = "payment error" if _is_payment_error(first_err) else "connection error"
-            logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
-                        task or "call", reason, resolved_provider, first_err)
+            reason = (
+                "payment error" if _is_payment_error(first_err) else "connection error"
+            )
+            logger.info(
+                "Auxiliary %s (async): %s on %s (%s), trying fallback",
+                task or "call",
+                reason,
+                resolved_provider,
+                first_err,
+            )
             fb_client, fb_model, fb_label = _try_payment_fallback(
-                resolved_provider, task, reason=reason)
+                resolved_provider, task, reason=reason
+            )
             if fb_client is not None:
                 fb_kwargs = _build_call_kwargs(
                     fb_label, fb_model, messages,
@@ -2874,5 +3197,6 @@ async def async_call_llm(
                 if async_fb_model and async_fb_model != fb_kwargs.get("model"):
                     fb_kwargs["model"] = async_fb_model
                 return _validate_llm_response(
-                    await async_fb.chat.completions.create(**fb_kwargs), task)
+                    await async_fb.chat.completions.create(**fb_kwargs), task
+                )
         raise
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 254ac0ac5eb..848f43d16b6 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -147,7 +147,9 @@ def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) ->
 
     if tool_name == "write_file":
         path = args.get("path", "?")
-        written_lines = args.get("content", "").count("\n") + 1 if args.get("content") else "?"
+        written_lines = (
+            args.get("content", "").count("\n") + 1 if args.get("content") else "?"
+        )
         return f"[write_file] wrote to {path} ({written_lines} lines)"
 
     if tool_name == "search_files":
@@ -163,8 +165,14 @@ def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) ->
         mode = args.get("mode", "replace")
         return f"[patch] {mode} in {path} ({content_len:,} chars result)"
 
-    if tool_name in ("browser_navigate", "browser_click", "browser_snapshot",
-                     "browser_type", "browser_scroll", "browser_vision"):
+    if tool_name in (
+        "browser_navigate",
+        "browser_click",
+        "browser_snapshot",
+        "browser_type",
+        "browser_scroll",
+        "browser_vision",
+    ):
         url = args.get("url", "")
         ref = args.get("ref", "")
         detail = f" {url}" if url else (f" ref={ref}" if ref else "")
@@ -304,7 +312,9 @@ def __init__(
         self.quiet_mode = quiet_mode
 
         self.context_length = get_model_context_length(
-            model, base_url=base_url, api_key=api_key,
+            model,
+            base_url=base_url,
+            api_key=api_key,
             config_context_length=config_context_length,
             provider=provider,
         )
@@ -322,7 +332,8 @@ def __init__(
         target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
         self.tail_token_budget = target_tokens
         self.max_summary_tokens = min(
-            int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING,
+            int(self.context_length * 0.05),
+            _SUMMARY_TOKENS_CEILING,
         )
 
         if not quiet_mode:
@@ -330,10 +341,14 @@ def __init__(
                 "Context compressor initialized: model=%s context_length=%d "
                 "threshold=%d (%.0f%%) target_ratio=%.0f%% tail_budget=%d "
                 "provider=%s base_url=%s",
-                model, self.context_length, self.threshold_tokens,
-                threshold_percent * 100, self.summary_target_ratio * 100,
+                model,
+                self.context_length,
+                self.threshold_tokens,
+                threshold_percent * 100,
+                self.summary_target_ratio * 100,
                 self.tail_token_budget,
-                provider or "none", base_url or "none",
+                provider or "none",
+                base_url or "none",
             )
         self._context_probed = False  # True after a step-down from context error
 
@@ -381,7 +396,9 @@ def should_compress(self, prompt_tokens: int = None) -> bool:
     # ------------------------------------------------------------------
 
     def _prune_old_tool_results(
-        self, messages: List[Dict[str, Any]], protect_tail_count: int,
+        self,
+        messages: List[Dict[str, Any]],
+        protect_tail_count: int,
         protect_tail_tokens: int | None = None,
     ) -> tuple[List[Dict[str, Any]], int]:
         """Replace old tool result contents with informative 1-line summaries.
@@ -417,7 +434,10 @@ def _prune_old_tool_results(
                     if isinstance(tc, dict):
                         cid = tc.get("id", "")
                         fn = tc.get("function", {})
-                        call_id_to_tool[cid] = (fn.get("name", "unknown"), fn.get("arguments", ""))
+                        call_id_to_tool[cid] = (
+                            fn.get("name", "unknown"),
+                            fn.get("arguments", ""),
+                        )
                     else:
                         cid = getattr(tc, "id", "") or ""
                         fn = getattr(tc, "function", None)
@@ -434,13 +454,20 @@ def _prune_old_tool_results(
             for i in range(len(result) - 1, -1, -1):
                 msg = result[i]
                 raw_content = msg.get("content") or ""
-                content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
+                content_len = (
+                    sum(len(p.get("text", "")) for p in raw_content)
+                    if isinstance(raw_content, list)
+                    else len(raw_content)
+                )
                 msg_tokens = content_len // _CHARS_PER_TOKEN + 10
                 for tc in msg.get("tool_calls") or []:
                     if isinstance(tc, dict):
                         args = tc.get("function", {}).get("arguments", "")
                         msg_tokens += len(args) // _CHARS_PER_TOKEN
-                if accumulated + msg_tokens > protect_tail_tokens and (len(result) - i) >= min_protect:
+                if (
+                    accumulated + msg_tokens > protect_tail_tokens
+                    and (len(result) - i) >= min_protect
+                ):
                     boundary = i
                     break
                 accumulated += msg_tokens
@@ -466,7 +493,10 @@ def _prune_old_tool_results(
             h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
             if h in content_hashes:
                 # This is an older duplicate — replace with back-reference
-                result[i] = {**msg, "content": "[Duplicate tool output — same content as a more recent call]"}
+                result[i] = {
+                    **msg,
+                    "content": "[Duplicate tool output — same content as a more recent call]",
+                }
                 pruned += 1
             else:
                 content_hashes[h] = (i, msg.get("tool_call_id", "?"))
@@ -493,6 +523,7 @@ def _prune_old_tool_results(
                 result[i] = {**msg, "content": summary}
                 pruned += 1
 
+        # Pass 3: Truncate large tool_call arguments while keeping valid JSON
         # Pass 3: Truncate large tool_call arguments in assistant messages
         # outside the protected tail. write_file with 50KB content, for
         # example, survives pruning entirely without this.
@@ -505,8 +536,41 @@ def _prune_old_tool_results(
             msg = result[i]
             if msg.get("role") != "assistant" or not msg.get("tool_calls"):
                 continue
+            
             new_tcs = []
             modified = False
+            for tc in msg.get("tool_calls", []):
+                if not isinstance(tc, dict):
+                    new_tcs.append(tc)
+                    continue
+                
+                func = tc.get("function", {})
+                args_str = func.get("arguments", "")
+                
+                if len(args_str) > 500:
+                    # Try to parse and truncate while keeping valid JSON
+                    try:
+                        import json
+                        args_obj = json.loads(args_str)
+                        
+                        # Truncate string values
+                        for key, value in args_obj.items():
+                            if isinstance(value, str) and len(value) > 200:
+                                args_obj[key] = value[:200] + "...[truncated]"
+                        
+                        # Re-serialize to valid JSON
+                        func["arguments"] = json.dumps(args_obj, ensure_ascii=False)
+                        modified = True
+                    except json.JSONDecodeError:
+                        # If can't parse, drop this tool_call but keep message
+                        pruned.append(msg)
+                        modified = True
+                        break
+                else:
+                    new_tcs.append(tc)
+            
+            if modified and new_tcs:
+                msg["tool_calls"] = new_tcs
             for tc in msg["tool_calls"]:
                 if isinstance(tc, dict):
                     args = tc.get("function", {}).get("arguments", "")
@@ -539,11 +603,11 @@ def _compute_summary_budget(self, turns_to_summarize: List[Dict[str, Any]]) -> i
     # Truncation limits for the summarizer input.  These bound how much of
     # each message the summary model sees — the budget is the *summary*
     # model's context window, not the main model's.
-    _CONTENT_MAX = 6000       # total chars per message body
-    _CONTENT_HEAD = 4000      # chars kept from the start
-    _CONTENT_TAIL = 1500      # chars kept from the end
-    _TOOL_ARGS_MAX = 1500     # tool call argument chars
-    _TOOL_ARGS_HEAD = 1200    # kept from the start of tool args
+    _CONTENT_MAX = 6000  # total chars per message body
+    _CONTENT_HEAD = 4000  # chars kept from the start
+    _CONTENT_TAIL = 1500  # chars kept from the end
+    _TOOL_ARGS_MAX = 1500  # tool call argument chars
+    _TOOL_ARGS_HEAD = 1200  # kept from the start of tool args
 
     def _serialize_for_summary(self, turns: List[Dict[str, Any]]) -> str:
         """Serialize conversation turns into labeled text for the summarizer.
@@ -565,14 +629,22 @@ def _serialize_for_summary(self, turns: List[Dict[str, Any]]) -> str:
             if role == "tool":
                 tool_id = msg.get("tool_call_id", "")
                 if len(content) > self._CONTENT_MAX:
-                    content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
+                    content = (
+                        content[: self._CONTENT_HEAD]
+                        + "\n...[truncated]...\n"
+                        + content[-self._CONTENT_TAIL :]
+                    )
                 parts.append(f"[TOOL RESULT {tool_id}]: {content}")
                 continue
 
             # Assistant messages: include tool call names AND arguments
             if role == "assistant":
                 if len(content) > self._CONTENT_MAX:
-                    content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
+                    content = (
+                        content[: self._CONTENT_HEAD]
+                        + "\n...[truncated]...\n"
+                        + content[-self._CONTENT_TAIL :]
+                    )
                 tool_calls = msg.get("tool_calls", [])
                 if tool_calls:
                     tc_parts = []
@@ -583,7 +655,7 @@ def _serialize_for_summary(self, turns: List[Dict[str, Any]]) -> str:
                             args = redact_sensitive_text(fn.get("arguments", ""))
                             # Truncate long arguments but keep enough for context
                             if len(args) > self._TOOL_ARGS_MAX:
-                                args = args[:self._TOOL_ARGS_HEAD] + "..."
+                                args = args[: self._TOOL_ARGS_HEAD] + "..."
                             tc_parts.append(f"  {name}({args})")
                         else:
                             fn = getattr(tc, "function", None)
@@ -595,12 +667,18 @@ def _serialize_for_summary(self, turns: List[Dict[str, Any]]) -> str:
 
             # User and other roles
             if len(content) > self._CONTENT_MAX:
-                content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
+                content = (
+                    content[: self._CONTENT_HEAD]
+                    + "\n...[truncated]...\n"
+                    + content[-self._CONTENT_TAIL :]
+                )
             parts.append(f"[{role.upper()}]: {content}")
 
         return "\n\n".join(parts)
 
-    def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topic: str = None) -> Optional[str]:
+    def _generate_summary(
+        self, turns_to_summarize: List[Dict[str, Any]], focus_topic: str = None
+    ) -> Optional[str]:
         """Generate a structured summary of conversation turns.
 
         Uses a structured template (Goal, Progress, Decisions, Resolved/Pending
@@ -744,6 +822,9 @@ def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topi
 The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
 
         try:
+            from agent.auxiliary_client import _validate_messages_json
+
+            messages = _validate_messages_json([{"role": "user", "content": prompt}])
             call_kwargs = {
                 "task": "compression",
                 "main_runtime": {
@@ -753,7 +834,7 @@ def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topi
                     "api_key": self.api_key,
                     "api_mode": self.api_mode,
                 },
-                "messages": [{"role": "user", "content": prompt}],
+                "messages": messages,
                 "max_tokens": int(summary_budget * 1.3),
                 # timeout resolved from auxiliary.compression.timeout config by call_llm
             }
@@ -774,18 +855,24 @@ def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topi
             return self._with_summary_prefix(summary)
         except RuntimeError:
             # No provider configured — long cooldown, unlikely to self-resolve
-            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
-            logging.warning("Context compression: no provider available for "
-                            "summary. Middle turns will be dropped without summary "
-                            "for %d seconds.",
-                            _SUMMARY_FAILURE_COOLDOWN_SECONDS)
+            self._summary_failure_cooldown_until = (
+                time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            )
+            logging.warning(
+                "Context compression: no provider available for "
+                "summary. Middle turns will be dropped without summary "
+                "for %d seconds.",
+                _SUMMARY_FAILURE_COOLDOWN_SECONDS,
+            )
             return None
         except Exception as e:
             # If the summary model is different from the main model and the
             # error looks permanent (model not found, 503, 404), fall back to
             # using the main model instead of entering cooldown that leaves
             # context growing unbounded.  (#8620 sub-issue 4)
-            _status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None)
+            _status = getattr(e, "status_code", None) or getattr(
+                getattr(e, "response", None), "status_code", None
+            )
             _err_str = str(e).lower()
             _is_model_not_found = (
                 _status in (404, 503)
@@ -803,7 +890,9 @@ def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topi
                 logging.warning(
                     "Summary model '%s' not available (%s). "
                     "Falling back to main model '%s' for compression.",
-                    self.summary_model, e, self.model,
+                    self.summary_model,
+                    e,
+                    self.model,
                 )
                 self.summary_model = ""  # empty = use main model
                 self._summary_failure_cooldown_until = 0.0  # no cooldown
@@ -811,7 +900,9 @@ def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topi
 
             # Transient errors (timeout, rate limit, network) — shorter cooldown
             _transient_cooldown = 60
-            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
+            self._summary_failure_cooldown_until = (
+                time.monotonic() + _transient_cooldown
+            )
             logging.warning(
                 "Failed to generate context summary: %s. "
                 "Further summary attempts paused for %d seconds.",
@@ -826,7 +917,7 @@ def _with_summary_prefix(summary: str) -> str:
         text = (summary or "").strip()
         for prefix in (LEGACY_SUMMARY_PREFIX, SUMMARY_PREFIX):
             if text.startswith(prefix):
-                text = text[len(prefix):].lstrip()
+                text = text[len(prefix) :].lstrip()
                 break
         return f"{SUMMARY_PREFIX}\n{text}" if text else SUMMARY_PREFIX
 
@@ -841,7 +932,9 @@ def _get_tool_call_id(tc) -> str:
             return tc.get("id", "")
         return getattr(tc, "id", "") or ""
 
-    def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    def _sanitize_tool_pairs(
+        self, messages: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
         """Fix orphaned tool_call / tool_result pairs after compression.
 
         Two failure modes:
@@ -874,11 +967,18 @@ def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str,
         orphaned_results = result_call_ids - surviving_call_ids
         if orphaned_results:
             messages = [
-                m for m in messages
-                if not (m.get("role") == "tool" and m.get("tool_call_id") in orphaned_results)
+                m
+                for m in messages
+                if not (
+                    m.get("role") == "tool"
+                    and m.get("tool_call_id") in orphaned_results
+                )
             ]
             if not self.quiet_mode:
-                logger.info("Compression sanitizer: removed %d orphaned tool result(s)", len(orphaned_results))
+                logger.info(
+                    "Compression sanitizer: removed %d orphaned tool result(s)",
+                    len(orphaned_results),
+                )
 
         # 2. Add stub results for assistant tool_calls whose results were dropped
         missing_results = surviving_call_ids - result_call_ids
@@ -890,14 +990,19 @@ def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str,
                     for tc in msg.get("tool_calls") or []:
                         cid = self._get_tool_call_id(tc)
                         if cid in missing_results:
-                            patched.append({
-                                "role": "tool",
-                                "content": "[Result from earlier conversation — see context summary above]",
-                                "tool_call_id": cid,
-                            })
+                            patched.append(
+                                {
+                                    "role": "tool",
+                                    "content": "[Result from earlier conversation — see context summary above]",
+                                    "tool_call_id": cid,
+                                }
+                            )
             messages = patched
             if not self.quiet_mode:
-                logger.info("Compression sanitizer: added %d stub tool result(s)", len(missing_results))
+                logger.info(
+                    "Compression sanitizer: added %d stub tool result(s)",
+                    len(missing_results),
+                )
 
         return messages
 
@@ -931,7 +1036,11 @@ def _align_boundary_backward(self, messages: List[Dict[str, Any]], idx: int) ->
             check -= 1
         # If we landed on the parent assistant with tool_calls, pull the
         # boundary before it so the whole group gets summarised together.
-        if check >= 0 and messages[check].get("role") == "assistant" and messages[check].get("tool_calls"):
+        if (
+            check >= 0
+            and messages[check].get("role") == "assistant"
+            and messages[check].get("tool_calls")
+        ):
             idx = check
         return idx
 
@@ -996,7 +1105,9 @@ def _ensure_last_user_message_in_tail(
         return max(last_user_idx, head_end + 1)
 
     def _find_tail_cut_by_tokens(
-        self, messages: List[Dict[str, Any]], head_end: int,
+        self,
+        messages: List[Dict[str, Any]],
+        head_end: int,
         token_budget: int | None = None,
     ) -> int:
         """Walk backward from the end of messages, accumulating tokens until
@@ -1062,7 +1173,12 @@ def _find_tail_cut_by_tokens(
     # Main compression entry point
     # ------------------------------------------------------------------
 
-    def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, focus_topic: str = None) -> List[Dict[str, Any]]:
+    def compress(
+        self,
+        messages: List[Dict[str, Any]],
+        current_tokens: int = None,
+        focus_topic: str = None,
+    ) -> List[Dict[str, Any]]:
         """Compress conversation messages by summarizing middle turns.
 
         Algorithm:
@@ -1088,15 +1204,21 @@ def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, f
             if not self.quiet_mode:
                 logger.warning(
                     "Cannot compress: only %d messages (need > %d)",
-                    n_messages, _min_for_compress,
+                    n_messages,
+                    _min_for_compress,
                 )
             return messages
 
-        display_tokens = current_tokens if current_tokens else self.last_prompt_tokens or estimate_messages_tokens_rough(messages)
+        display_tokens = (
+            current_tokens
+            if current_tokens
+            else self.last_prompt_tokens or estimate_messages_tokens_rough(messages)
+        )
 
         # Phase 1: Prune old tool results (cheap, no LLM call)
         messages, pruned_count = self._prune_old_tool_results(
-            messages, protect_tail_count=self.protect_last_n,
+            messages,
+            protect_tail_count=self.protect_last_n,
             protect_tail_tokens=self.tail_token_budget,
         )
         if pruned_count and not self.quiet_mode:
@@ -1154,7 +1276,9 @@ def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, f
         # knows context was lost rather than silently dropping everything.
         if not summary:
             if not self.quiet_mode:
-                logger.warning("Summary generation failed — inserting static fallback context marker")
+                logger.warning(
+                    "Summary generation failed — inserting static fallback context marker"
+                )
             n_dropped = compress_end - compress_start
             summary = (
                 f"{SUMMARY_PREFIX}\n"
@@ -1165,8 +1289,16 @@ def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, f
             )
 
         _merge_summary_into_tail = False
-        last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user"
-        first_tail_role = messages[compress_end].get("role", "user") if compress_end < n_messages else "user"
+        last_head_role = (
+            messages[compress_start - 1].get("role", "user")
+            if compress_start > 0
+            else "user"
+        )
+        first_tail_role = (
+            messages[compress_end].get("role", "user")
+            if compress_end < n_messages
+            else "user"
+        )
         # Pick a role that avoids consecutive same-role with both neighbors.
         # Priority: avoid colliding with head (already committed), then tail.
         if last_head_role in ("assistant", "tool"):
@@ -1193,8 +1325,7 @@ def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, f
             if _merge_summary_into_tail and i == compress_end:
                 original = msg.get("content") or ""
                 msg["content"] = (
-                    summary
-                    + "\n\n--- END OF CONTEXT SUMMARY — "
+                    summary + "\n\n--- END OF CONTEXT SUMMARY — "
                     "respond to the message below, not the summary above ---\n\n"
                     + original
                 )
@@ -1209,7 +1340,9 @@ def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, f
         saved_estimate = display_tokens - new_estimate
 
         # Anti-thrashing: track compression effectiveness
-        savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
+        savings_pct = (
+            (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
+        )
         self._last_compression_savings_pct = savings_pct
         if savings_pct < 10:
             self._ineffective_compression_count += 1
diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py
index df3987f2ebd..93284645841 100644
--- a/gateway/platforms/qqbot/adapter.py
+++ b/gateway/platforms/qqbot/adapter.py
@@ -535,6 +535,9 @@ async def _listen_loop(self) -> None:
                     quick_disconnect_count = 0
                 else:
                     backoff_idx += 1
+                    if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
+                        logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
+                        return
 
             except Exception as exc:
                 if not self._running: