diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py
index 3ccfc5cdfe..e31ca07ce0 100644
--- a/studio/backend/core/inference/llama_cpp.py
+++ b/studio/backend/core/inference/llama_cpp.py
@@ -1686,6 +1686,13 @@ def supports_tools(self) -> bool:
             return False
         return self._supports_tools
 
+    @property
+    def supports_tool_passthrough(self) -> bool:
+        # supports_tools is forced off for DiffusionGemma (its agentic loop drops the
+        # per-step canvas frames); client tool loops skip that loop, so the passthrough
+        # follows the real _supports_tools instead of the forced-off value.
+        return self._supports_tools
+
     @property
     def cache_type_kv(self) -> Optional[str]:
         return self._cache_type_kv
diff --git a/studio/backend/routes/inference.py b/studio/backend/routes/inference.py
index 17be222d93..7831dfec42 100644
--- a/studio/backend/routes/inference.py
+++ b/studio/backend/routes/inference.py
@@ -5705,9 +5705,9 @@ def _reject_unsupported_n(path_label: str) -> "HTTPException":
     # free-form sampling. Guided decoding does not require ``supports_tools`` --
     # the grammar machinery is independent of tool-call parsing.
     _has_response_format = _extract_response_format(payload) is not None
-    _tools_passthrough = llama_backend.supports_tools and (
-        (payload.tools and len(payload.tools) > 0) or _has_tool_messages
-    )
+    _tools_passthrough = getattr(
+        llama_backend, "supports_tool_passthrough", llama_backend.supports_tools
+    ) and ((payload.tools and len(payload.tools) > 0) or _has_tool_messages)
     if (
         using_gguf
         and not _effective_enable_tools(payload)
@@ -9564,7 +9564,9 @@ async def anthropic_messages(
         and not _has_image
     )
     client_tools = (
-        not server_tools and len(openai_client_tools) > 0 and llama_backend.supports_tools
+        not server_tools
+        and len(openai_client_tools) > 0
+        and getattr(llama_backend, "supports_tool_passthrough", llama_backend.supports_tools)
     )
 
     # Anthropic tool_choice.disable_parallel_tool_use caps the response to a