From 38f85ec3fb1790fa608e23083c9ca033a5897748 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 3 Jul 2026 14:53:03 -0300
Subject: [PATCH 1/2] Studio: heal DiffusionGemma tool calls into structured
 tool_calls

---
 studio/backend/core/inference/llama_cpp.py | 9 +++++++++
 studio/backend/routes/inference.py         | 6 ++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py
index 3ccfc5cdfe..864f5461d1 100644
--- a/studio/backend/core/inference/llama_cpp.py
+++ b/studio/backend/core/inference/llama_cpp.py
@@ -1686,6 +1686,15 @@ def supports_tools(self) -> bool:
             return False
         return self._supports_tools
 
+    @property
+    def supports_tool_passthrough(self) -> bool:
+        # Client-side tool loops (the caller declares tools, executes them, and
+        # sends results back) only need the response healer to promote the model's
+        # text-form <|tool_call> blocks into structured tool_calls -- they never run
+        # the agentic loop, so DiffusionGemma's per-step canvas frames survive.
+        # So this stays on for diffusion even though supports_tools (agentic) is off.
+        return self._supports_tools
+
     @property
     def cache_type_kv(self) -> Optional[str]:
         return self._cache_type_kv
diff --git a/studio/backend/routes/inference.py b/studio/backend/routes/inference.py
index 17be222d93..3b77d567a4 100644
--- a/studio/backend/routes/inference.py
+++ b/studio/backend/routes/inference.py
@@ -5705,7 +5705,7 @@ def _reject_unsupported_n(path_label: str) -> "HTTPException":
     # free-form sampling. Guided decoding does not require ``supports_tools`` --
     # the grammar machinery is independent of tool-call parsing.
     _has_response_format = _extract_response_format(payload) is not None
-    _tools_passthrough = llama_backend.supports_tools and (
+    _tools_passthrough = llama_backend.supports_tool_passthrough and (
         (payload.tools and len(payload.tools) > 0) or _has_tool_messages
     )
     if (
@@ -9564,7 +9564,9 @@ async def anthropic_messages(
         and not _has_image
     )
     client_tools = (
-        not server_tools and len(openai_client_tools) > 0 and llama_backend.supports_tools
+        not server_tools
+        and len(openai_client_tools) > 0
+        and llama_backend.supports_tool_passthrough
     )
 
     # Anthropic tool_choice.disable_parallel_tool_use caps the response to a

From 85f9d37afa8ab56a4894a9b5657fd82372b333e6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 3 Jul 2026 15:11:51 -0300
Subject: [PATCH 2/2] Fall back to supports_tools for backends without the
 passthrough capability

---
 studio/backend/core/inference/llama_cpp.py | 8 +++-----
 studio/backend/routes/inference.py         | 8 ++++----
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py
index 864f5461d1..e31ca07ce0 100644
--- a/studio/backend/core/inference/llama_cpp.py
+++ b/studio/backend/core/inference/llama_cpp.py
@@ -1688,11 +1688,9 @@ def supports_tools(self) -> bool:
 
     @property
     def supports_tool_passthrough(self) -> bool:
-        # Client-side tool loops (the caller declares tools, executes them, and
-        # sends results back) only need the response healer to promote the model's
-        # text-form <|tool_call> blocks into structured tool_calls -- they never run
-        # the agentic loop, so DiffusionGemma's per-step canvas frames survive.
-        # So this stays on for diffusion even though supports_tools (agentic) is off.
+        # supports_tools is forced off for DiffusionGemma (its agentic loop drops the
+        # per-step canvas frames); client tool loops skip that loop, so the passthrough
+        # follows the real _supports_tools instead of the forced-off value.
         return self._supports_tools
 
     @property
diff --git a/studio/backend/routes/inference.py b/studio/backend/routes/inference.py
index 3b77d567a4..7831dfec42 100644
--- a/studio/backend/routes/inference.py
+++ b/studio/backend/routes/inference.py
@@ -5705,9 +5705,9 @@ def _reject_unsupported_n(path_label: str) -> "HTTPException":
     # free-form sampling. Guided decoding does not require ``supports_tools`` --
     # the grammar machinery is independent of tool-call parsing.
     _has_response_format = _extract_response_format(payload) is not None
-    _tools_passthrough = llama_backend.supports_tool_passthrough and (
-        (payload.tools and len(payload.tools) > 0) or _has_tool_messages
-    )
+    _tools_passthrough = getattr(
+        llama_backend, "supports_tool_passthrough", llama_backend.supports_tools
+    ) and ((payload.tools and len(payload.tools) > 0) or _has_tool_messages)
     if (
         using_gguf
         and not _effective_enable_tools(payload)
@@ -9566,7 +9566,7 @@ async def anthropic_messages(
     client_tools = (
         not server_tools
         and len(openai_client_tools) > 0
-        and llama_backend.supports_tool_passthrough
+        and getattr(llama_backend, "supports_tool_passthrough", llama_backend.supports_tools)
     )
 
     # Anthropic tool_choice.disable_parallel_tool_use caps the response to a