From 38f85ec3fb1790fa608e23083c9ca033a5897748 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 3 Jul 2026 14:53:03 -0300 Subject: [PATCH 1/2] Studio: heal DiffusionGemma tool calls into structured tool_calls --- studio/backend/core/inference/llama_cpp.py | 9 +++++++++ studio/backend/routes/inference.py | 6 ++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py index 3ccfc5cdfe..864f5461d1 100644 --- a/studio/backend/core/inference/llama_cpp.py +++ b/studio/backend/core/inference/llama_cpp.py @@ -1686,6 +1686,15 @@ def supports_tools(self) -> bool: return False return self._supports_tools + @property + def supports_tool_passthrough(self) -> bool: + # Client-side tool loops (the caller declares tools, executes them, and + # sends results back) only need the response healer to promote the model's + # text-form <|tool_call> blocks into structured tool_calls -- they never run + # the agentic loop, so DiffusionGemma's per-step canvas frames survive. + # So this stays on for diffusion even though supports_tools (agentic) is off. + return self._supports_tools + @property def cache_type_kv(self) -> Optional[str]: return self._cache_type_kv diff --git a/studio/backend/routes/inference.py b/studio/backend/routes/inference.py index 17be222d93..3b77d567a4 100644 --- a/studio/backend/routes/inference.py +++ b/studio/backend/routes/inference.py @@ -5705,7 +5705,7 @@ def _reject_unsupported_n(path_label: str) -> "HTTPException": # free-form sampling. Guided decoding does not require ``supports_tools`` -- # the grammar machinery is independent of tool-call parsing. _has_response_format = _extract_response_format(payload) is not None - _tools_passthrough = llama_backend.supports_tools and ( + _tools_passthrough = llama_backend.supports_tool_passthrough and ( (payload.tools and len(payload.tools) > 0) or _has_tool_messages ) if ( @@ -9564,7 +9564,9 @@ async def anthropic_messages( and not _has_image ) client_tools = ( - not server_tools and len(openai_client_tools) > 0 and llama_backend.supports_tools + not server_tools + and len(openai_client_tools) > 0 + and llama_backend.supports_tool_passthrough ) # Anthropic tool_choice.disable_parallel_tool_use caps the response to a From 85f9d37afa8ab56a4894a9b5657fd82372b333e6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 3 Jul 2026 15:11:51 -0300 Subject: [PATCH 2/2] Fall back to supports_tools for backends without the passthrough capability --- studio/backend/core/inference/llama_cpp.py | 8 +++----- studio/backend/routes/inference.py | 8 ++++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py index 864f5461d1..e31ca07ce0 100644 --- a/studio/backend/core/inference/llama_cpp.py +++ b/studio/backend/core/inference/llama_cpp.py @@ -1688,11 +1688,9 @@ def supports_tools(self) -> bool: @property def supports_tool_passthrough(self) -> bool: - # Client-side tool loops (the caller declares tools, executes them, and - # sends results back) only need the response healer to promote the model's - # text-form <|tool_call> blocks into structured tool_calls -- they never run - # the agentic loop, so DiffusionGemma's per-step canvas frames survive. - # So this stays on for diffusion even though supports_tools (agentic) is off. + # supports_tools is forced off for DiffusionGemma (its agentic loop drops the + # per-step canvas frames); client tool loops skip that loop, so the passthrough + # follows the real _supports_tools instead of the forced-off value. return self._supports_tools @property diff --git a/studio/backend/routes/inference.py b/studio/backend/routes/inference.py index 3b77d567a4..7831dfec42 100644 --- a/studio/backend/routes/inference.py +++ b/studio/backend/routes/inference.py @@ -5705,9 +5705,9 @@ def _reject_unsupported_n(path_label: str) -> "HTTPException": # free-form sampling. Guided decoding does not require ``supports_tools`` -- # the grammar machinery is independent of tool-call parsing. _has_response_format = _extract_response_format(payload) is not None - _tools_passthrough = llama_backend.supports_tool_passthrough and ( - (payload.tools and len(payload.tools) > 0) or _has_tool_messages - ) + _tools_passthrough = getattr( + llama_backend, "supports_tool_passthrough", llama_backend.supports_tools + ) and ((payload.tools and len(payload.tools) > 0) or _has_tool_messages) if ( using_gguf and not _effective_enable_tools(payload) @@ -9566,7 +9566,7 @@ async def anthropic_messages( client_tools = ( not server_tools and len(openai_client_tools) > 0 - and llama_backend.supports_tool_passthrough + and getattr(llama_backend, "supports_tool_passthrough", llama_backend.supports_tools) ) # Anthropic tool_choice.disable_parallel_tool_use caps the response to a