Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions studio/backend/core/inference/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1686,6 +1686,13 @@ def supports_tools(self) -> bool:
return False
return self._supports_tools

@property
def supports_tool_passthrough(self) -> bool:
# supports_tools is forced off for DiffusionGemma (its agentic loop drops the
# per-step canvas frames); client tool loops skip that loop, so the passthrough
# follows the real _supports_tools instead of the forced-off value.
return self._supports_tools

@property
def cache_type_kv(self) -> Optional[str]:
return self._cache_type_kv
Expand Down
10 changes: 6 additions & 4 deletions studio/backend/routes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -5705,9 +5705,9 @@ def _reject_unsupported_n(path_label: str) -> "HTTPException":
# free-form sampling. Guided decoding does not require ``supports_tools`` --
# the grammar machinery is independent of tool-call parsing.
_has_response_format = _extract_response_format(payload) is not None
_tools_passthrough = llama_backend.supports_tools and (
(payload.tools and len(payload.tools) > 0) or _has_tool_messages
)
_tools_passthrough = getattr(
llama_backend, "supports_tool_passthrough", llama_backend.supports_tools
) and ((payload.tools and len(payload.tools) > 0) or _has_tool_messages)
if (
using_gguf
and not _effective_enable_tools(payload)
Expand Down Expand Up @@ -9564,7 +9564,9 @@ async def anthropic_messages(
and not _has_image
)
client_tools = (
not server_tools and len(openai_client_tools) > 0 and llama_backend.supports_tools
not server_tools
and len(openai_client_tools) > 0
and getattr(llama_backend, "supports_tool_passthrough", llama_backend.supports_tools)
)

# Anthropic tool_choice.disable_parallel_tool_use caps the response to a
Expand Down
Loading