Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions unsloth_zoo/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,12 +323,21 @@ def train_on_responses_only(
return_function = False, # Useful for iterating over lists
num_proc = None,
last_response_only = False, # Train only on the last assistant turn
mask_out_tokens = None, # e.g. ["</think>"] - also mask these inside kept responses

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Propagate mask_out_tokens to vision collators

When callers use this new kwarg with a vision trainer, train_on_responses_only takes the later _is_vision_collator branch and assigns data_collator.train_on_responses_only from a recursive call at lines 695-702, but that call omits mask_out_tokens, so the inner closure builds an empty mask_out_sequences. This means VLM response-only training still includes </think> or any requested token in the loss despite accepting mask_out_tokens=[...]; pass the kwarg through the collator masking setup as well.

Useful? React with 👍 / 👎.

@pjordanandrsn pjordanandrsn Jul 3, 2026

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 76587db — the vision-collator recursive call now passes mask_out_tokens through, so the VLM path applies the same re-masking instead of silently ignoring the kwarg. Text-path label output is regression-identical.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Forward mask_out_tokens through the MLX wrapper

When using the MLX API (unsloth_zoo.mlx.trainer.train_on_responses_only), this newly added parameter cannot be used because the wrapper signature at unsloth_zoo/mlx/trainer.py:2877-2886 and its call into this helper at 2933-2941 were not updated. Passing mask_out_tokens=[...] through the documented MLX mirror of the HF/Unsloth API raises TypeError before it reaches this implementation, so MLX training cannot apply the new masking behavior.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 76587db — the MLX wrapper signature now accepts mask_out_tokens and forwards it to the HF implementation, mirroring the rest of the API surface.

):
"""Train only on responses by masking instruction labels to -100.

With last_response_only=True, only the final assistant turn is unmasked;
earlier assistant turns stay at -100 (never written, never copied from
old_labels).

mask_out_tokens re-masks the given token strings to -100 even inside kept
response spans - e.g. mask_out_tokens=["</think>"] reproduces the Nemotron
Ultra recipe of never training on the thinking closer. Each entry is matched
as its tokenized id sequence (a leading-space variant is matched too, for
SentencePiece-style tokenizers). Atomic added tokens such as "</think>"
always match exactly; multi-token strings match only where the in-context
tokenization equals the standalone one.
"""
# All Unsloth Zoo code licensed under LGPLv3
if tokenizer is None and trainer is not None:
Expand Down Expand Up @@ -375,6 +384,18 @@ def train_on_responses_only(
torch_Tensor = torch.Tensor
torch_int64 = torch.int64

# Precompute id sequences for mask_out_tokens (see docstring). Done once here so
# the per-example closure below only does integer comparisons.
mask_out_sequences = []
if mask_out_tokens:
if isinstance(mask_out_tokens, str): mask_out_tokens = [mask_out_tokens]
for token_string in mask_out_tokens:
for candidate in dict.fromkeys((token_string, " " + token_string)):
ids = tokenizer(candidate, add_special_tokens = False).input_ids
if ids and ids not in mask_out_sequences:
mask_out_sequences.append(ids)
pass
Comment on lines +389 to +397

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To ensure robust defensive programming and prevent cryptic runtime errors (such as TypeError during string concatenation or iteration), we should explicitly validate that mask_out_tokens is either a string or a sequence of strings.

    mask_out_sequences = []
    if mask_out_tokens:
        if isinstance(mask_out_tokens, str):
            mask_out_tokens = [mask_out_tokens]
        elif not isinstance(mask_out_tokens, (list, tuple)):
            raise TypeError("Unsloth: mask_out_tokens must be a string, list, or tuple of strings.")
        for token_string in mask_out_tokens:
            if not isinstance(token_string, str):
                raise TypeError(f"Unsloth: mask_out_tokens elements must be strings, but got {type(token_string).__name__}")
            for candidate in dict.fromkeys((token_string, " " + token_string)):
                ids = tokenizer(candidate, add_special_tokens = False).input_ids
                if ids and ids not in mask_out_sequences:
                    mask_out_sequences.append(ids)
    pass


def _train_on_responses_only(examples):
input_ids_ = examples["input_ids"]
use_tensors = False
Expand Down Expand Up @@ -468,6 +489,19 @@ def _train_on_responses_only(examples):
else:
labels[assistant_k : user_j] = old_labels[assistant_k : user_j]

# Re-mask requested token sequences (e.g. "</think>") wherever they occur;
# positions outside kept spans are already -100, so re-masking is harmless.
for seq in mask_out_sequences:
seq_len, first = len(seq), seq[0]
i, limit = 0, n - len(seq)
while i <= limit:
if input_ids[i] == first and input_ids[i : i + seq_len] == seq:
labels[i : i + seq_len] = [-100] * seq_len
i += seq_len
else:
i += 1
pass

all_labels.append(labels)
pass
return { "labels" : torch.tensor(all_labels, dtype = torch.int64) if use_tensors else all_labels }
Expand Down Expand Up @@ -664,6 +698,7 @@ def _is_vision_collator(collator):
tokenizer = coll_proc,
return_function = True,
last_response_only = last_response_only,
mask_out_tokens = mask_out_tokens,
**parts,
)
print(f"Unsloth: Enabled response-only masking on your {type(data_collator).__name__} (image handling kept intact).")
Expand Down
2 changes: 2 additions & 0 deletions unsloth_zoo/mlx/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2883,6 +2883,7 @@ def train_on_responses_only(
return_function=False,
num_proc=None,
last_response_only=False,
mask_out_tokens=None,
):
"""Mask instruction tokens from loss — train only on assistant responses.

Expand Down Expand Up @@ -2938,6 +2939,7 @@ def train_on_responses_only(
tokenizer=_detect_source,
return_function=True,
last_response_only=last_response_only,
mask_out_tokens=mask_out_tokens,
)

if return_function:
Expand Down