Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/transformers/loss/loss_deformable_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ def forward(self, outputs, targets):
# Final cost matrix
cost_matrix = self.bbox_cost * bbox_cost + self.class_cost * class_cost + self.giou_cost * giou_cost
cost_matrix = cost_matrix.view(batch_size, num_queries, -1).cpu()
# Guard against non-finite costs (e.g. inf/NaN from fp16 overflow under AMP), which would
# otherwise make scipy's linear_sum_assignment raise "ValueError: cost matrix is infeasible".
max_value = torch.finfo(cost_matrix.dtype).max
cost_matrix = torch.nan_to_num(cost_matrix, nan=max_value, posinf=max_value, neginf=max_value)

sizes = [len(v["boxes"]) for v in targets]
indices = [linear_sum_assignment(c[i]) for i, c in enumerate(cost_matrix.split(sizes, -1))]
Expand Down
4 changes: 4 additions & 0 deletions src/transformers/loss/loss_lw_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ def forward(self, outputs, targets, group_detr):
# Final cost matrix
cost_matrix = self.bbox_cost * bbox_cost + self.class_cost * class_cost + self.giou_cost * giou_cost
cost_matrix = cost_matrix.view(batch_size, num_queries, -1).cpu()
# Guard against non-finite costs (e.g. inf/NaN from fp16 overflow under AMP), which would
# otherwise make scipy's linear_sum_assignment raise "ValueError: cost matrix is infeasible".
max_value = torch.finfo(cost_matrix.dtype).max
cost_matrix = torch.nan_to_num(cost_matrix, nan=max_value, posinf=max_value, neginf=max_value)

sizes = [len(v["boxes"]) for v in targets]
indices = []
Expand Down
52 changes: 52 additions & 0 deletions tests/models/deformable_detr/test_modeling_deformable_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from transformers import DeformableDetrConfig, ResNetConfig, is_torch_available, is_vision_available
from transformers.testing_utils import (
require_scipy,
require_timm,
require_torch,
require_torch_accelerator,
Expand Down Expand Up @@ -741,3 +742,54 @@ def test_inference_object_detection_head_equivalence_cpu_accelerator(self):
[[-9.9160, -4.2876, -6.4985], [-9.6945, -4.0855, -6.8031], [-10.0665, -5.8471, -7.7001]]
)
assert torch.allclose(cpu_outputs.logits[0, :3, :3], expected_logits, atol=2e-4)


@require_torch
@require_scipy
class DeformableDetrHungarianMatcherInfeasibleCostTest(unittest.TestCase):
def test_non_finite_costs_do_not_crash_matcher(self):
"""
Regression test for the Deformable DETR Hungarian matcher. Its sigmoid focal classification
cost can overflow to inf/NaN (e.g. fp16 saturation under AMP, see #47000), which used to make
``scipy.optimize.linear_sum_assignment`` raise ``ValueError: cost matrix is infeasible``. The
matcher now replaces non-finite costs with a large finite value before matching.
"""
from transformers.loss.loss_deformable_detr import DeformableDetrHungarianMatcher

torch.manual_seed(0)
matcher = DeformableDetrHungarianMatcher(class_cost=1, bbox_cost=5, giou_cost=2)

num_queries, num_classes, num_targets = 5, 4, 3
# Boxes in normalized center format (cx, cy, w, h) with strictly positive width/height.
targets = [
{
"class_labels": torch.arange(num_targets),
"boxes": torch.rand(num_targets, 4) * 0.5 + 0.25,
}
]

def base_inputs():
return (
torch.randn(1, num_queries, num_classes),
torch.rand(1, num_queries, 4) * 0.5 + 0.25,
)

# NaN in the logits poisons the classification cost; +inf on a box width poisons the L1/GIoU
# cost. Either leaves a non-finite cost matrix that used to make scipy raise.
cases = []
logits, pred_boxes = base_inputs()
logits[0, 0, 0] = float("nan")
cases.append((logits, pred_boxes))
logits, pred_boxes = base_inputs()
pred_boxes[0, 0, 2] = float("inf")
cases.append((logits, pred_boxes))

for logits, pred_boxes in cases:
outputs = {"logits": logits, "pred_boxes": pred_boxes}

indices = matcher(outputs, targets)

self.assertEqual(len(indices), 1)
row_indices, col_indices = indices[0]
self.assertEqual(len(row_indices), num_targets)
self.assertEqual(len(col_indices), num_targets)
53 changes: 53 additions & 0 deletions tests/models/lw_detr/test_modeling_lw_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
)
from transformers.testing_utils import (
Expectations,
require_scipy,
require_torch,
require_vision,
slow,
Expand Down Expand Up @@ -813,3 +814,55 @@ def test_inference_object_detection_head_xlarge(self):
torch.testing.assert_close(results["scores"][:4], expected_scores, atol=1e-3, rtol=2e-4)
self.assertSequenceEqual(results["labels"][:4].tolist(), expected_labels)
torch.testing.assert_close(results["boxes"][:4], expected_slice_boxes, atol=1e-3, rtol=2e-4)


@require_torch
@require_scipy
@require_vision
class LwDetrHungarianMatcherInfeasibleCostTest(unittest.TestCase):
def test_non_finite_costs_do_not_crash_matcher(self):
"""
Regression test for the LW-DETR Hungarian matcher. Its sigmoid focal classification cost can
overflow to inf/NaN (e.g. fp16 saturation under AMP, see #47000), which used to make
``scipy.optimize.linear_sum_assignment`` raise ``ValueError: cost matrix is infeasible``. The
matcher now replaces non-finite costs with a large finite value before matching.
"""
from transformers.loss.loss_lw_detr import LwDetrHungarianMatcher

torch.manual_seed(0)
matcher = LwDetrHungarianMatcher(class_cost=1, bbox_cost=5, giou_cost=2)

num_queries, num_classes, num_targets = 5, 4, 3
# Boxes in normalized center format (cx, cy, w, h) with strictly positive width/height.
targets = [
{
"class_labels": torch.arange(num_targets),
"boxes": torch.rand(num_targets, 4) * 0.5 + 0.25,
}
]

def base_inputs():
return (
torch.randn(1, num_queries, num_classes),
torch.rand(1, num_queries, 4) * 0.5 + 0.25,
)

# NaN in the logits poisons the classification cost; +inf on a box width poisons the L1/GIoU
# cost. Either leaves a non-finite cost matrix that used to make scipy raise.
cases = []
logits, pred_boxes = base_inputs()
logits[0, 0, 0] = float("nan")
cases.append((logits, pred_boxes))
logits, pred_boxes = base_inputs()
pred_boxes[0, 0, 2] = float("inf")
cases.append((logits, pred_boxes))

for logits, pred_boxes in cases:
outputs = {"logits": logits, "pred_boxes": pred_boxes}

indices = matcher(outputs, targets, group_detr=1)

self.assertEqual(len(indices), 1)
row_indices, col_indices = indices[0]
self.assertEqual(len(row_indices), num_targets)
self.assertEqual(len(col_indices), num_targets)
Loading