From 968b11ce356e0125d0d7b4067e3b46e87bfd0912 Mon Sep 17 00:00:00 2001 From: MutugiD Date: Sat, 4 Jul 2026 14:21:42 +0300 Subject: [PATCH] Guard Deformable DETR and LW-DETR Hungarian matchers against infinite cost matrices Under AMP/fp16, the sigmoid focal classification cost can overflow to inf/NaN, which makes scipy's linear_sum_assignment raise "ValueError: cost matrix is infeasible". Replace non-finite costs with torch.finfo(dtype).max before matching, matching the RT-DETR fix in #47016. Adds regression tests for both matchers. Fixes #47065. --- src/transformers/loss/loss_deformable_detr.py | 4 ++ src/transformers/loss/loss_lw_detr.py | 4 ++ .../test_modeling_deformable_detr.py | 52 ++++++++++++++++++ tests/models/lw_detr/test_modeling_lw_detr.py | 53 +++++++++++++++++++ 4 files changed, 113 insertions(+) diff --git a/src/transformers/loss/loss_deformable_detr.py b/src/transformers/loss/loss_deformable_detr.py index 7619bbf93aea..a5e999d1a35d 100644 --- a/src/transformers/loss/loss_deformable_detr.py +++ b/src/transformers/loss/loss_deformable_detr.py @@ -50,6 +50,10 @@ def forward(self, outputs, targets): # Final cost matrix cost_matrix = self.bbox_cost * bbox_cost + self.class_cost * class_cost + self.giou_cost * giou_cost cost_matrix = cost_matrix.view(batch_size, num_queries, -1).cpu() + # Guard against non-finite costs (e.g. inf/NaN from fp16 overflow under AMP), which would + # otherwise make scipy's linear_sum_assignment raise "ValueError: cost matrix is infeasible". + max_value = torch.finfo(cost_matrix.dtype).max + cost_matrix = torch.nan_to_num(cost_matrix, nan=max_value, posinf=max_value, neginf=max_value) sizes = [len(v["boxes"]) for v in targets] indices = [linear_sum_assignment(c[i]) for i, c in enumerate(cost_matrix.split(sizes, -1))] diff --git a/src/transformers/loss/loss_lw_detr.py b/src/transformers/loss/loss_lw_detr.py index 13d310d17b31..c1e91140e611 100644 --- a/src/transformers/loss/loss_lw_detr.py +++ b/src/transformers/loss/loss_lw_detr.py @@ -74,6 +74,10 @@ def forward(self, outputs, targets, group_detr): # Final cost matrix cost_matrix = self.bbox_cost * bbox_cost + self.class_cost * class_cost + self.giou_cost * giou_cost cost_matrix = cost_matrix.view(batch_size, num_queries, -1).cpu() + # Guard against non-finite costs (e.g. inf/NaN from fp16 overflow under AMP), which would + # otherwise make scipy's linear_sum_assignment raise "ValueError: cost matrix is infeasible". + max_value = torch.finfo(cost_matrix.dtype).max + cost_matrix = torch.nan_to_num(cost_matrix, nan=max_value, posinf=max_value, neginf=max_value) sizes = [len(v["boxes"]) for v in targets] indices = [] diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py index cc1c03a4049e..255e0347c8bd 100644 --- a/tests/models/deformable_detr/test_modeling_deformable_detr.py +++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py @@ -21,6 +21,7 @@ from transformers import DeformableDetrConfig, ResNetConfig, is_torch_available, is_vision_available from transformers.testing_utils import ( + require_scipy, require_timm, require_torch, require_torch_accelerator, @@ -741,3 +742,54 @@ def test_inference_object_detection_head_equivalence_cpu_accelerator(self): [[-9.9160, -4.2876, -6.4985], [-9.6945, -4.0855, -6.8031], [-10.0665, -5.8471, -7.7001]] ) assert torch.allclose(cpu_outputs.logits[0, :3, :3], expected_logits, atol=2e-4) + + +@require_torch +@require_scipy +class DeformableDetrHungarianMatcherInfeasibleCostTest(unittest.TestCase): + def test_non_finite_costs_do_not_crash_matcher(self): + """ + Regression test for the Deformable DETR Hungarian matcher. Its sigmoid focal classification + cost can overflow to inf/NaN (e.g. fp16 saturation under AMP, see #47000), which used to make + ``scipy.optimize.linear_sum_assignment`` raise ``ValueError: cost matrix is infeasible``. The + matcher now replaces non-finite costs with a large finite value before matching. + """ + from transformers.loss.loss_deformable_detr import DeformableDetrHungarianMatcher + + torch.manual_seed(0) + matcher = DeformableDetrHungarianMatcher(class_cost=1, bbox_cost=5, giou_cost=2) + + num_queries, num_classes, num_targets = 5, 4, 3 + # Boxes in normalized center format (cx, cy, w, h) with strictly positive width/height. + targets = [ + { + "class_labels": torch.arange(num_targets), + "boxes": torch.rand(num_targets, 4) * 0.5 + 0.25, + } + ] + + def base_inputs(): + return ( + torch.randn(1, num_queries, num_classes), + torch.rand(1, num_queries, 4) * 0.5 + 0.25, + ) + + # NaN in the logits poisons the classification cost; +inf on a box width poisons the L1/GIoU + # cost. Either leaves a non-finite cost matrix that used to make scipy raise. + cases = [] + logits, pred_boxes = base_inputs() + logits[0, 0, 0] = float("nan") + cases.append((logits, pred_boxes)) + logits, pred_boxes = base_inputs() + pred_boxes[0, 0, 2] = float("inf") + cases.append((logits, pred_boxes)) + + for logits, pred_boxes in cases: + outputs = {"logits": logits, "pred_boxes": pred_boxes} + + indices = matcher(outputs, targets) + + self.assertEqual(len(indices), 1) + row_indices, col_indices = indices[0] + self.assertEqual(len(row_indices), num_targets) + self.assertEqual(len(col_indices), num_targets) diff --git a/tests/models/lw_detr/test_modeling_lw_detr.py b/tests/models/lw_detr/test_modeling_lw_detr.py index e30f9be2726e..1bafaff8d5fa 100644 --- a/tests/models/lw_detr/test_modeling_lw_detr.py +++ b/tests/models/lw_detr/test_modeling_lw_detr.py @@ -23,6 +23,7 @@ ) from transformers.testing_utils import ( Expectations, + require_scipy, require_torch, require_vision, slow, @@ -813,3 +814,55 @@ def test_inference_object_detection_head_xlarge(self): torch.testing.assert_close(results["scores"][:4], expected_scores, atol=1e-3, rtol=2e-4) self.assertSequenceEqual(results["labels"][:4].tolist(), expected_labels) torch.testing.assert_close(results["boxes"][:4], expected_slice_boxes, atol=1e-3, rtol=2e-4) + + +@require_torch +@require_scipy +@require_vision +class LwDetrHungarianMatcherInfeasibleCostTest(unittest.TestCase): + def test_non_finite_costs_do_not_crash_matcher(self): + """ + Regression test for the LW-DETR Hungarian matcher. Its sigmoid focal classification cost can + overflow to inf/NaN (e.g. fp16 saturation under AMP, see #47000), which used to make + ``scipy.optimize.linear_sum_assignment`` raise ``ValueError: cost matrix is infeasible``. The + matcher now replaces non-finite costs with a large finite value before matching. + """ + from transformers.loss.loss_lw_detr import LwDetrHungarianMatcher + + torch.manual_seed(0) + matcher = LwDetrHungarianMatcher(class_cost=1, bbox_cost=5, giou_cost=2) + + num_queries, num_classes, num_targets = 5, 4, 3 + # Boxes in normalized center format (cx, cy, w, h) with strictly positive width/height. + targets = [ + { + "class_labels": torch.arange(num_targets), + "boxes": torch.rand(num_targets, 4) * 0.5 + 0.25, + } + ] + + def base_inputs(): + return ( + torch.randn(1, num_queries, num_classes), + torch.rand(1, num_queries, 4) * 0.5 + 0.25, + ) + + # NaN in the logits poisons the classification cost; +inf on a box width poisons the L1/GIoU + # cost. Either leaves a non-finite cost matrix that used to make scipy raise. + cases = [] + logits, pred_boxes = base_inputs() + logits[0, 0, 0] = float("nan") + cases.append((logits, pred_boxes)) + logits, pred_boxes = base_inputs() + pred_boxes[0, 0, 2] = float("inf") + cases.append((logits, pred_boxes)) + + for logits, pred_boxes in cases: + outputs = {"logits": logits, "pred_boxes": pred_boxes} + + indices = matcher(outputs, targets, group_detr=1) + + self.assertEqual(len(indices), 1) + row_indices, col_indices = indices[0] + self.assertEqual(len(row_indices), num_targets) + self.assertEqual(len(col_indices), num_targets)