Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 73 additions & 2 deletions pr_agent/git_providers/gitlab_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -886,15 +886,86 @@ def get_user_id(self):

def publish_labels(self, pr_types):
try:
self.mr.labels = list(set(pr_types))
self.mr.save()
# Race-safe label update.
#
# Previous behavior: ``self.mr.labels = list(set(pr_types))`` followed
# by ``self.mr.save()`` issued a full PUT on the labels array using
# whatever snapshot of ``self.mr`` was cached at provider-construction
# time. Any label a user added between webhook delivery and this save
# was silently dropped.
#
# New behavior: re-fetch the MR immediately before computing the
# delta, then use python-gitlab's ``add_labels`` / ``remove_labels``
# attributes on save. python-gitlab forwards those attributes to
# the matching ``add_labels`` / ``remove_labels`` parameters on
# ``PUT /projects/:id/merge_requests/:merge_request_iid`` so the
# server applies an incremental set-diff and concurrent additions
# outside the diff are preserved.
desired = set(pr_types)
try:
self.mr = self._get_merge_request()
except Exception as refresh_err:
# Strict policy: a stale snapshot can produce an incorrect
# add/remove diff that re-introduces the bug this method is
# meant to fix. Abort the publish, log, and leave server
# state untouched rather than risk clobbering user labels.
#
# Log only the exception type — the message from HTTP/SDK
# client errors may include credentialed URLs or headers, so
# we avoid emitting the raw object.
get_logger().warning(
"publish_labels: aborting, failed to refresh MR before save "
f"({type(refresh_err).__name__})"
)
return
current = set(self.mr.labels or [])
to_add = sorted(desired - current)
to_remove = sorted(current - desired)
if not to_add and not to_remove:
return
# GitLab accepts comma-separated strings for these attributes.
# Wrap the save in try/finally so the transient add_labels /
# remove_labels attributes are always cleared from ``self.mr``,
# even on save() failure. Otherwise a later self.mr.save() call
# in an unrelated tool (e.g. publish_description) would resend
# the prior label diff and cause unexpected churn.
try:
if to_add:
self.mr.add_labels = ",".join(to_add)
if to_remove:
self.mr.remove_labels = ",".join(to_remove)
self.mr.save()
finally:
for attr in ("add_labels", "remove_labels"):
try:
delattr(self.mr, attr)
except (AttributeError, KeyError):
# Already absent (e.g. only one side of the diff was
# set, or python-gitlab cleared it on save). Safe to
# ignore.
pass
except Exception as e:
get_logger().warning(f"Failed to publish labels, error: {e}")

def publish_inline_comments(self, comments: list[dict]):
pass

def get_pr_labels(self, update=False):
# The previous implementation ignored the ``update`` flag entirely and
# always returned the snapshot cached at provider construction. Callers
# such as ``PRReviewer.set_review_labels`` rely on a fresh read to
# preserve user-added labels in their read-modify-write cycle; without
# the refresh, any label added after the webhook fired would be missing
# from ``current_labels`` and dropped by the subsequent publish_labels.
#
# Strict policy on refresh failure: a stale snapshot will produce an
# incorrect diff in the caller's filter-and-republish cycle, so we
# surface the failure to the caller instead of silently returning
# cached data. ``set_review_labels`` already wraps this in a broad
# try/except, so the failure degrades into "skip the label update for
# this run" rather than breaking the whole review.
if update:
self.mr = self._get_merge_request()
return self.mr.labels

def get_repo_labels(self):
Expand Down
33 changes: 23 additions & 10 deletions pr_agent/tools/pr_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,29 @@ async def run(self):

# publish labels
if get_settings().pr_description.publish_labels and pr_labels and self.git_provider.is_supported("get_labels"):
original_labels = self.git_provider.get_pr_labels(update=True)
get_logger().debug(f"original labels", artifact=original_labels)
user_labels = get_user_labels(original_labels)
new_labels = pr_labels + user_labels
get_logger().debug(f"published labels", artifact=new_labels)
if set(new_labels) != set(original_labels):
get_logger().info(f"Setting describe labels:\n{new_labels}")
self.git_provider.publish_labels(new_labels)
else:
get_logger().debug(f"Labels are the same, not updating")
# Isolate label refresh/publish failures from the description
# publish: GitLabProvider.get_pr_labels(update=True) now raises
# on a stale-snapshot refresh failure (see PR #2484), and the
# outer try/except in this method is wide enough that an
# unhandled exception here would skip the description publish
# too. Label updates are best-effort for /describe.
try:
original_labels = self.git_provider.get_pr_labels(update=True)
get_logger().debug("original labels", artifact=original_labels)
user_labels = get_user_labels(original_labels)
new_labels = pr_labels + user_labels
get_logger().debug("published labels", artifact=new_labels)
if set(new_labels) != set(original_labels):
get_logger().info(f"Setting describe labels:\n{new_labels}")
self.git_provider.publish_labels(new_labels)
else:
get_logger().debug("Labels are the same, not updating")
except Exception as label_err:
get_logger().warning(
"Failed to update labels during PR description; "
"continuing with description publish "
f"({type(label_err).__name__})"
)

# publish description
if get_settings().pr_description.publish_description_as_comment:
Expand Down
181 changes: 181 additions & 0 deletions tests/unittest/test_gitlab_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,184 @@ def test_publish_description_with_title_updates_both(self, gitlab_provider):
assert gitlab_provider.mr.title == "AI title"
assert gitlab_provider.mr.description == "Updated description"
gitlab_provider.mr.save.assert_called_once()
# ---- publish_labels / get_pr_labels tests ----

def _prime_mr_for_labels(self, gitlab_provider, server_labels):
"""Install a mock MR with server_labels for label-publishing tests.

_get_merge_request is patched to return a fresh MagicMock with the
same label set, simulating a successful server refresh. spec=[...]
keeps MagicMock from silently auto-creating add_labels /
remove_labels attrs so tests can assert which side(s) of the diff
were written (or were cleared) after publish_labels returns.
"""
mr = MagicMock(spec=["labels", "save"])
mr.labels = list(server_labels)
gitlab_provider.mr = mr
gitlab_provider._get_merge_request = MagicMock(return_value=mr)
return mr

def _capture_wire_payload_on_save(self, mr):
"""Capture add_labels / remove_labels at the moment save() is called.

publish_labels deletes the transient diff attributes in a ``finally``
block, so asserting on them *after* the call returns is meaningless
(they will always be absent). This helper installs a save() side_effect
that records the diff payload that was actually written to the wire,
which is what we need to validate.
"""
captured = {}

def _record_then_succeed(*_a, **_kw):
captured["add_labels"] = getattr(mr, "add_labels", None)
captured["remove_labels"] = getattr(mr, "remove_labels", None)

mr.save.side_effect = _record_then_succeed
return captured

def test_publish_labels_noop_when_sets_equal(self, gitlab_provider):
mr = self._prime_mr_for_labels(gitlab_provider, ["bug", "review effort 3/5"])

gitlab_provider.publish_labels(["bug", "review effort 3/5"])

# No diff -> no save, no transient attributes touched.
mr.save.assert_not_called()
assert not hasattr(mr, "add_labels")
assert not hasattr(mr, "remove_labels")

def test_publish_labels_adds_only_missing(self, gitlab_provider):
mr = self._prime_mr_for_labels(gitlab_provider, ["bug"])
captured = self._capture_wire_payload_on_save(mr)

gitlab_provider.publish_labels(["bug", "review effort 3/5"])

assert mr.save.call_count == 1
# Only the missing label is in the add diff; nothing is being
# removed because every server label is still desired.
assert captured["add_labels"] == "review effort 3/5"
assert captured["remove_labels"] is None
# Diff attrs are cleared on the way out.
assert not hasattr(mr, "add_labels")
assert not hasattr(mr, "remove_labels")

def test_publish_labels_removes_stale_managed_labels(self, gitlab_provider):
mr = self._prime_mr_for_labels(
gitlab_provider, ["review effort 5/5", "Possible security concern"]
)
captured = self._capture_wire_payload_on_save(mr)

# Caller wants to switch the managed labels to a fresh set.
gitlab_provider.publish_labels(["review effort 2/5"])

assert mr.save.call_count == 1
# "review effort 2/5" is added; both prior managed labels are removed.
# sorted() determinism is part of the contract so we can assert the
# exact comma-separated payload sent on the wire.
assert captured["add_labels"] == "review effort 2/5"
assert captured["remove_labels"] == "Possible security concern,review effort 5/5"
assert not hasattr(mr, "add_labels")
assert not hasattr(mr, "remove_labels")

def test_publish_labels_preserves_user_labels_outside_diff(self, gitlab_provider):
# The bug this PR fixes: a user-added label outside the diff must
# not be touched. With spec on the mock, ``mr.labels`` should remain
# the exact list we primed it with (no full-array overwrite).
mr = self._prime_mr_for_labels(
gitlab_provider, ["area/backend", "review effort 3/5"]
)
captured = self._capture_wire_payload_on_save(mr)

# Caller flipped the managed label only; ``area/backend`` stays.
gitlab_provider.publish_labels(["area/backend", "review effort 4/5"])

# Wire-level diff: only the managed label is updated.
assert captured["add_labels"] == "review effort 4/5"
assert captured["remove_labels"] == "review effort 3/5"
# We wrote exactly one save and never reassigned ``mr.labels`` (the
# pre-fix bug).
assert mr.save.call_count == 1
assert mr.labels == ["area/backend", "review effort 3/5"]
# Diff attrs cleared on the way out.
assert not hasattr(mr, "add_labels")
assert not hasattr(mr, "remove_labels")

def test_publish_labels_aborts_when_refresh_fails(self, gitlab_provider):
# Pre-fix behavior would have proceeded against the cached snapshot,
# potentially clobbering user labels. New strict behavior: abort the
# publish and leave server state untouched.
cached_mr = MagicMock(spec=["labels", "save"])
cached_mr.labels = ["stale label that no longer reflects server"]
gitlab_provider.mr = cached_mr

class _SecretError(RuntimeError):
"""Sentinel whose repr would carry a credentialed URL if logged raw."""

# Use an obviously-fake credential placeholder rather than anything
# that resembles a real token; secret scanners flag the latter.
secret_marker = "REDACTED-TOKEN-PLACEHOLDER"
gitlab_provider._get_merge_request = MagicMock(
side_effect=_SecretError(f"https://oauth2:{secret_marker}@gitlab.example.com/api/v4/...")
)
Comment thread
ashearin marked this conversation as resolved.

with patch("pr_agent.git_providers.gitlab_provider.get_logger") as mock_logger:
gitlab_provider.publish_labels(["review effort 3/5"])

cached_mr.save.assert_not_called()
# Log must name the exception type but never the raw message: the
# message body can carry credentials embedded in client-side URLs.
warning_messages = [
call.args[0] for call in mock_logger.return_value.warning.call_args_list
]
assert any("publish_labels: aborting" in m for m in warning_messages)
assert any("_SecretError" in m for m in warning_messages)
assert not any(secret_marker in m for m in warning_messages)
assert not any("oauth2" in m for m in warning_messages)

def test_publish_labels_clears_diff_attrs_on_save_failure(self, gitlab_provider):
# If ``self.mr.save()`` raises, the transient diff fields must still
# be cleared so a later, unrelated save() (e.g. publish_description)
# does not resend them.
mr = self._prime_mr_for_labels(gitlab_provider, ["bug"])
mr.save.side_effect = RuntimeError("network blip")

gitlab_provider.publish_labels(["review effort 3/5"]) # adds + removes

# publish_labels swallows the outer Exception by design; what matters
# is that the transient attrs do not leak into the next save().
assert not hasattr(mr, "add_labels")
assert not hasattr(mr, "remove_labels")

def test_get_pr_labels_no_update_returns_cached(self, gitlab_provider):
cached_mr = MagicMock()
cached_mr.labels = ["cached"]
gitlab_provider.mr = cached_mr
gitlab_provider._get_merge_request = MagicMock()

result = gitlab_provider.get_pr_labels(update=False)

assert result == ["cached"]
gitlab_provider._get_merge_request.assert_not_called()

def test_get_pr_labels_with_update_refreshes(self, gitlab_provider):
cached_mr = MagicMock()
cached_mr.labels = ["cached-stale"]
fresh_mr = MagicMock()
fresh_mr.labels = ["fresh-from-server"]
gitlab_provider.mr = cached_mr
gitlab_provider._get_merge_request = MagicMock(return_value=fresh_mr)

result = gitlab_provider.get_pr_labels(update=True)

assert result == ["fresh-from-server"]
assert gitlab_provider.mr is fresh_mr

def test_get_pr_labels_with_update_propagates_refresh_failure(self, gitlab_provider):
# Strict policy: surface the refresh failure to the caller (which
# wraps the call in a broader try/except), rather than silently
# returning stale data that would corrupt the read-modify-write cycle.
gitlab_provider.mr = MagicMock()
gitlab_provider._get_merge_request = MagicMock(side_effect=RuntimeError("boom"))

with pytest.raises(RuntimeError):
gitlab_provider.get_pr_labels(update=True)

Loading
Loading