diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 000000000..5d3ae04c7
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,84 @@
+# headroom AGENTS.md
+
+> headroomlabs-ai/headroom — AI Proxy 的 OSS 贡献项目
+> Fork: lennney/headroom → upstream: headroomlabs-ai/headroom
+
+## 技术栈
+
+- Python 3.12+, uv 包管理
+- CLI: `headroom` 命令行工具
+- 项目结构: `headroom/` 核心库, `tests/` pytest, `docs/` Nextra MDX
+
+## 常用命令
+
+```bash
+# 同步上游
+git fetch upstream main && git checkout main && git merge upstream/main
+
+# 开新分支（必须从最新 main 开）
+git checkout main && git pull upstream main && git checkout -b fix/xxx
+
+# 本地测试
+uv run pytest tests/ -x -q
+uv run ruff check headroom/
+
+# 检查 PR 分支干净度
+git diff upstream/main...HEAD --name-only | grep -v 'uv\.lock$'
+
+# 运行 PR governance 检查（本地模拟）
+# PR body 必须有 6 个 section，见下方 PR Governance
+```
+
+## 当前 PR 状态
+
+| PR | 分支 | 标题 | 状态 |
+|---|---|---|---|
+| #1406 | `fix/wrap-port-fallback` | fix(wrap): port fallback | APPROVED |
+| #1708 | `docs/pipeline-extension-recipe` | docs: pipeline extension recipe | ready for review |
+| #1707 | `docs/fix-lean-ctx-reversible` | docs(readme): fix lean-ctx column | ready for review |
+| #1666 | `fix/transformers-5.3.0-cve-2026-4372` | fix(deps): bump transformers | ready for review |
+
+## PR Governance（红线）
+
+headroom 用 bot 自动检查 PR body，6 个 section 一个不能少：
+
+```
+## Description          ← ≥10 chars, 包含 Closes #
+## Type of Change        ← 至少一个 - [x] checkbox
+## Changes Made          ← 非空 bullets
+## Testing               ← checkboxes + 代码块（真实输出）
+## Real Behavior Proof   ← 4 个子字段（精确标签）：
+  - Environment:
+  - Exact command / steps:
+  - Observed result:
+  - Not tested:
+## Review Readiness      ← 2 个 checkbox（精确标签）：
+  - [x] I have performed a self-review
+  - [x] This PR is ready for human review
+```
+
+**关键坑：**
+- Bot 做**精确字符串匹配**，`"Verification:"` ≠ `"Exact command / steps:"`
+- 修改 PR body **不会**触发 re-check，必须 push commit
+- Review Readiness 的 checkbox 标签必须一字不差
+
+## 分支干净度
+
+每个 PR push 前必须检查：
+
+```bash
+git diff upstream/main...HEAD --name-only | grep -E '(AGENTS\.md|HANDOVER\.md|\.hermes/|docs/plans/|solutions/|\.cursorrules|CLAUDE\.md)'
+# 有输出 = 脏分支，必须清理
+```
+
+## 边界
+
+- ✅ **Always**: 跑测试、修 lint、更新 HANDOVER.md、PR 前查分支干净度
+- ⚠️ **Ask first**: 改公共接口、加新依赖、关闭安全机制
+- 🚫 **Never**: 硬编码密钥、改系统级配置（/etc/）、跳过 PR governance 模板
+
+## 已知陷阱
+
+1. **#1406 的 7 轮 review** — 端口 fallback 后配置消费者不同步（TOCTOU race）、Codex prepare-only 路径误启动 proxy、bypass pattern 绕过 `_ensure_proxy`。详见 code-review skill 的 pitfall #15-22。
+2. **3 个 docs PR 全被 governance bot 挡** — 没填模板就直接提 PR。教训：先读 `.github/PULL_REQUEST_TEMPLATE.md`。
+3. **#1666 脏分支** — 从包含 #1406 改动的分支出去了，diff 里混入 6 个无关文件。教训：永远从 `upstream/main` 开分支。
diff --git a/headroom/cli/wrap.py b/headroom/cli/wrap.py
index 07b42e6ed..75afe48a0 100644
--- a/headroom/cli/wrap.py
+++ b/headroom/cli/wrap.py
@@ -15,6 +15,7 @@
 
 from __future__ import annotations
 
+import errno
 import importlib.util
 import io
 import json
@@ -346,20 +347,27 @@ def _port_bind_error(port: int) -> OSError | None:
             s.bind(("127.0.0.1", port))
     except OSError as exc:
         return exc
+    except OverflowError:
+        return OSError(errno.EADDRNOTAVAIL, f"Port {port} out of range (0-65535)")
     return None
 
 
-def _format_unbindable_port_error(port: int, error: OSError, agent_type: str) -> str:
-    """Build an actionable message for ports that fail before uvicorn can bind."""
-    command = "headroom proxy"
-    if agent_type != "unknown":
-        command = f"headroom wrap {agent_type}"
-    suggested_port = port + 1
-    return (
-        f"Port {port} is unavailable on 127.0.0.1 before the proxy can start: {error}. "
-        "On Windows this can happen when the port is in an excluded or reserved range. "
-        f"Rerun with a different port, for example `{command} --port {suggested_port}`."
-    )
+def _find_available_port(start_port: int, max_attempts: int = 100) -> int:
+    """Find first available port >= start_port via socket.bind probe.
+
+    Skips ports with EADDRINUSE (busy) and EACCES (reserved on Windows,
+    privileged on Linux) — both indicate the port can't be bound here.
+    Other OS errors (EADDRNOTAVAIL) propagate immediately.
+    Raises RuntimeError when no port is found in range.
+    """
+    end_port = min(start_port + max_attempts, 65536)
+    for port in range(start_port, end_port):
+        error = _port_bind_error(port)
+        if error is None:
+            return port
+        if error.errno not in (errno.EADDRINUSE, errno.EACCES):
+            raise error
+    raise RuntimeError(f"No available port found in range {start_port}-{end_port - 1}")
 
 
 def _get_log_path() -> Path:
@@ -397,7 +405,11 @@ def _start_proxy(
     Stdout and stderr are written to a dedicated sibling file, usually
     `~/.headroom/logs/proxy-stdio.log`, to avoid pipe deadlock risk without
     competing with the rotating `proxy.log` runtime log.
+
+    The caller is responsible for ensuring *port* is available
+    (see ``_find_available_port``).
     """
+
     cmd = [sys.executable, "-m", "headroom.cli", "proxy", "--port", str(port)]
 
     # Forward HEADROOM_MODE env var so the proxy respects the user's mode choice
@@ -1923,7 +1935,7 @@ def _run_proxy_only_watcher(
     learn: bool,
     memory: bool,
     agent_type: str,
-    print_setup_lines: Callable[[], None],
+    print_setup_lines: Callable[[int], None],
 ) -> None:
     """Shared scaffolding for proxy-only wrap subcommands (no child binary launch).
 
@@ -1937,19 +1949,24 @@ def _run_proxy_only_watcher(
     through here. ``_launch_tool`` owns the proxy lifecycle on that path.
     """
     proxy_holder: list[subprocess.Popen | None] = [None]
-    cleanup = _make_cleanup(proxy_holder, port)
-    _register_proxy_client(port)
+    port_holder: list[int] = [port]
+    cleanup = _make_cleanup(proxy_holder, port_holder)
     signal.signal(signal.SIGINT, cleanup)
     signal.signal(signal.SIGTERM, cleanup)
 
     try:
         _print_wrap_banner(agent_label)
-        proxy_holder[0] = _ensure_proxy(
+        _register_proxy_client(port)
+        proxy_holder[0], actual_port = _ensure_proxy(
             port, no_proxy, learn=learn, memory=memory, agent_type=agent_type
         )
-        _push_runtime_env(port, no_proxy)
+        if actual_port != port:
+            _unregister_proxy_client(port)
+            _register_proxy_client(actual_port)
+        port_holder[0] = actual_port
+        _push_runtime_env(actual_port, no_proxy)
         click.echo()
-        print_setup_lines()
+        print_setup_lines(actual_port)
         click.echo()
         click.echo("  Press Ctrl+C to stop the proxy.")
         click.echo()
@@ -2670,8 +2687,8 @@ def _ensure_proxy(
     vertex_api_url: str | None = None,
     clear_vertex_api_url: bool = False,
     copilot_api_token: str | None = None,
-) -> subprocess.Popen | None:
-    """Start or verify proxy. Returns process handle if we started it."""
+) -> tuple[subprocess.Popen | None, int]:
+    """Start or verify proxy. Returns (process_handle, actual_port)."""
     helpers = _live_wrap_module()
     # --no-proxy reuses an already-running proxy, so backend/region/provider
     # flags (which only apply when we start one) would be silently dropped.
@@ -2705,9 +2722,9 @@ def _ensure_proxy(
                             f"  Leaving it running because {detail} "
                             "are still attached; it will be restarted when idle."
                         )
-                        return None
+                        return None, port
                     if helpers._restart_persistent_proxy(manifest, port):
-                        return None
+                        return None, port
                     raise click.ClickException(
                         f"Persistent deployment '{manifest.profile}' on port {port} "
                         f"is running stale Headroom {running_version} and could not be restarted."
@@ -2737,7 +2754,7 @@ def _ensure_proxy(
                     if not missing:
                         click.echo(f"  Proxy already running on port {port}")
                         click.echo(f"  Dashboard:    http://127.0.0.1:{port}/dashboard")
-                        return None
+                        return None, port
                 # Features mismatch or config unavailable — fall through to
                 # the non-persistent path which handles proxy restart.
             else:
@@ -2748,9 +2765,9 @@ def _ensure_proxy(
                     # restart logic can run. For plain recover-only calls,
                     # preserve the historical fast return.
                     if not any((memory, learn, code_graph, openai_api_url)):
-                        return None
+                        return None, port
                     if not helpers._check_proxy(port):
-                        return None
+                        return None, port
 
                     # A freshly recovered persistent proxy may not expose
                     # a full config payload yet. In feature-sensitive flows
@@ -2768,7 +2785,7 @@ def _ensure_proxy(
                             "did not expose config; restarting with requested features..."
                         )
                         if helpers._restart_persistent_proxy(manifest, port):
-                            return None
+                            return None, port
                         raise click.ClickException(
                             f"Persistent deployment '{manifest.profile}' on port {port} "
                             "could not be restarted after recovery."
@@ -2796,12 +2813,12 @@ def _ensure_proxy(
                             f"{flags_str}; restarting..."
                         )
                         if helpers._restart_persistent_proxy(manifest, port):
-                            return None
+                            return None, port
                         raise click.ClickException(
                             f"Persistent deployment '{manifest.profile}' on port {port} "
                             "could not be restarted with requested features."
                         )
-                    return None
+                    return None, port
                 elif helpers._check_proxy(port):
                     raise click.ClickException(
                         f"Persistent deployment '{manifest.profile}' on port {port} is not healthy."
@@ -2841,7 +2858,7 @@ def _ensure_proxy(
                         f"  Leaving it running because {detail} "
                         "are still attached; it will be restarted when idle."
                     )
-                    return None
+                    return None, port
 
                 click.echo(
                     f"  Proxy on port {port} is running Headroom {running_version}; "
@@ -2932,26 +2949,31 @@ def _ensure_proxy(
                                 f"  Please stop the proxy on port {port} manually "
                                 f"and rerun with {flags_str}."
                             )
-                            return None
+                            return None, port
 
             if not needs_restart:
                 click.echo(f"  Proxy already running on port {port}")
                 click.echo(f"  Dashboard:    http://127.0.0.1:{port}/dashboard")
-                return None
+                return None, port
 
         # Start (or restart) the proxy with the requested flags
-        bind_error = helpers._port_bind_error(port)
-        if bind_error is not None:
-            raise click.ClickException(
-                helpers._format_unbindable_port_error(port, bind_error, agent_type)
-            )
+        # Find an available port (port may be busy from a stale proxy).
+        try:
+            actual_port = helpers._find_available_port(port)
+        except OSError as e:
+            raise click.ClickException(f"Port {port} is unavailable: {e}") from e
+        except RuntimeError as e:
+            raise click.ClickException(str(e)) from e
 
-        click.echo(f"  Starting Headroom proxy on port {port}...")
+        if actual_port != port:
+            click.echo(f"  Port {port} is in use, using port {actual_port} instead.")
+
+        click.echo(f"  Starting Headroom proxy on port {actual_port}...")
         try:
             proc = cast(
                 subprocess.Popen[Any],
                 _live_wrap_module()._start_proxy(
-                    port,
+                    actual_port,
                     learn=learn,
                     memory=memory,
                     agent_type=agent_type,
@@ -2966,9 +2988,9 @@ def _ensure_proxy(
                     copilot_api_token=copilot_api_token,
                 ),
             )
-            click.echo(f"  Proxy ready on http://127.0.0.1:{port}")
-            click.echo(f"  Dashboard:    http://127.0.0.1:{port}/dashboard")
-            return proc
+            click.echo(f"  Proxy ready on http://127.0.0.1:{actual_port}")
+            click.echo(f"  Dashboard:    http://127.0.0.1:{actual_port}/dashboard")
+            return proc, actual_port
         except RuntimeError as e:
             click.echo(f"  Error: {e}")
             raise SystemExit(1) from e
@@ -2992,7 +3014,7 @@ def _ensure_proxy(
                     "advertise the requested Vertex target. Requests may still go "
                     "to the proxy's existing Vertex upstream."
                 )
-        return None
+        return None, port
 
 
 def _client_marker_path(port: int) -> Path:
@@ -3128,24 +3150,24 @@ def _live_proxy_clients(port: int, *, exclude_self: bool = True) -> list[int]:
     return live
 
 
-def _make_cleanup(proxy_proc_holder: list, port: int = 8787) -> Any:
+def _make_cleanup(proxy_proc_holder: list, port: int | list[int] = 8787) -> Any:
     """Create a cleanup function that terminates the proxy on exit.
 
     Only kills the proxy when no other live headroom-wrapped clients remain,
     tracked via per-PID marker files in ``paths.proxy_clients_dir(port)``.
+
+    ``port`` can be an ``int`` or a ``list[int]``.  When a port fallback occurs
+    (``_ensure_proxy`` ups the port because the requested one is busy), the
+    caller can update ``port[0]`` in-place and the closure picks it up.
     """
 
     def _other_clients_exist() -> bool:
-        # Reference-count from marker files, not argv scans. Wrapped clients
-        # carry the proxy URL in ANTHROPIC_BASE_URL/OPENAI_BASE_URL (env, not
-        # argv), so `pgrep -f` could never see them — and it matched unrelated
-        # processes by substring. Markers are exact and OS-portable.
-        return len(_live_proxy_clients(port, exclude_self=True)) > 0
+        p = port[0] if isinstance(port, list) else port
+        return len(_live_proxy_clients(p, exclude_self=True)) > 0
 
     def cleanup(signum: int | None = None, frame: Any = None) -> None:
-        # Drop our own marker first so the count reflects the post-exit state;
-        # also covers the signal path, where the `finally` block may not run.
-        _unregister_proxy_client(port)
+        p = port[0] if isinstance(port, list) else port
+        _unregister_proxy_client(p)
         proc = proxy_proc_holder[0] if proxy_proc_holder else None
         if proc and proc.poll() is None:
             if _other_clients_exist():
@@ -3187,8 +3209,8 @@ def _launch_tool(
 ) -> None:
     """Common logic: start proxy, launch tool, clean up."""
     proxy_holder: list[subprocess.Popen | None] = [None]
-    cleanup = _make_cleanup(proxy_holder, port)
-    _register_proxy_client(port)
+    port_holder: list[int] = [port]
+    cleanup = _make_cleanup(proxy_holder, port_holder)
     signal.signal(signal.SIGINT, _ignore_child_sigint)
     signal.signal(signal.SIGTERM, cleanup)
 
@@ -3200,7 +3222,8 @@ def _launch_tool(
         click.echo("  ╚═══════════════════════════════════════════════╝")
         click.echo()
 
-        proxy_holder[0] = _ensure_proxy(
+        _register_proxy_client(port)
+        proxy_holder[0], actual_port = _ensure_proxy(
             port,
             no_proxy,
             learn=learn,
@@ -3213,7 +3236,16 @@ def _launch_tool(
             openai_api_url=openai_api_url,
             copilot_api_token=copilot_api_token,
         )
-        _push_runtime_env(port, no_proxy)
+        if actual_port != port:
+            _unregister_proxy_client(port)
+            _register_proxy_client(actual_port)
+        port_holder[0] = actual_port
+        _push_runtime_env(actual_port, no_proxy)
+
+        # If port fell back, update env URLs to point at the actual port
+        if actual_port != port:
+            for k, v in dict(env).items():
+                env[k] = v.replace(f"127.0.0.1:{port}", f"127.0.0.1:{actual_port}")
 
         if code_graph:
             _setup_code_graph(verbose=False)
@@ -3603,9 +3635,9 @@ def claude(
     proxy_holder: list[subprocess.Popen | None] = [None]
     _saved_base_url: list[str | None] = [None]  # previous settings.json value for restore
     _settings_foundry: list[bool] = [False]
+    port_holder: list[int] = [port]
     _settings_vertex: list[bool] = [False]
-    cleanup = _make_cleanup(proxy_holder, port)
-    _register_proxy_client(port)
+    cleanup = _make_cleanup(proxy_holder, port_holder)
     signal.signal(signal.SIGINT, _ignore_child_sigint)
     signal.signal(signal.SIGTERM, cleanup)
 
@@ -3681,7 +3713,8 @@ def claude(
         proxy_url = _claude_proxy_base_url(port)
         vertex_upstream = _vertex_target_api_url_from_claude_env(proxy_url) if use_vertex else None
 
-        proxy_holder[0] = _ensure_proxy(
+        _register_proxy_client(port)
+        proxy_holder[0], actual_port = _ensure_proxy(
             port,
             no_proxy,
             learn=learn,
@@ -3694,7 +3727,11 @@ def claude(
             vertex_api_url=vertex_upstream,
             clear_vertex_api_url=use_vertex and vertex_upstream is None,
         )
-        _push_runtime_env(port, no_proxy)
+        if actual_port != port:
+            _unregister_proxy_client(port)
+            _register_proxy_client(actual_port)
+        port_holder[0] = actual_port
+        _push_runtime_env(actual_port, no_proxy)
 
         if not no_rtk:
             if _selected_context_tool() == _CONTEXT_TOOL_LEAN_CTX:
@@ -3709,7 +3746,7 @@ def claude(
         if not no_mcp:
             from headroom.mcp_registry import ClaudeRegistrar
 
-            _setup_headroom_mcp(ClaudeRegistrar(), port, verbose=verbose)
+            _setup_headroom_mcp(ClaudeRegistrar(), actual_port, verbose=verbose)
         elif verbose:
             click.echo("  Skipping MCP retrieve tool (--no-mcp)")
 
@@ -3728,6 +3765,7 @@ def claude(
         if code_graph:
             _setup_code_graph(verbose=verbose)
 
+        proxy_url = _claude_proxy_base_url(actual_port)
         click.echo()
         click.echo("  Launching Claude Code (API routed through Headroom)...")
         if use_vertex:
@@ -4314,7 +4352,7 @@ def codex(
     _codex_config_file, _codex_backup_file = _codex_config_paths()
     _snapshot_codex_config_if_unwrapped(_codex_config_file, _codex_backup_file)
 
-    # Setup CLI context tool for Codex.
+    # Non-port-dependent setup first (RTK, etc.).
     if not no_rtk:
         if _selected_context_tool() == _CONTEXT_TOOL_LEAN_CTX:
             click.echo("  Setting up lean-ctx for Codex...")
@@ -4327,8 +4365,74 @@ def codex(
                 global_agents = _codex_home_dir() / "AGENTS.md"
                 _inject_rtk_instructions(global_agents, verbose=verbose)
 
+    # --prepare-only: only update Codex config, do NOT start proxy.
+    # MCP/memory/provider config are all config-file writes — they don't
+    # need a running proxy.  Use the raw requested port (no health check,
+    # no port fallback) since the user will run the full command later.
+    if prepare_only:
+        if not no_mcp:
+            from headroom.mcp_registry import CodexRegistrar
+
+            _setup_headroom_mcp(CodexRegistrar(), port, verbose=verbose, force=True)
+        elif verbose:
+            click.echo("  Skipping MCP retrieve tool (--no-mcp)")
+
+        from headroom.mcp_registry import CodexRegistrar
+
+        _setup_coding_compressor(
+            CodexRegistrar(),
+            serena_context="codex",
+            serena=serena,
+            no_serena=no_serena,
+            no_tokensave=no_tokensave,
+            verbose=verbose,
+            force=True,
+        )
+
+        if memory:
+            click.echo("  Setting up memory for Codex...")
+            mem_dir = Path.cwd() / ".headroom"
+            mem_dir.mkdir(parents=True, exist_ok=True)
+            db_path = str(mem_dir / "memory.db")
+            mem_user = os.environ.get("USER", os.environ.get("USERNAME", "default"))
+            _inject_memory_mcp_config(mem_user)
+            agents_md = Path.cwd() / "AGENTS.md"
+            _inject_memory_agents_md(agents_md)
+
+            # Sync Claude's memories → DB so MCP search finds them
+            try:
+                import asyncio
+
+                from headroom.memory.sync import _build_sync_backend, sync_import
+                from headroom.memory.sync_adapters.claude_code import (
+                    ClaudeCodeAdapter,
+                    get_claude_memory_dir,
+                )
+
+                claude_memory_dir = get_claude_memory_dir()
+
+                async def _import_claude_memories() -> int:
+                    backend = _build_sync_backend(db_path)
+                    await backend._ensure_initialized()
+                    adapter = ClaudeCodeAdapter(claude_memory_dir)
+                    count = await sync_import(backend, adapter, mem_user)
+                    await backend.close()
+                    return count
+
+                imported = asyncio.run(_import_claude_memories())
+                if imported:
+                    click.echo(f"  Memory: imported {imported} memories from Claude")
+            except Exception as e:
+                click.echo(f"  Warning: Claude memory import failed: {e}")
+
+        _inject_codex_provider_config(port)
+        return
+
     # Register headroom MCP server in Codex config.toml so Codex can
     # call headroom_retrieve on compression markers from the proxy.
+    # These config writes do not need a running proxy — they run before
+    # _ensure_proxy so unwrap has config to clean up even when proxy
+    # startup or binary lookup fails.
     if not no_mcp:
         from headroom.mcp_registry import CodexRegistrar
 
@@ -4394,17 +4498,46 @@ async def _import_claude_memories() -> int:
         except Exception as e:
             click.echo(f"  Warning: Claude memory import failed: {e}")
 
-    if prepare_only:
-        _inject_codex_provider_config(port)
-        return
-
     codex_bin = shutil.which("codex")
     if not codex_bin:
         click.echo("Error: 'codex' not found in PATH.")
         click.echo("Install Codex CLI: npm install -g @openai/codex")
         raise SystemExit(1)
 
-    env, env_vars_display = _build_codex_launch_env(port, os.environ)
+    # Register our proxy client marker BEFORE _ensure_proxy so that another
+    # wrapper's cleanup sees us as an active client and doesn't terminate a
+    # shared proxy during the startup gap.
+    _register_proxy_client(port)
+
+    # Let _ensure_proxy decide the port (same contract as other wrappers).
+    # Called after config writes so unwrap has config to restore even when
+    # proxy startup fails.
+    _codex_proxy, actual_port = _ensure_proxy(
+        port,
+        no_proxy,
+        learn=learn,
+        memory=memory,
+        agent_type="codex",
+        code_graph=code_graph,
+        backend=backend,
+        anyllm_provider=anyllm_provider,
+        region=region,
+    )
+
+    # If the proxy fell back to a different port, move our marker to the
+    # actual port so cleanup tracking stays accurate.
+    if actual_port != port:
+        _unregister_proxy_client(port)
+        _register_proxy_client(actual_port)
+
+    # If the proxy fell back to a different port, update the MCP config so
+    # the retrieval tool URL points at the port the proxy is actually on.
+    if actual_port != port and not no_mcp:
+        from headroom.mcp_registry import CodexRegistrar
+
+        _setup_headroom_mcp(CodexRegistrar(), actual_port, verbose=verbose, force=True)
+
+    env, env_vars_display = _build_codex_launch_env(actual_port, os.environ)
 
     # Per-project savings attribution: the injected provider config maps the
     # X-Headroom-Project header to HEADROOM_PROJECT via env_http_headers, so
@@ -4418,26 +4551,42 @@ async def _import_claude_memories() -> int:
     # transport unless a custom provider declares supports_websockets = true.
     # NOTE: this must run BEFORE _inject_memory_mcp_config because it rewrites
     # the config file.  Re-inject MCP config after if memory is enabled.
-    _inject_codex_provider_config(port)
+    _inject_codex_provider_config(actual_port)
     if memory:
         _inject_memory_mcp_config(os.environ.get("USER", os.environ.get("USERNAME", "default")))
 
-    _launch_tool(
-        binary=codex_bin,
-        args=codex_args,
-        env=env,
-        port=port,
-        no_proxy=no_proxy,
-        tool_label="CODEX",
-        env_vars_display=env_vars_display,
-        learn=learn,
-        memory=memory,
-        agent_type="codex",
-        code_graph=code_graph,
-        backend=backend,
-        anyllm_provider=anyllm_provider,
-        region=region,
-    )
+    # Proxy already started by _ensure_proxy above; tell _launch_tool to
+    # skip duplicate startup.  Cleanup of _codex_proxy happens on exit
+    # via the finally block below.
+    try:
+        _launch_tool(
+            binary=codex_bin,
+            args=codex_args,
+            env=env,
+            port=actual_port,
+            no_proxy=True,
+            tool_label="CODEX",
+            env_vars_display=env_vars_display,
+            learn=learn,
+            memory=memory,
+            agent_type="codex",
+            code_graph=code_graph,
+            backend=backend,
+            anyllm_provider=anyllm_provider,
+            region=region,
+        )
+    finally:
+        # _launch_tool's internal cleanup unregisters this client marker,
+        # but doesn't know about the proxy we started.  Terminate it when
+        # no other clients remain.
+        if _codex_proxy and _codex_proxy.poll() is None:
+            _other = _live_proxy_clients(actual_port, exclude_self=True)
+            if not _other:
+                _codex_proxy.terminate()
+                try:
+                    _codex_proxy.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    _codex_proxy.kill()
 
 
 # =============================================================================
@@ -4687,8 +4836,8 @@ def cursor(
     if prepare_only:
         return
 
-    def _print_cursor_setup() -> None:
-        for line in _render_cursor_setup_lines(port, project=_project_name_from_cwd()):
+    def _print_cursor_setup(actual_port: int) -> None:
+        for line in _render_cursor_setup_lines(actual_port, project=_project_name_from_cwd()):
             click.echo(line)
         if not no_rtk:
             click.echo()
@@ -4785,9 +4934,9 @@ def cline(
     if prepare_only:
         return
 
-    def _print_cline_setup() -> None:
-        anthropic_base = _claude_proxy_base_url(port)
-        openai_base = f"http://127.0.0.1:{port}/v1"
+    def _print_cline_setup(actual_port: int) -> None:
+        anthropic_base = _claude_proxy_base_url(actual_port)
+        openai_base = f"http://127.0.0.1:{actual_port}/v1"
         click.echo("  Configure Cline in VS Code:")
         click.echo("    Settings > Cline > API Provider")
         click.echo(f"    Anthropic Base URL: {anthropic_base}")
@@ -4909,9 +5058,9 @@ def continue_dev(
     if prepare_only:
         return
 
-    def _print_continue_setup() -> None:
-        anthropic_base = _claude_proxy_base_url(port)
-        openai_base = f"http://127.0.0.1:{port}/v1"
+    def _print_continue_setup(actual_port: int) -> None:
+        anthropic_base = _claude_proxy_base_url(actual_port)
+        openai_base = f"http://127.0.0.1:{actual_port}/v1"
         click.echo("  Configure Continue in your IDE:")
         click.echo(f"    Edit {config_file} and set, per model:")
         click.echo(f'      "apiBase": "{openai_base}"          # OpenAI-compatible models')
@@ -5570,34 +5719,75 @@ def opencode(
         click.echo("Install OpenCode: https://opencode.ai")
         raise SystemExit(1)
 
+    # Register our proxy client marker BEFORE _ensure_proxy so that another
+    # wrapper's cleanup sees us as an active client and doesn't terminate a
+    # shared proxy during the startup gap.
+    _register_proxy_client(port)
+
+    # Resolve port before config injection so the provider block and MCP
+    # URL both point at the port the proxy will actually be on.
+    _opencode_proxy, actual_port = _ensure_proxy(
+        port,
+        no_proxy,
+        learn=learn,
+        memory=memory,
+        agent_type="opencode",
+        code_graph=code_graph,
+        backend=backend,
+        anyllm_provider=anyllm_provider,
+        region=region,
+    )
+
+    # If the proxy fell back to a different port, move our marker so
+    # cleanup tracking stays accurate and update MCP config.
+    if actual_port != port:
+        _unregister_proxy_client(port)
+        _register_proxy_client(actual_port)
+        if not no_mcp:
+            from headroom.mcp_registry import OpencodeRegistrar
+
+            _setup_headroom_mcp(OpencodeRegistrar(), actual_port, verbose=verbose, force=True)
+
     env, env_vars_display = _build_opencode_launch_env(
-        port, os.environ, project=_project_name_from_cwd(), include_mcp=not no_mcp
+        actual_port, os.environ, project=_project_name_from_cwd(), include_mcp=not no_mcp
     )
 
     # Inject Headroom provider into OpenCode config so traffic routes through proxy.
-    inject_opencode_provider_config(port)
+    inject_opencode_provider_config(actual_port)
     if memory:
         mem_dir = Path.cwd() / ".headroom"
         _inject_memory_mcp_config(
             os.environ.get("USER", os.environ.get("USERNAME", "default")),
         )
 
-    _launch_tool(
-        binary=opencode_bin,
-        args=opencode_args,
-        env=env,
-        port=port,
-        no_proxy=no_proxy,
-        tool_label="OPENCODE",
-        env_vars_display=env_vars_display,
-        learn=learn,
-        memory=memory,
-        agent_type="opencode",
-        code_graph=code_graph,
-        backend=backend,
-        anyllm_provider=anyllm_provider,
-        region=region,
-    )
+    # Proxy already started by _ensure_proxy above; tell _launch_tool to
+    # skip duplicate startup.
+    try:
+        _launch_tool(
+            binary=opencode_bin,
+            args=opencode_args,
+            env=env,
+            port=actual_port,
+            no_proxy=True,
+            tool_label="OPENCODE",
+            env_vars_display=env_vars_display,
+            learn=learn,
+            memory=memory,
+            agent_type="opencode",
+            code_graph=code_graph,
+            backend=backend,
+            anyllm_provider=anyllm_provider,
+            region=region,
+        )
+    finally:
+        if _opencode_proxy and _opencode_proxy.poll() is None:
+            _other = _live_proxy_clients(actual_port, exclude_self=True)
+            if not _other:
+                _opencode_proxy.terminate()
+                try:
+                    _opencode_proxy.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    _opencode_proxy.kill()
 
 
 def _opencode_home_dir() -> Path:
diff --git a/tests/test_cli/test_main_help_version.py b/tests/test_cli/test_main_help_version.py
index 19d355f19..51a3c8771 100644
--- a/tests/test_cli/test_main_help_version.py
+++ b/tests/test_cli/test_main_help_version.py
@@ -52,7 +52,7 @@ def test_subcommand_verbose_flag_still_works() -> None:
     completed = SimpleNamespace(returncode=0)
 
     with patch("headroom.cli.wrap.shutil.which", return_value="claude"):
-        with patch("headroom.cli.wrap._ensure_proxy", return_value=None):
+        with patch("headroom.cli.wrap._ensure_proxy", return_value=(None, 8787)):
             with patch("headroom.cli.wrap._setup_rtk", return_value=None):
                 with patch("headroom.cli.wrap.subprocess.run", return_value=completed):
                     result = runner.invoke(main, ["wrap", "claude", "-v"])
diff --git a/tests/test_cli/test_wrap_claude_vertex_proxy_env.py b/tests/test_cli/test_wrap_claude_vertex_proxy_env.py
index 33f2eb239..2b6ed6cce 100644
--- a/tests/test_cli/test_wrap_claude_vertex_proxy_env.py
+++ b/tests/test_cli/test_wrap_claude_vertex_proxy_env.py
@@ -68,9 +68,10 @@ def fake_write_base_url(*args: object, **kwargs: object) -> None:
     monkeypatch.setattr(wrap_mod, "_restore_claude_wrap_base_url", lambda *_args, **_kwargs: None)
     monkeypatch.setattr(wrap_mod, "_print_telemetry_notice", lambda: None)
 
-    def fake_ensure_proxy(*args: object, **kwargs: object) -> None:
+    def fake_ensure_proxy(*args: object, **kwargs: object) -> tuple[None, int]:
         captured["ensure_args"] = args
         captured["ensure_kwargs"] = kwargs
+        return None, args[0] if args else 8787
 
     def fake_run(cmd: list[str], *, env: dict[str, str]) -> _Completed:
         captured["child_cmd"] = cmd
@@ -320,13 +321,14 @@ def test_ensure_proxy_restarts_idle_proxy_for_vertex_api_url_mismatch(
         lambda *args, **kwargs: calls.append(("start", args, kwargs)),
     )
 
-    result = wrap_mod._ensure_proxy(
+    proc, actual_port = wrap_mod._ensure_proxy(
         8787,
         False,
         vertex_api_url="https://new-gateway.example.com/vertex/v1",
     )
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls[0] == ("kill", 12345, 8787)
     assert calls[1][0] == "start"
     assert calls[1][2]["vertex_api_url"] == "https://new-gateway.example.com/vertex/v1"
@@ -364,9 +366,10 @@ def test_ensure_proxy_restarts_idle_proxy_to_clear_vertex_api_url(
         lambda *args, **kwargs: calls.append(("start", args, kwargs)),
     )
 
-    result = wrap_mod._ensure_proxy(8787, False, clear_vertex_api_url=True)
+    proc, actual_port = wrap_mod._ensure_proxy(8787, False, clear_vertex_api_url=True)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls[0] == ("kill", 12345, 8787)
     assert calls[1][0] == "start"
     assert calls[1][2]["vertex_api_url"] is None
diff --git a/tests/test_cli/test_wrap_codex.py b/tests/test_cli/test_wrap_codex.py
index 7af6597f4..6d7e96813 100644
--- a/tests/test_cli/test_wrap_codex.py
+++ b/tests/test_cli/test_wrap_codex.py
@@ -1023,7 +1023,7 @@ def test_launch_tool_ignores_sigint_in_wrapper(
     class FakeCompleted:
         returncode = 0
 
-    monkeypatch.setattr(wrap_mod, "_ensure_proxy", lambda *args, **kwargs: None)
+    monkeypatch.setattr(wrap_mod, "_ensure_proxy", lambda *args, **kwargs: (None, 8787))
     monkeypatch.setattr(
         wrap_mod.signal, "signal", lambda sig, fn: signal_handlers.setdefault(sig, fn)
     )
@@ -1480,3 +1480,73 @@ def test_strip_removes_block_with_env_http_headers(
         assert "X-Headroom-Project" not in cleaned
         assert "[model_providers.headroom]" not in cleaned
         assert 'model = "gpt-4o"' in cleaned
+
+
+# ---------------------------------------------------------------------------
+# Regression: codex delegates port resolution to _ensure_proxy
+# ---------------------------------------------------------------------------
+
+
+class TestCodexPortResolution:
+    """codex() uses _ensure_proxy() to resolve ports (not early _find_available_port).
+
+    Regression for headroom#1406 round 2 review: codex() must follow
+    the same selected-port contract as other wrappers (aider, copilot, etc.)
+    so that a healthy existing proxy on the requested port is reused instead
+    of skipped by a blind socket probe.
+    """
+
+    def test_delegates_to_ensure_proxy(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """codex() calls _ensure_proxy and uses the returned port."""
+        _set_test_home(monkeypatch, Path("/tmp/test_headroom_codex"))
+
+        captured_port: list[int] = []
+
+        # Mock _ensure_proxy to capture the requested port
+        def mock_ensure_proxy(port: int, no_proxy: bool, **kwargs: object) -> tuple[None, int]:
+            captured_port.append(port)
+            # Simulate port fallback: requested 8787, actual 8788
+            return None, 8788
+
+        monkeypatch.setattr(wrap_mod, "_ensure_proxy", mock_ensure_proxy)
+
+        # Mock all heavy dependencies
+        monkeypatch.setattr(
+            wrap_mod, "_codex_config_paths", lambda: (Path("/dev/null"), Path("/dev/null"))
+        )
+        monkeypatch.setattr(wrap_mod, "_snapshot_codex_config_if_unwrapped", lambda *a, **kw: None)
+        monkeypatch.setattr(wrap_mod, "_ensure_rtk_binary", lambda *a, **kw: None)
+        monkeypatch.setattr(wrap_mod, "_setup_lean_ctx_agent", lambda *a, **kw: None)
+        monkeypatch.setattr(wrap_mod, "_inject_rtk_instructions", lambda *a, **kw: None)
+        monkeypatch.setattr(wrap_mod, "_codex_home_dir", lambda: Path("/tmp"))
+        monkeypatch.setattr(wrap_mod, "_setup_headroom_mcp", lambda *a, **kw: None)
+        monkeypatch.setattr(wrap_mod, "_setup_serena_mcp", lambda *a, **kw: None)
+        monkeypatch.setattr(wrap_mod, "_disable_serena_mcp", lambda *a, **kw: None)
+        monkeypatch.setattr("shutil.which", lambda x: "/usr/bin/codex" if x == "codex" else None)
+        monkeypatch.setattr(wrap_mod, "_build_codex_launch_env", lambda port, env: ({}, []))
+        monkeypatch.setattr(wrap_mod, "_inject_codex_provider_config", lambda port: None)
+        monkeypatch.setattr(wrap_mod, "_project_name_from_cwd", lambda: None)
+        monkeypatch.setattr(wrap_mod, "_live_proxy_clients", lambda *a, **kw: [])
+
+        # Intercept _launch_tool to verify port propagation
+        launch_kw: dict = {}
+
+        def mock_launch_tool(**kwargs: object) -> None:
+            launch_kw.update(kwargs)
+
+        monkeypatch.setattr(wrap_mod, "_launch_tool", mock_launch_tool)
+
+        runner = CliRunner()
+        result = runner.invoke(
+            main,
+            ["wrap", "codex", "--port", "8787", "--no-rtk", "--no-mcp", "--no-serena"],
+        )
+
+        assert result.exit_code == 0, f"CLI failed: {result.output}"
+        assert captured_port == [8787], (
+            f"_ensure_proxy called with {captured_port}, expected [8787]"
+        )
+        assert launch_kw.get("port") == 8788, (
+            f"_launch_tool port={launch_kw.get('port')}, expected 8788 "
+            "(the actual_port from _ensure_proxy fallback)"
+        )
diff --git a/tests/test_cli/test_wrap_helpers.py b/tests/test_cli/test_wrap_helpers.py
index 87806e5e5..2173d9d42 100644
--- a/tests/test_cli/test_wrap_helpers.py
+++ b/tests/test_cli/test_wrap_helpers.py
@@ -12,6 +12,7 @@
 
 from __future__ import annotations
 
+import errno
 import json
 import os
 import subprocess
@@ -270,10 +271,10 @@ def poll(self) -> int | None:
 
     callback_calls: list[None] = []
 
-    def fake_setup() -> None:
+    def fake_setup(_port: int) -> None:
         callback_calls.append(None)
 
-    monkeypatch.setattr(wrap_mod, "_ensure_proxy", lambda *a, **kw: fake_proc)
+    monkeypatch.setattr(wrap_mod, "_ensure_proxy", lambda *a, **kw: (fake_proc, 8787))
     # Replace time.sleep with a no-op so the loop spins quickly.
     monkeypatch.setattr(wrap_mod.time, "sleep", lambda _s: None)
     # Replace _make_cleanup to avoid side-effects on real ports/files.
@@ -321,7 +322,7 @@ def raising_sleep(_s: float) -> None:
         if sleep_calls["n"] >= 1:
             raise KeyboardInterrupt
 
-    monkeypatch.setattr(wrap_mod, "_ensure_proxy", lambda *a, **kw: _FakeProc())
+    monkeypatch.setattr(wrap_mod, "_ensure_proxy", lambda *a, **kw: (_FakeProc(), 8787))
     monkeypatch.setattr(wrap_mod.time, "sleep", raising_sleep)
     monkeypatch.setattr(wrap_mod, "_make_cleanup", lambda holder, port: lambda *a, **kw: None)
     monkeypatch.setattr(wrap_mod.signal, "signal", lambda *a, **kw: None)
@@ -337,7 +338,7 @@ def _cmd() -> None:
             learn=False,
             memory=False,
             agent_type="cursor",
-            print_setup_lines=lambda: None,
+            print_setup_lines=lambda _port: None,
         )
 
     inv = runner.invoke(_cmd)
@@ -368,7 +369,7 @@ def _cmd() -> None:
             learn=False,
             memory=False,
             agent_type="cline",
-            print_setup_lines=lambda: None,
+            print_setup_lines=lambda _port: None,
         )
 
     inv = runner.invoke(_cmd)
@@ -404,7 +405,7 @@ def _cmd() -> None:
             learn=False,
             memory=False,
             agent_type="cline",
-            print_setup_lines=lambda: None,
+            print_setup_lines=lambda _port: None,
         )
 
     inv = runner.invoke(_cmd)
@@ -789,3 +790,65 @@ def test_resolve_1m_model_falls_back_to_default_when_unset() -> None:
     """With no model selected, fall back to the default Opus carrying [1m]."""
     assert wrap_mod._resolve_1m_model(None) == "claude-opus-4-8[1m]"
     assert wrap_mod._resolve_1m_model("  ") == "claude-opus-4-8[1m]"
+
+
+class TestFindAvailablePort:
+    """Tests for _find_available_port (Vite-style port fallback)."""
+
+    def test_port_free_returns_same(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """When port is free, returns the same port."""
+        monkeypatch.setattr(wrap_mod, "_port_bind_error", lambda port: None)
+        assert wrap_mod._find_available_port(8787) == 8787
+
+    def test_port_busy_finds_next(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """When port is busy, returns the next free port."""
+
+        def mock_bind(port: int) -> OSError | None:
+            if port == 8787:
+                return OSError(errno.EADDRINUSE, "Address in use")
+            return None
+
+        monkeypatch.setattr(wrap_mod, "_port_bind_error", mock_bind)
+        assert wrap_mod._find_available_port(8787) == 8788
+
+    def test_multiple_busy_ports(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """When multiple consecutive ports are busy, skips all of them."""
+
+        def mock_bind(port: int) -> OSError | None:
+            if port in (8787, 8788, 8789):
+                return OSError(errno.EADDRINUSE, "Address in use")
+            return None
+
+        monkeypatch.setattr(wrap_mod, "_port_bind_error", mock_bind)
+        assert wrap_mod._find_available_port(8787) == 8790
+
+    def test_propagates_unexpected_error(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Errors other than EADDRINUSE/EACCES (e.g. EADDRNOTAVAIL) propagate."""
+        monkeypatch.setattr(
+            wrap_mod,
+            "_port_bind_error",
+            lambda port: OSError(errno.EADDRNOTAVAIL, "Address not available"),
+        )
+        with pytest.raises(OSError, match="Address not available"):
+            wrap_mod._find_available_port(8787)
+
+    def test_propagates_eaddrinuse_with_eacces(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Both EADDRINUSE and EACCES are skipped (not propagated)."""
+
+        def mock_bind(port: int) -> OSError | None:
+            if port == 8787:
+                return OSError(errno.EACCES, "Permission denied")
+            return None
+
+        monkeypatch.setattr(wrap_mod, "_port_bind_error", mock_bind)
+        assert wrap_mod._find_available_port(8787) == 8788
+
+    def test_exhausts_range(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """When all ports in range are busy, raises RuntimeError."""
+        monkeypatch.setattr(
+            wrap_mod,
+            "_port_bind_error",
+            lambda port: OSError(errno.EADDRINUSE, "Address in use"),
+        )
+        with pytest.raises(RuntimeError, match="No available port found"):
+            wrap_mod._find_available_port(8787, max_attempts=3)
diff --git a/tests/test_cli/test_wrap_persistent.py b/tests/test_cli/test_wrap_persistent.py
index de5a3951d..72919cafd 100644
--- a/tests/test_cli/test_wrap_persistent.py
+++ b/tests/test_cli/test_wrap_persistent.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import errno
+
 import click
 import pytest
 
@@ -45,9 +47,10 @@ def test_ensure_proxy_recovers_matching_persistent_deployment(monkeypatch) -> No
         ),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls == ["start:default"]
 
 
@@ -65,9 +68,10 @@ def test_ensure_proxy_recovers_persistent_deployment_when_socket_is_bound(monkey
         "headroom.install.runtime.wait_ready", lambda manifest, timeout_seconds=45: True
     )
 
-    result = wrap_cli._ensure_proxy(8787, False)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls == ["start:default"]
 
 
@@ -93,11 +97,13 @@ def test_ensure_proxy_falls_back_when_persistent_manifest_is_stale(monkeypatch)
     monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: False)
     monkeypatch.setattr(wrap_cli, "_recover_persistent_proxy", lambda port: False)
     monkeypatch.setattr(wrap_cli, "_port_bind_error", lambda port: None)
+    monkeypatch.setattr(wrap_cli, "_find_available_port", lambda port, **kw: port)
     monkeypatch.setattr(wrap_cli, "_start_proxy", lambda *args, **kwargs: calls.append("start"))
 
-    result = wrap_cli._ensure_proxy(8787, False)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls == ["start"]
 
 
@@ -108,8 +114,10 @@ def test_ensure_proxy_reports_unbindable_port_before_starting_subprocess(monkeyp
     monkeypatch.setattr(wrap_cli, "_find_persistent_manifest", lambda port: None)
     monkeypatch.setattr(
         wrap_cli,
-        "_port_bind_error",
-        lambda port: PermissionError(10013, "access denied by OS port reservation"),
+        "_find_available_port",
+        lambda port, **kw: (_ for _ in ()).throw(
+            OSError(errno.EADDRNOTAVAIL, "address not available")
+        ),
     )
     monkeypatch.setattr(wrap_cli, "_start_proxy", lambda *args, **kwargs: calls.append("start"))
 
@@ -121,8 +129,6 @@ def test_ensure_proxy_reports_unbindable_port_before_starting_subprocess(monkeyp
         raise AssertionError("expected unbindable port to raise before starting proxy")
 
     assert "Port 8787 is unavailable" in message
-    assert "Windows" in message
-    assert "headroom wrap cursor --port 8788" in message
     assert calls == []
 
 
@@ -150,9 +156,10 @@ def test_ensure_proxy_restarts_idle_stale_persistent_deployment(monkeypatch) ->
         ),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls == ["restart:default:8787"]
 
 
@@ -174,9 +181,10 @@ def test_ensure_proxy_leaves_active_stale_persistent_deployment_running(monkeypa
         ),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
 
 
 def test_ensure_proxy_defers_persistent_restart_when_http_wrapper_attached(
@@ -209,9 +217,10 @@ def test_ensure_proxy_defers_persistent_restart_when_http_wrapper_attached(
         ),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
 
 
 def test_find_persistent_manifest_prefers_default_profile(monkeypatch) -> None:
@@ -273,9 +282,10 @@ def test_ensure_proxy_restarts_idle_stale_ephemeral_proxy(monkeypatch) -> None:
         lambda *args, **kwargs: calls.append(("start", args, kwargs)),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls[0] == ("kill", 12345, 8787)
     assert calls[1][0] == "start"
 
@@ -309,13 +319,14 @@ def test_ensure_proxy_restarts_ephemeral_proxy_for_openai_api_url_mismatch(monke
         lambda *args, **kwargs: calls.append(("start", args, kwargs)),
     )
 
-    result = wrap_cli._ensure_proxy(
+    proc, actual_port = wrap_cli._ensure_proxy(
         8787,
         False,
         openai_api_url="https://api.individual.githubcopilot.com",
     )
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls[0] == ("kill", 12345, 8787)
     assert calls[1][0] == "start"
     assert calls[1][2]["openai_api_url"] == "https://api.individual.githubcopilot.com"
@@ -347,9 +358,10 @@ def test_ensure_proxy_reuses_agent_proxy_without_savings_profile(monkeypatch) ->
         ),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False, agent_type="codex")
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False, agent_type="codex")
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
 
 
 def test_ensure_proxy_restarts_for_explicit_agent_savings_profile(monkeypatch) -> None:
@@ -376,9 +388,10 @@ def test_ensure_proxy_restarts_for_explicit_agent_savings_profile(monkeypatch) -
         lambda *args, **kwargs: calls.append(("start", args, kwargs)),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False, agent_type="codex")
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False, agent_type="codex")
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls[0] == ("kill", 12345, 8787)
     assert calls[1][0] == "start"
 
@@ -424,9 +437,10 @@ def test_ensure_proxy_reuses_agent_proxy_with_savings_profile(monkeypatch) -> No
         ),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False, agent_type="cursor")
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False, agent_type="cursor")
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
 
 
 def test_ensure_proxy_leaves_active_stale_ephemeral_proxy_running(monkeypatch) -> None:
@@ -454,9 +468,10 @@ def test_ensure_proxy_leaves_active_stale_ephemeral_proxy_running(monkeypatch) -
         ),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
 
 
 def test_ensure_proxy_defers_version_restart_when_http_wrapper_attached(monkeypatch) -> None:
@@ -489,9 +504,10 @@ def test_ensure_proxy_defers_version_restart_when_http_wrapper_attached(monkeypa
         ),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
 
 
 def test_ensure_proxy_defers_flag_restart_when_other_wrapper_attached(monkeypatch) -> None:
@@ -523,9 +539,10 @@ def test_ensure_proxy_defers_flag_restart_when_other_wrapper_attached(monkeypatc
         ),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False, memory=True)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False, memory=True)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
 
 
 def test_ensure_proxy_restarts_for_flags_when_no_other_wrapper(monkeypatch) -> None:
@@ -554,9 +571,10 @@ def test_ensure_proxy_restarts_for_flags_when_no_other_wrapper(monkeypatch) -> N
         lambda *args, **kwargs: calls.append(("start", args, kwargs)),
     )
 
-    result = wrap_cli._ensure_proxy(8787, False, memory=True)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False, memory=True)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls[0] == ("kill", 12345, 8787)
     assert calls[1][0] == "start"
 
@@ -594,13 +612,14 @@ def test_ensure_proxy_restarts_persistent_deployment_for_feature_mismatch(monkey
     )
 
     # Request openai_api_url that differs from running config (None)
-    result = wrap_cli._ensure_proxy(
+    proc, actual_port = wrap_cli._ensure_proxy(
         8787,
         False,
         openai_api_url="https://api.githubcopilot.com",
     )
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     # Proxy should be killed and restarted due to openai_api_url mismatch
     assert calls[0] == ("kill", 12345, 8787)
     assert calls[1][0] == "start"
@@ -640,9 +659,10 @@ def test_ensure_proxy_restarts_persistent_deployment_for_memory_mismatch(monkeyp
     )
 
     # Request memory that differs from running config (False)
-    result = wrap_cli._ensure_proxy(8787, False, memory=True)
+    proc, actual_port = wrap_cli._ensure_proxy(8787, False, memory=True)
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     # Proxy should be killed and restarted due to memory mismatch
     assert calls[0] == ("kill", 12345, 8787)
     assert calls[1][0] == "start"
@@ -682,13 +702,14 @@ def test_ensure_proxy_restarts_recovered_persistent_for_openai_api_url_mismatch(
         ),
     )
 
-    result = wrap_cli._ensure_proxy(
+    proc, actual_port = wrap_cli._ensure_proxy(
         8787,
         False,
         openai_api_url="https://api.business.githubcopilot.com",
     )
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls == [("restart", "default", 8787)]
 
 
@@ -714,13 +735,14 @@ def test_ensure_proxy_restarts_recovered_persistent_when_config_unavailable(monk
         ),
     )
 
-    result = wrap_cli._ensure_proxy(
+    proc, actual_port = wrap_cli._ensure_proxy(
         8787,
         False,
         openai_api_url="https://api.business.githubcopilot.com",
     )
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls == [("restart", "default", 8787)]
 
 
@@ -757,14 +779,15 @@ def test_ensure_proxy_reuses_persistent_deployment_when_features_match(monkeypat
     )
 
     # Request same features as running config
-    result = wrap_cli._ensure_proxy(
+    proc, actual_port = wrap_cli._ensure_proxy(
         8787,
         False,
         memory=True,
         openai_api_url="https://api.githubcopilot.com",
     )
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
 
 
 def test_ensure_proxy_recovered_persistent_deployment_checks_feature_mismatch(monkeypatch) -> None:
@@ -806,11 +829,12 @@ def test_ensure_proxy_recovered_persistent_deployment_checks_feature_mismatch(mo
         ),
     )
 
-    result = wrap_cli._ensure_proxy(
+    proc, actual_port = wrap_cli._ensure_proxy(
         8787,
         False,
         openai_api_url="https://api.githubcopilot.com",
     )
 
-    assert result is None
+    assert proc is None
+    assert actual_port == 8787
     assert calls == [("restart", "default", 8787)]