headroomlabs-ai · chopratejas · Jul 3, 2026
@@ -76,6 +76,24 @@
 _OPENAI_BASE_URL_HEADER = "x-headroom-base-url"
 
 
+def _normalize_openai_max_tokens(body: dict[str, Any]) -> None:
+    """Rename the legacy ``max_tokens`` to ``max_completion_tokens`` in-place.
+
+    GPT-5 / o-series chat models reject ``max_tokens`` and require
+    ``max_completion_tokens``; gpt-4o/4.1 accept the latter too. So translating
+    is a safe, one-way shim for current OpenAI models that lets openai-compatible
+    clients (opencode, older SDKs) which still send ``max_tokens`` work unchanged.
+    No-op when there is no ``max_tokens``; keeps an already-set
+    ``max_completion_tokens`` and just drops the rejected legacy key.
+    """
+    if not isinstance(body, dict) or "max_tokens" not in body:
+        return
+    legacy = body.get("max_tokens")
+    if legacy is not None and body.get("max_completion_tokens") is None:
+        body["max_completion_tokens"] = legacy
+    body.pop("max_tokens", None)
+
+
 def _header_get(headers: dict[str, str], name: str) -> str | None:
     """Case-insensitive header lookup for plain dicts."""
     lowered = name.lower()
@@ -2382,6 +2400,15 @@ async def handle_openai_chat(
         optimized_tokens = tokenizer.count_messages(body["messages"])
         tokens_saved = original_tokens - optimized_tokens
 
+        # Compatibility shim: GPT-5 / o-series chat models REJECT the legacy
+        # `max_tokens` ("Unsupported parameter … Use 'max_completion_tokens'
+        # instead"); gpt-4o/4.1 accept `max_completion_tokens` too. openai-
+        # compatible clients (opencode, older SDKs) still send `max_tokens`, so
+        # translate it here — the proxy already owns the outbound body — and
+        # those requests work unchanged. No-op when the caller already set
+        # `max_completion_tokens`.
+        _normalize_openai_max_tokens(body)
+
         # Route through LiteLLM/any-llm backend if configured
         if self.anthropic_backend is not None:
             try:

@@ -0,0 +1,51 @@
+"""OpenAI chat-path compatibility shim: max_tokens -> max_completion_tokens.
+
+GPT-5 / o-series chat models reject the legacy ``max_tokens`` and require
+``max_completion_tokens`` ("Unsupported parameter: 'max_tokens' is not supported
+with this model. Use 'max_completion_tokens' instead."). openai-compatible
+clients (opencode, older SDKs) still send ``max_tokens``, so the proxy — which
+already owns the outbound request body — translates it.
+"""
+
+from __future__ import annotations
+
+from headroom.proxy.handlers.openai import _normalize_openai_max_tokens
+
+
+def test_renames_legacy_max_tokens():
+    body = {"model": "gpt-5.3-chat-latest", "max_tokens": 256, "messages": []}
+    _normalize_openai_max_tokens(body)
+    assert "max_tokens" not in body
+    assert body["max_completion_tokens"] == 256
+
+
+def test_preserves_existing_max_completion_tokens_and_drops_legacy():
+    body = {"max_tokens": 256, "max_completion_tokens": 100}
+    _normalize_openai_max_tokens(body)
+    assert "max_tokens" not in body
+    assert body["max_completion_tokens"] == 100  # explicit value wins
+
+
+def test_noop_when_only_max_completion_tokens():
+    body = {"max_completion_tokens": 128}
+    _normalize_openai_max_tokens(body)
+    assert body == {"max_completion_tokens": 128}
+
+
+def test_noop_when_neither_present():
+    body = {"model": "gpt-4o", "messages": []}
+    _normalize_openai_max_tokens(body)
+    assert "max_completion_tokens" not in body
+    assert "max_tokens" not in body
+
+
+def test_null_max_tokens_is_dropped_without_setting_completion():
+    body = {"max_tokens": None}
+    _normalize_openai_max_tokens(body)
+    assert "max_tokens" not in body
+    assert body.get("max_completion_tokens") is None
+
+
+def test_non_dict_is_safe():
+    _normalize_openai_max_tokens(None)  # type: ignore[arg-type]
+    _normalize_openai_max_tokens("nope")  # type: ignore[arg-type]