Skip to content

Commit 41bce60

Browse files
committed
feat(proxy): opt-in compression for catch-all passthrough routes
Requests whose path doesn't match a built-in API route fall through to `handle_passthrough`, which forwarded the body verbatim — bypassing the whole compression pipeline. Wrapper-proxy setups that front Headroom on custom paths (e.g. `/api/codex-proxy/<key>/v1/responses`) therefore got zero compression on coding-agent traffic and hit context-limit 400s in long sessions (#1546). Add `--compress-passthrough` (env `HEADROOM_COMPRESS_PASSTHROUGH=1`), off by default. When enabled, POST passthrough requests whose path ends in `/responses` and carry an OpenAI Responses-shaped body are routed through the same `_compress_openai_responses_payload_in_executor` path the native `/v1/responses` handler uses, then forwarded to the unknown upstream. Stale Content-Length is dropped so httpx recomputes it. Fail-open by construction: non-JSON, non-Responses payloads, unmodified results, and any compressor error forward the original body unchanged, so opting in can never drop a catch-all request. Anthropic `/messages` and `/chat/completions` passthrough compression are left as follow-ups. Closes #1546
1 parent 9fbd47b commit 41bce60

5 files changed

Lines changed: 187 additions & 0 deletions

File tree

docs/content/docs/proxy.mdx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,13 @@ headroom proxy --mode cache
9292
| `--no-learn` | `false` | Explicitly disable traffic learning |
9393
| `--min-evidence` | `5` | Minimum observations before a learned pattern is persisted |
9494
| `--codex-wire-debug` | `false` | Write local Codex wire snapshots and matching proxy log traces |
95+
| `--compress-passthrough` | `false` | Also compress custom proxy paths that fall through to the catch-all handler (OpenAI Responses-shaped bodies, path ends in `/responses`). Also `HEADROOM_COMPRESS_PASSTHROUGH=1` |
9596

9697
```bash
9798
headroom proxy --memory
9899
headroom proxy --learn --min-evidence 3
99100
headroom proxy --codex-wire-debug
101+
headroom proxy --compress-passthrough
100102
```
101103

102104
<Callout type="info" title="LLMLingua removed from the proxy CLI">

headroom/proxy/handlers/openai.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6365,6 +6365,52 @@ async def handle_compress(self, request: Request) -> JSONResponse:
63656365
},
63666366
)
63676367

6368+
async def _maybe_compress_passthrough_responses(self, body: bytes) -> bytes:
6369+
"""Compress an OpenAI Responses-shaped passthrough body, fail-open.
6370+
6371+
Reuses the native `/v1/responses` compression path so custom
6372+
wrapper-proxy routes get the same ContentRouter/Kompress treatment.
6373+
Any parse/compression failure returns the original body unchanged so a
6374+
catch-all request is never dropped by opting into compression.
6375+
"""
6376+
try:
6377+
payload = json.loads(body)
6378+
except (json.JSONDecodeError, ValueError, TypeError):
6379+
return body
6380+
if not isinstance(payload, dict) or "input" not in payload:
6381+
# Not a Responses payload (no `input` array) — leave it alone.
6382+
return body
6383+
6384+
model = str(payload.get("model") or "passthrough")
6385+
request_id = await self._next_request_id()
6386+
try:
6387+
(
6388+
compressed_payload,
6389+
modified,
6390+
*_rest,
6391+
) = await self._compress_openai_responses_payload_in_executor(
6392+
payload,
6393+
model=model,
6394+
request_id=request_id,
6395+
)
6396+
except Exception as exc: # noqa: BLE001 — fail-open on any compressor error
6397+
logger.warning(
6398+
"[%s] passthrough Responses compression failed, forwarding verbatim: %s",
6399+
request_id,
6400+
exc,
6401+
)
6402+
return body
6403+
if not modified:
6404+
return body
6405+
try:
6406+
return json.dumps(
6407+
compressed_payload,
6408+
separators=(",", ":"),
6409+
ensure_ascii=False,
6410+
).encode("utf-8")
6411+
except (TypeError, ValueError):
6412+
return body
6413+
63686414
async def handle_passthrough(
63696415
self,
63706416
request: Request,
@@ -6423,6 +6469,25 @@ async def handle_passthrough(
64236469

64246470
body = await request.body()
64256471

6472+
# Opt-in: compress requests that fall through here because their path
6473+
# doesn't match a built-in API route (custom wrapper-proxy paths like
6474+
# `/api/codex-proxy/<key>/v1/responses`). Off by default; only touches
6475+
# OpenAI Responses-shaped bodies (path ends in `/responses`) so we reuse
6476+
# the exact same ContentRouter/Kompress path the native handler runs.
6477+
if (
6478+
getattr(self.config, "compress_passthrough", False)
6479+
and self.config.optimize
6480+
and request.method == "POST"
6481+
and path.rstrip("/").endswith("/responses")
6482+
and body
6483+
):
6484+
compressed = await self._maybe_compress_passthrough_responses(body)
6485+
if compressed != body:
6486+
body = compressed
6487+
# Body size changed — let httpx recompute Content-Length.
6488+
for _hk in [k for k in headers if k.lower() == "content-length"]:
6489+
headers.pop(_hk, None)
6490+
64266491
headers = await apply_copilot_api_auth(headers, url=url)
64276492
# Cloudflare bot-management challenges our HTTP/2 fingerprint on
64286493
# ChatGPT's sensitive account endpoints (/backend-api/me,

headroom/proxy/models.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,16 @@ class ProxyConfig:
180180
# CLI: --force-kompress-all; env: HEADROOM_FORCE_KOMPRESS_ALL=1.
181181
force_kompress_all: bool = False
182182

183+
# Compress requests that fall through to the catch-all passthrough handler
184+
# (custom proxy paths that don't match a built-in API route, e.g.
185+
# `/api/codex-proxy/<key>/v1/responses` fronted by another proxy). Off by
186+
# default because passthrough targets are unknown upstreams; opt-in for
187+
# wrapper-proxy architectures that need coding-agent traffic compressed.
188+
# Currently applies to OpenAI Responses-shaped bodies (paths ending in
189+
# `/responses`). CLI: --compress-passthrough; env:
190+
# HEADROOM_COMPRESS_PASSTHROUGH=1.
191+
compress_passthrough: bool = False
192+
183193
# Code graph live watcher (triggers incremental reindex on file changes)
184194
code_graph_watcher: bool = False
185195

headroom/proxy/server.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4080,6 +4080,7 @@ def _proxy_config_from_env() -> ProxyConfig:
40804080
disable_kompress_anthropic=_get_env_optional_bool("HEADROOM_DISABLE_KOMPRESS_ANTHROPIC"),
40814081
disable_kompress_openai=_get_env_optional_bool("HEADROOM_DISABLE_KOMPRESS_OPENAI"),
40824082
force_kompress_all=_get_env_bool("HEADROOM_FORCE_KOMPRESS_ALL", False),
4083+
compress_passthrough=_get_env_bool("HEADROOM_COMPRESS_PASSTHROUGH", False),
40834084
max_connections=_get_env_int("HEADROOM_MAX_CONNECTIONS", 500),
40844085
max_keepalive_connections=_get_env_int("HEADROOM_MAX_KEEPALIVE", 100),
40854086
keepalive_expiry=_get_env_float("HEADROOM_KEEPALIVE_EXPIRY", 90.0),
@@ -4574,6 +4575,18 @@ def _parse_tool_profiles(cli_profiles: list[str]) -> dict[str, Any]:
45744575
"Also settable via HEADROOM_FORCE_KOMPRESS_ALL=1."
45754576
),
45764577
)
4578+
parser.add_argument(
4579+
"--compress-passthrough",
4580+
action="store_true",
4581+
help=(
4582+
"Also compress requests that fall through to the catch-all "
4583+
"passthrough handler (custom proxy paths not matched by a built-in "
4584+
"API route, e.g. `/api/codex-proxy/<key>/v1/responses` behind "
4585+
"another proxy). Applies to OpenAI Responses-shaped bodies (paths "
4586+
"ending in `/responses`). Off by default; also settable via "
4587+
"HEADROOM_COMPRESS_PASSTHROUGH=1."
4588+
),
4589+
)
45774590
parser.add_argument(
45784591
"--exclude-tools",
45794592
default=None,
@@ -4655,6 +4668,9 @@ def _parse_tool_profiles(cli_profiles: list[str]) -> dict[str, Any]:
46554668
force_kompress_all = args.force_kompress_all or _get_env_bool(
46564669
"HEADROOM_FORCE_KOMPRESS_ALL", False
46574670
)
4671+
compress_passthrough = args.compress_passthrough or _get_env_bool(
4672+
"HEADROOM_COMPRESS_PASSTHROUGH", False
4673+
)
46584674

46594675
# Set OpenRouter API key from CLI if provided
46604676
if hasattr(args, "openrouter_api_key") and args.openrouter_api_key:
@@ -4710,6 +4726,7 @@ def _parse_tool_profiles(cli_profiles: list[str]) -> dict[str, Any]:
47104726
disable_kompress_anthropic=disable_kompress_anthropic,
47114727
disable_kompress_openai=disable_kompress_openai,
47124728
force_kompress_all=force_kompress_all,
4729+
compress_passthrough=compress_passthrough,
47134730
# Connection pool settings
47144731
max_connections=_get_env_int("HEADROOM_MAX_CONNECTIONS", args.max_connections),
47154732
max_keepalive_connections=_get_env_int("HEADROOM_MAX_KEEPALIVE", args.max_keepalive),

tests/test_compress_passthrough.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""Tests for opt-in passthrough compression (issue #1546).
2+
3+
Requests whose path doesn't match a built-in API route fall through to
4+
``handle_passthrough``, which historically forwarded the body verbatim — no
5+
compression. With ``compress_passthrough`` enabled, OpenAI Responses-shaped
6+
bodies (path ends in ``/responses``) are routed through the same
7+
ContentRouter/Kompress path the native ``/v1/responses`` handler uses.
8+
9+
``_maybe_compress_passthrough_responses`` is the fail-open core: any parse or
10+
compressor failure returns the original body so a catch-all request is never
11+
dropped by opting into compression.
12+
"""
13+
14+
from __future__ import annotations
15+
16+
import json
17+
from types import SimpleNamespace
18+
19+
from headroom.proxy.handlers.openai import OpenAIHandlerMixin
20+
21+
22+
def _make_handler(compress_impl):
23+
"""Bare mixin instance with just the two collaborators the helper needs."""
24+
handler = OpenAIHandlerMixin.__new__(OpenAIHandlerMixin)
25+
handler.config = SimpleNamespace(optimize=True, compress_passthrough=True)
26+
27+
async def _next_request_id():
28+
return "req-test"
29+
30+
handler._next_request_id = _next_request_id
31+
handler._compress_openai_responses_payload_in_executor = compress_impl
32+
return handler
33+
34+
35+
def _shrinking_compressor(marker: str = "[C]"):
36+
async def _impl(payload, *, model, request_id):
37+
new = dict(payload)
38+
new["input"] = marker
39+
return (new, True, 5, ["kompress"], None, 100, 40, 5, {})
40+
41+
return _impl
42+
43+
44+
async def test_compresses_responses_shaped_body() -> None:
45+
handler = _make_handler(_shrinking_compressor())
46+
body = json.dumps({"model": "gpt-5.4", "input": [{"role": "user"}]}).encode()
47+
48+
out = await handler._maybe_compress_passthrough_responses(body)
49+
50+
assert out != body
51+
assert json.loads(out)["input"] == "[C]"
52+
53+
54+
async def test_non_json_body_passes_through() -> None:
55+
handler = _make_handler(_shrinking_compressor())
56+
body = b"not json at all"
57+
58+
assert await handler._maybe_compress_passthrough_responses(body) == body
59+
60+
61+
async def test_non_responses_payload_passes_through() -> None:
62+
# No `input` key → not a Responses payload; must not be touched.
63+
handler = _make_handler(_shrinking_compressor())
64+
body = json.dumps({"model": "gpt-5.4", "messages": []}).encode()
65+
66+
assert await handler._maybe_compress_passthrough_responses(body) == body
67+
68+
69+
async def test_unmodified_result_returns_original_bytes() -> None:
70+
async def _noop(payload, *, model, request_id):
71+
return (payload, False, 0, [], "no-op", 0, 0, 0, {})
72+
73+
handler = _make_handler(_noop)
74+
body = json.dumps({"input": [{"role": "user"}]}).encode()
75+
76+
assert await handler._maybe_compress_passthrough_responses(body) == body
77+
78+
79+
async def test_compressor_error_fails_open() -> None:
80+
async def _boom(payload, *, model, request_id):
81+
raise RuntimeError("kompress exploded")
82+
83+
handler = _make_handler(_boom)
84+
body = json.dumps({"input": [{"role": "user"}]}).encode()
85+
86+
# Fail-open: original body forwarded, exception swallowed.
87+
assert await handler._maybe_compress_passthrough_responses(body) == body
88+
89+
90+
def test_config_defaults_off() -> None:
91+
from headroom.proxy.models import ProxyConfig
92+
93+
assert ProxyConfig().compress_passthrough is False

0 commit comments

Comments
 (0)