From 45bab99f495c3749052724a60e66a6cf6cdbc1d0 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Jul 2026 19:23:05 +0530 Subject: [PATCH 1/3] fix contribution usd tracking --- headroom/proxy/handlers/anthropic.py | 8 ++++++ headroom/proxy/savings_tracker.py | 22 +++++++++++++++++ tests/test_proxy_cache_savings_usd.py | 35 +++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 tests/test_proxy_cache_savings_usd.py diff --git a/headroom/proxy/handlers/anthropic.py b/headroom/proxy/handlers/anthropic.py index 41233c817..00f7a26cc 100644 --- a/headroom/proxy/handlers/anthropic.py +++ b/headroom/proxy/handlers/anthropic.py @@ -2379,6 +2379,10 @@ async def api_call_fn( if _auth_header.startswith("Bearer ") and not _auth_header.startswith( "Bearer sk-ant-api" ): + from headroom.proxy.savings_tracker import ( + _estimate_cache_savings_usd, + _estimate_compression_savings_usd, + ) from headroom.subscription.tracker import ( get_subscription_tracker as _get_sub_tracker, ) @@ -2389,6 +2393,10 @@ async def api_call_fn( tokens_submitted=optimized_tokens, tokens_saved_compression=tokens_saved, tokens_saved_cache_reads=cr_tokens, + compression_savings_usd=_estimate_compression_savings_usd( + model, tokens_saved + ), + cache_savings_usd=_estimate_cache_savings_usd(model, cr_tokens), ) # The pre-refactor PERF emit (above) read raw usage diff --git a/headroom/proxy/savings_tracker.py b/headroom/proxy/savings_tracker.py index cbeab8fad..3ef6130c4 100644 --- a/headroom/proxy/savings_tracker.py +++ b/headroom/proxy/savings_tracker.py @@ -202,6 +202,28 @@ def _estimate_compression_savings_usd(model: str, tokens_saved: int) -> float: return 0.0 +def _estimate_cache_savings_usd(model: str, cache_read_tokens: int) -> float: + """Estimate cache-read savings in USD from discounted input tokens.""" + + litellm = _get_litellm_module() + if cache_read_tokens <= 0 or litellm is None: + return 0.0 + + try: + resolved = _resolve_litellm_model(model) + info = litellm.model_cost.get(resolved, {}) + input_cost_per_token = info.get("input_cost_per_token") + if not input_cost_per_token: + return 0.0 + cache_read_cost_per_token = info.get("cache_read_input_token_cost", input_cost_per_token) + savings_per_token = float(input_cost_per_token) - float(cache_read_cost_per_token) + if savings_per_token <= 0: + return 0.0 + return float(cache_read_tokens) * savings_per_token + except Exception: + return 0.0 + + def _estimate_input_cost_usd( model: str, input_tokens: int, diff --git a/tests/test_proxy_cache_savings_usd.py b/tests/test_proxy_cache_savings_usd.py new file mode 100644 index 000000000..8aa06e121 --- /dev/null +++ b/tests/test_proxy_cache_savings_usd.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + +from headroom.proxy import savings_tracker as savings_tracker_module + + +def test_estimate_cache_savings_usd_uses_discounted_cache_read_price(monkeypatch) -> None: + fake_litellm = SimpleNamespace( + model_cost={ + "gpt-4o": { + "input_cost_per_token": 0.002, + "cache_read_input_token_cost": 0.001, + } + } + ) + monkeypatch.setattr(savings_tracker_module, "LITELLM_AVAILABLE", True) + monkeypatch.setattr(savings_tracker_module, "litellm", fake_litellm) + + assert savings_tracker_module._estimate_cache_savings_usd("gpt-4o", 100) == pytest.approx( + 0.1 + ) + + +def test_estimate_cache_savings_usd_handles_missing_pricing(monkeypatch) -> None: + fake_litellm = SimpleNamespace(model_cost={}) + monkeypatch.setattr(savings_tracker_module, "LITELLM_AVAILABLE", True) + monkeypatch.setattr(savings_tracker_module, "litellm", fake_litellm) + + assert savings_tracker_module._estimate_cache_savings_usd("gpt-4o", 100) == 0.0 + + monkeypatch.setattr(savings_tracker_module, "LITELLM_AVAILABLE", False) + assert savings_tracker_module._estimate_cache_savings_usd("gpt-4o", 100) == 0.0 From 5785443b2b246a3c35b0dceae38c0231e84d7486 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Jul 2026 19:27:44 +0530 Subject: [PATCH 2/3] fix rtk path for wrapped tools --- headroom/cli/wrap.py | 13 +++++++++++++ tests/test_cli/test_wrap_helpers.py | 12 ++++++++++++ 2 files changed, 25 insertions(+) diff --git a/headroom/cli/wrap.py b/headroom/cli/wrap.py index 17ee01d76..b32151fd7 100644 --- a/headroom/cli/wrap.py +++ b/headroom/cli/wrap.py @@ -263,6 +263,18 @@ def _print_telemetry_notice() -> None: click.echo(notice) +def _prepend_rtk_bin_to_path(env: dict[str, str]) -> None: + """Ensure wrapped shells can resolve Headroom-managed `rtk`.""" + + from headroom.rtk import RTK_BIN_DIR + + rtk_bin = str(RTK_BIN_DIR) + current_path = env.get("PATH", "") + path_entries = current_path.split(os.pathsep) if current_path else [] + if rtk_bin not in path_entries: + env["PATH"] = f"{rtk_bin}{os.pathsep}{current_path}" if current_path else rtk_bin + + # Proxy health check (reused from evals/suite_runner.py pattern) @@ -2498,6 +2510,7 @@ def _launch_tool( signal.signal(signal.SIGTERM, cleanup) try: + _prepend_rtk_bin_to_path(env) click.echo() padded = f"HEADROOM WRAP: {tool_label}".center(47) click.echo(" ╔═══════════════════════════════════════════════╗") diff --git a/tests/test_cli/test_wrap_helpers.py b/tests/test_cli/test_wrap_helpers.py index 81cf1cf50..644c204d5 100644 --- a/tests/test_cli/test_wrap_helpers.py +++ b/tests/test_cli/test_wrap_helpers.py @@ -93,6 +93,18 @@ def test_print_wrap_banner_title_is_centered_or_near_centered() -> None: ) +def test_prepend_rtk_bin_to_path_injects_managed_bin_dir(monkeypatch: pytest.MonkeyPatch) -> None: + managed_dir = Path("/tmp/headroom-bin") + monkeypatch.setattr("headroom.rtk.RTK_BIN_DIR", managed_dir) + + env = {"PATH": "/usr/bin:/bin"} + + wrap_mod._prepend_rtk_bin_to_path(env) + + assert env["PATH"].split(os.pathsep)[0] == str(managed_dir) + assert "/usr/bin" in env["PATH"] + + # --------------------------------------------------------------------------- # _setup_context_tool_for_agent — all five branches: # 1. lean-ctx mode → calls _setup_lean_ctx_agent, returns None From 90aa5435cd6d30a6e82e295bbf54702c68c4c677 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Jul 2026 19:28:35 +0530 Subject: [PATCH 3/3] Revert "fix rtk path for wrapped tools" This reverts commit 5785443b2b246a3c35b0dceae38c0231e84d7486. --- headroom/cli/wrap.py | 13 ------------- tests/test_cli/test_wrap_helpers.py | 12 ------------ 2 files changed, 25 deletions(-) diff --git a/headroom/cli/wrap.py b/headroom/cli/wrap.py index b32151fd7..17ee01d76 100644 --- a/headroom/cli/wrap.py +++ b/headroom/cli/wrap.py @@ -263,18 +263,6 @@ def _print_telemetry_notice() -> None: click.echo(notice) -def _prepend_rtk_bin_to_path(env: dict[str, str]) -> None: - """Ensure wrapped shells can resolve Headroom-managed `rtk`.""" - - from headroom.rtk import RTK_BIN_DIR - - rtk_bin = str(RTK_BIN_DIR) - current_path = env.get("PATH", "") - path_entries = current_path.split(os.pathsep) if current_path else [] - if rtk_bin not in path_entries: - env["PATH"] = f"{rtk_bin}{os.pathsep}{current_path}" if current_path else rtk_bin - - # Proxy health check (reused from evals/suite_runner.py pattern) @@ -2510,7 +2498,6 @@ def _launch_tool( signal.signal(signal.SIGTERM, cleanup) try: - _prepend_rtk_bin_to_path(env) click.echo() padded = f"HEADROOM WRAP: {tool_label}".center(47) click.echo(" ╔═══════════════════════════════════════════════╗") diff --git a/tests/test_cli/test_wrap_helpers.py b/tests/test_cli/test_wrap_helpers.py index 644c204d5..81cf1cf50 100644 --- a/tests/test_cli/test_wrap_helpers.py +++ b/tests/test_cli/test_wrap_helpers.py @@ -93,18 +93,6 @@ def test_print_wrap_banner_title_is_centered_or_near_centered() -> None: ) -def test_prepend_rtk_bin_to_path_injects_managed_bin_dir(monkeypatch: pytest.MonkeyPatch) -> None: - managed_dir = Path("/tmp/headroom-bin") - monkeypatch.setattr("headroom.rtk.RTK_BIN_DIR", managed_dir) - - env = {"PATH": "/usr/bin:/bin"} - - wrap_mod._prepend_rtk_bin_to_path(env) - - assert env["PATH"].split(os.pathsep)[0] == str(managed_dir) - assert "/usr/bin" in env["PATH"] - - # --------------------------------------------------------------------------- # _setup_context_tool_for_agent — all five branches: # 1. lean-ctx mode → calls _setup_lean_ctx_agent, returns None