Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased

### Fixed
- **proxy:** the savings store now fsyncs its parent directory after the
atomic rename, so the most recent `proxy_savings.json` write survives a
power-loss or crash. `_save_locked` fsynced the temp file's contents but
never the directory entry the rename created, leaving the rename itself
non-durable on POSIX. Best-effort — a no-op on Windows and virtual
filesystems where directory fsync is unsupported.
- Non-finite values (`NaN`, `Infinity`) in `proxy_savings.json` or in upstream
cost/token metadata no longer crash the proxy or corrupt the savings
dashboard. `SavingsTracker`'s numeric coercion caught only `TypeError` and
Expand Down
16 changes: 16 additions & 0 deletions headroom/proxy/savings_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,6 +1035,22 @@ def _save_locked(self) -> None:
except OSError:
pass
raise

# Persist the rename itself — the fsync above flushed the file's
# bytes, but the directory entry the rename created isn't durable
# until the parent directory is fsynced too (POSIX). Best-effort —
# directory fsync is unsupported on Windows and some virtual
# filesystems; the file and atomic rename are already durable, so a
# failure here only forgoes the last-save crash guarantee, never
# correctness. (FP4b)
try:
dir_fd = os.open(self._path.parent, os.O_RDONLY)
try:
os.fsync(dir_fd)
finally:
os.close(dir_fd)
except OSError:
pass
except OSError as e:
logger.warning("Failed to save savings history to %s: %s", self._path, e)

Expand Down
62 changes: 62 additions & 0 deletions tests/test_proxy_savings_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import asyncio
import json
import math
import os
import stat
from datetime import datetime, timedelta, timezone
from pathlib import Path
from types import SimpleNamespace
Expand Down Expand Up @@ -291,6 +293,66 @@ def flock(self, _fh, operation: int) -> None:
assert persisted["lifetime"]["tokens_saved"] == 15


def test_savings_tracker_save_fsyncs_parent_directory(tmp_path, monkeypatch):
# The file fsync persists contents, but the rename isn't durable until the
# parent directory is fsynced too — without it a crash can drop the last
# save. Assert a directory fd is fsynced on save. (FP4b)
path = tmp_path / "proxy_savings.json"
tracker = SavingsTracker(path=str(path))

real_fsync = os.fsync
dir_fds_synced: list[int] = []

def _spy_fsync(fd: int) -> None:
try:
if stat.S_ISDIR(os.fstat(fd).st_mode):
dir_fds_synced.append(fd)
except OSError:
pass
real_fsync(fd)

monkeypatch.setattr(savings_tracker_module.os, "fsync", _spy_fsync)

tracker.record_request(
model="gpt-4o",
input_tokens=120,
tokens_saved=10,
timestamp="2026-03-27T09:00:00Z",
)

# Parent directory fsynced (rename durable) and the save still landed intact.
assert dir_fds_synced, "parent directory was never fsynced after os.replace"
persisted = json.loads(path.read_text(encoding="utf-8"))
assert persisted["lifetime"]["tokens_saved"] == 10


def test_savings_tracker_save_survives_directory_fsync_failure(tmp_path, monkeypatch):
# On Windows and some virtual filesystems the directory fsync fails — the
# save must still complete because the file and atomic rename are already
# durable on their own. (FP4b)
path = tmp_path / "proxy_savings.json"
tracker = SavingsTracker(path=str(path))

real_open = os.open

def _failing_open(target, *args, **kwargs):
if str(target) == str(path.parent):
raise OSError("directory fsync unsupported")
return real_open(target, *args, **kwargs)

monkeypatch.setattr(savings_tracker_module.os, "open", _failing_open)

tracker.record_request(
model="gpt-4o",
input_tokens=120,
tokens_saved=10,
timestamp="2026-03-27T09:00:00Z",
)

persisted = json.loads(path.read_text(encoding="utf-8"))
assert persisted["lifetime"]["tokens_saved"] == 10


def test_litellm_resolution_and_savings_estimation_fallbacks(monkeypatch):
def fake_cost_per_token(*, model, prompt_tokens, completion_tokens):
if model in {"gpt-4o", "anthropic/claude-sonnet-4-6"}:
Expand Down
Loading