From eb2093bf8f98fe516044b2a23102eb20e3c69d98 Mon Sep 17 00:00:00 2001 From: Parideboy Date: Thu, 2 Jul 2026 18:34:38 +0200 Subject: [PATCH] fix(core): load ONNX Runtime dynamically so headroom._core imports on non-AVX2 x86-64 fastembed's ort-download-binaries-rustls-tls feature statically links Microsoft's prebuilt ONNX Runtime into the extension on non-Windows targets. That prebuilt x86_64 binary requires AVX2 and its code runs as soon as the module loads, so import headroom._core died with SIGILL on pre-AVX2 CPUs before the runtime AVX2 guard from #1162 could intervene. Build with ort-load-dynamic on every platform (Windows already did, for DirectML link-lib reasons), collapsing the two identical target blocks into one dependency. ORT is now only dlopen'd at first use, where the AVX2 guard falls back to the non-ONNX detection tiers. To keep Magika/fastembed working out of the box, extend the Windows-only ORT_DYLIB_PATH auto-pin in headroom/_ort.py to all platforms: it now resolves the pip onnxruntime package's shared library (.dll/.so/.dylib), whose CPU wheels use runtime dispatch and run on pre-AVX2 machines too. Fixes #1278 Co-Authored-By: Claude Fable 5 --- Cargo.lock | 18 ------- crates/headroom-core/Cargo.toml | 21 ++++---- docs/content/docs/configuration.mdx | 2 +- headroom/_ort.py | 65 ++++++++++++++++--------- tests/test_transforms/test_ort_dylib.py | 41 ++++++++++++---- 5 files changed, 85 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2123f6ea5..84cf9da4c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2021,12 +2021,6 @@ dependencies = [ "digest 0.11.3", ] -[[package]] -name = "hmac-sha256" -version = "1.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec9d92d097f4749b64e8cc33d924d9f40a2d4eb91402b458014b781f5733d60f" - [[package]] name = "http" version = "0.2.12" @@ -2583,12 +2577,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" -[[package]] -name = "lzma-rust2" -version = "0.15.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e20f57f9918e5bd7bc58c22cdd70a6afc7375d4dd9683af5f2b34bd3d2bba619" - [[package]] name = "macro_rules_attribute" version = "0.2.2" @@ -2941,7 +2929,6 @@ dependencies = [ "ort-sys", "smallvec", "tracing", - "ureq 3.3.0", ] [[package]] @@ -2949,11 +2936,6 @@ name = "ort-sys" version = "2.0.0-rc.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7b497d21a8b6fbb4b5a544f8fadb77e801a09ae0add9e411d31c6f89e3c1e90" -dependencies = [ - "hmac-sha256", - "lzma-rust2", - "ureq 3.3.0", -] [[package]] name = "outref" diff --git a/crates/headroom-core/Cargo.toml b/crates/headroom-core/Cargo.toml index a5705ba8a..fec94392c 100644 --- a/crates/headroom-core/Cargo.toml +++ b/crates/headroom-core/Cargo.toml @@ -122,17 +122,16 @@ redis = { version = "0.27", optional = true, default-features = false } # cycling through the proxy crate. Tiny crate (no I/O, just types). http = "1" -[target.'cfg(not(windows))'.dependencies] -fastembed = { version = "5", default-features = false, features = [ - "hf-hub-rustls-tls", - "ort-download-binaries-rustls-tls", - "image-models", -] } - -[target.'cfg(windows)'.dependencies] -# `ort-download-binaries-*` emits DirectML link libs on Windows (`DXCORE`, -# `DXGI`, `D3D12`, `DirectML`). Users installing `headroom-ai[all]` from -# sdist often do not have those SDK libs, so load ORT dynamically instead. +# Load ONNX Runtime dynamically on every platform. The alternative, +# `ort-download-binaries-*`, statically links Microsoft's prebuilt ORT: +# on Windows it emits DirectML link libs (`DXCORE`, `DXGI`, `D3D12`, +# `DirectML`) that sdist installs of `headroom-ai[all]` often lack, and +# on x86_64 Linux/macOS the prebuilt binary requires AVX2 — its code is +# mapped and initialized as soon as the `headroom._core` extension +# loads, so importing headroom SIGILLed on pre-AVX2 CPUs before the +# runtime AVX2 guard could run (#1278). With `ort-load-dynamic` the +# library is only dlopen'd at first use, where the AVX2 guard falls +# back to the non-ONNX detection tiers. fastembed = { version = "5", default-features = false, features = [ "hf-hub-rustls-tls", "ort-load-dynamic", diff --git a/docs/content/docs/configuration.mdx b/docs/content/docs/configuration.mdx index 2c53089fc..eaafcd62a 100644 --- a/docs/content/docs/configuration.mdx +++ b/docs/content/docs/configuration.mdx @@ -246,7 +246,7 @@ headroom proxy --learn --min-evidence 3 | `HEADROOM_STRIP_INTERNAL_HEADERS` | Python proxy: whether to strip internal `x-headroom-*` request headers (e.g. `x-headroom-bypass`, `x-headroom-mode`, `x-headroom-user-id`, `x-headroom-stack`, `x-headroom-base-url`) before every upstream forwarder call (PR-A5, fixes P5-49). `enabled` (default) stops fingerprinting / leakage. `disabled` is an explicit operator opt-in for diagnostic shadow tracing — NOT a fallback. Inbound reads of these headers (bypass gating, memory user-id resolution) are unaffected because they read `request.headers` directly. | `enabled` | | `HEADROOM_PROXY_STRIP_INTERNAL_HEADERS` | Rust proxy: same policy as `HEADROOM_STRIP_INTERNAL_HEADERS` but for the Rust transparent proxy. Stripping happens inside `build_forward_request_headers` so both HTTP and WebSocket upstream calls are gated by one flag. `enabled` default; `disabled` operator opt-in for diagnostic shadow tracing. Response-side `X-Headroom-*` injection (e.g. `x-headroom-tokens-saved`) is unrelated and stays. | `enabled` | | `HEADROOM_EMBEDDER_RUNTIME` | Set to `pytorch_mps` to run the memory embedder via the torch sentence-transformers backend on the Apple GPU (MPS). Only engages when Apple MPS is actually available; otherwise it logs a warning and uses the existing default embedder selection path. `pytorch_mps` is the only accepted value. Requires the `[pytorch-mps]` extra. See [Memory](/docs/memory#embedding-runtime--gpu-offload-apple-silicon). | default embedder selection | -| `ORT_DYLIB_PATH` | Windows: path to the `onnxruntime.dll` loaded by the Rust core (magika detection, fastembed embeddings). Auto-pinned at `import headroom` to the DLL inside the `onnxruntime` pip package; set it yourself to override. Without a pin the bare Windows DLL search resolves to the Windows ML System32 build (1.17.x on Win11 24H2+), which deadlocks ONNX session init — see [Troubleshooting](/docs/troubleshooting#windows-ml-content-detection-hangs-or-silently-falls-back). | auto-pinned on Windows | +| `ORT_DYLIB_PATH` | Path to the ONNX Runtime shared library loaded by the Rust core (magika detection, fastembed embeddings), which loads ORT dynamically on every platform. Auto-pinned at `import headroom` to the library inside the `onnxruntime` pip package (`onnxruntime.dll` / `libonnxruntime.so*` / `libonnxruntime*.dylib`); set it yourself to override. Without a pin, ML detection degrades to the non-ONNX tiers — and on Windows the bare DLL search can resolve to the Windows ML System32 build (1.17.x on Win11 24H2+), which deadlocks ONNX session init — see [Troubleshooting](/docs/troubleshooting#windows-ml-content-detection-hangs-or-silently-falls-back). | auto-pinned | | `HEADROOM_MAGIKA_INIT_TIMEOUT_SECS` | Upper bound (integer seconds, > 0) on magika's one-time ONNX session init in the Rust detection chain. On timeout the init error is cached and detection uses the non-ML fallback tiers for the rest of the process; a warning is logged. Safety net for environments where the dylib pin above does not apply. | `5` | | `HEADROOM_REQUEST_TIMEOUT` | Request timeout in seconds | `300` | | `HEADROOM_BETA_HEADER_STICKY` | Controls per-session `anthropic-beta` / `OpenAI-Beta` re-echo. `enabled` (default): the proxy unions beta tokens across turns within a session — if the client sends a token in turn N and omits it in turn N+1, the proxy re-injects it to preserve prefix-cache stability. `disabled`: the client's value is forwarded verbatim with no accumulation. Any other value raises at request time. See [Session Beta Header Tracking](/docs/configuration#session-beta-header-tracking). | `enabled` | diff --git a/headroom/_ort.py b/headroom/_ort.py index 00a55d83c..c7082a591 100644 --- a/headroom/_ort.py +++ b/headroom/_ort.py @@ -1,12 +1,16 @@ -"""Pin the ONNX Runtime dylib for the Rust core on Windows. +"""Pin the ONNX Runtime dylib for the Rust core. Why this module exists ---------------------- -On Windows, ``headroom._core`` consumers of the ``ort`` crate (magika -content detection, fastembed embeddings) are built with -``ort-load-dynamic``: the native ``onnxruntime.dll`` is resolved at -*runtime*. Unless ``ORT_DYLIB_PATH`` is set, ort falls back to a bare -``LoadLibrary("onnxruntime.dll")`` and the Windows DLL search order +``headroom._core`` consumers of the ``ort`` crate (magika content +detection, fastembed embeddings) are built with ``ort-load-dynamic`` on +every platform: the native ONNX Runtime library is resolved at +*runtime* rather than statically linked. (Static `ort-download-binaries` +linking was dropped on Linux/macOS too because Microsoft's prebuilt +x86_64 ORT requires AVX2 and executes at extension load, SIGILLing +`import headroom._core` on pre-AVX2 CPUs — #1278.) Unless +``ORT_DYLIB_PATH`` is set, ort falls back to a bare dlopen / +``LoadLibrary("onnxruntime.dll")``; on Windows the DLL search order applies — and ``C:\\Windows\\System32`` wins. Windows 11 24H2+ ships ``System32\\onnxruntime.dll`` as part of Windows @@ -19,13 +23,15 @@ package's DLL (which ``headroom-ai[proxy]`` already depends on). The fix: before anything can import ``headroom._core``, resolve the -pip-installed ``onnxruntime\\capi\\onnxruntime.dll`` and export it via -``ORT_DYLIB_PATH``. ``headroom/__init__.py`` calls this hook, which -guarantees ordering for every package-level consumer. +pip-installed ``onnxruntime`` package's shared library +(``capi/onnxruntime.dll`` / ``capi/libonnxruntime.so*`` / +``capi/libonnxruntime*.dylib``) and export it via ``ORT_DYLIB_PATH``. +``headroom/__init__.py`` calls this hook, which guarantees ordering for +every package-level consumer. Behavior contract ----------------- -- Windows-only; a no-op everywhere else. +- All platforms; pins only when the ``onnxruntime`` package is present. - Respects a pre-set ``ORT_DYLIB_PATH`` (user override wins). - Locates the ``onnxruntime`` package via ``find_spec`` WITHOUT importing it (importing would load its native code; this hook must @@ -54,11 +60,11 @@ def ensure_ort_dylib_pinned() -> str | None: - """Export ``ORT_DYLIB_PATH`` for the Rust core's ort runtime (Windows). + """Export ``ORT_DYLIB_PATH`` for the Rust core's ort runtime. Returns the effective dylib path (pinned now or already present in - the environment), or ``None`` when no pin applies (non-Windows, or - no ``onnxruntime`` package to point at). Idempotent and exception-free. + the environment), or ``None`` when no pin applies (no ``onnxruntime`` + package to point at). Idempotent and exception-free. """ global _pinned if _pinned is not _UNSET: @@ -67,10 +73,21 @@ def ensure_ort_dylib_pinned() -> str | None: return _pinned # type: ignore[return-value] -def _resolve_and_pin() -> str | None: - if not sys.platform.startswith("win"): - return None +def _find_dylib(capi: Path) -> Path | None: + """Return the platform's ONNX Runtime shared library inside ``capi``.""" + if sys.platform.startswith("win"): + dll = capi / "onnxruntime.dll" + return dll if dll.is_file() else None + # Linux ships `libonnxruntime.so.`, macOS `libonnxruntime.dylib` + # (sometimes versioned). Glob rather than hardcode the suffix. + for pattern in ("libonnxruntime.so*", "libonnxruntime*.dylib"): + for candidate in sorted(capi.glob(pattern)): + if candidate.is_file(): + return candidate + return None + +def _resolve_and_pin() -> str | None: try: existing = os.environ.get(_ENV_VAR) if existing: @@ -81,19 +98,21 @@ def _resolve_and_pin() -> str | None: if spec is None or not spec.origin: logger.debug( "onnxruntime package not found; %s left unset. The Rust ML detection " - "may pick up the Windows ML System32 onnxruntime.dll, which is known " - "to deadlock ort init on Windows 11 24H2+ (it then degrades to non-ML " - "tiers via HEADROOM_MAGIKA_INIT_TIMEOUT_SECS). Install onnxruntime or " - "set %s explicitly.", + "cannot load ONNX Runtime and degrades to non-ML tiers (on Windows it " + "may instead pick up the Windows ML System32 onnxruntime.dll, which is " + "known to deadlock ort init on Windows 11 24H2+ and then degrades via " + "HEADROOM_MAGIKA_INIT_TIMEOUT_SECS). Install onnxruntime or set %s " + "explicitly.", _ENV_VAR, _ENV_VAR, ) return None - dll = Path(spec.origin).parent / "capi" / "onnxruntime.dll" - if not dll.is_file(): + dll = _find_dylib(Path(spec.origin).parent / "capi") + if dll is None: logger.debug( - "onnxruntime package found but %s is missing; %s left unset", dll, _ENV_VAR + "onnxruntime package found but its shared library is missing; %s left unset", + _ENV_VAR, ) return None diff --git a/tests/test_transforms/test_ort_dylib.py b/tests/test_transforms/test_ort_dylib.py index 8930a07ab..5a79832c6 100644 --- a/tests/test_transforms/test_ort_dylib.py +++ b/tests/test_transforms/test_ort_dylib.py @@ -1,9 +1,12 @@ -"""Tests for headroom._ort — the Windows ORT_DYLIB_PATH auto-pin. - -The resolver guards the Rust core against the Windows DLL search picking -up the Windows ML System32 onnxruntime.dll (deadlocks ort session init on -Win11 24H2+, see headroom/_ort.py). The platform gate is monkeypatched so -the full logic runs on any CI OS. +"""Tests for headroom._ort — the ORT_DYLIB_PATH auto-pin. + +The resolver points the Rust core's ort-load-dynamic runtime at the pip +onnxruntime package's shared library on every platform: on Windows it +guards against the DLL search picking up the Windows ML System32 +onnxruntime.dll (deadlocks ort session init on Win11 24H2+), and on +Linux/macOS it restores ML detection after static ORT linking was +dropped for pre-AVX2 CPU compatibility (#1278). The platform is +monkeypatched so every branch runs on any CI OS. """ from __future__ import annotations @@ -37,10 +40,30 @@ def _fake_spec_for(monkeypatch, package_dir): ) -def test_noop_on_non_windows(monkeypatch): +def test_pins_versioned_so_on_linux(monkeypatch, tmp_path): monkeypatch.setattr(sys, "platform", "linux") - assert _ort.ensure_ort_dylib_pinned() is None - assert "ORT_DYLIB_PATH" not in _ort.os.environ + pkg = tmp_path / "onnxruntime" + capi = pkg / "capi" + capi.mkdir(parents=True) + so = capi / "libonnxruntime.so.1.22.0" + so.write_bytes(b"not really a shared object") + _fake_spec_for(monkeypatch, pkg) + + assert _ort.ensure_ort_dylib_pinned() == str(so) + assert _ort.os.environ["ORT_DYLIB_PATH"] == str(so) + + +def test_pins_dylib_on_macos(monkeypatch, tmp_path): + monkeypatch.setattr(sys, "platform", "darwin") + pkg = tmp_path / "onnxruntime" + capi = pkg / "capi" + capi.mkdir(parents=True) + dylib = capi / "libonnxruntime.dylib" + dylib.write_bytes(b"not really a dylib") + _fake_spec_for(monkeypatch, pkg) + + assert _ort.ensure_ort_dylib_pinned() == str(dylib) + assert _ort.os.environ["ORT_DYLIB_PATH"] == str(dylib) def test_respects_existing_env(monkeypatch):