headroomlabs-ai · Parideboy · Jul 2, 2026
@@ -122,17 +122,16 @@ redis = { version = "0.27", optional = true, default-features = false }
 # cycling through the proxy crate. Tiny crate (no I/O, just types).
 http = "1"
 
-[target.'cfg(not(windows))'.dependencies]
-fastembed = { version = "5", default-features = false, features = [
-    "hf-hub-rustls-tls",
-    "ort-download-binaries-rustls-tls",
-    "image-models",
-] }
-
-[target.'cfg(windows)'.dependencies]
-# `ort-download-binaries-*` emits DirectML link libs on Windows (`DXCORE`,
-# `DXGI`, `D3D12`, `DirectML`). Users installing `headroom-ai[all]` from
-# sdist often do not have those SDK libs, so load ORT dynamically instead.
+# Load ONNX Runtime dynamically on every platform. The alternative,
+# `ort-download-binaries-*`, statically links Microsoft's prebuilt ORT:
+# on Windows it emits DirectML link libs (`DXCORE`, `DXGI`, `D3D12`,
+# `DirectML`) that sdist installs of `headroom-ai[all]` often lack, and
+# on x86_64 Linux/macOS the prebuilt binary requires AVX2 — its code is
+# mapped and initialized as soon as the `headroom._core` extension
+# loads, so importing headroom SIGILLed on pre-AVX2 CPUs before the
+# runtime AVX2 guard could run (#1278). With `ort-load-dynamic` the
+# library is only dlopen'd at first use, where the AVX2 guard falls
+# back to the non-ONNX detection tiers.
 fastembed = { version = "5", default-features = false, features = [
     "hf-hub-rustls-tls",
     "ort-load-dynamic",

@@ -246,7 +246,7 @@ headroom proxy --learn --min-evidence 3
 | `HEADROOM_STRIP_INTERNAL_HEADERS` | Python proxy: whether to strip internal `x-headroom-*` request headers (e.g. `x-headroom-bypass`, `x-headroom-mode`, `x-headroom-user-id`, `x-headroom-stack`, `x-headroom-base-url`) before every upstream forwarder call (PR-A5, fixes P5-49). `enabled` (default) stops fingerprinting / leakage. `disabled` is an explicit operator opt-in for diagnostic shadow tracing — NOT a fallback. Inbound reads of these headers (bypass gating, memory user-id resolution) are unaffected because they read `request.headers` directly. | `enabled` |
 | `HEADROOM_PROXY_STRIP_INTERNAL_HEADERS` | Rust proxy: same policy as `HEADROOM_STRIP_INTERNAL_HEADERS` but for the Rust transparent proxy. Stripping happens inside `build_forward_request_headers` so both HTTP and WebSocket upstream calls are gated by one flag. `enabled` default; `disabled` operator opt-in for diagnostic shadow tracing. Response-side `X-Headroom-*` injection (e.g. `x-headroom-tokens-saved`) is unrelated and stays. | `enabled` |
 | `HEADROOM_EMBEDDER_RUNTIME` | Set to `pytorch_mps` to run the memory embedder via the torch sentence-transformers backend on the Apple GPU (MPS). Only engages when Apple MPS is actually available; otherwise it logs a warning and uses the existing default embedder selection path. `pytorch_mps` is the only accepted value. Requires the `[pytorch-mps]` extra. See [Memory](/docs/memory#embedding-runtime--gpu-offload-apple-silicon). | default embedder selection |
-| `ORT_DYLIB_PATH` | Windows: path to the `onnxruntime.dll` loaded by the Rust core (magika detection, fastembed embeddings). Auto-pinned at `import headroom` to the DLL inside the `onnxruntime` pip package; set it yourself to override. Without a pin the bare Windows DLL search resolves to the Windows ML System32 build (1.17.x on Win11 24H2+), which deadlocks ONNX session init — see [Troubleshooting](/docs/troubleshooting#windows-ml-content-detection-hangs-or-silently-falls-back). | auto-pinned on Windows |
+| `ORT_DYLIB_PATH` | Path to the ONNX Runtime shared library loaded by the Rust core (magika detection, fastembed embeddings), which loads ORT dynamically on every platform. Auto-pinned at `import headroom` to the library inside the `onnxruntime` pip package (`onnxruntime.dll` / `libonnxruntime.so*` / `libonnxruntime*.dylib`); set it yourself to override. Without a pin, ML detection degrades to the non-ONNX tiers — and on Windows the bare DLL search can resolve to the Windows ML System32 build (1.17.x on Win11 24H2+), which deadlocks ONNX session init — see [Troubleshooting](/docs/troubleshooting#windows-ml-content-detection-hangs-or-silently-falls-back). | auto-pinned |
 | `HEADROOM_MAGIKA_INIT_TIMEOUT_SECS` | Upper bound (integer seconds, > 0) on magika's one-time ONNX session init in the Rust detection chain. On timeout the init error is cached and detection uses the non-ML fallback tiers for the rest of the process; a warning is logged. Safety net for environments where the dylib pin above does not apply. | `5` |
 | `HEADROOM_REQUEST_TIMEOUT` | Request timeout in seconds | `300` |
 | `HEADROOM_BETA_HEADER_STICKY` | Controls per-session `anthropic-beta` / `OpenAI-Beta` re-echo. `enabled` (default): the proxy unions beta tokens across turns within a session — if the client sends a token in turn N and omits it in turn N+1, the proxy re-injects it to preserve prefix-cache stability. `disabled`: the client's value is forwarded verbatim with no accumulation. Any other value raises at request time. See [Session Beta Header Tracking](/docs/configuration#session-beta-header-tracking). | `enabled` |

@@ -1,12 +1,16 @@
-"""Pin the ONNX Runtime dylib for the Rust core on Windows.
+"""Pin the ONNX Runtime dylib for the Rust core.
 
 Why this module exists
 ----------------------
-On Windows, ``headroom._core`` consumers of the ``ort`` crate (magika
-content detection, fastembed embeddings) are built with
-``ort-load-dynamic``: the native ``onnxruntime.dll`` is resolved at
-*runtime*. Unless ``ORT_DYLIB_PATH`` is set, ort falls back to a bare
-``LoadLibrary("onnxruntime.dll")`` and the Windows DLL search order
+``headroom._core`` consumers of the ``ort`` crate (magika content
+detection, fastembed embeddings) are built with ``ort-load-dynamic`` on
+every platform: the native ONNX Runtime library is resolved at
+*runtime* rather than statically linked. (Static `ort-download-binaries`
+linking was dropped on Linux/macOS too because Microsoft's prebuilt
+x86_64 ORT requires AVX2 and executes at extension load, SIGILLing
+`import headroom._core` on pre-AVX2 CPUs — #1278.) Unless
+``ORT_DYLIB_PATH`` is set, ort falls back to a bare dlopen /
+``LoadLibrary("onnxruntime.dll")``; on Windows the DLL search order
 applies — and ``C:\\Windows\\System32`` wins.
 
 Windows 11 24H2+ ships ``System32\\onnxruntime.dll`` as part of Windows
@@ -19,13 +23,15 @@
 package's DLL (which ``headroom-ai[proxy]`` already depends on).
 
 The fix: before anything can import ``headroom._core``, resolve the
-pip-installed ``onnxruntime\\capi\\onnxruntime.dll`` and export it via
-``ORT_DYLIB_PATH``. ``headroom/__init__.py`` calls this hook, which
-guarantees ordering for every package-level consumer.
+pip-installed ``onnxruntime`` package's shared library
+(``capi/onnxruntime.dll`` / ``capi/libonnxruntime.so*`` /
+``capi/libonnxruntime*.dylib``) and export it via ``ORT_DYLIB_PATH``.
+``headroom/__init__.py`` calls this hook, which guarantees ordering for
+every package-level consumer.
 
 Behavior contract
 -----------------
-- Windows-only; a no-op everywhere else.
+- All platforms; pins only when the ``onnxruntime`` package is present.
 - Respects a pre-set ``ORT_DYLIB_PATH`` (user override wins).
 - Locates the ``onnxruntime`` package via ``find_spec`` WITHOUT
   importing it (importing would load its native code; this hook must
@@ -54,11 +60,11 @@
 
 
 def ensure_ort_dylib_pinned() -> str | None:
-    """Export ``ORT_DYLIB_PATH`` for the Rust core's ort runtime (Windows).
+    """Export ``ORT_DYLIB_PATH`` for the Rust core's ort runtime.
 
     Returns the effective dylib path (pinned now or already present in
-    the environment), or ``None`` when no pin applies (non-Windows, or
-    no ``onnxruntime`` package to point at). Idempotent and exception-free.
+    the environment), or ``None`` when no pin applies (no ``onnxruntime``
+    package to point at). Idempotent and exception-free.
     """
     global _pinned
     if _pinned is not _UNSET:
@@ -67,10 +73,21 @@ def ensure_ort_dylib_pinned() -> str | None:
     return _pinned  # type: ignore[return-value]
 
 
-def _resolve_and_pin() -> str | None:
-    if not sys.platform.startswith("win"):
-        return None
+def _find_dylib(capi: Path) -> Path | None:
+    """Return the platform's ONNX Runtime shared library inside ``capi``."""
+    if sys.platform.startswith("win"):
+        dll = capi / "onnxruntime.dll"
+        return dll if dll.is_file() else None
+    # Linux ships `libonnxruntime.so.<version>`, macOS `libonnxruntime.dylib`
+    # (sometimes versioned). Glob rather than hardcode the suffix.
+    for pattern in ("libonnxruntime.so*", "libonnxruntime*.dylib"):
+        for candidate in sorted(capi.glob(pattern)):
+            if candidate.is_file():
+                return candidate
+    return None
+
 
+def _resolve_and_pin() -> str | None:
     try:
         existing = os.environ.get(_ENV_VAR)
         if existing:
@@ -81,19 +98,21 @@ def _resolve_and_pin() -> str | None:
         if spec is None or not spec.origin:
             logger.debug(
                 "onnxruntime package not found; %s left unset. The Rust ML detection "
-                "may pick up the Windows ML System32 onnxruntime.dll, which is known "
-                "to deadlock ort init on Windows 11 24H2+ (it then degrades to non-ML "
-                "tiers via HEADROOM_MAGIKA_INIT_TIMEOUT_SECS). Install onnxruntime or "
-                "set %s explicitly.",
+                "cannot load ONNX Runtime and degrades to non-ML tiers (on Windows it "
+                "may instead pick up the Windows ML System32 onnxruntime.dll, which is "
+                "known to deadlock ort init on Windows 11 24H2+ and then degrades via "
+                "HEADROOM_MAGIKA_INIT_TIMEOUT_SECS). Install onnxruntime or set %s "
+                "explicitly.",
                 _ENV_VAR,
                 _ENV_VAR,
             )
             return None
 
-        dll = Path(spec.origin).parent / "capi" / "onnxruntime.dll"
-        if not dll.is_file():
+        dll = _find_dylib(Path(spec.origin).parent / "capi")
+        if dll is None:
             logger.debug(
-                "onnxruntime package found but %s is missing; %s left unset", dll, _ENV_VAR
+                "onnxruntime package found but its shared library is missing; %s left unset",
+                _ENV_VAR,
             )
             return None
 

@@ -1,9 +1,12 @@
-"""Tests for headroom._ort — the Windows ORT_DYLIB_PATH auto-pin.
-
-The resolver guards the Rust core against the Windows DLL search picking
-up the Windows ML System32 onnxruntime.dll (deadlocks ort session init on
-Win11 24H2+, see headroom/_ort.py). The platform gate is monkeypatched so
-the full logic runs on any CI OS.
+"""Tests for headroom._ort — the ORT_DYLIB_PATH auto-pin.
+
+The resolver points the Rust core's ort-load-dynamic runtime at the pip
+onnxruntime package's shared library on every platform: on Windows it
+guards against the DLL search picking up the Windows ML System32
+onnxruntime.dll (deadlocks ort session init on Win11 24H2+), and on
+Linux/macOS it restores ML detection after static ORT linking was
+dropped for pre-AVX2 CPU compatibility (#1278). The platform is
+monkeypatched so every branch runs on any CI OS.
 """
 
 from __future__ import annotations
@@ -37,10 +40,30 @@ def _fake_spec_for(monkeypatch, package_dir):
     )
 
 
-def test_noop_on_non_windows(monkeypatch):
+def test_pins_versioned_so_on_linux(monkeypatch, tmp_path):
     monkeypatch.setattr(sys, "platform", "linux")
-    assert _ort.ensure_ort_dylib_pinned() is None
-    assert "ORT_DYLIB_PATH" not in _ort.os.environ
+    pkg = tmp_path / "onnxruntime"
+    capi = pkg / "capi"
+    capi.mkdir(parents=True)
+    so = capi / "libonnxruntime.so.1.22.0"
+    so.write_bytes(b"not really a shared object")
+    _fake_spec_for(monkeypatch, pkg)
+
+    assert _ort.ensure_ort_dylib_pinned() == str(so)
+    assert _ort.os.environ["ORT_DYLIB_PATH"] == str(so)
+
+
+def test_pins_dylib_on_macos(monkeypatch, tmp_path):
+    monkeypatch.setattr(sys, "platform", "darwin")
+    pkg = tmp_path / "onnxruntime"
+    capi = pkg / "capi"
+    capi.mkdir(parents=True)
+    dylib = capi / "libonnxruntime.dylib"
+    dylib.write_bytes(b"not really a dylib")
+    _fake_spec_for(monkeypatch, pkg)
+
+    assert _ort.ensure_ort_dylib_pinned() == str(dylib)
+    assert _ort.os.environ["ORT_DYLIB_PATH"] == str(dylib)
 
 
 def test_respects_existing_env(monkeypatch):