headroomlabs-ai · NN--- · Jul 4, 2026
@@ -118,6 +118,32 @@ jobs:
           path: dist/*.whl
           retention-days: 1
 
+  py39-smoke:
+    # Oldest supported Python: install the abi3 wheel and smoke-test imports/CLI.
+    needs: [changes, build-wheel]
+    if: needs.changes.outputs.code == 'true'
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.9"
+      - name: Download prebuilt wheel
+        uses: actions/download-artifact@v8
+        with:
+          name: headroom-wheel
+          path: dist
+      - name: Install wheel on 3.9 and smoke-test
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install dist/*.whl
+          python -c "import headroom; print(headroom.__version__)"
+          headroom --help >/dev/null
+      - name: Bytecode-compile the whole package on 3.9
+        run: |
+          python -m compileall -q "$(python -c 'import headroom, os; print(os.path.dirname(headroom.__file__))')"
+
   prefetch-model:
     needs: changes
     if: needs.changes.outputs.code == 'true'

@@ -253,3 +253,5 @@ uv.lock
 /headroom/_core.*.so
 /headroom/_core.so
 .tokensave
+.rtk/
+custom:/
@@ -122,7 +122,7 @@ Enable or disable automatic Copilot review in **Settings → Rules → Rulesets
 - [Ruff](https://github.com/astral-sh/ruff) for lint + format, line length 100, PEP 8.
 - Type hints on public functions; Google-style docstrings.
 - Cover new behavior + edge cases; aim >80% coverage on new code.
-- Python 3.10+. Optional features go behind extras.
+- Python 3.9+. Optional features go behind extras.
 
 ## Architecture principles
 

@@ -59,7 +59,7 @@ tokio = { version = "1", features = ["macros", "rt-multi-thread", "signal"] }
 axum = "0.7"
 tower = "0.5"
 reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
-pyo3 = { version = "0.29", features = ["abi3-py310"] }
+pyo3 = { version = "0.29", features = ["abi3-py39"] }
 # Forwards Rust `log` records (incl. tracing events via the `log` compat
 # feature above) into Python's `logging` inside the _core extension module.
 pyo3-log = "0.13"

@@ -103,7 +103,7 @@ headroom dashboard                      # live savings dashboard (proxy must be
 
 The `headroom` CLI ships **only** via the PyPI package. The npm `headroom-ai` is the TypeScript SDK — a library you import (`import { compress } from 'headroom-ai'`), not a CLI, so it provides no `headroom` command.
 
-Granular extras: `[proxy]`, `[mcp]`, `[ml]`, `[code]`, `[memory]`, `[vector]` (optional HNSW backend — needs a C++ toolchain, not in `[all]`), `[relevance]`, `[image]`, `[agno]`, `[langchain]`, `[evals]`, `[pytorch-mps]` (Apple-GPU memory-embedder offload — set `HEADROOM_EMBEDDER_RUNTIME=pytorch_mps`). Requires **Python 3.10+**.
+Granular extras: `[proxy]`, `[mcp]`, `[ml]`, `[code]`, `[memory]`, `[vector]` (optional HNSW backend — needs a C++ toolchain, not in `[all]`), `[relevance]`, `[image]`, `[agno]`, `[langchain]`, `[evals]`, `[pytorch-mps]` (Apple-GPU memory-embedder offload — set `HEADROOM_EMBEDDER_RUNTIME=pytorch_mps`). Requires **Python 3.9+**.
 
 ## Proof
 
@@ -326,7 +326,7 @@ npm install headroom-ai                 # TypeScript SDK (library only — no `h
 docker pull ghcr.io/chopratejas/headroom:latest
 ```
 
-Granular extras: `[proxy]`, `[mcp]`, `[ml]` (Kompress-v2-base), `[code]`, `[memory]`, `[vector]` (optional HNSW backend — needs a C++ toolchain, not in `[all]`), `[relevance]`, `[image]`, `[agno]`, `[langchain]`, `[evals]`, `[pytorch-mps]` (Apple-GPU memory-embedder offload — set `HEADROOM_EMBEDDER_RUNTIME=pytorch_mps`). Requires **Python 3.10+**.
+Granular extras: `[proxy]`, `[mcp]`, `[ml]` (Kompress-v2-base), `[code]`, `[memory]`, `[vector]` (optional HNSW backend — needs a C++ toolchain, not in `[all]`), `[relevance]`, `[image]`, `[agno]`, `[langchain]`, `[evals]`, `[pytorch-mps]` (Apple-GPU memory-embedder offload — set `HEADROOM_EMBEDDER_RUNTIME=pytorch_mps`). Requires **Python 3.9+**.
 
 > **Note**: `[all]` covers the core stack but excludes framework adapters. Install them separately: `pip install "headroom-ai[langchain]"` (also `[agno]`, `[strands]`, `[anyllm]`, `[bedrock]`).
 

@@ -17,6 +17,8 @@
     python benchmarks/comprehensive_eval.py
 """
 
+from __future__ import annotations
+
 import json
 import os
 import time

@@ -160,7 +160,7 @@ def _build_bust_events(replay: SessionReplay) -> dict[str, list[dict[str, object
                         (
                             idx
                             for idx, (prev_msg, curr_msg) in enumerate(
-                                zip(previous_forwarded_request, forwarded, strict=False)
+                                zip(previous_forwarded_request, forwarded)
                             )
                             if prev_msg != curr_msg
                         ),

@@ -13,8 +13,8 @@ description: Install Headroom via pip, npm, or Docker. Includes all Python extra
 
 ## Python
 
-Headroom requires **Python 3.10+** and is published as `headroom-ai` on PyPI.
-Current release wheels are built for Python **3.10 through 3.13** on Linux
+Headroom requires **Python 3.9+** and is published as `headroom-ai` on PyPI.
+Current release wheels are built for Python **3.9 through 3.14** on Linux
 (manylinux_2_28 x86_64 / aarch64) and macOS (Apple Silicon). Other targets —
 **Windows** and **Intel macOS** — fall back to building the Rust extension
 from the sdist and need a working native toolchain. If your installer is
@@ -244,7 +244,7 @@ These are common issues faced during initial setup and how to resolve them.
 
 ### Python version error
 
-This project requires **Python 3.10+**.
+This project requires **Python 3.9+**.
 
 Check your version:
 

@@ -0,0 +1,136 @@
+"""Compatibility shims for Python versions older than 3.10."""
+
+from __future__ import annotations
+
+import sys
+from collections.abc import AsyncIterator
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from importlib.metadata import EntryPoint
+
+# `@dataclass(slots=True)` needs 3.10+.
+# Use `@dataclass(**DATACLASS_SLOTS)` to get slots where available
+# without breaking 3.9.
+DATACLASS_SLOTS: dict[str, bool] = {"slots": True} if sys.version_info >= (3, 10) else {}
+
+if sys.version_info >= (3, 10):
+    import asyncio
+    import importlib.metadata
+    from contextlib import aclosing
+
+    def entry_points_group(group: str) -> list[EntryPoint]:
+        """Entry points for ``group`` (`entry_points(group=...)`
+        needs 3.10+)."""
+        # Resolved dynamically so tests can monkeypatch
+        # importlib.metadata.entry_points.
+        return list(importlib.metadata.entry_points(group=group))
+
+    AsyncLock = asyncio.Lock
+    AsyncEvent = asyncio.Event
+    AsyncSemaphore = asyncio.Semaphore
+    AsyncQueue = asyncio.Queue
+    AsyncCondition = asyncio.Condition
+
+else:
+    import asyncio
+    import collections
+    import importlib.metadata
+    from contextlib import asynccontextmanager
+
+    @asynccontextmanager
+    async def aclosing(thing: Any) -> AsyncIterator[Any]:
+        try:
+            yield thing
+        finally:
+            await thing.aclose()
+
+    def entry_points_group(group: str) -> list[EntryPoint]:
+        """Entry points for ``group`` (`entry_points(group=...)`
+        needs 3.10+)."""
+        # Resolved dynamically so tests can monkeypatch
+        # importlib.metadata.entry_points.
+        eps = importlib.metadata.entry_points()
+        if isinstance(eps, dict):  # 3.9 returns {group: [EntryPoint, ...]}
+            return list(eps.get(group, []))
+        return list(eps)
+
+    class _LazyLoopMixin:
+        """Defer event-loop binding to first use inside a running loop.
+
+        Python 3.9's asyncio primitives call ``get_event_loop()`` eagerly in
+        ``__init__``, which (a) raises when constructed in a sync context with
+        no loop set, and (b) binds to a loop that may not be the one the
+        primitive is later awaited in. Python 3.10 made binding lazy; these
+        subclasses backport that behavior by skipping the eager binding and
+        resolving ``self._loop`` at first use via a property.
+        """
+
+        _lazy_loop: Any = None
+
+        @property
+        def _loop(self) -> Any:
+            loop = asyncio.get_running_loop()
+            if self._lazy_loop is None:
+                self._lazy_loop = loop
+            if self._lazy_loop is not loop:
+                raise RuntimeError(f"{self!r} is bound to a different event loop")
+            return loop
+
+    class AsyncLock(_LazyLoopMixin, asyncio.Lock):
+        def __init__(self) -> None:
+            # State from 3.9 Lock.__init__, minus the eager loop binding.
+            self._waiters = None
+            self._locked = False
+
+    class AsyncEvent(_LazyLoopMixin, asyncio.Event):
+        def __init__(self) -> None:
+            # State from 3.9 Event.__init__, minus the eager loop binding.
+            self._waiters = collections.deque()
+            self._value = False
+
+    class AsyncSemaphore(_LazyLoopMixin, asyncio.Semaphore):
+        def __init__(self, value: int = 1) -> None:
+            # State from 3.9 Semaphore.__init__, minus the eager loop binding.
+            if value < 0:
+                raise ValueError("Semaphore initial value must be >= 0")
+            self._value = value
+            self._waiters = collections.deque()
+            self._wakeup_scheduled = False
+
+    class AsyncCondition(_LazyLoopMixin, asyncio.Condition):
+        def __init__(self, lock: Any = None) -> None:
+            # State from 3.9 Condition.__init__, minus the eager loop binding
+            # (and minus its lock._loop agreement check, which is enforced
+            # lazily at await time by _LazyLoopMixin instead).
+            if lock is None:
+                lock = AsyncLock()
+            self._lock = lock
+            # Same method re-exports CPython 3.9's Condition.__init__ does.
+            self.locked = lock.locked  # type: ignore[method-assign]
+            self.acquire = lock.acquire  # type: ignore[method-assign]
+            self.release = lock.release  # type: ignore[method-assign]
+            self._waiters = collections.deque()
+
+    class AsyncQueue(_LazyLoopMixin, asyncio.Queue):
+        def __init__(self, maxsize: int = 0) -> None:
+            # State from 3.9 Queue.__init__, minus the eager loop binding.
+            self._maxsize = maxsize
+            self._getters: collections.deque[Any] = collections.deque()
+            self._putters: collections.deque[Any] = collections.deque()
+            self._unfinished_tasks = 0
+            self._finished = AsyncEvent()
+            self._finished.set()
+            self._init(maxsize)
+
+
+__all__ = [
+    "DATACLASS_SLOTS",
+    "AsyncCondition",
+    "AsyncEvent",
+    "AsyncLock",
+    "AsyncQueue",
+    "AsyncSemaphore",
+    "aclosing",
+    "entry_points_group",
+]
@@ -42,6 +42,8 @@
 from dataclasses import dataclass, field, replace
 from typing import TYPE_CHECKING, Any
 
+from headroom._compat import entry_points_group
+
 if TYPE_CHECKING:
     from ..memory.tracker import ComponentStats
     from .backends import CompressionStoreBackend
@@ -970,9 +972,7 @@ def _create_default_ccr_backend() -> CompressionStoreBackend | None:
             )
             return None
     try:
-        from importlib.metadata import entry_points
-
-        all_eps = entry_points(group="headroom.ccr_backend")
+        all_eps = entry_points_group("headroom.ccr_backend")
         ep = next((e for e in all_eps if e.name == backend_type), None)
         if ep is None:
             logger.warning(

@@ -263,7 +263,7 @@ def _pair_exchanges(
         direct_items = direct_by_key.get(key, [])
         headroom_items = headroom_by_key.get(key, [])
         shared = min(len(direct_items), len(headroom_items))
-        pairs.extend(zip(direct_items[:shared], headroom_items[:shared], strict=False))
+        pairs.extend(zip(direct_items[:shared], headroom_items[:shared]))
         only_direct.extend([item.route_key for item in direct_items[shared:]])
         only_headroom.extend([item.route_key for item in headroom_items[shared:]])
     return pairs, only_direct, only_headroom

@@ -17,6 +17,8 @@
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any
 
+from headroom._compat import AsyncLock
+
 if TYPE_CHECKING:
     from ..memory.tracker import ComponentStats
 
@@ -125,7 +127,7 @@ def __init__(
         self._contexts: dict[str, BatchContext] = {}
         self._ttl = ttl
         self._max_contexts = max_contexts
-        self._lock = asyncio.Lock()
+        self._lock = AsyncLock()
         self._cleanup_task: asyncio.Task | None = None
 
     async def store(self, context: BatchContext) -> None:

@@ -1,5 +1,7 @@
 """Formatting utilities for CLI output using Rich."""
 
+from __future__ import annotations
+
 from datetime import datetime
 from typing import Any
 

@@ -1,5 +1,7 @@
 """Traffic audit CLI commands."""
 
+from __future__ import annotations
+
 from pathlib import Path
 
 import click

@@ -5,6 +5,8 @@
 needing API key access.
 """
 
+from __future__ import annotations
+
 import json
 import shutil
 import subprocess

@@ -1,5 +1,7 @@
 """Proxy server CLI commands."""
 
+from __future__ import annotations
+
 import logging
 import os
 import sys

@@ -3085,7 +3085,7 @@ def _marker_pid_reused(marker: Path, pid: int) -> bool:
         return False
     src = rec.get("start_src")
     recorded = rec.get("start_time")
-    if not isinstance(src, str) or not isinstance(recorded, int | float):
+    if not isinstance(src, str) or not isinstance(recorded, (int, float)):
         return False  # legacy / identity-less marker — can't tell
     ident = _proc_identity(pid)
     if ident is None or ident[0] != src:

@@ -19,6 +19,7 @@
 from urllib.parse import urlparse
 
 from headroom import paths
+from headroom._compat import AsyncLock
 from headroom._subprocess import run
 from headroom.copilot_linux_secret import read_copilot_oauth_token as read_linux_secret_token
 from headroom.copilot_macos_keychain import read_copilot_oauth_token as read_macos_keychain_token
@@ -367,7 +368,7 @@ def _parse_expiry(value: Any) -> float | None:
     if value in (None, ""):
         return None
 
-    if isinstance(value, int | float):
+    if isinstance(value, (int, float)):
         number = float(value)
         if number > 10_000_000_000:
             return number / 1000.0
@@ -985,7 +986,7 @@ class CopilotTokenProvider:
     """Resolve and cache short-lived Copilot API tokens."""
 
     def __init__(self) -> None:
-        self._lock = asyncio.Lock()
+        self._lock = AsyncLock()
         self._cached: CopilotAPIToken | None = None
 
     async def get_api_token(self) -> CopilotAPIToken:
@@ -1042,7 +1043,7 @@ async def _exchange_token(self, oauth_token: str) -> CopilotAPIToken:
             token=token,
             expires_at=expires_at,
             api_url=api_url,
-            refresh_in=int(refresh_in) if isinstance(refresh_in, int | float) else None,
+            refresh_in=int(refresh_in) if isinstance(refresh_in, (int, float)) else None,
             sku=str(sku) if isinstance(sku, str) and sku.strip() else None,
         )
 

@@ -22,6 +22,7 @@
 from pathlib import Path
 from typing import Any
 
+from headroom._compat import AsyncSemaphore
 from headroom.evals.memory.locomo import (
     LOCOMO_CATEGORIES,
     LoCoMoCase,
@@ -708,7 +709,7 @@ async def run(
             # Parallel evaluation using semaphore to limit concurrency
             import asyncio
 
-            semaphore = asyncio.Semaphore(self.config.parallel_workers)
+            semaphore = AsyncSemaphore(self.config.parallel_workers)
             completed = 0
 
             async def eval_with_semaphore(case: LoCoMoCase) -> MemoryEvalResult: