unslothai · danielhanchen · May 24, 2026 · May 24, 2026 · May 24, 2026 · May 24, 2026
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
diff --git a/.gitignore b/.gitignore
@@ -235,5 +235,7 @@ package-lock.json
 !studio/backend/core/data_recipe/oxc-validator/package-lock.json
 !studio/package-lock.json
 llama.cpp/
+async_task_outputs/
+individual_reviews/
 # Stray "~" dir some tools create from a literal ~ TMPDIR; never part of the repo.
 /~/
diff --git a/docker/.dockerignore b/docker/.dockerignore
@@ -0,0 +1,16 @@
+**
+!Dockerfile
+!entrypoint.sh
+!smoke_test.py
+!fetch_llama_prebuilt.py
+!supervisord.conf
+!studio_launch.sh
+!unsloth_studio_update.sh
+!unsloth_llama_update.sh
+!unsloth_jupyter_tunnel.sh
+!unsloth_nb_compat.py
+!unsloth_pip_shim.py
+!unsloth_ipython_startup.py
+!unsloth_run.py
+!unsloth_sync_notebooks.sh
+!unsloth_nb_content_sig.py
diff --git a/docker/Dockerfile b/docker/Dockerfile
diff --git a/docker/Dockerfile.studio b/docker/Dockerfile.studio
@@ -0,0 +1,184 @@
+# Full Unsloth image: base training stack + Studio + JupyterLab + sshd.
+#
+# This is the image published as docker.io/unsloth/unsloth:latest. It layers
+# Unsloth Studio on top of the lean base image (Dockerfile, published under
+# the `base` tags) and runs the same service trio as the previous production
+# image: Studio on 8000, JupyterLab on 8888, key-only sshd on 22.
+#
+# Build (local):
+#   docker buildx build \
+#     --build-arg BASE_IMAGE=unsloth-blackwell:test \
+#     -f docker/Dockerfile.studio \
+#     -t unsloth-blackwell:studio docker/
+#
+# Run:
+#   docker run --rm --gpus all -p 8000:8000 -p 8888:8888 \
+#     -v $HOME/.cache/huggingface:/workspace/.cache/huggingface \
+#     unsloth-blackwell:studio
+#
+# Open http://localhost:8000 for Studio (first-boot admin password is printed
+# in the container logs and persisted under /opt/unsloth-studio/auth/) and
+# http://localhost:8888 for JupyterLab (password: JUPYTER_PASSWORD env; when
+# unset a random one is generated and printed in the container logs). On
+# hosts without GPU passthrough (Docker Desktop on macOS, Windows without
+# WSL2 GPU) add -e UNSLOTH_ALLOW_CPU=1: training is unavailable but Studio
+# chat / Data Recipes / GGUF tooling / Jupyter work.
+#
+# CI pins BASE_IMAGE to the just-published multi-arch base digest so the two
+# images always ship the same stack.
+
+ARG BASE_IMAGE=unsloth-blackwell:test
+FROM ${BASE_IMAGE}
+
+# Studio source ref to clone. Defaults to `main`, but a CI publish pipeline
+# that pins BASE_IMAGE to a digest should pin this too (same UNSLOTH_REF as
+# the base) so the published image is reproducible against a known ref.
+ARG UNSLOTH_STUDIO_REF=main
+ARG TARGETARCH
+
+# Services run as root in this revision (the base image is root-only by
+# design); the previous production image ran them as a dedicated uid-1001
+# user. Non-root parity is a tracked follow-up. sshd is key-only and stays
+# disabled unless a PUBLIC_KEY/SSH_KEY is provided, and no secrets are
+# persisted to disk (see studio_launch.sh).
+#
+# The JUPYTER_PORT / UNSLOTH_ENABLE_SSHD defaults exist so supervisord's
+# %(ENV_*)s expansions still resolve when someone bypasses the launcher
+# and runs supervisord directly.
+USER root
+ENV UNSLOTH_STUDIO_HOME=/opt/unsloth-studio \
+    JUPYTER_PORT=8888 \
+    UNSLOTH_ENABLE_SSHD=false \
+    DEBIAN_FRONTEND=noninteractive
+
+# install.sh needs curl + git; supervisor + openssh-server run the service
+# trio. The base image already has python + uv + pip.
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+        curl git ca-certificates supervisor openssh-server \
+ && rm -rf /var/lib/apt/lists/*
+
+# Clone + install Studio into a dedicated venv under $UNSLOTH_STUDIO_HOME.
+# --local makes install.sh use the just-cloned source tree (editable
+# install), so the source dir MUST persist for the venv's `unsloth_cli`
+# entrypoint to keep resolving. Move it under $UNSLOTH_STUDIO_HOME/src
+# (already inside the persistent layer) instead of deleting it. Strip
+# .git to save ~120MB.
+#
+# The llama.cpp symlink BEFORE install.sh points Studio's prebuilt dir at
+# the bundle already baked into the base image (validated, sha256-checked,
+# UNSLOTH_PREBUILT_INFO.json present), so the installer's prebuilt step
+# recognises it and skips a second ~400MB download. The
+# .unsloth-studio-owned marker satisfies setup.sh's ownership assertion for
+# custom STUDIO_HOMEs -- the dir IS provisioned exclusively for Studio.
+#
+# UNSLOTH_TORCH_INDEX_FAMILY pins the torch wheel index for the Studio
+# venv: at build time there is no GPU and no nvidia-smi, so install.sh's
+# probing would land on cpu or cu126 wheels depending on which host built
+# the image. cu128 on BOTH arches, mirroring the base venv: cu130 wheels
+# would silently lift the arm64 driver floor to 580+ while the base venv
+# keeps the documented 570+ floor. DGX Spark / GB10 (sm_121) support comes
+# from the same NVRTC cu13 swap the base image applies to its venv --
+# repeated below for the Studio venv's own bundled libnvrtc (the base's
+# arm64 layer already installed cuda-nvrtc-13-0, so the cu13 .so exists).
+#
+# UNSLOTH_PYTHON=3.12 pins the Studio venv to the SAME Python minor as the base
+# venv (install.sh defaults Linux to 3.13). Matching minors makes the two venvs'
+# nvidia-*-cu12 CUDA wheels byte-identical, which lets the dedup RUN further down
+# replace the Studio venv's ~3.7GB of CUDA .so with symlinks into the base venv's
+# copies (cudnn/cublas/nccl/... are plain C libs, Python-minor independent).
+#
+# fetch+checkout FETCH_HEAD instead of `clone --branch` because the CI
+# pipeline passes a commit SHA as the ref (clone --branch only accepts
+# branch/tag names).
+RUN set -eux \
+ && case "${TARGETARCH:-amd64}" in \
+        amd64|arm64) TORCH_FAMILY="cu128" ;; \
+        *) echo "ERROR: unsupported TARGETARCH=${TARGETARCH}" >&2; exit 1 ;; \
+    esac \
+ && mkdir -p "${UNSLOTH_STUDIO_HOME}" \
+ && ln -s /opt/unsloth/llama.cpp "${UNSLOTH_STUDIO_HOME}/llama.cpp" \
+ && touch /opt/unsloth/llama.cpp/.unsloth-studio-owned \
+ && git init -q "${UNSLOTH_STUDIO_HOME}/src" \
+ && cd "${UNSLOTH_STUDIO_HOME}/src" \
+ && git remote add origin https://github.com/unslothai/unsloth \
+ && git fetch -q --depth 1 origin "${UNSLOTH_STUDIO_REF}" \
+ && git checkout -q FETCH_HEAD \
+ && UNSLOTH_STUDIO_HOME="${UNSLOTH_STUDIO_HOME}" \
+    UNSLOTH_TORCH_INDEX_FAMILY="${TORCH_FAMILY}" \
+    UNSLOTH_PYTHON=3.12 \
+    bash install.sh --local \
+    # Fail loud if the Studio venv torch missed the pinned CUDA family (an
+    # install.sh that ignores UNSLOTH_TORCH_INDEX_FAMILY falls back to
+    # nvidia-smi probing, which cannot work at build time and lands on cu126
+    # wheels with no sm_100/sm_120 kernels). metadata check only: importing
+    # torch needs native libs, which QEMU arm64 builds cannot load.
+ && "${UNSLOTH_STUDIO_HOME}/unsloth_studio/bin/python" -c "import sys; from importlib.metadata import version; assert sys.version_info[:2] == (3, 12), 'Studio venv python %d.%d is not 3.12 (UNSLOTH_PYTHON pin ignored) -- CUDA dedup below depends on it' % sys.version_info[:2]; v = version('torch'); assert v.endswith('+${TORCH_FAMILY}'), 'Studio venv torch ' + v + ' does not match ${TORCH_FAMILY}'; print('Studio venv python %d.%d' % sys.version_info[:2], 'torch', v)" \
+    # setup.sh may relink the root llama-quantize into build/bin; prove the
+    # relinked quantizer still resolves its libraries, or GGUF export breaks
+    # at runtime with "No working quantizer found". Content check, not rc:
+    # llama-quantize exits nonzero on --help, while a loader failure prints
+    # "error while loading shared libraries" and no usage text.
+ && { "${UNSLOTH_STUDIO_HOME}/llama.cpp/llama-quantize" --help 2>&1 || true; } | grep -q "usage" \
+ && rm -rf "${UNSLOTH_STUDIO_HOME}/src/.git" \
+          "${UNSLOTH_STUDIO_HOME}/src/studio/frontend/node_modules" \
+          /root/.cache \
+ && if [ "${TARGETARCH:-amd64}" = "arm64" ]; then \
+        for NVRTC_DIR in "${UNSLOTH_STUDIO_HOME}"/unsloth_studio/lib/python*/site-packages/nvidia/cuda_nvrtc/lib; do \
+            if [ -f "${NVRTC_DIR}/libnvrtc.so.12" ]; then \
+                mv "${NVRTC_DIR}/libnvrtc.so.12" "${NVRTC_DIR}/libnvrtc.so.12.cu128.orig"; \
+                ln -s /usr/local/cuda-13.0/lib64/libnvrtc.so.13 "${NVRTC_DIR}/libnvrtc.so.12"; \
+            fi; \
+        done; \
+    fi \
+ && BASE_NV=/opt/unsloth-venv/lib/python3.12/site-packages/nvidia \
+ && STU_NV="${UNSLOTH_STUDIO_HOME}/unsloth_studio/lib/python3.12/site-packages/nvidia" \
+ && if [ ! -d "${STU_NV}" ] || [ ! -d "${BASE_NV}" ]; then \
+        echo ">> nvidia dir missing (STU=${STU_NV} BASE=${BASE_NV}); skipping CUDA dedup"; \
+    else \
+        find "${UNSLOTH_STUDIO_HOME}/unsloth_studio" -name '*.a' -delete; \
+        rm -f "${STU_NV}/nvshmem/lib/libnvshmem_device.bc"; \
+        for c in cudnn cublas cusparselt nccl cusolver cusparse cufft curand nvjitlink cuda_cupti nvshmem npp; do \
+            b="${BASE_NV}/${c}/lib"; s="${STU_NV}/${c}/lib"; \
+            { [ -d "$b" ] && [ -d "$s" ]; } || { echo ">> skip ${c} (dir missing)"; continue; }; \
+            if [ "${c}" = "npp" ]; then \
+                rm -rf "$s" && ln -s "$b" "$s" && readlink -e "$s" >/dev/null; \
+                echo ">> deduped npp -> base (pruned)"; \
+            elif [ "$(cd "$s" && ls | sort | tr '\n' ' ')" = "$(cd "$b" && ls | sort | tr '\n' ' ')" ]; then \
+                rm -rf "$s" && ln -s "$b" "$s" && readlink -e "$s" >/dev/null; \
+                echo ">> deduped ${c} -> base"; \
+            else \
+                echo ">> skip ${c} (file set differs base vs studio)"; \
+            fi; \
+        done; \
+        echo "studio venv size after dedup:"; du -sh "${UNSLOTH_STUDIO_HOME}/unsloth_studio"; \
+    fi
+
+COPY supervisord.conf /etc/supervisor/supervisord.conf
+COPY studio_launch.sh /usr/local/bin/unsloth-studio-launch
+# In-place updaters (no image pull):
+#   unsloth-studio-update  refreshes the Studio packages (backend + baked
+#                          frontend) and restarts the service.
+#   unsloth-llama-update   swaps the baked llama.cpp prebuilt to the latest
+#                          release (the same swap the in-app banner performs).
+COPY unsloth_studio_update.sh /usr/local/bin/unsloth-studio-update
+COPY unsloth_llama_update.sh /usr/local/bin/unsloth-llama-update
+# unsloth-llama-update reuses the build-time fetcher (redirect-based, no GitHub
+# API, so it is not rate-limited; deterministic portable bundle that runs on CPU
+# and every supported GPU) rather than the host-probing installer.
+COPY fetch_llama_prebuilt.py /usr/local/lib/unsloth/fetch_llama_prebuilt.py
+# Optional public Cloudflare tunnel for JupyterLab (UNSLOTH_JUPYTER_CLOUDFLARE=1,
+# or `unsloth-jupyter-tunnel --force`); supervisord runs it as jupyter-cloudflare.
+COPY unsloth_jupyter_tunnel.sh /usr/local/bin/unsloth-jupyter-tunnel
+RUN chmod +x /usr/local/bin/unsloth-studio-launch \
+             /usr/local/bin/unsloth-studio-update \
+             /usr/local/bin/unsloth-llama-update \
+             /usr/local/bin/unsloth-jupyter-tunnel
+
+# Studio web UI, JupyterLab, sshd. All bind 0.0.0.0 inside the container's
+# network namespace; the operator publishes them explicitly with -p.
+EXPOSE 8000 8888 22
+
+# The base ENTRYPOINT (unsloth-entrypoint) still runs its GPU pre-flight
+# first, then hands off to the service launcher.
+CMD ["/usr/local/bin/unsloth-studio-launch"]
diff --git a/docker/build.sh b/docker/build.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# Build the unsloth-blackwell image on this B200 host (or any Linux host with Docker).
+# The build host's GPU is NOT used -- nvcc cross-compiles for sm_100 + sm_120.
+#
+# Usage:
+#   ./build.sh                 # builds unsloth-blackwell:latest pinned to unsloth main
+#   TAG=2026.05.1 ./build.sh   # custom tag
+#   UNSLOTH_REF=v2026.5.6 UNSLOTH_ZOO_REF=v2026.5.4 ./build.sh   # pin git refs
+set -euo pipefail
+
+cd "$(dirname "$0")"
+
+IMAGE_NAME="${IMAGE_NAME:-unsloth-blackwell}"
+TAG="${TAG:-latest}"
+CUDA_VERSION="${CUDA_VERSION:-12.8.1}"
+UBUNTU_VERSION="${UBUNTU_VERSION:-24.04}"
+PYTHON_VERSION="${PYTHON_VERSION:-3.12}"
+UNSLOTH_REF="${UNSLOTH_REF:-main}"
+UNSLOTH_ZOO_REF="${UNSLOTH_ZOO_REF:-main}"
+
+# llama.cpp prebuilt: default to the newest unslothai/llama.cpp release, resolved
+# here to a concrete tag so the build-arg changes only when upstream publishes a
+# new release (correct Docker layer caching) and the build stays reproducible.
+# Pin it explicitly for a frozen build: LLAMA_PREBUILT_TAG=b9596-mix-e6f2453 ./build.sh
+resolve_latest_llama_tag() {
+    curl -fsSL -o /dev/null -w '%{url_effective}' \
+        "https://github.com/unslothai/llama.cpp/releases/latest" 2>/dev/null \
+        | sed -n 's#.*/releases/tag/##p'
+}
+if [ -z "${LLAMA_PREBUILT_TAG:-}" ]; then
+    LLAMA_PREBUILT_TAG="$(resolve_latest_llama_tag || true)"
+    if [ -n "$LLAMA_PREBUILT_TAG" ]; then
+        echo "Resolved latest llama.cpp release: ${LLAMA_PREBUILT_TAG}"
+    else
+        LLAMA_PREBUILT_TAG="latest"
+        echo "Could not resolve latest llama.cpp tag here; passing 'latest' (resolved inside the build)"
+    fi
+fi
+
+echo "Building ${IMAGE_NAME}:${TAG}"
+echo "  CUDA           ${CUDA_VERSION}  Ubuntu ${UBUNTU_VERSION}  Python ${PYTHON_VERSION}"
+echo "  unsloth        @${UNSLOTH_REF}"
+echo "  unsloth-zoo    @${UNSLOTH_ZOO_REF}"
+echo "  llama.cpp      ${LLAMA_PREBUILT_TAG}"
+echo "  arch list      8.0;8.6;8.9;9.0;10.0;12.0+PTX"
+echo
+
+DOCKER_BUILDKIT=1 docker build \
+    --progress=plain \
+    --build-arg CUDA_VERSION="${CUDA_VERSION}" \
+    --build-arg UBUNTU_VERSION="${UBUNTU_VERSION}" \
+    --build-arg PYTHON_VERSION="${PYTHON_VERSION}" \
+    --build-arg UNSLOTH_REF="${UNSLOTH_REF}" \
+    --build-arg UNSLOTH_ZOO_REF="${UNSLOTH_ZOO_REF}" \
+    --build-arg LLAMA_PREBUILT_TAG="${LLAMA_PREBUILT_TAG}" \
+    -t "${IMAGE_NAME}:${TAG}" \
+    .
+
+echo
+echo "Built ${IMAGE_NAME}:${TAG}"
+echo
+echo "Smoke test on this host (B200, sm_100):"
+echo "  docker run --rm --gpus all ${IMAGE_NAME}:${TAG} python /workspace/smoke_test.py"
+echo
+echo "Smoke test on an RTX 5090 host (sm_120):"
+echo "  docker pull ${IMAGE_NAME}:${TAG}   # or load .tar"
+echo "  docker run --rm --gpus all ${IMAGE_NAME}:${TAG} python /workspace/smoke_test.py"