Skip to content

Fix Studio custom folders on Linux external drives (#6799) #9261

Fix Studio custom folders on Linux external drives (#6799)

Fix Studio custom folders on Linux external drives (#6799) #9261

# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
# Three end-to-end smoke jobs that boot a freshly-installed Studio and
# exercise the surfaces real users hit through the OpenAI / Anthropic
# SDKs and curl, on the FREE windows-latest runner. Each job picks the
# smallest model that exercises the behaviour under test, primes
# HF_HOME via actions/cache, and shares the install.ps1 --local
# --no-torch bootstrap.
#
# 1. OpenAI, Anthropic API tests
# gemma-3-270m-it UD-Q4_K_XL (~254 MiB).
# 2. Tool calling Tests
# Qwen3.5-2B UD-Q4_K_XL (~890 MiB).
# 3. JSON, images
# Qwen3-VL-2B-Instruct UD-IQ2_XXS + mmproj-F16 (~1.4 GiB total).
# Within the 14 GB windows-latest SSD budget.
name: Windows Studio GGUF CI
on:
pull_request:
paths:
- 'studio/**'
- 'unsloth/**'
- 'unsloth_cli/**'
- 'install.ps1'
- 'pyproject.toml'
- 'tests/studio_setup_ps1/**'
- '.github/workflows/studio-windows-inference-smoke.yml'
push:
branches: [main, pip]
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
# ─────────────────────────────────────────────────────────────────────
# Job 1: OpenAI, Anthropic API tests
# ─────────────────────────────────────────────────────────────────────
openai-anthropic:
name: OpenAI, Anthropic API tests
runs-on: windows-latest
timeout-minutes: 30
defaults:
run:
shell: bash
env:
GGUF_REPO: unsloth/gemma-3-270m-it-GGUF
GGUF_VARIANT: UD-Q4_K_XL
GGUF_FILE: gemma-3-270m-it-UD-Q4_K_XL.gguf
STUDIO_PORT: '18888'
HF_HOME: ${{ github.workspace }}/hf-cache
# Force UTF-8 for stdio (Windows defaults to cp1252; hf
# download / Studio CLI print "✓" checkmarks and crash
# otherwise).
PYTHONIOENCODING: utf-8
PYTHONUTF8: '1'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
# Fast GPU-free gate: parse install.ps1 + setup.ps1 and run the PowerShell
# unit tests (CUDA-toolkit + torch-flavor helpers) before the heavy GGUF smoke.
- name: PowerShell installer unit tests
shell: pwsh
run: |
foreach ($f in @('install.ps1', 'studio/setup.ps1')) {
$errs = $null
[void][System.Management.Automation.Language.Parser]::ParseFile(
(Resolve-Path $f).Path, [ref]$null, [ref]$errs)
if ($errs) { $errs | ForEach-Object { $_.ToString() }; exit 1 }
Write-Host "$f parsed with no errors"
}
pwsh -NoProfile -File tests/studio/test_resolve_cuda_toolkit.ps1
pwsh -NoProfile -File tests/studio/test_torch_flavor.ps1
pwsh -NoProfile -File tests/studio/test_node_decision.ps1
pwsh -NoProfile -File tests/studio/test_node_probe_guard.ps1
# uninstall.ps1: native uninstall must keep the shared unsloth.ico while a
# WSL shortcut still references it (dual install), else that shortcut blanks.
- name: uninstall.ps1 unit test (dual-install icon preserve)
shell: pwsh
run: |
$errs = $null
[void][System.Management.Automation.Language.Parser]::ParseFile(
(Resolve-Path scripts/uninstall.ps1).Path, [ref]$null, [ref]$errs)
if ($errs) { $errs | ForEach-Object { $_.ToString() }; exit 1 }
Write-Host "uninstall.ps1 parsed with no errors"
pwsh -NoProfile -File tests/studio/test_uninstall_dual_install_icon.ps1
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: '22'
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.12'
# Split restore + save (rather than the one-step actions/cache) so a
# transient restore-side failure does not kill the whole job. v5 has a
# known flake where it logs "Cache hit for: <key>" and then exits
# non-zero without actually extracting the archive (see
# actions/cache#1621 and github community discussion #163260).
# continue-on-error on restore masks that failure so the Prime step
# below can re-download from HF and the job keeps running. Save then
# populates the cache key on a real miss only; cache keys are
# immutable, so a corrupted cached entry persists until the -v1
# suffix below is bumped.
- name: Restore HF_HOME cache for ${{ env.GGUF_REPO }}
id: cache-hf
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
continue-on-error: true
with:
path: hf-cache
key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-v2
- name: Prime HF_HOME with the GGUF
id: prime-hf
# Run on a real cache miss AND on the silent-restore-failure mode
# described above (outcome != success).
if: steps.cache-hf.outputs.cache-hit != 'true' || steps.cache-hf.outcome != 'success'
env:
# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }}
run: |
python -m pip install --upgrade huggingface_hub
mkdir -p hf-cache
bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$GGUF_FILE"
bash .github/scripts/hf-download-with-retry.sh ggml-org/models tinyllamas/stories260K.gguf
- name: Save HF_HOME cache for ${{ env.GGUF_REPO }}
# Only write a fresh cache entry when we actually rebuilt the
# directory (Prime ran and succeeded). Skipping when Prime is
# skipped avoids "already exists" save warnings on the happy path.
if: always() && steps.prime-hf.outcome == 'success'
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: hf-cache
key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-v2
- name: Pre-install Windows tweaks (npm 11 + Defender exclusions)
shell: pwsh
# See studio-windows-update-smoke.yml for the full rationale.
# tl;dr: setup.ps1 needs npm >=11 to skip a 35 s winget Node
# reinstall, and Defender's real-time scan dominates the
# frontend / uv-pip-extract steps.
run: |
$ProgressPreference = 'SilentlyContinue'
Write-Host "npm version before upgrade: $(npm -v)"
npm install -g 'npm@^11' 2>&1 | Out-Host
Write-Host "npm version after upgrade: $(npm -v)"
# NOTE: do NOT pre-create these directories. See
# studio-windows-update-smoke.yml for the full rationale --
# creating an empty studio/frontend/dist trips setup.ps1's
# mtime-based staleness check into "frontend up to date, skip
# rebuild" and Studio boots with an empty dist directory.
# Add-MpPreference accepts paths that do not yet exist.
foreach ($p in @(
"$env:USERPROFILE\.unsloth",
"$env:USERPROFILE\AppData\Local\uv",
"$env:GITHUB_WORKSPACE\studio\frontend\node_modules",
"$env:GITHUB_WORKSPACE\studio\frontend\dist"
)) {
try {
Add-MpPreference -ExclusionPath $p -ErrorAction Stop
Write-Host "Defender exclusion added: $p"
} catch {
Write-Host "Defender exclusion skipped ($($_.Exception.Message)): $p"
}
}
- name: Install Studio (--local, --no-torch)
shell: pwsh
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }}
run: |
New-Item -ItemType Directory -Force -Path logs | Out-Null
# *>&1 captures Write-Host (Information stream) output;
# plain 2>&1 does not. setup.ps1 emits "prebuilt installed
# and validated" via Write-Host, and we grep for that.
$ProgressPreference = 'SilentlyContinue'
& ./install.ps1 --local --no-torch *>&1 | Tee-Object -FilePath logs/install.log
- name: Assert install.ps1 used the Windows llama.cpp prebuilt
run: |
# Filesystem check; setup.ps1's stream output isn't captured.
LLAMA_DIR=~/.unsloth/llama.cpp
INFO="$LLAMA_DIR/UNSLOTH_PREBUILT_INFO.json"
BIN="$LLAMA_DIR/build/bin/Release/llama-server.exe"
if grep -q "falling back to source build" logs/install.log; then
echo "::error::install.ps1 fell back to source-build llama.cpp on Windows."
grep -E "llama-prebuilt|llama.cpp" logs/install.log | tail -60
exit 1
fi
if [ ! -f "$INFO" ]; then
echo "::error::no UNSLOTH_PREBUILT_INFO.json at $INFO."
ls -la "$LLAMA_DIR" || true
exit 1
fi
if [ ! -f "$BIN" ]; then
echo "::error::no llama-server.exe at $BIN."
ls -la "$LLAMA_DIR/build/bin" || true
exit 1
fi
echo "install.ps1 installed the Windows prebuilt llama.cpp:"
cat "$INFO"
- name: Add Studio shim to GITHUB_PATH
run: |
SHIM_DIR=~/.unsloth/studio/bin
if [ ! -f "$SHIM_DIR/unsloth.exe" ]; then
echo "::error::unsloth.exe shim not found at $SHIM_DIR"
ls -la ~/.unsloth/studio/ || true
exit 1
fi
cygpath -w "$SHIM_DIR" >> "$GITHUB_PATH"
- name: Install OpenAI + Anthropic Python SDKs
run: python -m pip install 'openai>=1.50' 'anthropic>=0.40'
- name: Reset auth + boot Studio (API-only)
run: |
unsloth studio reset-password
mkdir -p logs
UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \
> logs/studio.log 2>&1 &
echo "STUDIO_PID=$!" >> "$GITHUB_ENV"
- name: Wait for /api/health
run: |
for i in $(seq 1 180); do
if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then
jq -e '.status == "healthy"' /tmp/health.json
exit 0
fi
sleep 1
done
echo "Studio did not become healthy in 180s"
tail -200 logs/studio.log
exit 1
- name: Password rotation (old must fail, new must work)
run: |
OLD=$(cat ~/.unsloth/studio/auth/.bootstrap_password)
NEW="CIRotated-$(python -c 'import secrets; print(secrets.token_urlsafe(12))')"
echo "::add-mask::$OLD"
echo "::add-mask::$NEW"
OLD_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
-H 'content-type: application/json' \
-d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}" | jq -r .access_token)
[ -n "$OLD_TOKEN" ] && [ "$OLD_TOKEN" != "null" ] || { echo "bootstrap login failed"; exit 1; }
curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \
-H "Authorization: Bearer $OLD_TOKEN" -H 'content-type: application/json' \
-d "{\"current_password\":\"$OLD\",\"new_password\":\"$NEW\"}" > /dev/null
OLD_STATUS=$(curl -s -o /dev/null -w '%{http_code}' \
-X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
-H 'content-type: application/json' \
-d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}")
if [ "$OLD_STATUS" != "401" ]; then
echo "::error::Login with old password returned $OLD_STATUS, expected 401"
exit 1
fi
NEW_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
-H 'content-type: application/json' \
-d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" | jq -r .access_token)
[ -n "$NEW_TOKEN" ] && [ "$NEW_TOKEN" != "null" ] || { echo "new login failed"; exit 1; }
echo "TOKEN=$NEW_TOKEN" >> "$GITHUB_ENV"
echo "password rotation OK (old=401, new=200)"
- name: Load the GGUF (HF repo + variant, served from HF_HOME cache)
run: |
# Retry the load step a few times so a transient TCP RST during
# llama-server warm-up (Windows runner image churn,
# windows-latest -> windows-2025-vs2026 rollout) doesn't fail
# the whole job. The Studio backend's _wait_for_health now
# catches httpx.ReadError too; this retry layer covers the
# cases the backend can't recover from on its own.
LOAD_OK=0
for attempt in 1 2 3; do
HTTP=$(curl -s -o /tmp/load.json -w '%{http_code}' \
-X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \
-H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \
--max-time 600 \
-d "{\"model_path\":\"$GGUF_REPO\",\"gguf_variant\":\"$GGUF_VARIANT\",\"is_lora\":false,\"max_seq_length\":2048}")
if [ "$HTTP" = "200" ]; then LOAD_OK=1; break; fi
echo "::warning::/api/inference/load attempt $attempt returned $HTTP; response:"
cat /tmp/load.json || true
sleep 10
done
[ "$LOAD_OK" = "1" ] || { echo "::error::/api/inference/load failed 3 attempts"; exit 22; }
jq '{status, display_name, is_gguf, context_length}' /tmp/load.json
- name: Multi-turn determinism via OpenAI + Anthropic SDKs
env:
BASE_URL: http://127.0.0.1:18888
run: |
python - <<'PY'
import json
import os
from openai import OpenAI
from anthropic import Anthropic
BASE = os.environ["BASE_URL"]
KEY = os.environ["TOKEN"]
SEED = 3407
PROMPTS = [
"What is 1+1?",
"What did I ask before?",
"What is the capital of France?",
"Repeat the city name",
]
def run_openai():
client = OpenAI(base_url = f"{BASE}/v1", api_key = KEY)
history, replies = [], []
for prompt in PROMPTS:
history.append({"role": "user", "content": prompt})
resp = client.chat.completions.create(
model = "default",
messages = history,
temperature = 0.0,
max_tokens = 80,
seed = SEED,
extra_body = {"enable_thinking": False},
)
text = resp.choices[0].message.content or ""
replies.append(text)
history.append({"role": "assistant", "content": text})
return replies
def run_anthropic():
client = Anthropic(
base_url = BASE,
api_key = "unused",
default_headers = {"Authorization": f"Bearer {KEY}"},
)
history, replies = [], []
for prompt in PROMPTS:
history.append({"role": "user", "content": prompt})
msg = client.messages.create(
model = "default",
max_tokens = 80,
messages = history,
temperature = 0.0,
extra_body = {"seed": SEED, "enable_thinking": False},
)
text = "".join(b.text for b in msg.content if getattr(b, "type", None) == "text")
replies.append(text)
history.append({"role": "assistant", "content": text})
return replies
for label, runner in (("openai", run_openai), ("anthropic", run_anthropic)):
first = runner()
second = runner()
for i, (a, b) in enumerate(zip(first, second), start = 1):
print(f"[{label} turn {i}] {a!r}")
assert a, f"{label}: empty turn {i} response"
# Compare on stripped content: llama-server can vary
# trailing whitespace (specifically a final '\n') between
# otherwise-identical greedy runs depending on the
# batch-flush boundary at which the stream is closed. The
# generated tokens are identical; only the trailing
# whitespace differs. Keep the raw repr in the failure
# message so a real divergence is still legible.
assert a.strip() == b.strip(), (
f"{label} non-deterministic at turn {i} with temperature=0.0:\n"
f" run1: {a!r}\n run2: {b!r}"
)
joined = " ".join(first).lower()
assert "1" in first[0], f"{label}: turn-1 answer should contain '1', got {first[0]!r}"
assert "paris" in joined, f"{label}: expected 'paris' somewhere in the four-turn transcript: {first}"
print(f"[{label}] OK -- 4 turns, run1 == run2, history grounded")
PY
- name: Stop Studio
if: always()
# Run as cmd so we are not running through the Git Bash shell;
# Git Bash on windows-latest has been observed to exit 143
# (SIGTERM) from any inline kill/sleep block, masking a green
# test run. The runner reclaims the Studio child process at
# job end either way, so just emit a marker and exit 0.
shell: cmd
run: echo Stop Studio (no-op; runner reclaims STUDIO_PID=%STUDIO_PID% at job end)
- name: Collect llama-server logs
if: always()
# A transient Windows DLL-init crash (0xC0000142) in this diagnostic
# copy must not fail an otherwise-green job.
continue-on-error: true
shell: bash
# Copy llama-server's own stdout/stderr (teed by Studio under
# ~/.unsloth/studio/logs/llama-server/) into the workspace so
# upload-artifact can pick it up. Crucial for diagnosing a
# subprocess crash where Studio's traceback only shows the
# symptom (httpx ReadError) but not the cause.
run: |
mkdir -p logs/llama-server
cp -v ~/.unsloth/studio/logs/llama-server/*.log logs/llama-server/ 2>/dev/null || \
echo "no llama-server logs to collect"
- name: Upload logs
if: always()
# Diagnostic only: a transient artifact-service drop must not fail a green job.
continue-on-error: true
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: windows-openai-anthropic-log
path: |
logs/studio.log
logs/install.log
logs/llama-server/*.log
retention-days: 7
# ─────────────────────────────────────────────────────────────────────
# Job 2: Tool calling Tests
# ─────────────────────────────────────────────────────────────────────
tool-calling:
name: Tool calling Tests
runs-on: windows-latest
timeout-minutes: 30
defaults:
run:
shell: bash
env:
# Tool calling is the highest-volume GGUF in this workflow
# (Qwen3.5-2B at Q4_K_XL = ~1.28 GiB). The previous HF_HOME
# cache stored xet chunks + blobs + snapshots = ~4.7 GiB --
# 3.7x file-size inflation, dominating the post-step upload
# (211 s on first run; subsequent runs hit the cache, but the
# one-time cost recurs every time the cache key bumps). Use
# main's `--local-dir gguf-cache` pattern: cache the flat .gguf
# only, pass an absolute path to Studio's /api/inference/load.
# The OpenAI/Anth and JSON+images jobs still cover the
# gguf_variant resolution path.
GGUF_REPO: unsloth/Qwen3.5-2B-GGUF
GGUF_FILE: Qwen3.5-2B-UD-Q4_K_XL.gguf
STUDIO_PORT: '18898'
# Force UTF-8 for stdio (Windows defaults to cp1252; hf
# download / Studio CLI print "✓" checkmarks and crash
# otherwise).
PYTHONIOENCODING: utf-8
PYTHONUTF8: '1'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: '22'
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.12'
# Split restore + save so a transient restore-side failure does not
# kill the whole job. See the matching block in the tool-calling job
# above for the full rationale (actions/cache#1621).
- name: Restore GGUF model cache
id: cache-gguf
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
continue-on-error: true
with:
path: gguf-cache
key: ${{ runner.os }}-gguf-${{ env.GGUF_REPO }}-${{ env.GGUF_FILE }}-v1
- name: Download GGUF if cache miss
id: download-gguf
if: steps.cache-gguf.outputs.cache-hit != 'true' || steps.cache-gguf.outcome != 'success'
env:
# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }}
run: |
python -m pip install --upgrade huggingface_hub
mkdir -p gguf-cache
bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$GGUF_FILE" gguf-cache
- name: Save GGUF model cache
if: always() && steps.download-gguf.outcome == 'success'
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: gguf-cache
key: ${{ runner.os }}-gguf-${{ env.GGUF_REPO }}-${{ env.GGUF_FILE }}-v1
- name: Pre-install Windows tweaks (npm 11 + Defender exclusions)
shell: pwsh
# See studio-windows-update-smoke.yml for the full rationale.
# tl;dr: setup.ps1 needs npm >=11 to skip a 35 s winget Node
# reinstall, and Defender's real-time scan dominates the
# frontend / uv-pip-extract steps.
run: |
$ProgressPreference = 'SilentlyContinue'
Write-Host "npm version before upgrade: $(npm -v)"
npm install -g 'npm@^11' 2>&1 | Out-Host
Write-Host "npm version after upgrade: $(npm -v)"
# NOTE: do NOT pre-create these directories. See
# studio-windows-update-smoke.yml for the full rationale --
# creating an empty studio/frontend/dist trips setup.ps1's
# mtime-based staleness check into "frontend up to date, skip
# rebuild" and Studio boots with an empty dist directory.
# Add-MpPreference accepts paths that do not yet exist.
foreach ($p in @(
"$env:USERPROFILE\.unsloth",
"$env:USERPROFILE\AppData\Local\uv",
"$env:GITHUB_WORKSPACE\studio\frontend\node_modules",
"$env:GITHUB_WORKSPACE\studio\frontend\dist"
)) {
try {
Add-MpPreference -ExclusionPath $p -ErrorAction Stop
Write-Host "Defender exclusion added: $p"
} catch {
Write-Host "Defender exclusion skipped ($($_.Exception.Message)): $p"
}
}
- name: Install Studio (--local, --no-torch)
shell: pwsh
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }}
run: |
New-Item -ItemType Directory -Force -Path logs | Out-Null
# *>&1 captures Write-Host (Information stream) output;
# plain 2>&1 does not. setup.ps1 emits "prebuilt installed
# and validated" via Write-Host, and we grep for that.
$ProgressPreference = 'SilentlyContinue'
& ./install.ps1 --local --no-torch *>&1 | Tee-Object -FilePath logs/install.log
- name: Assert install.ps1 used the Windows llama.cpp prebuilt
run: |
# Filesystem check; setup.ps1's stream output isn't captured.
LLAMA_DIR=~/.unsloth/llama.cpp
INFO="$LLAMA_DIR/UNSLOTH_PREBUILT_INFO.json"
BIN="$LLAMA_DIR/build/bin/Release/llama-server.exe"
if grep -q "falling back to source build" logs/install.log; then
echo "::error::install.ps1 fell back to source-build llama.cpp on Windows."
grep -E "llama-prebuilt|llama.cpp" logs/install.log | tail -60
exit 1
fi
if [ ! -f "$INFO" ]; then
echo "::error::no UNSLOTH_PREBUILT_INFO.json at $INFO."
ls -la "$LLAMA_DIR" || true
exit 1
fi
if [ ! -f "$BIN" ]; then
echo "::error::no llama-server.exe at $BIN."
ls -la "$LLAMA_DIR/build/bin" || true
exit 1
fi
echo "install.ps1 installed the Windows prebuilt llama.cpp:"
cat "$INFO"
- name: Add Studio shim to GITHUB_PATH
run: |
SHIM_DIR=~/.unsloth/studio/bin
if [ ! -f "$SHIM_DIR/unsloth.exe" ]; then
echo "::error::unsloth.exe shim not found at $SHIM_DIR"
ls -la ~/.unsloth/studio/ || true
exit 1
fi
cygpath -w "$SHIM_DIR" >> "$GITHUB_PATH"
- name: Reset auth + boot Studio (API-only, default tool policy)
run: |
unsloth studio reset-password
mkdir -p logs
UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \
> logs/studio.log 2>&1 &
echo "STUDIO_PID=$!" >> "$GITHUB_ENV"
- name: Wait for /api/health, log in, change password, load model
run: |
for i in $(seq 1 180); do
if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then
jq -e '.status == "healthy"' /tmp/health.json && break
fi
sleep 1
done
jq -e '.status == "healthy"' /tmp/health.json
OLD=$(cat ~/.unsloth/studio/auth/.bootstrap_password)
NEW="CITool-$(python -c 'import secrets; print(secrets.token_urlsafe(12))')"
echo "::add-mask::$OLD"
echo "::add-mask::$NEW"
OLD_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
-H 'content-type: application/json' \
-d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}" | jq -r .access_token)
curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \
-H "Authorization: Bearer $OLD_TOKEN" -H 'content-type: application/json' \
-d "{\"current_password\":\"$OLD\",\"new_password\":\"$NEW\"}" > /dev/null
TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
-H 'content-type: application/json' \
-d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" | jq -r .access_token)
echo "API_KEY=$TOKEN" >> "$GITHUB_ENV"
# GITHUB_WORKSPACE on windows-latest is a Windows path with
# backslashes ("D:\a\unsloth\unsloth"). Bash handles it as a
# raw string, but we cannot embed `\a` etc. in JSON without
# JSON-string-escaping every backslash. Replace `\` with `/`
# via bash parameter expansion -- pathlib.Path on Windows
# accepts forward slashes natively, so Studio's loader sees
# a normal path.
GGUF_PATH="${GITHUB_WORKSPACE//\\//}/gguf-cache/${GGUF_FILE}"
ls -lh "$GGUF_PATH"
# Retry: same rationale as the OpenAI/Anthropic job.
LOAD_OK=0
for attempt in 1 2 3; do
HTTP=$(curl -s -o /tmp/load.json -w '%{http_code}' \
-X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \
-H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \
--max-time 600 \
-d "{\"model_path\":\"$GGUF_PATH\",\"is_lora\":false,\"max_seq_length\":2048}")
if [ "$HTTP" = "200" ]; then LOAD_OK=1; break; fi
echo "::warning::/api/inference/load attempt $attempt returned $HTTP; response:"
cat /tmp/load.json || true
sleep 10
done
[ "$LOAD_OK" = "1" ] || { echo "::error::/api/inference/load failed 3 attempts"; exit 22; }
jq '{status, display_name}' /tmp/load.json
- name: Tool calling, server-side tools, thinking on/off
env:
BASE_URL: http://127.0.0.1:18898
run: |
python - <<'PY'
import json
import os
import urllib.request
BASE = os.environ["BASE_URL"]
KEY = os.environ["API_KEY"]
SEED = 3407
# Same temperature shim as the Mac job. Small Qwen3.5-2B
# quants can degenerate at temperature=0; a small non-zero
# temperature with a fixed seed keeps the test deterministic
# while escaping the trap.
TEMP = 0.2
def post(path, body, *, timeout = 240):
data = json.dumps(body).encode()
req = urllib.request.Request(
f"{BASE}{path}",
data = data,
method = "POST",
headers = {
"Authorization": f"Bearer {KEY}",
"Content-Type": "application/json",
},
)
with urllib.request.urlopen(req, timeout = timeout) as resp:
return resp.status, json.loads(resp.read().decode())
def post_sse(path, body, *, timeout = 600):
body = {**body, "stream": True}
data = json.dumps(body).encode()
req = urllib.request.Request(
f"{BASE}{path}",
data = data,
method = "POST",
headers = {
"Authorization": f"Bearer {KEY}",
"Content-Type": "application/json",
},
)
parts = []
with urllib.request.urlopen(req, timeout = timeout) as resp:
for raw in resp:
line = raw.decode().strip()
if not line.startswith("data: "):
continue
payload = line[6:]
if payload == "[DONE]":
break
try:
chunk = json.loads(payload)
except json.JSONDecodeError:
continue
for choice in chunk.get("choices", []):
delta = choice.get("delta", {}) or {}
if delta.get("content"):
parts.append(delta["content"])
return "".join(parts)
# ── 1. Standard OpenAI function calling ──────────────────────
weather_tool = {
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather for a city.",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
},
}
status, data = post("/v1/chat/completions", {
"messages": [{"role": "user", "content": "What is the weather in Paris?"}],
"tools": [weather_tool],
"tool_choice": "required",
"stream": False,
"temperature": TEMP,
"seed": SEED,
"max_tokens": 600,
})
assert status == 200, f"tool call status {status}: {data}"
choice = data["choices"][0]
tool_calls = (choice.get("message") or {}).get("tool_calls") or []
if tool_calls:
tc = tool_calls[0]
assert tc["function"]["name"] == "get_weather", (
f"unexpected tool name: {tc['function']['name']!r}"
)
args = json.loads(tc["function"]["arguments"])
assert args.get("city"), f"missing city arg: {args}"
print(f"[tools] PASS function calling -> {tc['function']['name']}({args}) finish={choice.get('finish_reason')!r}")
else:
print(
f"[tools] WARN function calling: no tool_calls (finish_reason="
f"{choice.get('finish_reason')!r}); HTTP path OK, model output drift."
)
# ── 2. Server-side python tool ───────────────────────────────
content = post_sse("/v1/chat/completions", {
"messages": [{"role": "user", "content": "What is 123 * 456? Use the python tool to compute it and tell me the number."}],
"enable_tools": True,
"enabled_tools": ["python"],
"session_id": "ci-tool-calling-py",
"temperature": TEMP,
"seed": SEED,
"max_tokens": 600,
})
if "56088" in content or "56,088" in content:
print(f"[tools] PASS python tool ({len(content)} chars, found 56088)")
else:
assert content, "python tool: SSE stream empty"
print(
f"[tools] WARN python tool: SSE OK ({len(content)} chars) but "
f"model didn't return 56088 -- model output drift"
)
# ── 3. Server-side bash (terminal) tool ──────────────────────
# On Windows the terminal tool resolves to the system shell
# (cmd.exe wrapper) and `echo hello-bash-tool` works the same
# way it does on POSIX. The model still has to choose to
# invoke the tool; assert non-empty SSE if it doesn't.
content = post_sse("/v1/chat/completions", {
"messages": [{"role": "user", "content": "Use the terminal tool to run `echo hello-bash-tool` and tell me the exact output."}],
"enable_tools": True,
"enabled_tools": ["terminal"],
"session_id": "ci-tool-calling-bash",
"temperature": TEMP,
"seed": SEED,
"max_tokens": 600,
})
if "hello-bash-tool" in content:
print(f"[tools] PASS terminal tool ({len(content)} chars)")
else:
assert content, "terminal tool: SSE stream empty"
print(
f"[tools] WARN terminal tool: SSE OK ({len(content)} chars) but "
f"model didn't echo 'hello-bash-tool' -- model output drift"
)
# ── 4. Server-side web_search tool ───────────────────────────
# DuckDuckGo can be flaky from CI runners; only assert that
# the SSE stream opens and yields any data.
try:
content = post_sse("/v1/chat/completions", {
"messages": [{"role": "user", "content": "Search the web for 'unsloth ai github' and summarise."}],
"enable_tools": True,
"enabled_tools": ["web_search"],
"session_id": "ci-tool-calling-web",
"temperature": TEMP,
"seed": SEED,
"max_tokens": 400,
})
print(f"[tools] PASS web_search stream ({len(content)} chars)")
except Exception as exc:
print(f"[tools] WARN web_search probe failed (non-blocking): {exc}")
# ── 5. Thinking on / off ─────────────────────────────────────
def thinking_call(enable):
status, data = post("/v1/chat/completions", {
"messages": [{"role": "user", "content": "Briefly: is 17 prime?"}],
"stream": False,
"enable_thinking": enable,
"temperature": TEMP,
"seed": SEED,
"max_tokens": 300,
})
assert status == 200
msg = data["choices"][0]["message"]
raw = (msg.get("content") or "") + (msg.get("reasoning_content") or "")
return raw
on_text = thinking_call(True)
off_text = thinking_call(False)
had_think_on = ("<think>" in on_text) or len(on_text) > 80
if not had_think_on:
print(
f"[tools] WARN enable_thinking=True produced no thinking signal: "
f"{on_text[:200]!r}"
)
assert "<think>" not in off_text, (
f"enable_thinking=False but <think> still present: {off_text!r}"
)
print(f"[tools] PASS thinking on/off (on={len(on_text)} chars, off={len(off_text)} chars)")
PY
- name: Stop Studio
if: always()
# Run as cmd so we are not running through the Git Bash shell;
# Git Bash on windows-latest has been observed to exit 143
# (SIGTERM) from any inline kill/sleep block, masking a green
# test run. The runner reclaims the Studio child process at
# job end either way, so just emit a marker and exit 0.
shell: cmd
run: echo Stop Studio (no-op; runner reclaims STUDIO_PID=%STUDIO_PID% at job end)
- name: Collect llama-server logs
if: always()
# A transient Windows DLL-init crash (0xC0000142) in this diagnostic
# copy must not fail an otherwise-green job.
continue-on-error: true
shell: bash
# Copy llama-server's own stdout/stderr (teed by Studio under
# ~/.unsloth/studio/logs/llama-server/) into the workspace so
# upload-artifact can pick it up. Crucial for diagnosing a
# subprocess crash where Studio's traceback only shows the
# symptom (httpx ReadError) but not the cause.
run: |
mkdir -p logs/llama-server
cp -v ~/.unsloth/studio/logs/llama-server/*.log logs/llama-server/ 2>/dev/null || \
echo "no llama-server logs to collect"
- name: Upload logs
if: always()
# Diagnostic only: a transient artifact-service drop must not fail a green job.
continue-on-error: true
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: windows-tool-calling-log
path: |
logs/studio.log
logs/install.log
logs/llama-server/*.log
retention-days: 7
# ─────────────────────────────────────────────────────────────────────
# Job 3: JSON, images
# ─────────────────────────────────────────────────────────────────────
json-images:
name: JSON, images
runs-on: windows-latest
timeout-minutes: 35
defaults:
run:
shell: bash
env:
GGUF_REPO: unsloth/Qwen3-VL-2B-Instruct-GGUF
GGUF_VARIANT: UD-IQ2_XXS
GGUF_FILE: Qwen3-VL-2B-Instruct-UD-IQ2_XXS.gguf
MMPROJ_FILE: mmproj-F16.gguf
STUDIO_PORT: '18899'
HF_HOME: ${{ github.workspace }}/hf-cache
# Force UTF-8 for stdio (Windows defaults to cp1252; hf
# download / Studio CLI print "✓" checkmarks and crash
# otherwise).
PYTHONIOENCODING: utf-8
PYTHONUTF8: '1'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: '22'
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.12'
# Split restore + save so a transient restore-side failure does not
# kill the whole job. See the matching block in the tool-calling job
# for the full rationale (actions/cache#1621). This is the block that
# actually broke in run 25713577488: "Cache hit for: <key>" was
# logged, the step exited non-zero in ~0.3 s without extracting the
# 3.4 GiB archive, and steps 6-15 were skipped.
- name: Restore HF_HOME cache for ${{ env.GGUF_REPO }} (model + mmproj)
id: cache-hf
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
continue-on-error: true
with:
path: hf-cache
key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-${{ env.MMPROJ_FILE }}-v2
- name: Prime HF_HOME with the GGUF + mmproj
id: prime-hf
if: steps.cache-hf.outputs.cache-hit != 'true' || steps.cache-hf.outcome != 'success'
env:
# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }}
run: |
python -m pip install --upgrade huggingface_hub
mkdir -p hf-cache
bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$GGUF_FILE"
bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$MMPROJ_FILE"
bash .github/scripts/hf-download-with-retry.sh ggml-org/models tinyllamas/stories260K.gguf
- name: Save HF_HOME cache for ${{ env.GGUF_REPO }} (model + mmproj)
if: always() && steps.prime-hf.outcome == 'success'
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: hf-cache
key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-${{ env.MMPROJ_FILE }}-v2
- name: Pre-install Windows tweaks (npm 11 + Defender exclusions)
shell: pwsh
# See studio-windows-update-smoke.yml for the full rationale.
# tl;dr: setup.ps1 needs npm >=11 to skip a 35 s winget Node
# reinstall, and Defender's real-time scan dominates the
# frontend / uv-pip-extract steps.
run: |
$ProgressPreference = 'SilentlyContinue'
Write-Host "npm version before upgrade: $(npm -v)"
npm install -g 'npm@^11' 2>&1 | Out-Host
Write-Host "npm version after upgrade: $(npm -v)"
# NOTE: do NOT pre-create these directories. See
# studio-windows-update-smoke.yml for the full rationale --
# creating an empty studio/frontend/dist trips setup.ps1's
# mtime-based staleness check into "frontend up to date, skip
# rebuild" and Studio boots with an empty dist directory.
# Add-MpPreference accepts paths that do not yet exist.
foreach ($p in @(
"$env:USERPROFILE\.unsloth",
"$env:USERPROFILE\AppData\Local\uv",
"$env:GITHUB_WORKSPACE\studio\frontend\node_modules",
"$env:GITHUB_WORKSPACE\studio\frontend\dist"
)) {
try {
Add-MpPreference -ExclusionPath $p -ErrorAction Stop
Write-Host "Defender exclusion added: $p"
} catch {
Write-Host "Defender exclusion skipped ($($_.Exception.Message)): $p"
}
}
- name: Install Studio (--local, --no-torch)
shell: pwsh
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }}
run: |
New-Item -ItemType Directory -Force -Path logs | Out-Null
# *>&1 captures Write-Host (Information stream) output;
# plain 2>&1 does not. setup.ps1 emits "prebuilt installed
# and validated" via Write-Host, and we grep for that.
$ProgressPreference = 'SilentlyContinue'
& ./install.ps1 --local --no-torch *>&1 | Tee-Object -FilePath logs/install.log
- name: Assert install.ps1 used the Windows llama.cpp prebuilt
run: |
# Filesystem check; setup.ps1's stream output isn't captured.
LLAMA_DIR=~/.unsloth/llama.cpp
INFO="$LLAMA_DIR/UNSLOTH_PREBUILT_INFO.json"
BIN="$LLAMA_DIR/build/bin/Release/llama-server.exe"
if grep -q "falling back to source build" logs/install.log; then
echo "::error::install.ps1 fell back to source-build llama.cpp on Windows."
grep -E "llama-prebuilt|llama.cpp" logs/install.log | tail -60
exit 1
fi
if [ ! -f "$INFO" ]; then
echo "::error::no UNSLOTH_PREBUILT_INFO.json at $INFO."
ls -la "$LLAMA_DIR" || true
exit 1
fi
if [ ! -f "$BIN" ]; then
echo "::error::no llama-server.exe at $BIN."
ls -la "$LLAMA_DIR/build/bin" || true
exit 1
fi
echo "install.ps1 installed the Windows prebuilt llama.cpp:"
cat "$INFO"
- name: Add Studio shim to GITHUB_PATH
run: |
SHIM_DIR=~/.unsloth/studio/bin
if [ ! -f "$SHIM_DIR/unsloth.exe" ]; then
echo "::error::unsloth.exe shim not found at $SHIM_DIR"
ls -la ~/.unsloth/studio/ || true
exit 1
fi
cygpath -w "$SHIM_DIR" >> "$GITHUB_PATH"
- name: Install OpenAI + Anthropic Python SDKs
run: python -m pip install 'openai>=1.50' 'anthropic>=0.40'
- name: Reset auth + boot Studio (API-only)
run: |
unsloth studio reset-password
mkdir -p logs
UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \
> logs/studio.log 2>&1 &
echo "STUDIO_PID=$!" >> "$GITHUB_ENV"
- name: Wait for /api/health, log in, change password, load model
run: |
for i in $(seq 1 180); do
if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then
jq -e '.status == "healthy"' /tmp/health.json && break
fi
sleep 1
done
jq -e '.status == "healthy"' /tmp/health.json
OLD=$(cat ~/.unsloth/studio/auth/.bootstrap_password)
NEW="CIJson-$(python -c 'import secrets; print(secrets.token_urlsafe(12))')"
echo "::add-mask::$OLD"
echo "::add-mask::$NEW"
OLD_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
-H 'content-type: application/json' \
-d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}" | jq -r .access_token)
curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \
-H "Authorization: Bearer $OLD_TOKEN" -H 'content-type: application/json' \
-d "{\"current_password\":\"$OLD\",\"new_password\":\"$NEW\"}" > /dev/null
TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
-H 'content-type: application/json' \
-d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" | jq -r .access_token)
echo "API_KEY=$TOKEN" >> "$GITHUB_ENV"
# Retry: same rationale as the OpenAI/Anthropic and Tool calling jobs.
LOAD_OK=0
for attempt in 1 2 3; do
HTTP=$(curl -s -o /tmp/load.json -w '%{http_code}' \
-X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \
-H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \
--max-time 900 \
-d "{\"model_path\":\"$GGUF_REPO\",\"gguf_variant\":\"$GGUF_VARIANT\",\"is_lora\":false,\"max_seq_length\":2048}")
if [ "$HTTP" = "200" ]; then LOAD_OK=1; break; fi
echo "::warning::/api/inference/load attempt $attempt returned $HTTP; response:"
cat /tmp/load.json || true
sleep 10
done
[ "$LOAD_OK" = "1" ] || { echo "::error::/api/inference/load failed 3 attempts"; exit 22; }
jq '{status, display_name, is_vision}' /tmp/load.json
- name: JSON schema decoding + image input
env:
BASE_URL: http://127.0.0.1:18899
run: |
python - <<'PY'
import base64
import json
import os
import urllib.request
from openai import OpenAI
from anthropic import Anthropic
BASE = os.environ["BASE_URL"]
KEY = os.environ["API_KEY"]
SEED = 3407
TEMP = 0.2
def post(path, body, *, timeout = 240):
req = urllib.request.Request(
f"{BASE}{path}",
data = json.dumps(body).encode(),
method = "POST",
headers = {
"Authorization": f"Bearer {KEY}",
"Content-Type": "application/json",
},
)
with urllib.request.urlopen(req, timeout = timeout) as resp:
return resp.status, json.loads(resp.read().decode())
# ── 1. response_format = json_object (JSON mode) ─────────────
status, data = post("/v1/chat/completions", {
"model": "default",
"messages": [
{"role": "system", "content": 'Reply with a single JSON object of the form {"city": "...", "country": "..."}. Output ONLY the JSON, nothing else.'},
{"role": "user", "content": "What is the capital of France?"},
],
"temperature": TEMP,
"max_tokens": 600,
"seed": SEED,
"stream": False,
"enable_thinking": False,
"response_format": {"type": "json_object"},
}, timeout = 600)
assert status == 200, f"json status {status}: {data}"
assert (
isinstance(data.get("choices"), list)
and data["choices"]
and "message" in data["choices"][0]
), f"json response envelope malformed: {data}"
content = (data["choices"][0]["message"].get("content") or "").strip()
print(f"[json] raw json_object content: {content!r}")
if content.startswith("```"):
content = content.split("```", 2)[1]
if content.startswith("json"):
content = content[4:]
content = content.strip("`\n ")
if content:
try:
parsed = json.loads(content)
if "paris" in str(parsed.get("city", "")).lower():
print(f"[json] PASS json_object -> {parsed}")
else:
print(f"[json] WARN json_object decoded but city!=Paris: {parsed}")
except json.JSONDecodeError as exc:
print(f"[json] WARN json_object content not parseable ({exc}); content={content!r}")
else:
print("[json] WARN json_object produced empty content")
status2, data2 = post("/v1/chat/completions", {
"model": "default",
"messages": [{"role": "user", "content": "What is the capital of France? Answer with one word."}],
"temperature": TEMP,
"max_tokens": 400,
"seed": SEED,
"stream": False,
"enable_thinking": False,
}, timeout = 600)
assert status2 == 200, f"plain status {status2}: {data2}"
plain = (data2["choices"][0]["message"].get("content") or "").lower()
print(f"[json] plain capital-of-france reply: {plain!r}")
if "paris" in plain:
print("[json] PASS plain inference path (paris mentioned)")
else:
print(
f"[json] WARN plain inference returned no 'paris' -- "
f"model output drift. HTTP path validated separately above."
)
# ── 2. OpenAI image_url (data URI base64) ───────────────────
PNG_64X64_RED_B64 = (
"iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAIAAAAlC+aJAAAAYklEQVR4nO3PMQ0AIADAMEAI/k"
"UhBhEcDcmqYJtn7/GzpQNeNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA"
"1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaBdCJ0BmMJ25zMAAAAASUVORK5CYII="
)
data_uri = f"data:image/png;base64,{PNG_64X64_RED_B64}"
# On Windows + the Qwen3-VL mmproj, llama.cpp's vision
# path runs on CPU (no Metal involvement). The wrapper is
# kept for resilience but the vision path is expected to
# work on Windows; an exception here is a real regression.
client = OpenAI(base_url = f"{BASE}/v1", api_key = KEY)
try:
openai_resp = client.chat.completions.create(
model = "default",
temperature = TEMP,
max_tokens = 80,
seed = SEED,
messages = [{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": data_uri}},
{"type": "text", "text": "What colour dominates this image? Reply in one word."},
],
}],
)
openai_text = (openai_resp.choices[0].message.content or "").lower()
print(f"[image/openai] reply: {openai_text!r}")
if openai_text:
print("[image/openai] PASS image_url accepted, non-empty response")
else:
print("[image/openai] WARN image_url accepted but empty content")
except Exception as exc:
print(
f"[image/openai] WARN image_url SDK call raised: {type(exc).__name__}: "
f"{exc}. Studio successfully forwarded the request; failure here is "
f"upstream llama.cpp vision behaviour."
)
# ── 3. Anthropic source/base64 image ────────────────────────
anthropic = Anthropic(
base_url = BASE,
api_key = "unused",
default_headers = {"Authorization": f"Bearer {KEY}"},
)
try:
a_msg = anthropic.messages.create(
model = "default",
max_tokens = 80,
temperature = TEMP,
extra_body = {"seed": SEED},
messages = [{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": PNG_64X64_RED_B64,
},
},
{"type": "text", "text": "Describe this image briefly."},
],
}],
)
a_text = "".join(b.text for b in a_msg.content if getattr(b, "type", None) == "text")
print(f"[image/anthropic] reply: {a_text!r}")
if a_text:
print("[image/anthropic] PASS source/base64 accepted, non-empty response")
else:
print("[image/anthropic] WARN source/base64 accepted but empty content")
except Exception as exc:
print(
f"[image/anthropic] WARN anthropic image SDK call raised: "
f"{type(exc).__name__}: {exc}. Likely upstream llama.cpp vision "
f"behaviour, NOT a Studio regression."
)
PY
- name: Stop Studio
if: always()
# Run as cmd so we are not running through the Git Bash shell;
# Git Bash on windows-latest has been observed to exit 143
# (SIGTERM) from any inline kill/sleep block, masking a green
# test run. The runner reclaims the Studio child process at
# job end either way, so just emit a marker and exit 0.
shell: cmd
run: echo Stop Studio (no-op; runner reclaims STUDIO_PID=%STUDIO_PID% at job end)
- name: Collect llama-server logs
if: always()
# A transient Windows DLL-init crash (0xC0000142) in this diagnostic
# copy must not fail an otherwise-green job.
continue-on-error: true
shell: bash
# Copy llama-server's own stdout/stderr (teed by Studio under
# ~/.unsloth/studio/logs/llama-server/) into the workspace so
# upload-artifact can pick it up. Crucial for diagnosing a
# subprocess crash where Studio's traceback only shows the
# symptom (httpx ReadError) but not the cause.
run: |
mkdir -p logs/llama-server
cp -v ~/.unsloth/studio/logs/llama-server/*.log logs/llama-server/ 2>/dev/null || \
echo "no llama-server logs to collect"
- name: Upload logs
if: always()
# Diagnostic only: a transient artifact-service drop must not fail a green job.
continue-on-error: true
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: windows-json-images-log
path: |
logs/studio.log
logs/install.log
logs/llama-server/*.log
retention-days: 7
# ── folded from studio-windows-no-vs-smoke.yml: install + run with no Visual Studio ──
no-vs-cpu:
name: Studio install + inference without Visual Studio
runs-on: windows-latest
timeout-minutes: 35
defaults:
run:
shell: bash
env:
GGUF_REPO: unsloth/gemma-3-270m-it-GGUF
GGUF_VARIANT: UD-Q4_K_XL
GGUF_FILE: gemma-3-270m-it-UD-Q4_K_XL.gguf
STUDIO_PORT: '18820'
HF_HOME: ${{ github.workspace }}/hf-cache
PYTHONIOENCODING: utf-8
PYTHONUTF8: '1'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: '22'
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.12'
- name: Restore HF_HOME for ${{ env.GGUF_REPO }}
id: cache-hf
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
continue-on-error: true
with:
path: hf-cache
key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-v2
- name: Prime HF_HOME with the GGUF
id: prime-hf
if: steps.cache-hf.outputs.cache-hit != 'true' || steps.cache-hf.outcome != 'success'
env:
# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }}
run: |
python -m pip install --upgrade huggingface_hub
mkdir -p hf-cache
bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$GGUF_FILE"
bash .github/scripts/hf-download-with-retry.sh ggml-org/models tinyllamas/stories260K.gguf
- name: Save HF_HOME for ${{ env.GGUF_REPO }}
if: always() && steps.prime-hf.outcome == 'success'
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: hf-cache
key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-v2
- name: Pre-install Windows tweaks (npm 11 + Defender exclusions)
shell: pwsh
run: |
$ProgressPreference = 'SilentlyContinue'
npm install -g 'npm@^11' 2>&1 | Out-Host
foreach ($p in @(
"$env:USERPROFILE\.unsloth",
"$env:USERPROFILE\AppData\Local\uv",
"$env:GITHUB_WORKSPACE\studio\frontend\node_modules",
"$env:GITHUB_WORKSPACE\studio\frontend\dist"
)) {
try { Add-MpPreference -ExclusionPath $p -ErrorAction Stop } catch { }
}
- name: Hide Visual Studio + CMake (simulate a host with no build tools)
shell: pwsh
run: |
$ErrorActionPreference = 'Stop'
# A Program Files dir can hold a transient handle (Defender / MSBuild node)
# so Rename-Item intermittently fails with "Access is denied"; retry to ride it out.
function Rename-WithRetry($Path, $NewName) {
for ($i = 1; $i -le 6; $i++) {
try { Rename-Item -LiteralPath $Path -NewName $NewName -ErrorAction Stop; return }
catch { if ($i -eq 6) { throw }; Start-Sleep -Seconds 3 }
}
}
# Rename the Visual Studio install roots (incl. the Installer that holds
# vswhere.exe) so Find-VsBuildTools' vswhere + filesystem scan both miss.
foreach ($d in @("$env:ProgramFiles\Microsoft Visual Studio", "${env:ProgramFiles(x86)}\Microsoft Visual Studio")) {
if (Test-Path -LiteralPath $d) {
Rename-WithRetry $d ((Split-Path $d -Leaf) + '.vsoff')
Write-Host "Hid VS: $d"
}
}
# Surgically rename each cmake executable on PATH (not its parent dir --
# cmake can share a dir with other shims) so Get-Command cmake fails.
$hidden = @()
foreach ($c in (Get-Command cmake -All -ErrorAction SilentlyContinue)) {
if ($c.Source -and (Test-Path -LiteralPath $c.Source)) {
Rename-WithRetry $c.Source ((Split-Path $c.Source -Leaf) + '.off')
$hidden += $c.Source
Write-Host "Hid cmake: $($c.Source)"
}
}
("HIDDEN_CMAKE=" + ($hidden -join '|')) | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
- name: Assert Visual Studio + CMake are genuinely undetectable
shell: pwsh
run: |
$ErrorActionPreference = 'Stop'
. (Join-Path $env:GITHUB_WORKSPACE 'tests/studio_setup_ps1/Get-FunctionSource.ps1')
$setup = Join-Path $env:GITHUB_WORKSPACE 'studio/setup.ps1'
foreach ($fn in @('Resolve-VsGeneratorFromLabel', 'Find-VsBuildTools')) {
. ([scriptblock]::Create((Get-FunctionSource -Path $setup -Name $fn)))
}
$vs = Find-VsBuildTools
if ($vs) { Write-Error "Find-VsBuildTools still detects VS: $($vs.Generator) @ $($vs.InstallPath)"; exit 1 }
if (Get-Command cmake -ErrorAction SilentlyContinue) { Write-Error "cmake is still on PATH"; exit 1 }
if (Get-Command cl.exe -ErrorAction SilentlyContinue) { Write-Error "cl.exe is still on PATH"; exit 1 }
Write-Host "Confirmed: no Visual Studio, no cmake, no cl.exe."
- name: PyTorch CPU wheel installs and imports (no Visual Studio)
run: |
python -m pip install --upgrade pip
python -m pip install torch --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.org/simple
python -c "import torch; print('torch', torch.__version__, 'cuda?', torch.cuda.is_available())"
- name: Install Studio (--local, --no-torch) with no build tools present
shell: pwsh
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }}
run: |
New-Item -ItemType Directory -Force -Path logs | Out-Null
$ProgressPreference = 'SilentlyContinue'
& ./install.ps1 --local --no-torch *>&1 | Tee-Object -FilePath logs/install.log
- name: Assert prebuilt used AND no build tools were installed
run: |
LLAMA_DIR=~/.unsloth/llama.cpp
INFO="$LLAMA_DIR/UNSLOTH_PREBUILT_INFO.json"
BIN="$LLAMA_DIR/build/bin/Release/llama-server.exe"
fail=0
if grep -q "falling back to source build" logs/install.log; then
echo "::error::install.ps1 fell back to source-build llama.cpp without VS."; fail=1
fi
# The deferred build-tool installs must NOT run on the prebuilt path.
for pat in "Kitware.CMake" "Microsoft.VisualStudio.2022.BuildTools" "installing via winget"; do
if grep -qi "$pat" logs/install.log; then
echo "::error::unexpected build-tool install on the prebuilt path: '$pat'"; fail=1
fi
done
[ -f "$INFO" ] || { echo "::error::no UNSLOTH_PREBUILT_INFO.json"; ls -la "$LLAMA_DIR" || true; fail=1; }
[ -f "$BIN" ] || { echo "::error::no llama-server.exe"; ls -la "$LLAMA_DIR/build/bin" || true; fail=1; }
if [ "$fail" != "0" ]; then grep -iE "cmake|visual studio|prebuilt|source build" logs/install.log | tail -60; exit 1; fi
echo "Prebuilt installed with no build tools:"
cat "$INFO"
- name: Add Studio shim to GITHUB_PATH
run: |
SHIM_DIR=~/.unsloth/studio/bin
[ -f "$SHIM_DIR/unsloth.exe" ] || { echo "::error::unsloth.exe shim not found"; ls -la ~/.unsloth/studio/ || true; exit 1; }
cygpath -w "$SHIM_DIR" >> "$GITHUB_PATH"
- name: Reset auth + boot Studio (API-only)
run: |
unsloth studio reset-password
mkdir -p logs
UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \
> logs/studio.log 2>&1 &
echo "STUDIO_PID=$!" >> "$GITHUB_ENV"
- name: Wait for /api/health, log in, load the GGUF
run: |
for i in $(seq 1 180); do
if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then
jq -e '.status == "healthy"' /tmp/health.json && break
fi
sleep 1
done
jq -e '.status == "healthy"' /tmp/health.json || { tail -200 logs/studio.log; exit 1; }
OLD=$(cat ~/.unsloth/studio/auth/.bootstrap_password)
NEW="CINoVS-$(python -c 'import secrets; print(secrets.token_urlsafe(12))')"
echo "::add-mask::$OLD"
echo "::add-mask::$NEW"
OLD_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
-H 'content-type: application/json' \
-d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}" | jq -r .access_token)
curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \
-H "Authorization: Bearer $OLD_TOKEN" -H 'content-type: application/json' \
-d "{\"current_password\":\"$OLD\",\"new_password\":\"$NEW\"}" > /dev/null
TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
-H 'content-type: application/json' \
-d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" | jq -r .access_token)
echo "API_KEY=$TOKEN" >> "$GITHUB_ENV"
LOAD_OK=0
for attempt in 1 2 3; do
HTTP=$(curl -s -o /tmp/load.json -w '%{http_code}' \
-X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \
-H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \
--max-time 600 \
-d "{\"model_path\":\"$GGUF_REPO\",\"gguf_variant\":\"$GGUF_VARIANT\",\"is_lora\":false,\"max_seq_length\":2048}")
if [ "$HTTP" = "200" ]; then LOAD_OK=1; break; fi
echo "::warning::/api/inference/load attempt $attempt returned $HTTP"; cat /tmp/load.json || true; sleep 10
done
[ "$LOAD_OK" = "1" ] || { echo "::error::/api/inference/load failed 3 attempts"; exit 22; }
jq '{status, display_name, is_gguf}' /tmp/load.json
- name: Inference works via the prebuilt llama.cpp (no VS)
run: |
RESP=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/v1/chat/completions" \
-H "Authorization: Bearer $API_KEY" -H 'content-type: application/json' \
--max-time 240 \
-d '{"model":"default","messages":[{"role":"user","content":"What is 1+1? Answer briefly."}],"temperature":0,"max_tokens":32,"stream":false}')
echo "$RESP" | jq '.choices[0].message' || { echo "$RESP"; exit 1; }
CONTENT=$(echo "$RESP" | jq -r '.choices[0].message.content')
[ -n "$CONTENT" ] && [ "$CONTENT" != "null" ] || { echo "::error::empty completion"; exit 1; }
echo "Inference OK without Visual Studio: $CONTENT"
- name: Restore Visual Studio + CMake
if: always()
shell: pwsh
run: |
foreach ($d in @("$env:ProgramFiles\Microsoft Visual Studio", "${env:ProgramFiles(x86)}\Microsoft Visual Studio")) {
$off = "$d.vsoff"
if (Test-Path -LiteralPath $off) { Rename-Item -LiteralPath $off -NewName (Split-Path $d -Leaf); Write-Host "Restored $d" }
}
if ($env:HIDDEN_CMAKE) {
foreach ($src in ($env:HIDDEN_CMAKE -split '\|')) {
if ($src -and (Test-Path -LiteralPath "$src.off")) { Rename-Item -LiteralPath "$src.off" -NewName (Split-Path $src -Leaf) }
}
}
- name: Stop Studio
if: always()
shell: cmd
run: echo Stop Studio (no-op; runner reclaims STUDIO_PID=%STUDIO_PID% at job end)
- name: Collect llama-server logs
if: always()
continue-on-error: true
run: |
mkdir -p logs/llama-server
cp -v ~/.unsloth/studio/logs/llama-server/*.log logs/llama-server/ 2>/dev/null || echo "no llama-server logs"
- name: Upload logs
if: always()
continue-on-error: true
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: windows-no-vs-cpu-log
path: |
logs/install.log
logs/studio.log
logs/llama-server/*.log
retention-days: 7
# ─────────────────────────────────────────────────────────────────────
# Job B: the GPU (CUDA) prebuilt path is also VS-free (resolve/availability)
# ─────────────────────────────────────────────────────────────────────
no-vs-gpu-resolve:
name: GPU prebuilt resolves without Visual Studio
runs-on: windows-latest
timeout-minutes: 15
defaults:
run:
shell: bash
env:
PYTHONIOENCODING: utf-8
PYTHONUTF8: '1'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.12'
- name: Hide Visual Studio
shell: pwsh
run: |
$ErrorActionPreference = 'Stop'
# Retry the rename: a Program Files dir can hold a transient handle that
# makes Rename-Item intermittently fail with "Access is denied".
function Rename-WithRetry($Path, $NewName) {
for ($i = 1; $i -le 6; $i++) {
try { Rename-Item -LiteralPath $Path -NewName $NewName -ErrorAction Stop; return }
catch { if ($i -eq 6) { throw }; Start-Sleep -Seconds 3 }
}
}
foreach ($d in @("$env:ProgramFiles\Microsoft Visual Studio", "${env:ProgramFiles(x86)}\Microsoft Visual Studio")) {
if (Test-Path -LiteralPath $d) { Rename-WithRetry $d ((Split-Path $d -Leaf) + '.vsoff'); Write-Host "Hid VS: $d" }
}
- name: Windows CUDA and ROCm prebuilts exist in unslothai/llama.cpp (what GPU users download, no VS)
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
curl -fsSL -H "Authorization: Bearer $GH_TOKEN" \
"https://api.github.com/repos/unslothai/llama.cpp/releases/latest" > /tmp/rel.json
echo "release: $(jq -r .tag_name /tmp/rel.json)"
ASSETS=$(jq -r '.assets[].name' /tmp/rel.json)
echo "$ASSETS" | grep -iE 'windows-x64-cuda[0-9]' || {
echo "::error::no Windows x64 CUDA prebuilt asset found in unslothai/llama.cpp latest release"
echo "$ASSETS"; exit 1; }
# AMD parity: hosted runners have no AMD GPU, so the resolver step below
# can't exercise the ROCm path (it resolves to CPU). Pin the per-gfx
# Windows ROCm bundles here so a release that drops them fails loudly --
# the AMD no-VS guarantee otherwise rides only on shared resolver code.
echo "$ASSETS" | grep -iE 'windows-x64-rocm-gfx' || {
echo "::error::no Windows x64 ROCm (per-gfx) prebuilt asset found in unslothai/llama.cpp latest release"
echo "$ASSETS"; exit 1; }
echo "Windows CUDA and ROCm prebuilts are available -- GPU users get them without compiling."
- name: The prebuilt resolver runs without Visual Studio
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Resolver-only (no GPU on hosted runners, so the host resolves to the
# CPU bundle). The point is that resolution needs no compiler/VS.
python -m pip install --upgrade huggingface_hub
python studio/install_llama_prebuilt.py --resolve-prebuilt latest --output-format json > /tmp/resolve.json || {
echo "::error::resolver exited non-zero"; cat /tmp/resolve.json || true; exit 1; }
cat /tmp/resolve.json
echo "Prebuilt resolver ran with no Visual Studio present."
- name: Restore Visual Studio
if: always()
shell: pwsh
run: |
foreach ($d in @("$env:ProgramFiles\Microsoft Visual Studio", "${env:ProgramFiles(x86)}\Microsoft Visual Studio")) {
$off = "$d.vsoff"
if (Test-Path -LiteralPath $off) { Rename-Item -LiteralPath $off -NewName (Split-Path $d -Leaf); Write-Host "Restored $d" }
}
# ── folded from studio-setup-ps1-vs2026.yml: setup.ps1 unit tests + real-VS detection + vcredist ──
pester:
name: setup.ps1 unit tests (VS 2026 / CMake guard)
runs-on: windows-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Install Pester v5
shell: pwsh
run: |
Set-PSRepository PSGallery -InstallationPolicy Trusted
Install-Module Pester -MinimumVersion 5.5.0 -Force -SkipPublisherCheck -Scope CurrentUser
Import-Module Pester -MinimumVersion 5.5.0
Get-Module Pester | Select-Object Name, Version | Format-Table
- name: Run Pester suite
shell: pwsh
run: |
$ErrorActionPreference = 'Stop'
$testDir = Join-Path $env:GITHUB_WORKSPACE 'tests/studio_setup_ps1'
if (-not (Test-Path $testDir)) {
Write-Error "Test directory not found: $testDir"
exit 1
}
$cfg = New-PesterConfiguration
$cfg.Run.Path = $testDir
$cfg.Run.Exit = $true # non-zero exit => job fails
$cfg.Run.Throw = $true # also throw on test failure / 0 tests
$cfg.TestResult.Enabled = $true
$cfg.TestResult.OutputFormat = 'NUnitXml'
$cfg.TestResult.OutputPath = Join-Path $env:GITHUB_WORKSPACE 'pester-results.xml'
$cfg.Output.Verbosity = 'Detailed'
Invoke-Pester -Configuration $cfg
- name: Upload Pester results
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: pester-results-setup-ps1
path: pester-results.xml
if-no-files-found: warn
vs-integration:
# Real detection against the VS installed on the runner image (no mocks).
name: real-VS detection (${{ matrix.label }})
strategy:
fail-fast: false
matrix:
include:
- { os: windows-2022, label: 'VS 2022', expectGen: 'Visual Studio 17 2022', expectToolset: 'v170' }
- { os: windows-2025-vs2026, label: 'VS 2026', expectGen: 'Visual Studio 18 2026', expectToolset: 'v180' }
runs-on: ${{ matrix.os }}
timeout-minutes: 15
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Detect the real Visual Studio with setup.ps1 functions
shell: pwsh
env:
EXPECT_GEN: ${{ matrix.expectGen }}
EXPECT_TOOLSET: ${{ matrix.expectToolset }}
run: |
$ErrorActionPreference = 'Stop'
. (Join-Path $env:GITHUB_WORKSPACE 'tests/studio_setup_ps1/Get-FunctionSource.ps1')
$setup = Join-Path $env:GITHUB_WORKSPACE 'studio/setup.ps1'
foreach ($fn in @('Resolve-VsGeneratorFromLabel', 'Get-VcBuildCustomizationsDir', 'Find-VsBuildTools')) {
. ([scriptblock]::Create((Get-FunctionSource -Path $setup -Name $fn)))
}
# Ground truth from the real vswhere (independent of our code), for visibility.
$vsw = "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe"
if (Test-Path $vsw) {
$year = (& $vsw -latest -property catalog_productLineVersion 2>$null | Select-Object -First 1)
$path = (& $vsw -latest -property installationPath 2>$null | Select-Object -First 1)
Write-Host "Real vswhere: productLineVersion='$year' installPath='$path'"
} else {
Write-Host "vswhere not present at $vsw (relying on filesystem fallback)"
}
# Our detection must find the real VS and report the expected generator.
$r = Find-VsBuildTools
if (-not $r) { throw "Find-VsBuildTools returned null on a host with real $env:EXPECT_GEN" }
Write-Host "Find-VsBuildTools -> Generator='$($r.Generator)' Source='$($r.Source)' InstallPath='$($r.InstallPath)'"
if ($r.Generator -ne $env:EXPECT_GEN) {
throw "Detection mismatch: got '$($r.Generator)', expected '$env:EXPECT_GEN'"
}
if (-not (Test-Path $r.InstallPath)) { throw "Detected InstallPath does not exist: $($r.InstallPath)" }
# Toolset path derivation must match the expected v-number...
$bc = Get-VcBuildCustomizationsDir -VsInstallPath $r.InstallPath -Generator $r.Generator
$derived = Split-Path (Split-Path $bc -Parent) -Leaf # e.g. v170 / v180
Write-Host "Get-VcBuildCustomizationsDir -> '$bc' (toolset='$derived')"
if ($derived -ne $env:EXPECT_TOOLSET) {
throw "Toolset mismatch: derived '$derived', expected '$env:EXPECT_TOOLSET'"
}
# ...and that v-number is a real folder on the VS install (where CUDA's
# BuildCustomizations would land).
$vcRoot = Join-Path $r.InstallPath 'MSBuild\Microsoft\VC'
if (Test-Path $vcRoot) {
$realToolsets = @((Get-ChildItem -Path $vcRoot -Directory -ErrorAction SilentlyContinue).Name)
Write-Host "Real VC toolset dirs: $($realToolsets -join ', ')"
if ($realToolsets -notcontains $derived) {
throw "Derived toolset '$derived' is not present on the real $env:EXPECT_GEN install (have: $($realToolsets -join ', '))"
}
Write-Host "OK: toolset '$derived' exists on the real VS install."
} else {
Write-Warning "VC MSBuild root absent ($vcRoot) - C++ workload not installed; skipping on-disk toolset check."
}
Write-Host "PASS: real $env:EXPECT_GEN detected correctly with toolset '$derived'."
vcredist-clean-box:
# Validate Test-VCRedistInstalled + Ensure-VCRedist on a throwaway runner:
# present on the stock image, fires on a clean box (signals removed restorably),
# then a literal uninstall/reinstall round trip. Always restored before the end.
name: VC++ runtime detect + install round-trip (${{ matrix.os }})
strategy:
fail-fast: false
matrix:
os: [windows-latest, windows-2025-vs2026]
runs-on: ${{ matrix.os }}
timeout-minutes: 20
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Detect present, fire on a clean box, and round-trip the install
shell: pwsh
run: |
$ErrorActionPreference = 'Stop'
. (Join-Path $env:GITHUB_WORKSPACE 'tests/studio_setup_ps1/Get-FunctionSource.ps1')
$setup = Join-Path $env:GITHUB_WORKSPACE 'studio/setup.ps1'
# Dot-source the guard + the logging closure it reaches
# (step/substep -> Write-StudioStdoutMirror / Get-StudioAnsi).
$script:StudioVtOk = $false
$script:UnslothVerbose = $false
foreach ($fn in @('Get-StudioAnsi', 'Write-StudioStdoutMirror', 'step', 'substep',
'Invoke-SetupCommand', 'Refresh-Environment',
'Test-VCRedistInstalled', 'Ensure-VCRedist')) {
$src = Get-FunctionSource -Path $setup -Name $fn
if (-not $src) { throw "Function '$fn' not found in setup.ps1" }
. ([scriptblock]::Create($src))
}
$regKeys = @(
'HKLM\SOFTWARE\Microsoft\VisualStudio\14.0\VC\Runtimes\x64',
'HKLM\SOFTWARE\WOW6432Node\Microsoft\VisualStudio\14.0\VC\Runtimes\x64'
)
function Show-GroundTruth {
$dll = Join-Path $env:SystemRoot 'System32\vcruntime140_1.dll'
Write-Host (" System32\vcruntime140_1.dll present: {0}" -f (Test-Path $dll))
foreach ($k in $regKeys) {
$r = Get-ItemProperty -Path "HKLM:\$($k.Substring(5))" -ErrorAction SilentlyContinue
if ($r) { Write-Host (" {0}: Installed={1} {2}.{3}" -f $k, $r.Installed, $r.Major, $r.Minor) }
else { Write-Host (" {0}: (absent)" -f $k) }
}
}
Write-Host '== A. Detection on the stock runner (expect present) =='
Show-GroundTruth
if (-not (Test-VCRedistInstalled)) { throw 'Test-VCRedistInstalled reported ABSENT on a stock runner that ships the VC++ runtime (detection regression).' }
Write-Host ' Test-VCRedistInstalled -> present OK'
Write-Host '== B. Genuinely clean box (restorable): detection must FIRE =='
$scratch = Join-Path $env:RUNNER_TEMP 'cleanwin'
New-Item -ItemType Directory -Force -Path (Join-Path $scratch 'System32') | Out-Null
$backup = Join-Path $env:RUNNER_TEMP 'vcreg_backup'
New-Item -ItemType Directory -Force -Path $backup | Out-Null
$origSysRoot = $env:SystemRoot
try {
for ($i = 0; $i -lt $regKeys.Count; $i++) {
reg query $regKeys[$i] *> $null
if ($LASTEXITCODE -eq 0) {
reg export $regKeys[$i] (Join-Path $backup "$i.reg") /y *> $null
reg delete $regKeys[$i] /f *> $null
}
}
$env:SystemRoot = $scratch
if (Test-VCRedistInstalled) { throw 'Detection still PRESENT after both signals were removed (it would never trigger an install on a clean box).' }
Write-Host ' Test-VCRedistInstalled -> absent OK (detection fires on a clean box)'
} finally {
$env:SystemRoot = $origSysRoot
for ($i = 0; $i -lt $regKeys.Count; $i++) {
$f = Join-Path $backup "$i.reg"
if (Test-Path $f) { reg import $f *> $null }
}
}
Show-GroundTruth
if (-not (Test-VCRedistInstalled)) { throw 'Detection did not recover after restoring the registry (test restore bug).' }
Write-Host '== C. Literal uninstall on this throwaway VM (official installer), observe detection =='
$exe = Join-Path $env:RUNNER_TEMP 'vc_redist.x64.exe'
Invoke-WebRequest -Uri 'https://aka.ms/vs/17/release/vc_redist.x64.exe' -OutFile $exe
Start-Process -FilePath $exe -ArgumentList '/uninstall', '/quiet', '/norestart' -Wait
Show-GroundTruth
Write-Host (" Test-VCRedistInstalled after uninstall -> {0}" -f (Test-VCRedistInstalled))
if (Test-VCRedistInstalled) {
Write-Host ' Note: the Visual Studio on this image ref-counts the runtime, so the package'
Write-Host ' uninstall is a no-op here; section B already proved detection on a clean box.'
}
Write-Host '== D. Restore via Ensure-VCRedist (winget product path), installer fallback if needed =='
Ensure-VCRedist
if (-not (Test-VCRedistInstalled)) {
Write-Host ' winget path did not restore it; using the official installer to close the round trip.'
Start-Process -FilePath $exe -ArgumentList '/install', '/quiet', '/norestart' -Wait
}
Show-GroundTruth
if (-not (Test-VCRedistInstalled)) { throw 'VC++ runtime could not be restored after the uninstall round-trip.' }
Write-Host ' Test-VCRedistInstalled -> present OK'
Write-Host 'PASS: detection is correct on a real install, fires on a clean box, and the install round-trip restores the runtime.'