Fix Studio custom folders on Linux external drives (#6799) #9261

Workflow file for this run

.github/workflows/studio-windows-inference-smoke.yml at 01f7e14

	# SPDX-License-Identifier: AGPL-3.0-only
	# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.

	# Three end-to-end smoke jobs that boot a freshly-installed Studio and
	# exercise the surfaces real users hit through the OpenAI / Anthropic
	# SDKs and curl, on the FREE windows-latest runner. Each job picks the
	# smallest model that exercises the behaviour under test, primes
	# HF_HOME via actions/cache, and shares the install.ps1 --local
	# --no-torch bootstrap.
	#
	# 1. OpenAI, Anthropic API tests
	# gemma-3-270m-it UD-Q4_K_XL (~254 MiB).
	# 2. Tool calling Tests
	# Qwen3.5-2B UD-Q4_K_XL (~890 MiB).
	# 3. JSON, images
	# Qwen3-VL-2B-Instruct UD-IQ2_XXS + mmproj-F16 (~1.4 GiB total).
	# Within the 14 GB windows-latest SSD budget.

	name: Windows Studio GGUF CI

	on:
	pull_request:
	paths:
	- 'studio/**'
	- 'unsloth/**'
	- 'unsloth_cli/**'
	- 'install.ps1'
	- 'pyproject.toml'
	- 'tests/studio_setup_ps1/**'
	- '.github/workflows/studio-windows-inference-smoke.yml'
	push:
	branches: [main, pip]
	workflow_dispatch:

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	permissions:
	contents: read

	jobs:
	# ─────────────────────────────────────────────────────────────────────
	# Job 1: OpenAI, Anthropic API tests
	# ─────────────────────────────────────────────────────────────────────
	openai-anthropic:
	name: OpenAI, Anthropic API tests
	runs-on: windows-latest
	timeout-minutes: 30
	defaults:
	run:
	shell: bash
	env:
	GGUF_REPO: unsloth/gemma-3-270m-it-GGUF
	GGUF_VARIANT: UD-Q4_K_XL
	GGUF_FILE: gemma-3-270m-it-UD-Q4_K_XL.gguf
	STUDIO_PORT: '18888'
	HF_HOME: ${{ github.workspace }}/hf-cache
	# Force UTF-8 for stdio (Windows defaults to cp1252; hf
	# download / Studio CLI print "✓" checkmarks and crash
	# otherwise).
	PYTHONIOENCODING: utf-8
	PYTHONUTF8: '1'
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	# Fast GPU-free gate: parse install.ps1 + setup.ps1 and run the PowerShell
	# unit tests (CUDA-toolkit + torch-flavor helpers) before the heavy GGUF smoke.
	- name: PowerShell installer unit tests
	shell: pwsh
	run: \|
	foreach ($f in @('install.ps1', 'studio/setup.ps1')) {
	$errs = $null
	[void][System.Management.Automation.Language.Parser]::ParseFile(
	(Resolve-Path $f).Path, [ref]$null, [ref]$errs)
	if ($errs) { $errs \| ForEach-Object { $_.ToString() }; exit 1 }
	Write-Host "$f parsed with no errors"
	}
	pwsh -NoProfile -File tests/studio/test_resolve_cuda_toolkit.ps1
	pwsh -NoProfile -File tests/studio/test_torch_flavor.ps1
	pwsh -NoProfile -File tests/studio/test_node_decision.ps1
	pwsh -NoProfile -File tests/studio/test_node_probe_guard.ps1

	# uninstall.ps1: native uninstall must keep the shared unsloth.ico while a
	# WSL shortcut still references it (dual install), else that shortcut blanks.
	- name: uninstall.ps1 unit test (dual-install icon preserve)
	shell: pwsh
	run: \|
	$errs = $null
	[void][System.Management.Automation.Language.Parser]::ParseFile(
	(Resolve-Path scripts/uninstall.ps1).Path, [ref]$null, [ref]$errs)
	if ($errs) { $errs \| ForEach-Object { $_.ToString() }; exit 1 }
	Write-Host "uninstall.ps1 parsed with no errors"
	pwsh -NoProfile -File tests/studio/test_uninstall_dual_install_icon.ps1

	- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
	with:
	node-version: '22'

	- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
	with:
	python-version: '3.12'

	# Split restore + save (rather than the one-step actions/cache) so a
	# transient restore-side failure does not kill the whole job. v5 has a
	# known flake where it logs "Cache hit for: <key>" and then exits
	# non-zero without actually extracting the archive (see
	# actions/cache#1621 and github community discussion #163260).
	# continue-on-error on restore masks that failure so the Prime step
	# below can re-download from HF and the job keeps running. Save then
	# populates the cache key on a real miss only; cache keys are
	# immutable, so a corrupted cached entry persists until the -v1
	# suffix below is bumped.
	- name: Restore HF_HOME cache for ${{ env.GGUF_REPO }}
	id: cache-hf
	uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	continue-on-error: true
	with:
	path: hf-cache
	key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-v2

	- name: Prime HF_HOME with the GGUF
	id: prime-hf
	# Run on a real cache miss AND on the silent-restore-failure mode
	# described above (outcome != success).
	if: steps.cache-hf.outputs.cache-hit != 'true' \|\| steps.cache-hf.outcome != 'success'
	env:
	# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
	HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN \|\| '' }}
	run: \|
	python -m pip install --upgrade huggingface_hub
	mkdir -p hf-cache
	bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$GGUF_FILE"
	bash .github/scripts/hf-download-with-retry.sh ggml-org/models tinyllamas/stories260K.gguf

	- name: Save HF_HOME cache for ${{ env.GGUF_REPO }}
	# Only write a fresh cache entry when we actually rebuilt the
	# directory (Prime ran and succeeded). Skipping when Prime is
	# skipped avoids "already exists" save warnings on the happy path.
	if: always() && steps.prime-hf.outcome == 'success'
	uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	with:
	path: hf-cache
	key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-v2

	- name: Pre-install Windows tweaks (npm 11 + Defender exclusions)
	shell: pwsh
	# See studio-windows-update-smoke.yml for the full rationale.
	# tl;dr: setup.ps1 needs npm >=11 to skip a 35 s winget Node
	# reinstall, and Defender's real-time scan dominates the
	# frontend / uv-pip-extract steps.
	run: \|
	$ProgressPreference = 'SilentlyContinue'
	Write-Host "npm version before upgrade: $(npm -v)"
	npm install -g 'npm@^11' 2>&1 \| Out-Host
	Write-Host "npm version after upgrade: $(npm -v)"
	# NOTE: do NOT pre-create these directories. See
	# studio-windows-update-smoke.yml for the full rationale --
	# creating an empty studio/frontend/dist trips setup.ps1's
	# mtime-based staleness check into "frontend up to date, skip
	# rebuild" and Studio boots with an empty dist directory.
	# Add-MpPreference accepts paths that do not yet exist.
	foreach ($p in @(
	"$env:USERPROFILE\.unsloth",
	"$env:USERPROFILE\AppData\Local\uv",
	"$env:GITHUB_WORKSPACE\studio\frontend\node_modules",
	"$env:GITHUB_WORKSPACE\studio\frontend\dist"
	)) {
	try {
	Add-MpPreference -ExclusionPath $p -ErrorAction Stop
	Write-Host "Defender exclusion added: $p"
	} catch {
	Write-Host "Defender exclusion skipped ($($_.Exception.Message)): $p"
	}
	}

	- name: Install Studio (--local, --no-torch)
	shell: pwsh
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
	HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN \|\| '' }}
	run: \|
	New-Item -ItemType Directory -Force -Path logs \| Out-Null
	# *>&1 captures Write-Host (Information stream) output;
	# plain 2>&1 does not. setup.ps1 emits "prebuilt installed
	# and validated" via Write-Host, and we grep for that.
	$ProgressPreference = 'SilentlyContinue'
	& ./install.ps1 --local --no-torch *>&1 \| Tee-Object -FilePath logs/install.log

	- name: Assert install.ps1 used the Windows llama.cpp prebuilt
	run: \|
	# Filesystem check; setup.ps1's stream output isn't captured.
	LLAMA_DIR=~/.unsloth/llama.cpp
	INFO="$LLAMA_DIR/UNSLOTH_PREBUILT_INFO.json"
	BIN="$LLAMA_DIR/build/bin/Release/llama-server.exe"
	if grep -q "falling back to source build" logs/install.log; then
	echo "::error::install.ps1 fell back to source-build llama.cpp on Windows."
	grep -E "llama-prebuilt\|llama.cpp" logs/install.log \| tail -60
	exit 1
	fi
	if [ ! -f "$INFO" ]; then
	echo "::error::no UNSLOTH_PREBUILT_INFO.json at $INFO."
	ls -la "$LLAMA_DIR" \|\| true
	exit 1
	fi
	if [ ! -f "$BIN" ]; then
	echo "::error::no llama-server.exe at $BIN."
	ls -la "$LLAMA_DIR/build/bin" \|\| true
	exit 1
	fi
	echo "install.ps1 installed the Windows prebuilt llama.cpp:"
	cat "$INFO"

	- name: Add Studio shim to GITHUB_PATH
	run: \|
	SHIM_DIR=~/.unsloth/studio/bin
	if [ ! -f "$SHIM_DIR/unsloth.exe" ]; then
	echo "::error::unsloth.exe shim not found at $SHIM_DIR"
	ls -la ~/.unsloth/studio/ \|\| true
	exit 1
	fi
	cygpath -w "$SHIM_DIR" >> "$GITHUB_PATH"

	- name: Install OpenAI + Anthropic Python SDKs
	run: python -m pip install 'openai>=1.50' 'anthropic>=0.40'

	- name: Reset auth + boot Studio (API-only)
	run: \|
	unsloth studio reset-password
	mkdir -p logs
	UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \
	> logs/studio.log 2>&1 &
	echo "STUDIO_PID=$!" >> "$GITHUB_ENV"

	- name: Wait for /api/health
	run: \|
	for i in $(seq 1 180); do
	if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then
	jq -e '.status == "healthy"' /tmp/health.json
	exit 0
	fi
	sleep 1
	done
	echo "Studio did not become healthy in 180s"
	tail -200 logs/studio.log
	exit 1

	- name: Password rotation (old must fail, new must work)
	run: \|
	OLD=$(cat ~/.unsloth/studio/auth/.bootstrap_password)
	NEW="CIRotated-$(python -c 'import secrets; print(secrets.token_urlsafe(12))')"
	echo "::add-mask::$OLD"
	echo "::add-mask::$NEW"
	OLD_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
	-H 'content-type: application/json' \
	-d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}" \| jq -r .access_token)
	[ -n "$OLD_TOKEN" ] && [ "$OLD_TOKEN" != "null" ] \|\| { echo "bootstrap login failed"; exit 1; }
	curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \
	-H "Authorization: Bearer $OLD_TOKEN" -H 'content-type: application/json' \
	-d "{\"current_password\":\"$OLD\",\"new_password\":\"$NEW\"}" > /dev/null
	OLD_STATUS=$(curl -s -o /dev/null -w '%{http_code}' \
	-X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
	-H 'content-type: application/json' \
	-d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}")
	if [ "$OLD_STATUS" != "401" ]; then
	echo "::error::Login with old password returned $OLD_STATUS, expected 401"
	exit 1
	fi
	NEW_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
	-H 'content-type: application/json' \
	-d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" \| jq -r .access_token)
	[ -n "$NEW_TOKEN" ] && [ "$NEW_TOKEN" != "null" ] \|\| { echo "new login failed"; exit 1; }
	echo "TOKEN=$NEW_TOKEN" >> "$GITHUB_ENV"
	echo "password rotation OK (old=401, new=200)"

	- name: Load the GGUF (HF repo + variant, served from HF_HOME cache)
	run: \|
	# Retry the load step a few times so a transient TCP RST during
	# llama-server warm-up (Windows runner image churn,
	# windows-latest -> windows-2025-vs2026 rollout) doesn't fail
	# the whole job. The Studio backend's _wait_for_health now
	# catches httpx.ReadError too; this retry layer covers the
	# cases the backend can't recover from on its own.
	LOAD_OK=0
	for attempt in 1 2 3; do
	HTTP=$(curl -s -o /tmp/load.json -w '%{http_code}' \
	-X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \
	-H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \
	--max-time 600 \
	-d "{\"model_path\":\"$GGUF_REPO\",\"gguf_variant\":\"$GGUF_VARIANT\",\"is_lora\":false,\"max_seq_length\":2048}")
	if [ "$HTTP" = "200" ]; then LOAD_OK=1; break; fi
	echo "::warning::/api/inference/load attempt $attempt returned $HTTP; response:"
	cat /tmp/load.json \|\| true
	sleep 10
	done
	[ "$LOAD_OK" = "1" ] \|\| { echo "::error::/api/inference/load failed 3 attempts"; exit 22; }
	jq '{status, display_name, is_gguf, context_length}' /tmp/load.json

	- name: Multi-turn determinism via OpenAI + Anthropic SDKs
	env:
	BASE_URL: http://127.0.0.1:18888
	run: \|
	python - <<'PY'
	import json
	import os
	from openai import OpenAI
	from anthropic import Anthropic

	BASE = os.environ["BASE_URL"]
	KEY = os.environ["TOKEN"]
	SEED = 3407

	PROMPTS = [
	"What is 1+1?",
	"What did I ask before?",
	"What is the capital of France?",
	"Repeat the city name",
	]

	def run_openai():
	client = OpenAI(base_url = f"{BASE}/v1", api_key = KEY)
	history, replies = [], []
	for prompt in PROMPTS:
	history.append({"role": "user", "content": prompt})
	resp = client.chat.completions.create(
	model = "default",
	messages = history,
	temperature = 0.0,
	max_tokens = 80,
	seed = SEED,
	extra_body = {"enable_thinking": False},
	)
	text = resp.choices[0].message.content or ""
	replies.append(text)
	history.append({"role": "assistant", "content": text})
	return replies

	def run_anthropic():
	client = Anthropic(
	base_url = BASE,
	api_key = "unused",
	default_headers = {"Authorization": f"Bearer {KEY}"},
	)
	history, replies = [], []
	for prompt in PROMPTS:
	history.append({"role": "user", "content": prompt})
	msg = client.messages.create(
	model = "default",
	max_tokens = 80,
	messages = history,
	temperature = 0.0,
	extra_body = {"seed": SEED, "enable_thinking": False},
	)
	text = "".join(b.text for b in msg.content if getattr(b, "type", None) == "text")
	replies.append(text)
	history.append({"role": "assistant", "content": text})
	return replies

	for label, runner in (("openai", run_openai), ("anthropic", run_anthropic)):
	first = runner()
	second = runner()
	for i, (a, b) in enumerate(zip(first, second), start = 1):
	print(f"[{label} turn {i}] {a!r}")
	assert a, f"{label}: empty turn {i} response"
	# Compare on stripped content: llama-server can vary
	# trailing whitespace (specifically a final '\n') between
	# otherwise-identical greedy runs depending on the
	# batch-flush boundary at which the stream is closed. The
	# generated tokens are identical; only the trailing
	# whitespace differs. Keep the raw repr in the failure
	# message so a real divergence is still legible.
	assert a.strip() == b.strip(), (
	f"{label} non-deterministic at turn {i} with temperature=0.0:\n"
	f" run1: {a!r}\n run2: {b!r}"
	)
	joined = " ".join(first).lower()
	assert "1" in first[0], f"{label}: turn-1 answer should contain '1', got {first[0]!r}"
	assert "paris" in joined, f"{label}: expected 'paris' somewhere in the four-turn transcript: {first}"
	print(f"[{label}] OK -- 4 turns, run1 == run2, history grounded")
	PY

	- name: Stop Studio
	if: always()
	# Run as cmd so we are not running through the Git Bash shell;
	# Git Bash on windows-latest has been observed to exit 143
	# (SIGTERM) from any inline kill/sleep block, masking a green
	# test run. The runner reclaims the Studio child process at
	# job end either way, so just emit a marker and exit 0.
	shell: cmd
	run: echo Stop Studio (no-op; runner reclaims STUDIO_PID=%STUDIO_PID% at job end)

	- name: Collect llama-server logs
	if: always()
	# A transient Windows DLL-init crash (0xC0000142) in this diagnostic
	# copy must not fail an otherwise-green job.
	continue-on-error: true
	shell: bash
	# Copy llama-server's own stdout/stderr (teed by Studio under
	# ~/.unsloth/studio/logs/llama-server/) into the workspace so
	# upload-artifact can pick it up. Crucial for diagnosing a
	# subprocess crash where Studio's traceback only shows the
	# symptom (httpx ReadError) but not the cause.
	run: \|
	mkdir -p logs/llama-server
	cp -v ~/.unsloth/studio/logs/llama-server/*.log logs/llama-server/ 2>/dev/null \|\| \
	echo "no llama-server logs to collect"

	- name: Upload logs
	if: always()
	# Diagnostic only: a transient artifact-service drop must not fail a green job.
	continue-on-error: true
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: windows-openai-anthropic-log
	path: \|
	logs/studio.log
	logs/install.log
	logs/llama-server/*.log
	retention-days: 7

	# ─────────────────────────────────────────────────────────────────────
	# Job 2: Tool calling Tests
	# ─────────────────────────────────────────────────────────────────────
	tool-calling:
	name: Tool calling Tests
	runs-on: windows-latest
	timeout-minutes: 30
	defaults:
	run:
	shell: bash
	env:
	# Tool calling is the highest-volume GGUF in this workflow
	# (Qwen3.5-2B at Q4_K_XL = ~1.28 GiB). The previous HF_HOME
	# cache stored xet chunks + blobs + snapshots = ~4.7 GiB --
	# 3.7x file-size inflation, dominating the post-step upload
	# (211 s on first run; subsequent runs hit the cache, but the
	# one-time cost recurs every time the cache key bumps). Use
	# main's `--local-dir gguf-cache` pattern: cache the flat .gguf
	# only, pass an absolute path to Studio's /api/inference/load.
	# The OpenAI/Anth and JSON+images jobs still cover the
	# gguf_variant resolution path.
	GGUF_REPO: unsloth/Qwen3.5-2B-GGUF
	GGUF_FILE: Qwen3.5-2B-UD-Q4_K_XL.gguf
	STUDIO_PORT: '18898'
	# Force UTF-8 for stdio (Windows defaults to cp1252; hf
	# download / Studio CLI print "✓" checkmarks and crash
	# otherwise).
	PYTHONIOENCODING: utf-8
	PYTHONUTF8: '1'
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
	with:
	node-version: '22'

	- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
	with:
	python-version: '3.12'

	# Split restore + save so a transient restore-side failure does not
	# kill the whole job. See the matching block in the tool-calling job
	# above for the full rationale (actions/cache#1621).
	- name: Restore GGUF model cache
	id: cache-gguf
	uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	continue-on-error: true
	with:
	path: gguf-cache
	key: ${{ runner.os }}-gguf-${{ env.GGUF_REPO }}-${{ env.GGUF_FILE }}-v1

	- name: Download GGUF if cache miss
	id: download-gguf
	if: steps.cache-gguf.outputs.cache-hit != 'true' \|\| steps.cache-gguf.outcome != 'success'
	env:
	# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
	HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN \|\| '' }}
	run: \|
	python -m pip install --upgrade huggingface_hub
	mkdir -p gguf-cache
	bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$GGUF_FILE" gguf-cache

	- name: Save GGUF model cache
	if: always() && steps.download-gguf.outcome == 'success'
	uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	with:
	path: gguf-cache
	key: ${{ runner.os }}-gguf-${{ env.GGUF_REPO }}-${{ env.GGUF_FILE }}-v1

	- name: Pre-install Windows tweaks (npm 11 + Defender exclusions)
	shell: pwsh
	# See studio-windows-update-smoke.yml for the full rationale.
	# tl;dr: setup.ps1 needs npm >=11 to skip a 35 s winget Node
	# reinstall, and Defender's real-time scan dominates the
	# frontend / uv-pip-extract steps.
	run: \|
	$ProgressPreference = 'SilentlyContinue'
	Write-Host "npm version before upgrade: $(npm -v)"
	npm install -g 'npm@^11' 2>&1 \| Out-Host
	Write-Host "npm version after upgrade: $(npm -v)"
	# NOTE: do NOT pre-create these directories. See
	# studio-windows-update-smoke.yml for the full rationale --
	# creating an empty studio/frontend/dist trips setup.ps1's
	# mtime-based staleness check into "frontend up to date, skip
	# rebuild" and Studio boots with an empty dist directory.
	# Add-MpPreference accepts paths that do not yet exist.
	foreach ($p in @(
	"$env:USERPROFILE\.unsloth",
	"$env:USERPROFILE\AppData\Local\uv",
	"$env:GITHUB_WORKSPACE\studio\frontend\node_modules",
	"$env:GITHUB_WORKSPACE\studio\frontend\dist"
	)) {
	try {
	Add-MpPreference -ExclusionPath $p -ErrorAction Stop
	Write-Host "Defender exclusion added: $p"
	} catch {
	Write-Host "Defender exclusion skipped ($($_.Exception.Message)): $p"
	}
	}

	- name: Install Studio (--local, --no-torch)
	shell: pwsh
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
	HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN \|\| '' }}
	run: \|
	New-Item -ItemType Directory -Force -Path logs \| Out-Null
	# *>&1 captures Write-Host (Information stream) output;
	# plain 2>&1 does not. setup.ps1 emits "prebuilt installed
	# and validated" via Write-Host, and we grep for that.
	$ProgressPreference = 'SilentlyContinue'
	& ./install.ps1 --local --no-torch *>&1 \| Tee-Object -FilePath logs/install.log

	- name: Assert install.ps1 used the Windows llama.cpp prebuilt
	run: \|
	# Filesystem check; setup.ps1's stream output isn't captured.
	LLAMA_DIR=~/.unsloth/llama.cpp
	INFO="$LLAMA_DIR/UNSLOTH_PREBUILT_INFO.json"
	BIN="$LLAMA_DIR/build/bin/Release/llama-server.exe"
	if grep -q "falling back to source build" logs/install.log; then
	echo "::error::install.ps1 fell back to source-build llama.cpp on Windows."
	grep -E "llama-prebuilt\|llama.cpp" logs/install.log \| tail -60
	exit 1
	fi
	if [ ! -f "$INFO" ]; then
	echo "::error::no UNSLOTH_PREBUILT_INFO.json at $INFO."
	ls -la "$LLAMA_DIR" \|\| true
	exit 1
	fi
	if [ ! -f "$BIN" ]; then
	echo "::error::no llama-server.exe at $BIN."
	ls -la "$LLAMA_DIR/build/bin" \|\| true
	exit 1
	fi
	echo "install.ps1 installed the Windows prebuilt llama.cpp:"
	cat "$INFO"

	- name: Add Studio shim to GITHUB_PATH
	run: \|
	SHIM_DIR=~/.unsloth/studio/bin
	if [ ! -f "$SHIM_DIR/unsloth.exe" ]; then
	echo "::error::unsloth.exe shim not found at $SHIM_DIR"
	ls -la ~/.unsloth/studio/ \|\| true
	exit 1
	fi
	cygpath -w "$SHIM_DIR" >> "$GITHUB_PATH"

	- name: Reset auth + boot Studio (API-only, default tool policy)
	run: \|
	unsloth studio reset-password
	mkdir -p logs
	UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \
	> logs/studio.log 2>&1 &
	echo "STUDIO_PID=$!" >> "$GITHUB_ENV"

	- name: Wait for /api/health, log in, change password, load model
	run: \|
	for i in $(seq 1 180); do
	if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then
	jq -e '.status == "healthy"' /tmp/health.json && break
	fi
	sleep 1
	done
	jq -e '.status == "healthy"' /tmp/health.json
	OLD=$(cat ~/.unsloth/studio/auth/.bootstrap_password)
	NEW="CITool-$(python -c 'import secrets; print(secrets.token_urlsafe(12))')"
	echo "::add-mask::$OLD"
	echo "::add-mask::$NEW"
	OLD_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
	-H 'content-type: application/json' \
	-d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}" \| jq -r .access_token)
	curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \
	-H "Authorization: Bearer $OLD_TOKEN" -H 'content-type: application/json' \
	-d "{\"current_password\":\"$OLD\",\"new_password\":\"$NEW\"}" > /dev/null
	TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
	-H 'content-type: application/json' \
	-d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" \| jq -r .access_token)
	echo "API_KEY=$TOKEN" >> "$GITHUB_ENV"
	# GITHUB_WORKSPACE on windows-latest is a Windows path with
	# backslashes ("D:\a\unsloth\unsloth"). Bash handles it as a
	# raw string, but we cannot embed `\a` etc. in JSON without
	# JSON-string-escaping every backslash. Replace `\` with `/`
	# via bash parameter expansion -- pathlib.Path on Windows
	# accepts forward slashes natively, so Studio's loader sees
	# a normal path.
	GGUF_PATH="${GITHUB_WORKSPACE//\\//}/gguf-cache/${GGUF_FILE}"
	ls -lh "$GGUF_PATH"
	# Retry: same rationale as the OpenAI/Anthropic job.
	LOAD_OK=0
	for attempt in 1 2 3; do
	HTTP=$(curl -s -o /tmp/load.json -w '%{http_code}' \
	-X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \
	-H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \
	--max-time 600 \
	-d "{\"model_path\":\"$GGUF_PATH\",\"is_lora\":false,\"max_seq_length\":2048}")
	if [ "$HTTP" = "200" ]; then LOAD_OK=1; break; fi
	echo "::warning::/api/inference/load attempt $attempt returned $HTTP; response:"
	cat /tmp/load.json \|\| true
	sleep 10
	done
	[ "$LOAD_OK" = "1" ] \|\| { echo "::error::/api/inference/load failed 3 attempts"; exit 22; }
	jq '{status, display_name}' /tmp/load.json

	- name: Tool calling, server-side tools, thinking on/off
	env:
	BASE_URL: http://127.0.0.1:18898
	run: \|
	python - <<'PY'
	import json
	import os
	import urllib.request

	BASE = os.environ["BASE_URL"]
	KEY = os.environ["API_KEY"]
	SEED = 3407
	# Same temperature shim as the Mac job. Small Qwen3.5-2B
	# quants can degenerate at temperature=0; a small non-zero
	# temperature with a fixed seed keeps the test deterministic
	# while escaping the trap.
	TEMP = 0.2

	def post(path, body, *, timeout = 240):
	data = json.dumps(body).encode()
	req = urllib.request.Request(
	f"{BASE}{path}",
	data = data,
	method = "POST",
	headers = {
	"Authorization": f"Bearer {KEY}",
	"Content-Type": "application/json",
	},
	)
	with urllib.request.urlopen(req, timeout = timeout) as resp:
	return resp.status, json.loads(resp.read().decode())

	def post_sse(path, body, *, timeout = 600):
	body = {**body, "stream": True}
	data = json.dumps(body).encode()
	req = urllib.request.Request(
	f"{BASE}{path}",
	data = data,
	method = "POST",
	headers = {
	"Authorization": f"Bearer {KEY}",
	"Content-Type": "application/json",
	},
	)
	parts = []
	with urllib.request.urlopen(req, timeout = timeout) as resp:
	for raw in resp:
	line = raw.decode().strip()
	if not line.startswith("data: "):
	continue
	payload = line[6:]
	if payload == "[DONE]":
	break
	try:
	chunk = json.loads(payload)
	except json.JSONDecodeError:
	continue
	for choice in chunk.get("choices", []):
	delta = choice.get("delta", {}) or {}
	if delta.get("content"):
	parts.append(delta["content"])
	return "".join(parts)

	# ── 1. Standard OpenAI function calling ──────────────────────
	weather_tool = {
	"type": "function",
	"function": {
	"name": "get_weather",
	"description": "Get current weather for a city.",
	"parameters": {
	"type": "object",
	"properties": {"city": {"type": "string"}},
	"required": ["city"],
	},
	},
	}

	status, data = post("/v1/chat/completions", {
	"messages": [{"role": "user", "content": "What is the weather in Paris?"}],
	"tools": [weather_tool],
	"tool_choice": "required",
	"stream": False,
	"temperature": TEMP,
	"seed": SEED,
	"max_tokens": 600,
	})
	assert status == 200, f"tool call status {status}: {data}"
	choice = data["choices"][0]
	tool_calls = (choice.get("message") or {}).get("tool_calls") or []
	if tool_calls:
	tc = tool_calls[0]
	assert tc["function"]["name"] == "get_weather", (
	f"unexpected tool name: {tc['function']['name']!r}"
	)
	args = json.loads(tc["function"]["arguments"])
	assert args.get("city"), f"missing city arg: {args}"
	print(f"[tools] PASS function calling -> {tc['function']['name']}({args}) finish={choice.get('finish_reason')!r}")
	else:
	print(
	f"[tools] WARN function calling: no tool_calls (finish_reason="
	f"{choice.get('finish_reason')!r}); HTTP path OK, model output drift."
	)

	# ── 2. Server-side python tool ───────────────────────────────
	content = post_sse("/v1/chat/completions", {
	"messages": [{"role": "user", "content": "What is 123 * 456? Use the python tool to compute it and tell me the number."}],
	"enable_tools": True,
	"enabled_tools": ["python"],
	"session_id": "ci-tool-calling-py",
	"temperature": TEMP,
	"seed": SEED,
	"max_tokens": 600,
	})
	if "56088" in content or "56,088" in content:
	print(f"[tools] PASS python tool ({len(content)} chars, found 56088)")
	else:
	assert content, "python tool: SSE stream empty"
	print(
	f"[tools] WARN python tool: SSE OK ({len(content)} chars) but "
	f"model didn't return 56088 -- model output drift"
	)

	# ── 3. Server-side bash (terminal) tool ──────────────────────
	# On Windows the terminal tool resolves to the system shell
	# (cmd.exe wrapper) and `echo hello-bash-tool` works the same
	# way it does on POSIX. The model still has to choose to
	# invoke the tool; assert non-empty SSE if it doesn't.
	content = post_sse("/v1/chat/completions", {
	"messages": [{"role": "user", "content": "Use the terminal tool to run `echo hello-bash-tool` and tell me the exact output."}],
	"enable_tools": True,
	"enabled_tools": ["terminal"],
	"session_id": "ci-tool-calling-bash",
	"temperature": TEMP,
	"seed": SEED,
	"max_tokens": 600,
	})
	if "hello-bash-tool" in content:
	print(f"[tools] PASS terminal tool ({len(content)} chars)")
	else:
	assert content, "terminal tool: SSE stream empty"
	print(
	f"[tools] WARN terminal tool: SSE OK ({len(content)} chars) but "
	f"model didn't echo 'hello-bash-tool' -- model output drift"
	)

	# ── 4. Server-side web_search tool ───────────────────────────
	# DuckDuckGo can be flaky from CI runners; only assert that
	# the SSE stream opens and yields any data.
	try:
	content = post_sse("/v1/chat/completions", {
	"messages": [{"role": "user", "content": "Search the web for 'unsloth ai github' and summarise."}],
	"enable_tools": True,
	"enabled_tools": ["web_search"],
	"session_id": "ci-tool-calling-web",
	"temperature": TEMP,
	"seed": SEED,
	"max_tokens": 400,
	})
	print(f"[tools] PASS web_search stream ({len(content)} chars)")
	except Exception as exc:
	print(f"[tools] WARN web_search probe failed (non-blocking): {exc}")

	# ── 5. Thinking on / off ─────────────────────────────────────
	def thinking_call(enable):
	status, data = post("/v1/chat/completions", {
	"messages": [{"role": "user", "content": "Briefly: is 17 prime?"}],
	"stream": False,
	"enable_thinking": enable,
	"temperature": TEMP,
	"seed": SEED,
	"max_tokens": 300,
	})
	assert status == 200
	msg = data["choices"][0]["message"]
	raw = (msg.get("content") or "") + (msg.get("reasoning_content") or "")
	return raw

	on_text = thinking_call(True)
	off_text = thinking_call(False)
	had_think_on = ("<think>" in on_text) or len(on_text) > 80
	if not had_think_on:
	print(
	f"[tools] WARN enable_thinking=True produced no thinking signal: "
	f"{on_text[:200]!r}"
	)
	assert "<think>" not in off_text, (
	f"enable_thinking=False but <think> still present: {off_text!r}"
	)
	print(f"[tools] PASS thinking on/off (on={len(on_text)} chars, off={len(off_text)} chars)")
	PY

	- name: Stop Studio
	if: always()
	# Run as cmd so we are not running through the Git Bash shell;
	# Git Bash on windows-latest has been observed to exit 143
	# (SIGTERM) from any inline kill/sleep block, masking a green
	# test run. The runner reclaims the Studio child process at
	# job end either way, so just emit a marker and exit 0.
	shell: cmd
	run: echo Stop Studio (no-op; runner reclaims STUDIO_PID=%STUDIO_PID% at job end)

	- name: Collect llama-server logs
	if: always()
	# A transient Windows DLL-init crash (0xC0000142) in this diagnostic
	# copy must not fail an otherwise-green job.
	continue-on-error: true
	shell: bash
	# Copy llama-server's own stdout/stderr (teed by Studio under
	# ~/.unsloth/studio/logs/llama-server/) into the workspace so
	# upload-artifact can pick it up. Crucial for diagnosing a
	# subprocess crash where Studio's traceback only shows the
	# symptom (httpx ReadError) but not the cause.
	run: \|
	mkdir -p logs/llama-server
	cp -v ~/.unsloth/studio/logs/llama-server/*.log logs/llama-server/ 2>/dev/null \|\| \
	echo "no llama-server logs to collect"

	- name: Upload logs
	if: always()
	# Diagnostic only: a transient artifact-service drop must not fail a green job.
	continue-on-error: true
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: windows-tool-calling-log
	path: \|
	logs/studio.log
	logs/install.log
	logs/llama-server/*.log
	retention-days: 7

	# ─────────────────────────────────────────────────────────────────────
	# Job 3: JSON, images
	# ─────────────────────────────────────────────────────────────────────
	json-images:
	name: JSON, images
	runs-on: windows-latest
	timeout-minutes: 35
	defaults:
	run:
	shell: bash
	env:
	GGUF_REPO: unsloth/Qwen3-VL-2B-Instruct-GGUF
	GGUF_VARIANT: UD-IQ2_XXS
	GGUF_FILE: Qwen3-VL-2B-Instruct-UD-IQ2_XXS.gguf
	MMPROJ_FILE: mmproj-F16.gguf
	STUDIO_PORT: '18899'
	HF_HOME: ${{ github.workspace }}/hf-cache
	# Force UTF-8 for stdio (Windows defaults to cp1252; hf
	# download / Studio CLI print "✓" checkmarks and crash
	# otherwise).
	PYTHONIOENCODING: utf-8
	PYTHONUTF8: '1'
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
	with:
	node-version: '22'

	- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
	with:
	python-version: '3.12'

	# Split restore + save so a transient restore-side failure does not
	# kill the whole job. See the matching block in the tool-calling job
	# for the full rationale (actions/cache#1621). This is the block that
	# actually broke in run 25713577488: "Cache hit for: <key>" was
	# logged, the step exited non-zero in ~0.3 s without extracting the
	# 3.4 GiB archive, and steps 6-15 were skipped.
	- name: Restore HF_HOME cache for ${{ env.GGUF_REPO }} (model + mmproj)
	id: cache-hf
	uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	continue-on-error: true
	with:
	path: hf-cache
	key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-${{ env.MMPROJ_FILE }}-v2

	- name: Prime HF_HOME with the GGUF + mmproj
	id: prime-hf
	if: steps.cache-hf.outputs.cache-hit != 'true' \|\| steps.cache-hf.outcome != 'success'
	env:
	# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
	HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN \|\| '' }}
	run: \|
	python -m pip install --upgrade huggingface_hub
	mkdir -p hf-cache
	bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$GGUF_FILE"
	bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$MMPROJ_FILE"
	bash .github/scripts/hf-download-with-retry.sh ggml-org/models tinyllamas/stories260K.gguf

	- name: Save HF_HOME cache for ${{ env.GGUF_REPO }} (model + mmproj)
	if: always() && steps.prime-hf.outcome == 'success'
	uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	with:
	path: hf-cache
	key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-${{ env.MMPROJ_FILE }}-v2

	- name: Pre-install Windows tweaks (npm 11 + Defender exclusions)
	shell: pwsh
	# See studio-windows-update-smoke.yml for the full rationale.
	# tl;dr: setup.ps1 needs npm >=11 to skip a 35 s winget Node
	# reinstall, and Defender's real-time scan dominates the
	# frontend / uv-pip-extract steps.
	run: \|
	$ProgressPreference = 'SilentlyContinue'
	Write-Host "npm version before upgrade: $(npm -v)"
	npm install -g 'npm@^11' 2>&1 \| Out-Host
	Write-Host "npm version after upgrade: $(npm -v)"
	# NOTE: do NOT pre-create these directories. See
	# studio-windows-update-smoke.yml for the full rationale --
	# creating an empty studio/frontend/dist trips setup.ps1's
	# mtime-based staleness check into "frontend up to date, skip
	# rebuild" and Studio boots with an empty dist directory.
	# Add-MpPreference accepts paths that do not yet exist.
	foreach ($p in @(
	"$env:USERPROFILE\.unsloth",
	"$env:USERPROFILE\AppData\Local\uv",
	"$env:GITHUB_WORKSPACE\studio\frontend\node_modules",
	"$env:GITHUB_WORKSPACE\studio\frontend\dist"
	)) {
	try {
	Add-MpPreference -ExclusionPath $p -ErrorAction Stop
	Write-Host "Defender exclusion added: $p"
	} catch {
	Write-Host "Defender exclusion skipped ($($_.Exception.Message)): $p"
	}
	}

	- name: Install Studio (--local, --no-torch)
	shell: pwsh
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
	HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN \|\| '' }}
	run: \|
	New-Item -ItemType Directory -Force -Path logs \| Out-Null
	# *>&1 captures Write-Host (Information stream) output;
	# plain 2>&1 does not. setup.ps1 emits "prebuilt installed
	# and validated" via Write-Host, and we grep for that.
	$ProgressPreference = 'SilentlyContinue'
	& ./install.ps1 --local --no-torch *>&1 \| Tee-Object -FilePath logs/install.log

	- name: Assert install.ps1 used the Windows llama.cpp prebuilt
	run: \|
	# Filesystem check; setup.ps1's stream output isn't captured.
	LLAMA_DIR=~/.unsloth/llama.cpp
	INFO="$LLAMA_DIR/UNSLOTH_PREBUILT_INFO.json"
	BIN="$LLAMA_DIR/build/bin/Release/llama-server.exe"
	if grep -q "falling back to source build" logs/install.log; then
	echo "::error::install.ps1 fell back to source-build llama.cpp on Windows."
	grep -E "llama-prebuilt\|llama.cpp" logs/install.log \| tail -60
	exit 1
	fi
	if [ ! -f "$INFO" ]; then
	echo "::error::no UNSLOTH_PREBUILT_INFO.json at $INFO."
	ls -la "$LLAMA_DIR" \|\| true
	exit 1
	fi
	if [ ! -f "$BIN" ]; then
	echo "::error::no llama-server.exe at $BIN."
	ls -la "$LLAMA_DIR/build/bin" \|\| true
	exit 1
	fi
	echo "install.ps1 installed the Windows prebuilt llama.cpp:"
	cat "$INFO"

	- name: Add Studio shim to GITHUB_PATH
	run: \|
	SHIM_DIR=~/.unsloth/studio/bin
	if [ ! -f "$SHIM_DIR/unsloth.exe" ]; then
	echo "::error::unsloth.exe shim not found at $SHIM_DIR"
	ls -la ~/.unsloth/studio/ \|\| true
	exit 1
	fi
	cygpath -w "$SHIM_DIR" >> "$GITHUB_PATH"

	- name: Install OpenAI + Anthropic Python SDKs
	run: python -m pip install 'openai>=1.50' 'anthropic>=0.40'

	- name: Reset auth + boot Studio (API-only)
	run: \|
	unsloth studio reset-password
	mkdir -p logs
	UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \
	> logs/studio.log 2>&1 &
	echo "STUDIO_PID=$!" >> "$GITHUB_ENV"

	- name: Wait for /api/health, log in, change password, load model
	run: \|
	for i in $(seq 1 180); do
	if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then
	jq -e '.status == "healthy"' /tmp/health.json && break
	fi
	sleep 1
	done
	jq -e '.status == "healthy"' /tmp/health.json
	OLD=$(cat ~/.unsloth/studio/auth/.bootstrap_password)
	NEW="CIJson-$(python -c 'import secrets; print(secrets.token_urlsafe(12))')"
	echo "::add-mask::$OLD"
	echo "::add-mask::$NEW"
	OLD_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
	-H 'content-type: application/json' \
	-d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}" \| jq -r .access_token)
	curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \
	-H "Authorization: Bearer $OLD_TOKEN" -H 'content-type: application/json' \
	-d "{\"current_password\":\"$OLD\",\"new_password\":\"$NEW\"}" > /dev/null
	TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
	-H 'content-type: application/json' \
	-d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" \| jq -r .access_token)
	echo "API_KEY=$TOKEN" >> "$GITHUB_ENV"
	# Retry: same rationale as the OpenAI/Anthropic and Tool calling jobs.
	LOAD_OK=0
	for attempt in 1 2 3; do
	HTTP=$(curl -s -o /tmp/load.json -w '%{http_code}' \
	-X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \
	-H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \
	--max-time 900 \
	-d "{\"model_path\":\"$GGUF_REPO\",\"gguf_variant\":\"$GGUF_VARIANT\",\"is_lora\":false,\"max_seq_length\":2048}")
	if [ "$HTTP" = "200" ]; then LOAD_OK=1; break; fi
	echo "::warning::/api/inference/load attempt $attempt returned $HTTP; response:"
	cat /tmp/load.json \|\| true
	sleep 10
	done
	[ "$LOAD_OK" = "1" ] \|\| { echo "::error::/api/inference/load failed 3 attempts"; exit 22; }
	jq '{status, display_name, is_vision}' /tmp/load.json

	- name: JSON schema decoding + image input
	env:
	BASE_URL: http://127.0.0.1:18899
	run: \|
	python - <<'PY'
	import base64
	import json
	import os
	import urllib.request
	from openai import OpenAI
	from anthropic import Anthropic

	BASE = os.environ["BASE_URL"]
	KEY = os.environ["API_KEY"]
	SEED = 3407
	TEMP = 0.2

	def post(path, body, *, timeout = 240):
	req = urllib.request.Request(
	f"{BASE}{path}",
	data = json.dumps(body).encode(),
	method = "POST",
	headers = {
	"Authorization": f"Bearer {KEY}",
	"Content-Type": "application/json",
	},
	)
	with urllib.request.urlopen(req, timeout = timeout) as resp:
	return resp.status, json.loads(resp.read().decode())

	# ── 1. response_format = json_object (JSON mode) ─────────────
	status, data = post("/v1/chat/completions", {
	"model": "default",
	"messages": [
	{"role": "system", "content": 'Reply with a single JSON object of the form {"city": "...", "country": "..."}. Output ONLY the JSON, nothing else.'},
	{"role": "user", "content": "What is the capital of France?"},
	],
	"temperature": TEMP,
	"max_tokens": 600,
	"seed": SEED,
	"stream": False,
	"enable_thinking": False,
	"response_format": {"type": "json_object"},
	}, timeout = 600)
	assert status == 200, f"json status {status}: {data}"
	assert (
	isinstance(data.get("choices"), list)
	and data["choices"]
	and "message" in data["choices"][0]
	), f"json response envelope malformed: {data}"
	content = (data["choices"][0]["message"].get("content") or "").strip()
	print(f"[json] raw json_object content: {content!r}")
	if content.startswith("```"):
	content = content.split("```", 2)[1]
	if content.startswith("json"):
	content = content[4:]
	content = content.strip("`\n ")
	if content:
	try:
	parsed = json.loads(content)
	if "paris" in str(parsed.get("city", "")).lower():
	print(f"[json] PASS json_object -> {parsed}")
	else:
	print(f"[json] WARN json_object decoded but city!=Paris: {parsed}")
	except json.JSONDecodeError as exc:
	print(f"[json] WARN json_object content not parseable ({exc}); content={content!r}")
	else:
	print("[json] WARN json_object produced empty content")

	status2, data2 = post("/v1/chat/completions", {
	"model": "default",
	"messages": [{"role": "user", "content": "What is the capital of France? Answer with one word."}],
	"temperature": TEMP,
	"max_tokens": 400,
	"seed": SEED,
	"stream": False,
	"enable_thinking": False,
	}, timeout = 600)
	assert status2 == 200, f"plain status {status2}: {data2}"
	plain = (data2["choices"][0]["message"].get("content") or "").lower()
	print(f"[json] plain capital-of-france reply: {plain!r}")
	if "paris" in plain:
	print("[json] PASS plain inference path (paris mentioned)")
	else:
	print(
	f"[json] WARN plain inference returned no 'paris' -- "
	f"model output drift. HTTP path validated separately above."
	)

	# ── 2. OpenAI image_url (data URI base64) ───────────────────
	PNG_64X64_RED_B64 = (
	"iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAIAAAAlC+aJAAAAYklEQVR4nO3PMQ0AIADAMEAI/k"
	"UhBhEcDcmqYJtn7/GzpQNeNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA"
	"1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaBdCJ0BmMJ25zMAAAAASUVORK5CYII="
	)
	data_uri = f"data:image/png;base64,{PNG_64X64_RED_B64}"

	# On Windows + the Qwen3-VL mmproj, llama.cpp's vision
	# path runs on CPU (no Metal involvement). The wrapper is
	# kept for resilience but the vision path is expected to
	# work on Windows; an exception here is a real regression.
	client = OpenAI(base_url = f"{BASE}/v1", api_key = KEY)
	try:
	openai_resp = client.chat.completions.create(
	model = "default",
	temperature = TEMP,
	max_tokens = 80,
	seed = SEED,
	messages = [{
	"role": "user",
	"content": [
	{"type": "image_url", "image_url": {"url": data_uri}},
	{"type": "text", "text": "What colour dominates this image? Reply in one word."},
	],
	}],
	)
	openai_text = (openai_resp.choices[0].message.content or "").lower()
	print(f"[image/openai] reply: {openai_text!r}")
	if openai_text:
	print("[image/openai] PASS image_url accepted, non-empty response")
	else:
	print("[image/openai] WARN image_url accepted but empty content")
	except Exception as exc:
	print(
	f"[image/openai] WARN image_url SDK call raised: {type(exc).__name__}: "
	f"{exc}. Studio successfully forwarded the request; failure here is "
	f"upstream llama.cpp vision behaviour."
	)

	# ── 3. Anthropic source/base64 image ────────────────────────
	anthropic = Anthropic(
	base_url = BASE,
	api_key = "unused",
	default_headers = {"Authorization": f"Bearer {KEY}"},
	)
	try:
	a_msg = anthropic.messages.create(
	model = "default",
	max_tokens = 80,
	temperature = TEMP,
	extra_body = {"seed": SEED},
	messages = [{
	"role": "user",
	"content": [
	{
	"type": "image",
	"source": {
	"type": "base64",
	"media_type": "image/png",
	"data": PNG_64X64_RED_B64,
	},
	},
	{"type": "text", "text": "Describe this image briefly."},
	],
	}],
	)
	a_text = "".join(b.text for b in a_msg.content if getattr(b, "type", None) == "text")
	print(f"[image/anthropic] reply: {a_text!r}")
	if a_text:
	print("[image/anthropic] PASS source/base64 accepted, non-empty response")
	else:
	print("[image/anthropic] WARN source/base64 accepted but empty content")
	except Exception as exc:
	print(
	f"[image/anthropic] WARN anthropic image SDK call raised: "
	f"{type(exc).__name__}: {exc}. Likely upstream llama.cpp vision "
	f"behaviour, NOT a Studio regression."
	)
	PY

	- name: Stop Studio
	if: always()
	# Run as cmd so we are not running through the Git Bash shell;
	# Git Bash on windows-latest has been observed to exit 143
	# (SIGTERM) from any inline kill/sleep block, masking a green
	# test run. The runner reclaims the Studio child process at
	# job end either way, so just emit a marker and exit 0.
	shell: cmd
	run: echo Stop Studio (no-op; runner reclaims STUDIO_PID=%STUDIO_PID% at job end)

	- name: Collect llama-server logs
	if: always()
	# A transient Windows DLL-init crash (0xC0000142) in this diagnostic
	# copy must not fail an otherwise-green job.
	continue-on-error: true
	shell: bash
	# Copy llama-server's own stdout/stderr (teed by Studio under
	# ~/.unsloth/studio/logs/llama-server/) into the workspace so
	# upload-artifact can pick it up. Crucial for diagnosing a
	# subprocess crash where Studio's traceback only shows the
	# symptom (httpx ReadError) but not the cause.
	run: \|
	mkdir -p logs/llama-server
	cp -v ~/.unsloth/studio/logs/llama-server/*.log logs/llama-server/ 2>/dev/null \|\| \
	echo "no llama-server logs to collect"

	- name: Upload logs
	if: always()
	# Diagnostic only: a transient artifact-service drop must not fail a green job.
	continue-on-error: true
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: windows-json-images-log
	path: \|
	logs/studio.log
	logs/install.log
	logs/llama-server/*.log
	retention-days: 7

	# ── folded from studio-windows-no-vs-smoke.yml: install + run with no Visual Studio ──
	no-vs-cpu:
	name: Studio install + inference without Visual Studio
	runs-on: windows-latest
	timeout-minutes: 35
	defaults:
	run:
	shell: bash
	env:
	GGUF_REPO: unsloth/gemma-3-270m-it-GGUF
	GGUF_VARIANT: UD-Q4_K_XL
	GGUF_FILE: gemma-3-270m-it-UD-Q4_K_XL.gguf
	STUDIO_PORT: '18820'
	HF_HOME: ${{ github.workspace }}/hf-cache
	PYTHONIOENCODING: utf-8
	PYTHONUTF8: '1'
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
	with:
	node-version: '22'

	- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
	with:
	python-version: '3.12'

	- name: Restore HF_HOME for ${{ env.GGUF_REPO }}
	id: cache-hf
	uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	continue-on-error: true
	with:
	path: hf-cache
	key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-v2

	- name: Prime HF_HOME with the GGUF
	id: prime-hf
	if: steps.cache-hf.outputs.cache-hit != 'true' \|\| steps.cache-hf.outcome != 'success'
	env:
	# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
	HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN \|\| '' }}
	run: \|
	python -m pip install --upgrade huggingface_hub
	mkdir -p hf-cache
	bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$GGUF_FILE"
	bash .github/scripts/hf-download-with-retry.sh ggml-org/models tinyllamas/stories260K.gguf

	- name: Save HF_HOME for ${{ env.GGUF_REPO }}
	if: always() && steps.prime-hf.outcome == 'success'
	uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	with:
	path: hf-cache
	key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-v2

	- name: Pre-install Windows tweaks (npm 11 + Defender exclusions)
	shell: pwsh
	run: \|
	$ProgressPreference = 'SilentlyContinue'
	npm install -g 'npm@^11' 2>&1 \| Out-Host
	foreach ($p in @(
	"$env:USERPROFILE\.unsloth",
	"$env:USERPROFILE\AppData\Local\uv",
	"$env:GITHUB_WORKSPACE\studio\frontend\node_modules",
	"$env:GITHUB_WORKSPACE\studio\frontend\dist"
	)) {
	try { Add-MpPreference -ExclusionPath $p -ErrorAction Stop } catch { }
	}

	- name: Hide Visual Studio + CMake (simulate a host with no build tools)
	shell: pwsh
	run: \|
	$ErrorActionPreference = 'Stop'
	# A Program Files dir can hold a transient handle (Defender / MSBuild node)
	# so Rename-Item intermittently fails with "Access is denied"; retry to ride it out.
	function Rename-WithRetry($Path, $NewName) {
	for ($i = 1; $i -le 6; $i++) {
	try { Rename-Item -LiteralPath $Path -NewName $NewName -ErrorAction Stop; return }
	catch { if ($i -eq 6) { throw }; Start-Sleep -Seconds 3 }
	}
	}
	# Rename the Visual Studio install roots (incl. the Installer that holds
	# vswhere.exe) so Find-VsBuildTools' vswhere + filesystem scan both miss.
	foreach ($d in @("$env:ProgramFiles\Microsoft Visual Studio", "${env:ProgramFiles(x86)}\Microsoft Visual Studio")) {
	if (Test-Path -LiteralPath $d) {
	Rename-WithRetry $d ((Split-Path $d -Leaf) + '.vsoff')
	Write-Host "Hid VS: $d"
	}
	}
	# Surgically rename each cmake executable on PATH (not its parent dir --
	# cmake can share a dir with other shims) so Get-Command cmake fails.
	$hidden = @()
	foreach ($c in (Get-Command cmake -All -ErrorAction SilentlyContinue)) {
	if ($c.Source -and (Test-Path -LiteralPath $c.Source)) {
	Rename-WithRetry $c.Source ((Split-Path $c.Source -Leaf) + '.off')
	$hidden += $c.Source
	Write-Host "Hid cmake: $($c.Source)"
	}
	}
	("HIDDEN_CMAKE=" + ($hidden -join '\|')) \| Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8

	- name: Assert Visual Studio + CMake are genuinely undetectable
	shell: pwsh
	run: \|
	$ErrorActionPreference = 'Stop'
	. (Join-Path $env:GITHUB_WORKSPACE 'tests/studio_setup_ps1/Get-FunctionSource.ps1')
	$setup = Join-Path $env:GITHUB_WORKSPACE 'studio/setup.ps1'
	foreach ($fn in @('Resolve-VsGeneratorFromLabel', 'Find-VsBuildTools')) {
	. ([scriptblock]::Create((Get-FunctionSource -Path $setup -Name $fn)))
	}
	$vs = Find-VsBuildTools
	if ($vs) { Write-Error "Find-VsBuildTools still detects VS: $($vs.Generator) @ $($vs.InstallPath)"; exit 1 }
	if (Get-Command cmake -ErrorAction SilentlyContinue) { Write-Error "cmake is still on PATH"; exit 1 }
	if (Get-Command cl.exe -ErrorAction SilentlyContinue) { Write-Error "cl.exe is still on PATH"; exit 1 }
	Write-Host "Confirmed: no Visual Studio, no cmake, no cl.exe."

	- name: PyTorch CPU wheel installs and imports (no Visual Studio)
	run: \|
	python -m pip install --upgrade pip
	python -m pip install torch --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.org/simple
	python -c "import torch; print('torch', torch.__version__, 'cuda?', torch.cuda.is_available())"

	- name: Install Studio (--local, --no-torch) with no build tools present
	shell: pwsh
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	# Withheld on PR: this step runs checked-out PR code; public GGUF still downloads.
	HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN \|\| '' }}
	run: \|
	New-Item -ItemType Directory -Force -Path logs \| Out-Null
	$ProgressPreference = 'SilentlyContinue'
	& ./install.ps1 --local --no-torch *>&1 \| Tee-Object -FilePath logs/install.log

	- name: Assert prebuilt used AND no build tools were installed
	run: \|
	LLAMA_DIR=~/.unsloth/llama.cpp
	INFO="$LLAMA_DIR/UNSLOTH_PREBUILT_INFO.json"
	BIN="$LLAMA_DIR/build/bin/Release/llama-server.exe"
	fail=0
	if grep -q "falling back to source build" logs/install.log; then
	echo "::error::install.ps1 fell back to source-build llama.cpp without VS."; fail=1
	fi
	# The deferred build-tool installs must NOT run on the prebuilt path.
	for pat in "Kitware.CMake" "Microsoft.VisualStudio.2022.BuildTools" "installing via winget"; do
	if grep -qi "$pat" logs/install.log; then
	echo "::error::unexpected build-tool install on the prebuilt path: '$pat'"; fail=1
	fi
	done
	[ -f "$INFO" ] \|\| { echo "::error::no UNSLOTH_PREBUILT_INFO.json"; ls -la "$LLAMA_DIR" \|\| true; fail=1; }
	[ -f "$BIN" ] \|\| { echo "::error::no llama-server.exe"; ls -la "$LLAMA_DIR/build/bin" \|\| true; fail=1; }
	if [ "$fail" != "0" ]; then grep -iE "cmake\|visual studio\|prebuilt\|source build" logs/install.log \| tail -60; exit 1; fi
	echo "Prebuilt installed with no build tools:"
	cat "$INFO"

	- name: Add Studio shim to GITHUB_PATH
	run: \|
	SHIM_DIR=~/.unsloth/studio/bin
	[ -f "$SHIM_DIR/unsloth.exe" ] \|\| { echo "::error::unsloth.exe shim not found"; ls -la ~/.unsloth/studio/ \|\| true; exit 1; }
	cygpath -w "$SHIM_DIR" >> "$GITHUB_PATH"

	- name: Reset auth + boot Studio (API-only)
	run: \|
	unsloth studio reset-password
	mkdir -p logs
	UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \
	> logs/studio.log 2>&1 &
	echo "STUDIO_PID=$!" >> "$GITHUB_ENV"

	- name: Wait for /api/health, log in, load the GGUF
	run: \|
	for i in $(seq 1 180); do
	if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then
	jq -e '.status == "healthy"' /tmp/health.json && break
	fi
	sleep 1
	done
	jq -e '.status == "healthy"' /tmp/health.json \|\| { tail -200 logs/studio.log; exit 1; }
	OLD=$(cat ~/.unsloth/studio/auth/.bootstrap_password)
	NEW="CINoVS-$(python -c 'import secrets; print(secrets.token_urlsafe(12))')"
	echo "::add-mask::$OLD"
	echo "::add-mask::$NEW"
	OLD_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
	-H 'content-type: application/json' \
	-d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}" \| jq -r .access_token)
	curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \
	-H "Authorization: Bearer $OLD_TOKEN" -H 'content-type: application/json' \
	-d "{\"current_password\":\"$OLD\",\"new_password\":\"$NEW\"}" > /dev/null
	TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
	-H 'content-type: application/json' \
	-d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" \| jq -r .access_token)
	echo "API_KEY=$TOKEN" >> "$GITHUB_ENV"
	LOAD_OK=0
	for attempt in 1 2 3; do
	HTTP=$(curl -s -o /tmp/load.json -w '%{http_code}' \
	-X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \
	-H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \
	--max-time 600 \
	-d "{\"model_path\":\"$GGUF_REPO\",\"gguf_variant\":\"$GGUF_VARIANT\",\"is_lora\":false,\"max_seq_length\":2048}")
	if [ "$HTTP" = "200" ]; then LOAD_OK=1; break; fi
	echo "::warning::/api/inference/load attempt $attempt returned $HTTP"; cat /tmp/load.json \|\| true; sleep 10
	done
	[ "$LOAD_OK" = "1" ] \|\| { echo "::error::/api/inference/load failed 3 attempts"; exit 22; }
	jq '{status, display_name, is_gguf}' /tmp/load.json

	- name: Inference works via the prebuilt llama.cpp (no VS)
	run: \|
	RESP=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/v1/chat/completions" \
	-H "Authorization: Bearer $API_KEY" -H 'content-type: application/json' \
	--max-time 240 \
	-d '{"model":"default","messages":[{"role":"user","content":"What is 1+1? Answer briefly."}],"temperature":0,"max_tokens":32,"stream":false}')
	echo "$RESP" \| jq '.choices[0].message' \|\| { echo "$RESP"; exit 1; }
	CONTENT=$(echo "$RESP" \| jq -r '.choices[0].message.content')
	[ -n "$CONTENT" ] && [ "$CONTENT" != "null" ] \|\| { echo "::error::empty completion"; exit 1; }
	echo "Inference OK without Visual Studio: $CONTENT"

	- name: Restore Visual Studio + CMake
	if: always()
	shell: pwsh
	run: \|
	foreach ($d in @("$env:ProgramFiles\Microsoft Visual Studio", "${env:ProgramFiles(x86)}\Microsoft Visual Studio")) {
	$off = "$d.vsoff"
	if (Test-Path -LiteralPath $off) { Rename-Item -LiteralPath $off -NewName (Split-Path $d -Leaf); Write-Host "Restored $d" }
	}
	if ($env:HIDDEN_CMAKE) {
	foreach ($src in ($env:HIDDEN_CMAKE -split '\\|')) {
	if ($src -and (Test-Path -LiteralPath "$src.off")) { Rename-Item -LiteralPath "$src.off" -NewName (Split-Path $src -Leaf) }
	}
	}

	- name: Stop Studio
	if: always()
	shell: cmd
	run: echo Stop Studio (no-op; runner reclaims STUDIO_PID=%STUDIO_PID% at job end)

	- name: Collect llama-server logs
	if: always()
	continue-on-error: true
	run: \|
	mkdir -p logs/llama-server
	cp -v ~/.unsloth/studio/logs/llama-server/*.log logs/llama-server/ 2>/dev/null \|\| echo "no llama-server logs"

	- name: Upload logs
	if: always()
	continue-on-error: true
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: windows-no-vs-cpu-log
	path: \|
	logs/install.log
	logs/studio.log
	logs/llama-server/*.log
	retention-days: 7

	# ─────────────────────────────────────────────────────────────────────
	# Job B: the GPU (CUDA) prebuilt path is also VS-free (resolve/availability)
	# ─────────────────────────────────────────────────────────────────────
	no-vs-gpu-resolve:
	name: GPU prebuilt resolves without Visual Studio
	runs-on: windows-latest
	timeout-minutes: 15
	defaults:
	run:
	shell: bash
	env:
	PYTHONIOENCODING: utf-8
	PYTHONUTF8: '1'
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
	with:
	python-version: '3.12'

	- name: Hide Visual Studio
	shell: pwsh
	run: \|
	$ErrorActionPreference = 'Stop'
	# Retry the rename: a Program Files dir can hold a transient handle that
	# makes Rename-Item intermittently fail with "Access is denied".
	function Rename-WithRetry($Path, $NewName) {
	for ($i = 1; $i -le 6; $i++) {
	try { Rename-Item -LiteralPath $Path -NewName $NewName -ErrorAction Stop; return }
	catch { if ($i -eq 6) { throw }; Start-Sleep -Seconds 3 }
	}
	}
	foreach ($d in @("$env:ProgramFiles\Microsoft Visual Studio", "${env:ProgramFiles(x86)}\Microsoft Visual Studio")) {
	if (Test-Path -LiteralPath $d) { Rename-WithRetry $d ((Split-Path $d -Leaf) + '.vsoff'); Write-Host "Hid VS: $d" }
	}

	- name: Windows CUDA and ROCm prebuilts exist in unslothai/llama.cpp (what GPU users download, no VS)
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	run: \|
	curl -fsSL -H "Authorization: Bearer $GH_TOKEN" \
	"https://api.github.com/repos/unslothai/llama.cpp/releases/latest" > /tmp/rel.json
	echo "release: $(jq -r .tag_name /tmp/rel.json)"
	ASSETS=$(jq -r '.assets[].name' /tmp/rel.json)
	echo "$ASSETS" \| grep -iE 'windows-x64-cuda[0-9]' \|\| {
	echo "::error::no Windows x64 CUDA prebuilt asset found in unslothai/llama.cpp latest release"
	echo "$ASSETS"; exit 1; }
	# AMD parity: hosted runners have no AMD GPU, so the resolver step below
	# can't exercise the ROCm path (it resolves to CPU). Pin the per-gfx
	# Windows ROCm bundles here so a release that drops them fails loudly --
	# the AMD no-VS guarantee otherwise rides only on shared resolver code.
	echo "$ASSETS" \| grep -iE 'windows-x64-rocm-gfx' \|\| {
	echo "::error::no Windows x64 ROCm (per-gfx) prebuilt asset found in unslothai/llama.cpp latest release"
	echo "$ASSETS"; exit 1; }
	echo "Windows CUDA and ROCm prebuilts are available -- GPU users get them without compiling."

	- name: The prebuilt resolver runs without Visual Studio
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	run: \|
	# Resolver-only (no GPU on hosted runners, so the host resolves to the
	# CPU bundle). The point is that resolution needs no compiler/VS.
	python -m pip install --upgrade huggingface_hub
	python studio/install_llama_prebuilt.py --resolve-prebuilt latest --output-format json > /tmp/resolve.json \|\| {
	echo "::error::resolver exited non-zero"; cat /tmp/resolve.json \|\| true; exit 1; }
	cat /tmp/resolve.json
	echo "Prebuilt resolver ran with no Visual Studio present."

	- name: Restore Visual Studio
	if: always()
	shell: pwsh
	run: \|
	foreach ($d in @("$env:ProgramFiles\Microsoft Visual Studio", "${env:ProgramFiles(x86)}\Microsoft Visual Studio")) {
	$off = "$d.vsoff"
	if (Test-Path -LiteralPath $off) { Rename-Item -LiteralPath $off -NewName (Split-Path $d -Leaf); Write-Host "Restored $d" }
	}

	# ── folded from studio-setup-ps1-vs2026.yml: setup.ps1 unit tests + real-VS detection + vcredist ──
	pester:
	name: setup.ps1 unit tests (VS 2026 / CMake guard)
	runs-on: windows-latest
	timeout-minutes: 15
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: Install Pester v5
	shell: pwsh
	run: \|
	Set-PSRepository PSGallery -InstallationPolicy Trusted
	Install-Module Pester -MinimumVersion 5.5.0 -Force -SkipPublisherCheck -Scope CurrentUser
	Import-Module Pester -MinimumVersion 5.5.0
	Get-Module Pester \| Select-Object Name, Version \| Format-Table

	- name: Run Pester suite
	shell: pwsh
	run: \|
	$ErrorActionPreference = 'Stop'
	$testDir = Join-Path $env:GITHUB_WORKSPACE 'tests/studio_setup_ps1'
	if (-not (Test-Path $testDir)) {
	Write-Error "Test directory not found: $testDir"
	exit 1
	}
	$cfg = New-PesterConfiguration
	$cfg.Run.Path = $testDir
	$cfg.Run.Exit = $true # non-zero exit => job fails
	$cfg.Run.Throw = $true # also throw on test failure / 0 tests
	$cfg.TestResult.Enabled = $true
	$cfg.TestResult.OutputFormat = 'NUnitXml'
	$cfg.TestResult.OutputPath = Join-Path $env:GITHUB_WORKSPACE 'pester-results.xml'
	$cfg.Output.Verbosity = 'Detailed'
	Invoke-Pester -Configuration $cfg

	- name: Upload Pester results
	if: always()
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: pester-results-setup-ps1
	path: pester-results.xml
	if-no-files-found: warn

	vs-integration:
	# Real detection against the VS installed on the runner image (no mocks).
	name: real-VS detection (${{ matrix.label }})
	strategy:
	fail-fast: false
	matrix:
	include:
	- { os: windows-2022, label: 'VS 2022', expectGen: 'Visual Studio 17 2022', expectToolset: 'v170' }
	- { os: windows-2025-vs2026, label: 'VS 2026', expectGen: 'Visual Studio 18 2026', expectToolset: 'v180' }
	runs-on: ${{ matrix.os }}
	timeout-minutes: 15
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: Detect the real Visual Studio with setup.ps1 functions
	shell: pwsh
	env:
	EXPECT_GEN: ${{ matrix.expectGen }}
	EXPECT_TOOLSET: ${{ matrix.expectToolset }}
	run: \|
	$ErrorActionPreference = 'Stop'
	. (Join-Path $env:GITHUB_WORKSPACE 'tests/studio_setup_ps1/Get-FunctionSource.ps1')
	$setup = Join-Path $env:GITHUB_WORKSPACE 'studio/setup.ps1'
	foreach ($fn in @('Resolve-VsGeneratorFromLabel', 'Get-VcBuildCustomizationsDir', 'Find-VsBuildTools')) {
	. ([scriptblock]::Create((Get-FunctionSource -Path $setup -Name $fn)))
	}

	# Ground truth from the real vswhere (independent of our code), for visibility.
	$vsw = "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe"
	if (Test-Path $vsw) {
	$year = (& $vsw -latest -property catalog_productLineVersion 2>$null \| Select-Object -First 1)
	$path = (& $vsw -latest -property installationPath 2>$null \| Select-Object -First 1)
	Write-Host "Real vswhere: productLineVersion='$year' installPath='$path'"
	} else {
	Write-Host "vswhere not present at $vsw (relying on filesystem fallback)"
	}

	# Our detection must find the real VS and report the expected generator.
	$r = Find-VsBuildTools
	if (-not $r) { throw "Find-VsBuildTools returned null on a host with real $env:EXPECT_GEN" }
	Write-Host "Find-VsBuildTools -> Generator='$($r.Generator)' Source='$($r.Source)' InstallPath='$($r.InstallPath)'"
	if ($r.Generator -ne $env:EXPECT_GEN) {
	throw "Detection mismatch: got '$($r.Generator)', expected '$env:EXPECT_GEN'"
	}
	if (-not (Test-Path $r.InstallPath)) { throw "Detected InstallPath does not exist: $($r.InstallPath)" }

	# Toolset path derivation must match the expected v-number...
	$bc = Get-VcBuildCustomizationsDir -VsInstallPath $r.InstallPath -Generator $r.Generator
	$derived = Split-Path (Split-Path $bc -Parent) -Leaf # e.g. v170 / v180
	Write-Host "Get-VcBuildCustomizationsDir -> '$bc' (toolset='$derived')"
	if ($derived -ne $env:EXPECT_TOOLSET) {
	throw "Toolset mismatch: derived '$derived', expected '$env:EXPECT_TOOLSET'"
	}

	# ...and that v-number is a real folder on the VS install (where CUDA's
	# BuildCustomizations would land).
	$vcRoot = Join-Path $r.InstallPath 'MSBuild\Microsoft\VC'
	if (Test-Path $vcRoot) {
	$realToolsets = @((Get-ChildItem -Path $vcRoot -Directory -ErrorAction SilentlyContinue).Name)
	Write-Host "Real VC toolset dirs: $($realToolsets -join ', ')"
	if ($realToolsets -notcontains $derived) {
	throw "Derived toolset '$derived' is not present on the real $env:EXPECT_GEN install (have: $($realToolsets -join ', '))"
	}
	Write-Host "OK: toolset '$derived' exists on the real VS install."
	} else {
	Write-Warning "VC MSBuild root absent ($vcRoot) - C++ workload not installed; skipping on-disk toolset check."
	}

	Write-Host "PASS: real $env:EXPECT_GEN detected correctly with toolset '$derived'."

	vcredist-clean-box:
	# Validate Test-VCRedistInstalled + Ensure-VCRedist on a throwaway runner:
	# present on the stock image, fires on a clean box (signals removed restorably),
	# then a literal uninstall/reinstall round trip. Always restored before the end.
	name: VC++ runtime detect + install round-trip (${{ matrix.os }})
	strategy:
	fail-fast: false
	matrix:
	os: [windows-latest, windows-2025-vs2026]
	runs-on: ${{ matrix.os }}
	timeout-minutes: 20
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	persist-credentials: false

	- name: Detect present, fire on a clean box, and round-trip the install
	shell: pwsh
	run: \|
	$ErrorActionPreference = 'Stop'
	. (Join-Path $env:GITHUB_WORKSPACE 'tests/studio_setup_ps1/Get-FunctionSource.ps1')
	$setup = Join-Path $env:GITHUB_WORKSPACE 'studio/setup.ps1'
	# Dot-source the guard + the logging closure it reaches
	# (step/substep -> Write-StudioStdoutMirror / Get-StudioAnsi).
	$script:StudioVtOk = $false
	$script:UnslothVerbose = $false
	foreach ($fn in @('Get-StudioAnsi', 'Write-StudioStdoutMirror', 'step', 'substep',
	'Invoke-SetupCommand', 'Refresh-Environment',
	'Test-VCRedistInstalled', 'Ensure-VCRedist')) {
	$src = Get-FunctionSource -Path $setup -Name $fn
	if (-not $src) { throw "Function '$fn' not found in setup.ps1" }
	. ([scriptblock]::Create($src))
	}

	$regKeys = @(
	'HKLM\SOFTWARE\Microsoft\VisualStudio\14.0\VC\Runtimes\x64',
	'HKLM\SOFTWARE\WOW6432Node\Microsoft\VisualStudio\14.0\VC\Runtimes\x64'
	)
	function Show-GroundTruth {
	$dll = Join-Path $env:SystemRoot 'System32\vcruntime140_1.dll'
	Write-Host (" System32\vcruntime140_1.dll present: {0}" -f (Test-Path $dll))
	foreach ($k in $regKeys) {
	$r = Get-ItemProperty -Path "HKLM:\$($k.Substring(5))" -ErrorAction SilentlyContinue
	if ($r) { Write-Host (" {0}: Installed={1} {2}.{3}" -f $k, $r.Installed, $r.Major, $r.Minor) }
	else { Write-Host (" {0}: (absent)" -f $k) }
	}
	}

	Write-Host '== A. Detection on the stock runner (expect present) =='
	Show-GroundTruth
	if (-not (Test-VCRedistInstalled)) { throw 'Test-VCRedistInstalled reported ABSENT on a stock runner that ships the VC++ runtime (detection regression).' }
	Write-Host ' Test-VCRedistInstalled -> present OK'

	Write-Host '== B. Genuinely clean box (restorable): detection must FIRE =='
	$scratch = Join-Path $env:RUNNER_TEMP 'cleanwin'
	New-Item -ItemType Directory -Force -Path (Join-Path $scratch 'System32') \| Out-Null
	$backup = Join-Path $env:RUNNER_TEMP 'vcreg_backup'
	New-Item -ItemType Directory -Force -Path $backup \| Out-Null
	$origSysRoot = $env:SystemRoot
	try {
	for ($i = 0; $i -lt $regKeys.Count; $i++) {
	reg query $regKeys[$i] *> $null
	if ($LASTEXITCODE -eq 0) {
	reg export $regKeys[$i] (Join-Path $backup "$i.reg") /y *> $null
	reg delete $regKeys[$i] /f *> $null
	}
	}
	$env:SystemRoot = $scratch
	if (Test-VCRedistInstalled) { throw 'Detection still PRESENT after both signals were removed (it would never trigger an install on a clean box).' }
	Write-Host ' Test-VCRedistInstalled -> absent OK (detection fires on a clean box)'
	} finally {
	$env:SystemRoot = $origSysRoot
	for ($i = 0; $i -lt $regKeys.Count; $i++) {
	$f = Join-Path $backup "$i.reg"
	if (Test-Path $f) { reg import $f *> $null }
	}
	}
	Show-GroundTruth
	if (-not (Test-VCRedistInstalled)) { throw 'Detection did not recover after restoring the registry (test restore bug).' }

	Write-Host '== C. Literal uninstall on this throwaway VM (official installer), observe detection =='
	$exe = Join-Path $env:RUNNER_TEMP 'vc_redist.x64.exe'
	Invoke-WebRequest -Uri 'https://aka.ms/vs/17/release/vc_redist.x64.exe' -OutFile $exe
	Start-Process -FilePath $exe -ArgumentList '/uninstall', '/quiet', '/norestart' -Wait
	Show-GroundTruth
	Write-Host (" Test-VCRedistInstalled after uninstall -> {0}" -f (Test-VCRedistInstalled))
	if (Test-VCRedistInstalled) {
	Write-Host ' Note: the Visual Studio on this image ref-counts the runtime, so the package'
	Write-Host ' uninstall is a no-op here; section B already proved detection on a clean box.'
	}

	Write-Host '== D. Restore via Ensure-VCRedist (winget product path), installer fallback if needed =='
	Ensure-VCRedist
	if (-not (Test-VCRedistInstalled)) {
	Write-Host ' winget path did not restore it; using the official installer to close the round trip.'
	Start-Process -FilePath $exe -ArgumentList '/install', '/quiet', '/norestart' -Wait
	}
	Show-GroundTruth
	if (-not (Test-VCRedistInstalled)) { throw 'VC++ runtime could not be restored after the uninstall round-trip.' }
	Write-Host ' Test-VCRedistInstalled -> present OK'
	Write-Host 'PASS: detection is correct on a real install, fires on a clean box, and the install round-trip restores the runtime.'

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Fix Studio custom folders on Linux external drives (#6799) #9261

Workflow file

Fix Studio custom folders on Linux external drives (#6799) #9261

Uh oh!

Workflow file for this run