Fix Studio custom folders on Linux external drives (#6799) #9584
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # SPDX-License-Identifier: AGPL-3.0-only | |
| # Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. | |
| # Three end-to-end smoke jobs that boot a freshly-installed Studio and | |
| # exercise the surfaces real users hit through the OpenAI / Anthropic | |
| # SDKs and curl. Each job picks the smallest model that exercises the | |
| # behaviour under test, primes HF_HOME via actions/cache, and shares | |
| # the install.sh --local --no-torch bootstrap. | |
| # | |
| # 1. OpenAI, Anthropic API tests | |
| # gemma-3-270m-it UD-Q4_K_XL (~254 MiB). | |
| # Password rotation via /api/auth/change-password (old fails, | |
| # new works), then OpenAI + Anthropic Python SDKs against /v1/* | |
| # with temperature=0 and a fixed seed. Asserts the four-turn | |
| # conversation is deterministic across two runs. | |
| # | |
| # 2. Tool calling Tests | |
| # Qwen3.5-2B UD-IQ3_XXS (~890 MiB). OpenAI function calling, | |
| # server-side tools (python, terminal, web_search) via | |
| # enable_tools / enabled_tools, and enable_thinking on/off. | |
| # | |
| # 3. JSON, images | |
| # Qwen3-VL-2B-Instruct UD-Q4_K_XL (~1.1 GiB) + mmproj-F16 (~780 MiB). | |
| # response_format JSON-schema decoding and OpenAI image_url | |
| # (data URI) plus Anthropic source/base64 image inputs. | |
| # | |
| # All three jobs run in parallel. Total wall time is dominated by job 3 | |
| # on a cold cache; warm cache cuts that to ~3 min. | |
| name: Studio GGUF CI | |
| on: | |
| pull_request: | |
| paths: | |
| - 'studio/**' | |
| - 'unsloth/**' | |
| - 'unsloth_cli/**' | |
| - 'install.sh' | |
| - 'pyproject.toml' | |
| - '.github/workflows/studio-inference-smoke.yml' | |
| push: | |
| branches: [main, pip] | |
| # Manual trigger for pre-warming HF_HOME caches on main, or re-running | |
| # against an arbitrary branch without pushing a no-op commit. | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| jobs: | |
| # ───────────────────────────────────────────────────────────────────── | |
| # Job 1: OpenAI, Anthropic API tests | |
| # ───────────────────────────────────────────────────────────────────── | |
| openai-anthropic: | |
| name: OpenAI, Anthropic API tests | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 25 | |
| env: | |
| GGUF_REPO: unsloth/gemma-3-270m-it-GGUF | |
| GGUF_VARIANT: UD-Q4_K_XL | |
| GGUF_FILE: gemma-3-270m-it-UD-Q4_K_XL.gguf | |
| STUDIO_PORT: '18888' | |
| HF_HOME: ${{ github.workspace }}/hf-cache | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| persist-credentials: false | |
| - name: Linux deps for llama.cpp prebuilt | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y --no-install-recommends \ | |
| libcurl4-openssl-dev libssl-dev jq | |
| - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 | |
| with: | |
| node-version: '22' | |
| - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 | |
| with: | |
| python-version: '3.12' | |
| cache: 'pip' | |
| - name: Restore HF_HOME for ${{ env.GGUF_REPO }} | |
| id: cache-hf | |
| uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 | |
| continue-on-error: true | |
| with: | |
| path: hf-cache | |
| key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-v2 | |
| - name: Prime HF_HOME with the GGUF | |
| id: prime-hf | |
| if: steps.cache-hf.outputs.cache-hit != 'true' || steps.cache-hf.outcome != 'success' | |
| env: | |
| # Withheld on PR: this step runs checked-out PR code; public GGUF still downloads. | |
| HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }} | |
| run: | | |
| python -m pip install --upgrade huggingface_hub | |
| mkdir -p hf-cache | |
| bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$GGUF_FILE" | |
| bash .github/scripts/hf-download-with-retry.sh ggml-org/models tinyllamas/stories260K.gguf | |
| - name: Save HF_HOME for ${{ env.GGUF_REPO }} | |
| if: always() && steps.prime-hf.outcome == 'success' | |
| uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 | |
| with: | |
| path: hf-cache | |
| key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-v2 | |
| - name: Install Studio (--local, --no-torch) | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| # Withheld on PR: this step runs checked-out PR code; public GGUF still downloads. | |
| HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }} | |
| run: | | |
| mkdir -p logs | |
| set -o pipefail | |
| bash install.sh --local --no-torch 2>&1 | tee logs/install.log | |
| - name: Install OpenAI + Anthropic Python SDKs | |
| run: pip install 'openai>=1.50' 'anthropic>=0.40' | |
| - name: Reset auth + boot Studio (API-only) | |
| run: | | |
| unsloth studio reset-password | |
| mkdir -p logs | |
| UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \ | |
| > logs/studio.log 2>&1 & | |
| echo "STUDIO_PID=$!" >> "$GITHUB_ENV" | |
| - name: Wait for /api/health | |
| run: | | |
| for i in $(seq 1 180); do | |
| if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then | |
| jq -e '.status == "healthy"' /tmp/health.json | |
| exit 0 | |
| fi | |
| sleep 1 | |
| done | |
| echo "Studio did not become healthy in 180s" | |
| tail -200 logs/studio.log | |
| exit 1 | |
| - name: Password rotation (old must fail, new must work) | |
| run: | | |
| OLD=$(cat ~/.unsloth/studio/auth/.bootstrap_password) | |
| NEW="CIRotated-$(python -c 'import secrets; print(secrets.token_urlsafe(12))')" | |
| echo "::add-mask::$OLD" | |
| echo "::add-mask::$NEW" | |
| # 1. Login with the bootstrap password. | |
| OLD_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \ | |
| -H 'content-type: application/json' \ | |
| -d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}" | jq -r .access_token) | |
| [ -n "$OLD_TOKEN" ] && [ "$OLD_TOKEN" != "null" ] || { echo "bootstrap login failed"; exit 1; } | |
| # 2. Rotate to a fresh random password. | |
| curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \ | |
| -H "Authorization: Bearer $OLD_TOKEN" -H 'content-type: application/json' \ | |
| -d "{\"current_password\":\"$OLD\",\"new_password\":\"$NEW\"}" > /dev/null | |
| # 3. Old password must now be rejected (HTTP 401). | |
| OLD_STATUS=$(curl -s -o /dev/null -w '%{http_code}' \ | |
| -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \ | |
| -H 'content-type: application/json' \ | |
| -d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}") | |
| if [ "$OLD_STATUS" != "401" ]; then | |
| echo "::error::Login with old password returned $OLD_STATUS, expected 401" | |
| exit 1 | |
| fi | |
| # 4. New password must succeed; capture the JWT for downstream steps. | |
| NEW_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \ | |
| -H 'content-type: application/json' \ | |
| -d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" | jq -r .access_token) | |
| [ -n "$NEW_TOKEN" ] && [ "$NEW_TOKEN" != "null" ] || { echo "new login failed"; exit 1; } | |
| echo "TOKEN=$NEW_TOKEN" >> "$GITHUB_ENV" | |
| echo "password rotation OK (old=401, new=200)" | |
| - name: Load the GGUF (HF repo + variant, served from HF_HOME cache) | |
| run: | | |
| curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \ | |
| -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \ | |
| --max-time 600 \ | |
| -d "{\"model_path\":\"$GGUF_REPO\",\"gguf_variant\":\"$GGUF_VARIANT\",\"is_lora\":false,\"max_seq_length\":2048}" \ | |
| | jq '{status, display_name, is_gguf, context_length}' | |
| - name: Multi-turn determinism via OpenAI + Anthropic SDKs | |
| env: | |
| BASE_URL: http://127.0.0.1:18888 | |
| run: | | |
| python - <<'PY' | |
| import json | |
| import os | |
| from openai import OpenAI | |
| from anthropic import Anthropic | |
| BASE = os.environ["BASE_URL"] | |
| KEY = os.environ["TOKEN"] # JWT also accepted as Bearer on /v1/* | |
| SEED = 3407 | |
| # Four-turn conversation: the second and fourth turns can only be | |
| # answered correctly if the model sees the prior turns, so this | |
| # also exercises the conversation-history wiring. | |
| PROMPTS = [ | |
| "What is 1+1?", | |
| "What did I ask before?", | |
| "What is the capital of France?", | |
| "Repeat the city name", | |
| ] | |
| def run_openai(): | |
| client = OpenAI(base_url = f"{BASE}/v1", api_key = KEY) | |
| history, replies = [], [] | |
| for prompt in PROMPTS: | |
| history.append({"role": "user", "content": prompt}) | |
| resp = client.chat.completions.create( | |
| model = "default", | |
| messages = history, | |
| temperature = 0.0, | |
| max_tokens = 80, | |
| seed = SEED, | |
| extra_body = {"enable_thinking": False}, | |
| ) | |
| text = resp.choices[0].message.content or "" | |
| replies.append(text) | |
| history.append({"role": "assistant", "content": text}) | |
| return replies | |
| def run_anthropic(): | |
| # Two SDK quirks vs. Studio: | |
| # 1. base_url must NOT include /v1 -- the SDK appends | |
| # /v1/messages itself; otherwise the request hits | |
| # /v1/v1/messages and 405s. | |
| # 2. The SDK sends `x-api-key` by default, but Studio's | |
| # auth layer is HTTPBearer-only. Override via | |
| # default_headers so Authorization: Bearer ... is | |
| # sent instead. | |
| client = Anthropic( | |
| base_url = BASE, | |
| api_key = "unused", | |
| default_headers = {"Authorization": f"Bearer {KEY}"}, | |
| ) | |
| history, replies = [], [] | |
| for prompt in PROMPTS: | |
| history.append({"role": "user", "content": prompt}) | |
| msg = client.messages.create( | |
| model = "default", | |
| max_tokens = 80, | |
| messages = history, | |
| temperature = 0.0, | |
| extra_body = {"seed": SEED, "enable_thinking": False}, | |
| ) | |
| text = "".join(b.text for b in msg.content if getattr(b, "type", None) == "text") | |
| replies.append(text) | |
| history.append({"role": "assistant", "content": text}) | |
| return replies | |
| for label, runner in (("openai", run_openai), ("anthropic", run_anthropic)): | |
| first = runner() | |
| second = runner() | |
| determinism_failures = [] | |
| for i, (a, b) in enumerate(zip(first, second), start = 1): | |
| print(f"[{label} turn {i}] {a!r}") | |
| # Both runs must be non-empty; small-quant drift | |
| # across runs is WARN-only (grounding asserts below | |
| # are the stronger signal). | |
| assert a, f"{label}: empty turn {i} response in first run" | |
| assert b, f"{label}: empty turn {i} response in second run" | |
| if a.strip() != b.strip(): | |
| determinism_failures.append( | |
| f"turn {i}: run1={a!r} run2={b!r}" | |
| ) | |
| if determinism_failures: | |
| print( | |
| f"[{label}] WARN non-determinism at temperature=0.0 across " | |
| f"{len(determinism_failures)} of {len(first)} turn(s); " | |
| f"small-quant model drift, not a Studio regression. " | |
| f"Details: " + " | ".join(determinism_failures) | |
| ) | |
| # Sanity: turn-2 reply should mention the earlier question, and | |
| # turn-4 reply should mention Paris (model echoes the city it | |
| # produced for turn 3). Lower-cased substring checks keep the | |
| # assertion robust to formatting jitter. | |
| joined = " ".join(first).lower() | |
| assert "1" in first[0], f"{label}: turn-1 answer should contain '1', got {first[0]!r}" | |
| assert "paris" in joined, f"{label}: expected 'paris' somewhere in the four-turn transcript: {first}" | |
| status_word = "PASS" if not determinism_failures else "PASS (with drift)" | |
| print(f"[{label}] {status_word} -- 4 turns, history grounded ('paris' present)") | |
| PY | |
| - name: Stop Studio | |
| if: always() | |
| run: | | |
| kill "${STUDIO_PID}" 2>/dev/null || true | |
| sleep 2 | |
| ss -tln | grep ":${STUDIO_PORT}" || true | |
| - name: Upload logs | |
| # Always upload so green runs are still reviewable. | |
| if: always() | |
| # Diagnostic only: a transient artifact-service drop must not fail a green job. | |
| continue-on-error: true | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: openai-anthropic-log | |
| path: | | |
| logs/studio.log | |
| logs/install.log | |
| retention-days: 7 | |
| # ───────────────────────────────────────────────────────────────────── | |
| # Job 2: Tool calling Tests | |
| # ───────────────────────────────────────────────────────────────────── | |
| tool-calling: | |
| name: Tool calling Tests | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 25 | |
| env: | |
| # Tool calling is the highest-volume GGUF in this workflow | |
| # (Qwen3.5-2B at Q4_K_XL = ~1.28 GiB). Caching HF_HOME would | |
| # store xet chunks + blobs + snapshots = ~4 GiB compressed -- | |
| # 4-5x file-size inflation, dominated by xet chunks. Use main's | |
| # `--local-dir gguf-cache` pattern to cache the flat .gguf only. | |
| # Studio's /api/inference/load accepts either a HF repo (which | |
| # uses HF_HOME) or an absolute file path; passing the absolute | |
| # path keeps the test off HF_HOME entirely so the cache size | |
| # tracks the GGUF file 1:1. The OpenAI/Anth and JSON+images | |
| # jobs still cover the gguf_variant resolution path. | |
| # Q4_K_XL, not IQ3_XXS: at IQ3_XXS this model emits malformed | |
| # tool calls that llama-server's peg-native parser rejects with a | |
| # 500. Mac/Windows already use Q4_K_XL for the same reason. | |
| GGUF_REPO: unsloth/Qwen3.5-2B-GGUF | |
| GGUF_FILE: Qwen3.5-2B-UD-Q4_K_XL.gguf | |
| STUDIO_PORT: '18889' | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| persist-credentials: false | |
| - name: Linux deps for llama.cpp prebuilt | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y --no-install-recommends \ | |
| libcurl4-openssl-dev libssl-dev jq | |
| - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 | |
| with: | |
| node-version: '22' | |
| - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 | |
| with: | |
| python-version: '3.12' | |
| cache: 'pip' | |
| - name: Restore GGUF model file | |
| id: cache-gguf | |
| uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 | |
| continue-on-error: true | |
| with: | |
| path: gguf-cache | |
| key: ${{ runner.os }}-gguf-${{ env.GGUF_REPO }}-${{ env.GGUF_FILE }}-v1 | |
| - name: Download GGUF if cache miss | |
| id: download-gguf | |
| if: steps.cache-gguf.outputs.cache-hit != 'true' || steps.cache-gguf.outcome != 'success' | |
| env: | |
| # Withheld on PR: this step runs checked-out PR code; public GGUF still downloads. | |
| HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }} | |
| run: | | |
| python -m pip install --upgrade huggingface_hub | |
| mkdir -p gguf-cache | |
| bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$GGUF_FILE" gguf-cache | |
| - name: Save GGUF model file | |
| if: always() && steps.download-gguf.outcome == 'success' | |
| uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 | |
| with: | |
| path: gguf-cache | |
| key: ${{ runner.os }}-gguf-${{ env.GGUF_REPO }}-${{ env.GGUF_FILE }}-v1 | |
| - name: Install Studio (--local, --no-torch) | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| # Withheld on PR: this step runs checked-out PR code; public GGUF still downloads. | |
| HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }} | |
| run: | | |
| mkdir -p logs | |
| set -o pipefail | |
| bash install.sh --local --no-torch 2>&1 | tee logs/install.log | |
| - name: Reset auth + boot Studio (API-only, default tool policy) | |
| # We deliberately use the API-only mode rather than | |
| # `unsloth studio run` because the latter calls | |
| # `set_tool_policy(...)` with a resolved bool: on loopback the | |
| # default resolves to True, which forces every request through | |
| # the server-side agentic loop and breaks the standard | |
| # function-calling test below. API-only mode leaves | |
| # tool_policy=None so each request's `enable_tools` field is | |
| # honoured. | |
| run: | | |
| unsloth studio reset-password | |
| mkdir -p logs | |
| UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \ | |
| > logs/studio.log 2>&1 & | |
| echo "STUDIO_PID=$!" >> "$GITHUB_ENV" | |
| - name: Wait for /api/health, log in, change password, load model | |
| run: | | |
| for i in $(seq 1 180); do | |
| if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then | |
| jq -e '.status == "healthy"' /tmp/health.json && break | |
| fi | |
| sleep 1 | |
| done | |
| jq -e '.status == "healthy"' /tmp/health.json | |
| OLD=$(cat ~/.unsloth/studio/auth/.bootstrap_password) | |
| NEW="CITool-$(python -c 'import secrets; print(secrets.token_urlsafe(12))')" | |
| echo "::add-mask::$OLD" | |
| echo "::add-mask::$NEW" | |
| OLD_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \ | |
| -H 'content-type: application/json' \ | |
| -d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}" | jq -r .access_token) | |
| curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \ | |
| -H "Authorization: Bearer $OLD_TOKEN" -H 'content-type: application/json' \ | |
| -d "{\"current_password\":\"$OLD\",\"new_password\":\"$NEW\"}" > /dev/null | |
| TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \ | |
| -H 'content-type: application/json' \ | |
| -d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" | jq -r .access_token) | |
| echo "API_KEY=$TOKEN" >> "$GITHUB_ENV" | |
| GGUF_PATH="$GITHUB_WORKSPACE/gguf-cache/${GGUF_FILE}" | |
| ls -lh "$GGUF_PATH" | |
| curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \ | |
| -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \ | |
| --max-time 600 \ | |
| -d "{\"model_path\":\"$GGUF_PATH\",\"is_lora\":false,\"max_seq_length\":2048}" \ | |
| | jq '{status, display_name}' | |
| - name: Tool calling, server-side tools, thinking on/off | |
| env: | |
| BASE_URL: http://127.0.0.1:18889 | |
| run: | | |
| python - <<'PY' | |
| import json | |
| import os | |
| import urllib.request | |
| BASE = os.environ["BASE_URL"] | |
| KEY = os.environ["API_KEY"] | |
| SEED = 3407 | |
| def post(path, body, *, timeout = 240): | |
| """Plain JSON POST. For requests that don't go through | |
| the server-side agentic loop, the response is one JSON | |
| object.""" | |
| data = json.dumps(body).encode() | |
| req = urllib.request.Request( | |
| f"{BASE}{path}", | |
| data = data, | |
| method = "POST", | |
| headers = { | |
| "Authorization": f"Bearer {KEY}", | |
| "Content-Type": "application/json", | |
| }, | |
| ) | |
| with urllib.request.urlopen(req, timeout = timeout) as resp: | |
| return resp.status, json.loads(resp.read().decode()) | |
| def post_sse(path, body, *, timeout = 600): | |
| """POST a streaming request and accumulate the assistant | |
| text deltas. The server-side agentic loop ALWAYS returns | |
| SSE regardless of the request's `stream` field, so any | |
| call with enable_tools=true must use this helper. | |
| Returns (content, raw_payloads): | |
| content -- concatenated assistant delta.content | |
| raw_payloads -- list of every raw "data: ..." event | |
| payload (JSON strings). Callers asserting | |
| that a server-side tool actually ran (and | |
| not just that the model emitted some | |
| text) should grep raw_payloads for tool | |
| invocation markers / tool output, since | |
| `delta.content` alone is not evidence | |
| that the tool path executed. | |
| """ | |
| body = {**body, "stream": True} | |
| data = json.dumps(body).encode() | |
| req = urllib.request.Request( | |
| f"{BASE}{path}", | |
| data = data, | |
| method = "POST", | |
| headers = { | |
| "Authorization": f"Bearer {KEY}", | |
| "Content-Type": "application/json", | |
| }, | |
| ) | |
| parts = [] | |
| events = [] | |
| with urllib.request.urlopen(req, timeout = timeout) as resp: | |
| for raw in resp: | |
| line = raw.decode().strip() | |
| if not line.startswith("data: "): | |
| continue | |
| payload = line[6:] | |
| if payload == "[DONE]": | |
| break | |
| events.append(payload) | |
| try: | |
| chunk = json.loads(payload) | |
| except json.JSONDecodeError: | |
| continue | |
| for choice in chunk.get("choices", []): | |
| delta = choice.get("delta", {}) or {} | |
| if delta.get("content"): | |
| parts.append(delta["content"]) | |
| return "".join(parts), events | |
| _STUDIO_TOOL_TYPES = { | |
| "tool_start", "tool_end", "tool_use", "tool_result", | |
| } | |
| def _tool_invoked(events): | |
| """Structural check: True iff some SSE payload is a real | |
| tool envelope (Studio tool_start/tool_end, Anthropic | |
| tool_use/tool_result, OpenAI non-empty delta.tool_calls / | |
| message.tool_calls / finish_reason='tool_calls' / | |
| role:'tool' / function_call). tool_status is NOT | |
| evidence: Studio emits empty tool_status events on | |
| iteration boundaries even when no tool ran. | |
| """ | |
| for raw in events: | |
| try: | |
| ev = json.loads(raw) | |
| except (json.JSONDecodeError, TypeError): | |
| continue | |
| if not isinstance(ev, dict): | |
| continue | |
| if ev.get("type") in _STUDIO_TOOL_TYPES: | |
| return True | |
| for choice in ev.get("choices", []) or []: | |
| if not isinstance(choice, dict): | |
| continue | |
| if choice.get("finish_reason") == "tool_calls": | |
| return True | |
| for src_key in ("delta", "message"): | |
| src = choice.get(src_key) or {} | |
| if not isinstance(src, dict): | |
| continue | |
| tc = src.get("tool_calls") | |
| if isinstance(tc, list) and tc: | |
| return True | |
| if src.get("function_call"): | |
| return True | |
| if src.get("role") == "tool": | |
| return True | |
| for item in ev.get("output", []) or []: | |
| if isinstance(item, dict) and item.get("type") in { | |
| "tool_call", "function_call", "tool_use", | |
| }: | |
| return True | |
| content = ev.get("content") | |
| if isinstance(content, list): | |
| for blk in content: | |
| if isinstance(blk, dict) and blk.get("type") in { | |
| "tool_use", "tool_result", | |
| }: | |
| return True | |
| return False | |
| def _tool_output_contains(events, *needles): | |
| """True iff any tool_end.result / tool_result.content / | |
| tool-role message content contains a needle. Inspects | |
| the tool's own output, not the model's narration.""" | |
| for raw in events: | |
| try: | |
| ev = json.loads(raw) | |
| except (json.JSONDecodeError, TypeError): | |
| continue | |
| if not isinstance(ev, dict): | |
| continue | |
| if ev.get("type") == "tool_end": | |
| result = ev.get("result") | |
| if isinstance(result, str) and any(n in result for n in needles if n): | |
| return True | |
| if ev.get("type") == "tool_result": | |
| content = ev.get("content") | |
| if isinstance(content, str) and any(n in content for n in needles if n): | |
| return True | |
| if isinstance(content, list): | |
| for blk in content: | |
| if isinstance(blk, dict): | |
| text = blk.get("text") or blk.get("content") | |
| if isinstance(text, str) and any(n in text for n in needles if n): | |
| return True | |
| for choice in ev.get("choices", []) or []: | |
| delta = (choice or {}).get("delta") or {} | |
| msg = (choice or {}).get("message") or {} | |
| for src in (delta, msg): | |
| if src.get("role") == "tool": | |
| content = src.get("content") or "" | |
| if isinstance(content, str) and any(n in content for n in needles if n): | |
| return True | |
| return False | |
| # ── 1. Standard OpenAI function calling ────────────────────── | |
| weather_tool = { | |
| "type": "function", | |
| "function": { | |
| "name": "get_weather", | |
| "description": "Get current weather for a city.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": {"city": {"type": "string"}}, | |
| "required": ["city"], | |
| }, | |
| }, | |
| } | |
| status, data = post("/v1/chat/completions", { | |
| "messages": [{"role": "user", "content": "What is the weather in Paris?"}], | |
| "tools": [weather_tool], | |
| "tool_choice": "required", | |
| "stream": False, | |
| "temperature": 0.0, | |
| "seed": SEED, | |
| "max_tokens": 120, | |
| }) | |
| assert status == 200, f"tool call status {status}: {data}" | |
| choice = data["choices"][0] | |
| assert choice["finish_reason"] == "tool_calls", f"finish_reason={choice['finish_reason']!r}" | |
| tc = choice["message"]["tool_calls"][0] | |
| assert tc["function"]["name"] == "get_weather" | |
| args = json.loads(tc["function"]["arguments"]) | |
| assert args.get("city"), f"missing city arg: {args}" | |
| print(f"[tools] PASS function calling -> {tc['function']['name']}({args})") | |
| # T=0 = deterministic argmax in llama.cpp; T>0 lets seed | |
| # rotation explore distinct trajectories on retry. | |
| TOOL_PROBE_TEMP = 0.4 | |
| def _run_tool_probe(*, label, prompt, enabled, session, needles, | |
| max_attempts = 4): | |
| """Drive a server-side tool with retries. Hard FAIL if no | |
| attempt has structural invocation evidence. WARN (not | |
| FAIL) if invoked but no attempt produces the expected | |
| literal in tool_end.result -- small-quant Qwen3.5-2B can | |
| emit OpenAI tool_calls deltas without Studio's GGUF | |
| agentic loop intercepting them, and that GGUF-vs-OpenAI | |
| format mismatch is out of scope for #5642. | |
| """ | |
| attempts_log = [] | |
| best = None | |
| for attempt_i in range(max_attempts): | |
| attempt_seed = SEED + attempt_i | |
| content, events = post_sse("/v1/chat/completions", { | |
| "messages": [{"role": "user", "content": prompt}], | |
| "enable_tools": True, | |
| "enabled_tools": enabled, | |
| "session_id": f"{session}-att{attempt_i}", | |
| "temperature": TOOL_PROBE_TEMP, | |
| "seed": attempt_seed, | |
| "max_tokens": 600, | |
| }) | |
| invoked = _tool_invoked(events) | |
| produced = _tool_output_contains(events, *needles) | |
| attempts_log.append({ | |
| "attempt": attempt_i, "seed": attempt_seed, | |
| "n_events": len(events), | |
| "tool_invoked": invoked, "tool_output_contains": produced, | |
| "content_len": len(content), | |
| }) | |
| if invoked and produced: | |
| print(f"[tools] PASS {label} attempt {attempt_i}") | |
| return content, events, attempts_log | |
| if invoked and best is None: | |
| best = (content, events) | |
| print(f"[tools] retry {label} attempt {attempt_i}: invoked={invoked} output_ok={produced} events={len(events)}") | |
| if best is not None: | |
| print(f"[tools] WARN {label}: invoked but no tool_end.result match (small-quant flake). Attempts: {attempts_log}") | |
| content, events = best | |
| return content, events, attempts_log | |
| raise AssertionError( | |
| f"{label}: no structural tool-invocation evidence across " | |
| f"{max_attempts} attempts. enable_tools may be silently " | |
| f"ignored. Attempts: {attempts_log}" | |
| ) | |
| # ── 2. Server-side python tool ─────────────────────────────── | |
| content, events, _attempts = _run_tool_probe( | |
| label = "python tool", | |
| prompt = "What is 123 * 456? Use the python tool to compute it and tell me the number.", | |
| enabled = ["python"], | |
| session = "ci-tool-calling-py", | |
| needles = ("56088", "56,088"), | |
| ) | |
| if "56088" in content or "56,088" in content: | |
| print(f"[tools] python tool narration OK") | |
| else: | |
| print(f"[tools] python tool narration drifted -- content={content!r}") | |
| # ── 3. Server-side bash (terminal) tool ────────────────────── | |
| content, events, _attempts = _run_tool_probe( | |
| label = "bash/terminal tool", | |
| prompt = "Use the terminal tool to run `echo hello-bash-tool` and tell me the exact output.", | |
| enabled = ["terminal"], | |
| session = "ci-tool-calling-bash", | |
| needles = ("hello-bash-tool",), | |
| ) | |
| if "hello-bash-tool" in content: | |
| print(f"[tools] bash/terminal narration OK") | |
| else: | |
| print(f"[tools] bash/terminal narration dropped literal -- content={content!r}") | |
| # ── 4. Server-side web_search tool ─────────────────────────── | |
| # DuckDuckGo is flaky from CI runners and small Qwen3.5-2B | |
| # may not actually search. Only assert that the SSE stream | |
| # opens and yields any data; HTTP / parser failures already | |
| # raise above. Tool-invocation strictness is relaxed here | |
| # because (a) the search may legitimately return no results, | |
| # and (b) DuckDuckGo upstream blocks GHA IP ranges often | |
| # enough that requiring a tool_call marker would create | |
| # red-herring failures from infra rather than from Studio. | |
| try: | |
| content, events = post_sse("/v1/chat/completions", { | |
| "messages": [{"role": "user", "content": "Search the web for 'unsloth ai github' and summarise."}], | |
| "enable_tools": True, | |
| "enabled_tools": ["web_search"], | |
| "session_id": "ci-tool-calling-web", | |
| "temperature": 0.0, | |
| "seed": SEED, | |
| "max_tokens": 400, | |
| }) | |
| print( | |
| f"[tools] PASS web_search stream ({len(content)} chars in content, " | |
| f"{len(events)} raw events)" | |
| ) | |
| except Exception as exc: | |
| print(f"[tools] WARN web_search probe failed (non-blocking): {exc}") | |
| # ── 5. Thinking on / off ───────────────────────────────────── | |
| # Studio strips think blocks from message.content for tools-mode | |
| # responses, so we toggle plain chat (no enable_tools) and look | |
| # at the surfaced reasoning_content / message.thinking field. | |
| def thinking_call(enable): | |
| status, data = post("/v1/chat/completions", { | |
| "messages": [{"role": "user", "content": "Briefly: is 17 prime?"}], | |
| "stream": False, | |
| "enable_thinking": enable, | |
| "temperature": 0.0, | |
| "seed": SEED, | |
| "max_tokens": 300, | |
| }) | |
| assert status == 200 | |
| msg = data["choices"][0]["message"] | |
| # Studio surfaces thinking via reasoning_content (OpenAI | |
| # extension). Fall back to inline <think> markers for | |
| # robustness across template versions. | |
| raw = (msg.get("content") or "") + (msg.get("reasoning_content") or "") | |
| return raw | |
| on_text = thinking_call(True) | |
| off_text = thinking_call(False) | |
| had_think_on = ("<think>" in on_text) or len(on_text) > 80 | |
| had_think_off = ("<think>" in off_text) and len(off_text) > 0 | |
| assert had_think_on, ( | |
| f"enable_thinking=True produced no thinking signal: {on_text!r}" | |
| ) | |
| # Off-mode should not contain the literal <think> marker. | |
| assert "<think>" not in off_text, ( | |
| f"enable_thinking=False but <think> still present: {off_text!r}" | |
| ) | |
| print(f"[tools] PASS thinking on/off (on={len(on_text)} chars, off={len(off_text)} chars)") | |
| PY | |
| - name: Stop Studio | |
| if: always() | |
| run: | | |
| kill "${STUDIO_PID}" 2>/dev/null || true | |
| sleep 2 | |
| ss -tln | grep ":${STUDIO_PORT}" || true | |
| # Capture backend + llama-server logs so a 500 has a server-side traceback. | |
| mkdir -p logs/server-logs | |
| cp -r ~/.unsloth/studio/logs/. logs/server-logs/ 2>/dev/null || true | |
| - name: Upload logs | |
| # Always upload so green runs are still reviewable. | |
| if: always() | |
| # Diagnostic only: a transient artifact-service drop must not fail a green job. | |
| continue-on-error: true | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: tool-calling-log | |
| path: | | |
| logs/studio.log | |
| logs/install.log | |
| logs/server-logs/ | |
| retention-days: 7 | |
| # ───────────────────────────────────────────────────────────────────── | |
| # Job 3: JSON, images | |
| # ───────────────────────────────────────────────────────────────────── | |
| json-images: | |
| name: JSON, images | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| env: | |
| GGUF_REPO: unsloth/Qwen3-VL-2B-Instruct-GGUF | |
| # UD-Q4_K_XL, not UD-IQ2_XXS: at 2-bit the temp-0 answer to the JSON | |
| # step's capital-of-France probe flips with the host's SIMD kernels | |
| # (GitHub runners deterministically answered France while other CPUs | |
| # answer Paris; seeds do not rescue it, 1/5 Paris at temp 0.7). The | |
| # Q4 quant answered Paris 13/13 across temps and seeds on the same | |
| # runners, so the hard Paris assertion below stays reliable. | |
| GGUF_VARIANT: UD-Q4_K_XL | |
| GGUF_FILE: Qwen3-VL-2B-Instruct-UD-Q4_K_XL.gguf | |
| MMPROJ_FILE: mmproj-F16.gguf | |
| STUDIO_PORT: '18890' | |
| HF_HOME: ${{ github.workspace }}/hf-cache | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| persist-credentials: false | |
| - name: Linux deps for llama.cpp prebuilt | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y --no-install-recommends \ | |
| libcurl4-openssl-dev libssl-dev jq | |
| - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 | |
| with: | |
| node-version: '22' | |
| - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 | |
| with: | |
| python-version: '3.12' | |
| cache: 'pip' | |
| - name: Restore HF_HOME for ${{ env.GGUF_REPO }} (model + mmproj) | |
| id: cache-hf | |
| uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 | |
| continue-on-error: true | |
| with: | |
| path: hf-cache | |
| key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-${{ env.MMPROJ_FILE }}-v2 | |
| - name: Prime HF_HOME with the GGUF + mmproj | |
| id: prime-hf | |
| if: steps.cache-hf.outputs.cache-hit != 'true' || steps.cache-hf.outcome != 'success' | |
| env: | |
| # Withheld on PR: this step runs checked-out PR code; public GGUF still downloads. | |
| HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }} | |
| run: | | |
| python -m pip install --upgrade huggingface_hub | |
| mkdir -p hf-cache | |
| bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$GGUF_FILE" | |
| bash .github/scripts/hf-download-with-retry.sh "$GGUF_REPO" "$MMPROJ_FILE" | |
| bash .github/scripts/hf-download-with-retry.sh ggml-org/models tinyllamas/stories260K.gguf | |
| - name: Save HF_HOME for ${{ env.GGUF_REPO }} (model + mmproj) | |
| if: always() && steps.prime-hf.outcome == 'success' | |
| uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 | |
| with: | |
| path: hf-cache | |
| key: ${{ runner.os }}-hf-${{ env.GGUF_REPO }}-${{ env.GGUF_VARIANT }}-${{ env.MMPROJ_FILE }}-v2 | |
| - name: Install Studio (--local, --no-torch) | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| # Withheld on PR: this step runs checked-out PR code; public GGUF still downloads. | |
| HF_TOKEN: ${{ github.event_name != 'pull_request' && secrets.HF_TOKEN || '' }} | |
| run: | | |
| mkdir -p logs | |
| set -o pipefail | |
| bash install.sh --local --no-torch 2>&1 | tee logs/install.log | |
| - name: Install OpenAI + Anthropic Python SDKs | |
| run: pip install 'openai>=1.50' 'anthropic>=0.40' | |
| - name: Reset auth + boot Studio (API-only) | |
| # See Job 2's comment: API-only mode keeps tool_policy=None so | |
| # response_format requests aren't routed through the agentic | |
| # tool loop. | |
| run: | | |
| unsloth studio reset-password | |
| mkdir -p logs | |
| UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \ | |
| > logs/studio.log 2>&1 & | |
| echo "STUDIO_PID=$!" >> "$GITHUB_ENV" | |
| - name: Wait for /api/health, log in, change password, load model | |
| run: | | |
| for i in $(seq 1 180); do | |
| if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then | |
| jq -e '.status == "healthy"' /tmp/health.json && break | |
| fi | |
| sleep 1 | |
| done | |
| jq -e '.status == "healthy"' /tmp/health.json | |
| OLD=$(cat ~/.unsloth/studio/auth/.bootstrap_password) | |
| NEW="CIJson-$(python -c 'import secrets; print(secrets.token_urlsafe(12))')" | |
| echo "::add-mask::$OLD" | |
| echo "::add-mask::$NEW" | |
| OLD_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \ | |
| -H 'content-type: application/json' \ | |
| -d "{\"username\":\"unsloth\",\"password\":\"$OLD\"}" | jq -r .access_token) | |
| curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \ | |
| -H "Authorization: Bearer $OLD_TOKEN" -H 'content-type: application/json' \ | |
| -d "{\"current_password\":\"$OLD\",\"new_password\":\"$NEW\"}" > /dev/null | |
| TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \ | |
| -H 'content-type: application/json' \ | |
| -d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" | jq -r .access_token) | |
| echo "API_KEY=$TOKEN" >> "$GITHUB_ENV" | |
| # Retry: llama-server startup can race process teardown after a | |
| # failed attempt. Keep curl out of a pipe so HTTP failures are not | |
| # masked by jq. | |
| LOAD_OK=0 | |
| for attempt in 1 2 3; do | |
| HTTP=$(curl -s -o /tmp/load.json -w '%{http_code}' \ | |
| -X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \ | |
| -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \ | |
| --max-time 900 \ | |
| -d "{\"model_path\":\"$GGUF_REPO\",\"gguf_variant\":\"$GGUF_VARIANT\",\"is_lora\":false,\"max_seq_length\":2048}") | |
| if [ "$HTTP" = "200" ]; then LOAD_OK=1; break; fi | |
| echo "::warning::/api/inference/load attempt $attempt returned $HTTP; response:" | |
| cat /tmp/load.json || true | |
| sleep 10 | |
| done | |
| [ "$LOAD_OK" = "1" ] || { echo "::error::/api/inference/load failed 3 attempts"; exit 22; } | |
| jq '{status, display_name, is_vision}' /tmp/load.json | |
| - name: JSON schema decoding + image input | |
| env: | |
| BASE_URL: http://127.0.0.1:18890 | |
| run: | | |
| python - <<'PY' | |
| import base64 | |
| import json | |
| import os | |
| import urllib.request | |
| from openai import OpenAI | |
| from anthropic import Anthropic | |
| BASE = os.environ["BASE_URL"] | |
| KEY = os.environ["API_KEY"] | |
| SEED = 3407 | |
| def post(path, body, *, timeout = 240): | |
| req = urllib.request.Request( | |
| f"{BASE}{path}", | |
| data = json.dumps(body).encode(), | |
| method = "POST", | |
| headers = { | |
| "Authorization": f"Bearer {KEY}", | |
| "Content-Type": "application/json", | |
| }, | |
| ) | |
| with urllib.request.urlopen(req, timeout = timeout) as resp: | |
| return resp.status, json.loads(resp.read().decode()) | |
| # ── 1. response_format = json_object (JSON mode) ───────────── | |
| # llama.cpp's HTTP server supports OpenAI-compatible JSON | |
| # mode: `response_format: {"type": "json_object"}` constrains | |
| # the model to emit syntactically-valid JSON. We use raw HTTP | |
| # rather than the OpenAI SDK so that the field shape Studio | |
| # forwards to llama-server is unambiguous (the SDK rewrites | |
| # response_format depending on which variant it recognises). | |
| # We deliberately do NOT pass a strict JSON schema -- on | |
| # small Gemma-4 quants the GBNF-from-schema path occasionally | |
| # produces empty output, and JSON mode is the surface we care | |
| # about exposing through Studio. | |
| status, data = post("/v1/chat/completions", { | |
| "model": "default", | |
| "messages": [ | |
| {"role": "system", "content": 'Reply with a single JSON object of the form {"city": "...", "country": "..."}. Output ONLY the JSON, nothing else.'}, | |
| {"role": "user", "content": "What is the capital of France?"}, | |
| ], | |
| "temperature": 0.0, | |
| "max_tokens": 200, | |
| "seed": SEED, | |
| "stream": False, | |
| "enable_thinking": False, | |
| "response_format": {"type": "json_object"}, | |
| }, timeout = 600) | |
| assert status == 200, f"json status {status}: {data}" | |
| content = (data["choices"][0]["message"].get("content") or "").strip() | |
| # Some chat templates wrap JSON in ```json fences even in JSON | |
| # mode -- strip those before parsing. | |
| if content.startswith("```"): | |
| content = content.split("```", 2)[1] | |
| if content.startswith("json"): | |
| content = content[4:] | |
| content = content.strip("`\n ") | |
| parsed = json.loads(content) | |
| assert "paris" in str(parsed.get("city", "")).lower(), ( | |
| f"city != Paris: {parsed}" | |
| ) | |
| print(f"[json] PASS json_object -> {parsed}") | |
| # ── 2. OpenAI image_url (data URI base64) ─────────────────── | |
| # 64x64 solid-red PNG. stb_image (used by Studio's image | |
| # normaliser at routes/inference.py:3410) rejects 4x4 or | |
| # smaller PNGs as truncated, so we go up to 64x64 -- still | |
| # tiny in token cost. The assertion is loose: any non-empty | |
| # response from the vision path proves multimodal end-to-end | |
| # wiring; small VL quants are weak at colour identification. | |
| PNG_64X64_RED_B64 = ( | |
| "iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAIAAAAlC+aJAAAAYklEQVR4nO3PMQ0AIADAMEAI/k" | |
| "UhBhEcDcmqYJtn7/GzpQNeNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA" | |
| "1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaA1oDWgNaBdCJ0BmMJ25zMAAAAASUVORK5CYII=" | |
| ) | |
| data_uri = f"data:image/png;base64,{PNG_64X64_RED_B64}" | |
| client = OpenAI(base_url = f"{BASE}/v1", api_key = KEY) | |
| openai_resp = client.chat.completions.create( | |
| model = "default", | |
| temperature = 0.0, | |
| max_tokens = 80, | |
| seed = SEED, | |
| messages = [{ | |
| "role": "user", | |
| "content": [ | |
| {"type": "image_url", "image_url": {"url": data_uri}}, | |
| {"type": "text", "text": "What colour dominates this image? Reply in one word."}, | |
| ], | |
| }], | |
| ) | |
| openai_text = (openai_resp.choices[0].message.content or "").lower() | |
| print(f"[image/openai] reply: {openai_text!r}") | |
| assert openai_text, "OpenAI image_url returned empty content" | |
| # We do not strictly require 'red' -- some quants of small VL | |
| # models are weak at colour names. Just require a non-empty | |
| # answer; the vision path is the part under test. | |
| print("[image/openai] PASS image_url accepted, non-empty response") | |
| # ── 3. Anthropic source/base64 image ──────────────────────── | |
| # Two SDK quirks vs. Studio: base_url must NOT include /v1 | |
| # (the SDK appends it itself; otherwise /v1/v1/messages -> 405), | |
| # and Studio's auth is HTTPBearer-only so the SDK's default | |
| # x-api-key header is ignored -- send Authorization: Bearer | |
| # via default_headers. | |
| anthropic = Anthropic( | |
| base_url = BASE, | |
| api_key = "unused", | |
| default_headers = {"Authorization": f"Bearer {KEY}"}, | |
| ) | |
| a_msg = anthropic.messages.create( | |
| model = "default", | |
| max_tokens = 80, | |
| temperature = 0.0, | |
| extra_body = {"seed": SEED}, | |
| messages = [{ | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "image", | |
| "source": { | |
| "type": "base64", | |
| "media_type": "image/png", | |
| "data": PNG_64X64_RED_B64, | |
| }, | |
| }, | |
| {"type": "text", "text": "Describe this image briefly."}, | |
| ], | |
| }], | |
| ) | |
| a_text = "".join(b.text for b in a_msg.content if getattr(b, "type", None) == "text") | |
| print(f"[image/anthropic] reply: {a_text!r}") | |
| assert a_text, "Anthropic source/base64 returned empty content" | |
| print("[image/anthropic] PASS source/base64 accepted, non-empty response") | |
| PY | |
| - name: Stop Studio | |
| if: always() | |
| run: | | |
| kill "${STUDIO_PID}" 2>/dev/null || true | |
| sleep 2 | |
| ss -tln | grep ":${STUDIO_PORT}" || true | |
| - name: Upload logs | |
| # Always upload so green runs are still reviewable. | |
| if: always() | |
| # Diagnostic only: a transient artifact-service drop must not fail a green job. | |
| continue-on-error: true | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: json-images-log | |
| path: | | |
| logs/studio.log | |
| logs/install.log | |
| retention-days: 7 |