diff --git a/src/bootstrap.ts b/src/bootstrap.ts
new file mode 100644
index 00000000..eb9f03f2
--- /dev/null
+++ b/src/bootstrap.ts
@@ -0,0 +1,22 @@
+/**
+ * Threadpool bootstrap. MUST be the first import in index.ts.
+ *
+ * Every fs operation (read_file, write_file, edit_block, config/history/log
+ * persistence) runs on libuv's threadpool, which defaults to only 4 threads.
+ * Under heavy parallel load — e.g. several agents reading/writing files on a
+ * slow or cloud-synced filesystem — 4 stalled operations exhaust the pool, and
+ * because a stalled syscall keeps its thread until the OS returns (a JS-level
+ * timeout does not cancel it), every subsequent fs op queues for minutes. That
+ * surfaced as multi-minute tool-call hangs under parallel `claude -p` load.
+ *
+ * Raising the pool size gives enough headroom that a burst of slow reads no
+ * longer starves the rest. libuv reads UV_THREADPOOL_SIZE only when the pool is
+ * first initialized (on first submitted work), so this assignment has to happen
+ * before ANY threadpool work — hence "first import". A user-provided value is
+ * always respected.
+ */
+const DEFAULT_THREADPOOL_SIZE = 16;
+
+if (!process.env.UV_THREADPOOL_SIZE) {
+  process.env.UV_THREADPOOL_SIZE = String(DEFAULT_THREADPOOL_SIZE);
+}
diff --git a/src/config-manager.ts b/src/config-manager.ts
index 1c88700c..83520533 100644
--- a/src/config-manager.ts
+++ b/src/config-manager.ts
@@ -52,6 +52,10 @@ class ConfigManager {
   private config: ServerConfig = {};
   private initialized = false;
   private _isFirstRun = false; // Track if this is the first run (config was just created)
+  // Serializes all disk writes so concurrent saves can't corrupt config.json.
+  private writeChain: Promise<void> = Promise.resolve();
+  // True while a coalesced background write is already queued (see scheduleSave).
+  private saveScheduled = false;
 
   constructor() {
     // Get user's home directory
@@ -174,15 +178,45 @@ class ConfigManager {
   }
 
   /**
-   * Save config to disk
+   * Write the current in-memory config to disk. All writes funnel through
+   * writeChain (see saveConfig / scheduleSave) so overlapping saves can never
+   * interleave and corrupt the file. Previously every tool call could fire its
+   * own independent fs.writeFile of the same path.
    */
-  private async saveConfig() {
-    try {
-      await fs.writeFile(this.configPath, JSON.stringify(this.config, null, 2), 'utf8');
-    } catch (error) {
-      console.error('Failed to save config:', error);
-      throw error;
-    }
+  private async writeConfigToDisk(): Promise<void> {
+    await fs.writeFile(this.configPath, JSON.stringify(this.config, null, 2), 'utf8');
+  }
+
+  /**
+   * Awaitable save, serialized on writeChain. Use for explicit, user-driven
+   * config changes where the caller wants on-disk confirmation.
+   */
+  private async saveConfig(): Promise<void> {
+    const write = this.writeChain.then(() => this.writeConfigToDisk());
+    // Keep the chain alive even if this write rejects, so later writes still run.
+    this.writeChain = write.catch(() => {});
+    return write;
+  }
+
+  /**
+   * Non-blocking, coalesced save. Returns immediately; the write runs in the
+   * background. A burst of calls collapses to at most one queued write behind
+   * the in-flight one, so a storm of tool calls can't storm the disk — and,
+   * critically, can't pile up behind a saturated libuv threadpool and gate the
+   * tool-call response path. Used for high-frequency, non-critical persistence
+   * such as usage stats.
+   */
+  scheduleSave(): void {
+    if (this.saveScheduled) return; // a queued write will capture the latest config
+    this.saveScheduled = true;
+    this.writeChain = this.writeChain.then(async () => {
+      this.saveScheduled = false; // let the next burst queue a fresh write
+      try {
+        await this.writeConfigToDisk();
+      } catch (error) {
+        console.error('Failed to save config (background):', error);
+      }
+    });
   }
 
   /**
@@ -236,6 +270,21 @@ class ConfigManager {
     await this.saveConfig();
   }
 
+  /**
+   * Update a value in memory and persist it WITHOUT blocking the caller.
+   * The tool-call response path must never wait on a disk write: when the libuv
+   * threadpool is saturated (e.g. many parallel reads stalled on a slow/cloud
+   * filesystem) an awaited write can't get a thread and would hang the response
+   * of even pure-memory tools. The in-memory value is updated synchronously so
+   * subsequent reads see it immediately; the write is coalesced in the
+   * background. Callers needing on-disk confirmation should use setValue.
+   */
+  async setValueNonBlocking(key: string, value: any): Promise<void> {
+    await this.init();
+    this.config[key] = value;
+    this.scheduleSave();
+  }
+
   /**
    * Update multiple configuration values at once
    */
diff --git a/src/handlers/filesystem-handlers.ts b/src/handlers/filesystem-handlers.ts
index df889c0d..1c2832d6 100644
--- a/src/handlers/filesystem-handlers.ts
+++ b/src/handlers/filesystem-handlers.ts
@@ -71,7 +71,12 @@ function getErrorFromPath(path: string): string {
  * Handle read_file command
  */
 export async function handleReadFile(args: unknown): Promise<ServerResult> {
-    const HANDLER_TIMEOUT = 60000; // 60 seconds total operation timeout
+    // Backstop for the whole handler operation. The real control is the
+    // 3-minute cancellable read timeout inside readFileFromDisk; this sits just
+    // above it (so that one fires first, with cleanup + a useful error) but
+    // still below the MCP client's ~4-minute hard cap, so we never leave the
+    // client hanging on an opaque timeout.
+    const HANDLER_TIMEOUT = 3.5 * 60 * 1000; // 3m30s
     // Add input validation
     if (args === null || args === undefined) {
         return createErrorResponse('No arguments provided for read_file command');
diff --git a/src/index.ts b/src/index.ts
index 23fda32d..995940a1 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,5 +1,8 @@
 #!/usr/bin/env node
 
+// MUST be first: raises the libuv threadpool size before any fs work is
+// submitted. See src/bootstrap.ts for why import order matters.
+import './bootstrap.js';
 import { FilteredStdioServerTransport } from './custom-stdio.js';
 import { server, flushDeferredMessages } from './server.js';
 import { commandManager } from './command-manager.js';
diff --git a/src/tools/filesystem.ts b/src/tools/filesystem.ts
index 51b3a6ba..0476a7ca 100644
--- a/src/tools/filesystem.ts
+++ b/src/tools/filesystem.ts
@@ -5,7 +5,7 @@ import fetch from 'cross-fetch';
 import { execFile } from 'child_process';
 import { promisify } from 'util';
 import { capture } from '../utils/capture.js';
-import { withTimeout } from '../utils/withTimeout.js';
+import { withTimeout, runWithAbortableTimeout } from '../utils/withTimeout.js';
 import { configManager } from '../config-manager.js';
 import { getFileHandler, TextFileHandler } from '../utils/files/index.js';
 import type { ReadOptions, FileResult, PdfPageItem } from '../utils/files/base.js';
@@ -20,6 +20,14 @@ const FILE_OPERATION_TIMEOUTS = {
     FILE_READ: 30000,          // 30 seconds
 } as const;
 
+// Cap file read operations at 3 minutes. The MCP client's hard per-call limit
+// is ~4 minutes; timing out at 3m lets us abort the underlying fs op and return
+// a useful error (e.g. the cloud-storage guidance in buildPermissionError)
+// BEFORE the client gives up with an opaque "No result received after 4
+// minutes". Paired with runWithAbortableTimeout so the read is actually
+// cancelled (fd/thread released), not just abandoned.
+export const READ_OPERATION_TIMEOUT_MS = 3 * 60 * 1000; // 3 minutes
+
 const FILE_SIZE_LIMITS = {
     LINE_COUNT_LIMIT: 10 * 1024 * 1024,      // 10MB for line counting
 } as const;
@@ -495,8 +503,9 @@ export async function readFileFromDisk(
         // If we can't stat the file, continue anyway and let the read operation handle errors
     }
 
-    // Use withTimeout to handle potential hangs
-    const readOperation = async () => {
+    // Read under an abortable timeout so a hung/stalled read is cancelled
+    // (fd/thread freed) rather than leaked until the OS call returns.
+    const readOperation = async (signal: AbortSignal) => {
         // Get appropriate handler for this file type (async - includes binary detection)
         const handler = await getFileHandler(validPath);
 
@@ -506,7 +515,8 @@ export async function readFileFromDisk(
             length,
             sheet,
             range,
-            includeStatusMessage: true
+            includeStatusMessage: true,
+            signal
         });
 
         // Return with content as string
@@ -529,22 +539,20 @@ export async function readFileFromDisk(
         };
     };
 
-    // Execute with timeout
+    // Execute with a 3-minute, cancellable timeout
     let result;
     try {
-        result = await withTimeout(
-            readOperation(),
-            FILE_OPERATION_TIMEOUTS.FILE_READ,
-            `Read file operation for ${filePath}`,
-            null
+        result = await runWithAbortableTimeout(
+            (signal) => readOperation(signal),
+            READ_OPERATION_TIMEOUT_MS,
+            `Read file operation for ${filePath}`
         );
     } catch (error) {
         const err = error as NodeJS.ErrnoException;
-        // withTimeout rejects with a plain string "__ERROR__: ... timed out after N seconds"
-        // when defaultValue is null — it has no .code property, so check for that too.
-        const isWithTimeoutString = typeof error === 'string' && (error as string).startsWith('__ERROR__:');
-        if (isWithTimeoutString || err.code === 'EPERM' || err.code === 'EACCES' || err.code === 'ETIMEDOUT') {
-            throw buildPermissionError(filePath, isWithTimeoutString ? 'ETIMEDOUT' : err.code);
+        // runWithAbortableTimeout rejects with an Error whose .code is 'ETIMEDOUT'
+        // on timeout; fs rejects with EPERM/EACCES. Map all to the guidance error.
+        if (err.code === 'EPERM' || err.code === 'EACCES' || err.code === 'ETIMEDOUT') {
+            throw buildPermissionError(filePath, err.code);
         }
         throw error;
     }
@@ -602,8 +610,14 @@ export async function readFileInternal(filePath: string, offset: number = 0, len
     // preserve exact file content including original line endings.
     // We cannot use readline-based reading as it strips line endings.
 
-    // Read entire file content preserving line endings
-    const content = await fs.readFile(validPath, 'utf8');
+    // Read entire file content preserving line endings, under a 3-minute,
+    // cancellable timeout so an edit on a stalled/cloud path can't hang forever
+    // (previously this read had no timeout at all).
+    const content = await runWithAbortableTimeout(
+        (signal) => fs.readFile(validPath, { encoding: 'utf8', signal }),
+        READ_OPERATION_TIMEOUT_MS,
+        `Internal read for ${filePath}`
+    );
 
     // If we need to apply offset/length, do it while preserving line endings
     if (offset === 0 && length >= Number.MAX_SAFE_INTEGER) {
diff --git a/src/utils/files/base.ts b/src/utils/files/base.ts
index 278a784f..8f94a8d6 100644
--- a/src/utils/files/base.ts
+++ b/src/utils/files/base.ts
@@ -88,6 +88,9 @@ export interface ReadOptions {
 
     /** Whether to include status messages (default: true) */
     includeStatusMessage?: boolean;
+
+    /** Optional AbortSignal to cancel an in-flight read (frees fd/thread on timeout). */
+    signal?: AbortSignal;
 }
 
 /**
diff --git a/src/utils/files/image.ts b/src/utils/files/image.ts
index 4e97ce9b..06fd5b80 100644
--- a/src/utils/files/image.ts
+++ b/src/utils/files/image.ts
@@ -37,7 +37,7 @@ export class ImageFileHandler implements FileHandler {
 
     async read(path: string, options?: ReadOptions): Promise<FileResult> {
         // Images are always read in full, ignoring offset and length
-        const buffer = await fs.readFile(path);
+        const buffer = await fs.readFile(path, { signal: options?.signal });
         const content = buffer.toString('base64');
         const mimeType = this.getMimeType(path);
 
diff --git a/src/utils/files/text.ts b/src/utils/files/text.ts
index 52e37bff..bf9035ab 100644
--- a/src/utils/files/text.ts
+++ b/src/utils/files/text.ts
@@ -56,7 +56,7 @@ export class TextFileHandler implements FileHandler {
         const includeStatusMessage = options?.includeStatusMessage ?? true;
 
         // Binary detection is done at factory level - just read as text
-        return this.readFileWithSmartPositioning(filePath, offset, length, 'text/plain', includeStatusMessage);
+        return this.readFileWithSmartPositioning(filePath, offset, length, 'text/plain', includeStatusMessage, options?.signal);
     }
 
     async write(path: string, content: string, mode: 'rewrite' | 'append' = 'rewrite'): Promise<void> {
@@ -118,11 +118,11 @@ export class TextFileHandler implements FileHandler {
     /**
      * Get file line count (for files under size limit)
      */
-    private async getFileLineCount(filePath: string): Promise<number | undefined> {
+    private async getFileLineCount(filePath: string, signal?: AbortSignal): Promise<number | undefined> {
         try {
             const stats = await fs.stat(filePath);
             if (stats.size < FILE_SIZE_LIMITS.LINE_COUNT_LIMIT) {
-                const content = await fs.readFile(filePath, 'utf8');
+                const content = await fs.readFile(filePath, { encoding: 'utf8', signal });
                 return TextFileHandler.countLines(content);
             }
         } catch (error) {
@@ -208,12 +208,13 @@ export class TextFileHandler implements FileHandler {
         offset: number,
         length: number,
         mimeType: string,
-        includeStatusMessage: boolean = true
+        includeStatusMessage: boolean = true,
+        signal?: AbortSignal
     ): Promise<FileResult> {
         const stats = await fs.stat(filePath);
         const fileSize = stats.size;
 
-        const totalLines = await this.getFileLineCount(filePath);
+        const totalLines = await this.getFileLineCount(filePath, signal);
 
         // For negative offsets (tail behavior), use reverse reading
         if (offset < 0) {
@@ -221,20 +222,20 @@ export class TextFileHandler implements FileHandler {
 
             if (fileSize > FILE_SIZE_LIMITS.LARGE_FILE_THRESHOLD &&
                 requestedLines <= READ_PERFORMANCE_THRESHOLDS.SMALL_READ_THRESHOLD) {
-                return await this.readLastNLinesReverse(filePath, requestedLines, mimeType, includeStatusMessage, totalLines);
+                return await this.readLastNLinesReverse(filePath, requestedLines, mimeType, includeStatusMessage, totalLines, signal);
             } else {
-                return await this.readFromEndWithReadline(filePath, requestedLines, mimeType, includeStatusMessage, totalLines);
+                return await this.readFromEndWithReadline(filePath, requestedLines, mimeType, includeStatusMessage, totalLines, signal);
             }
         }
         // For positive offsets
         else {
             if (fileSize < FILE_SIZE_LIMITS.LARGE_FILE_THRESHOLD || offset === 0) {
-                return await this.readFromStartWithReadline(filePath, offset, length, mimeType, includeStatusMessage, totalLines);
+                return await this.readFromStartWithReadline(filePath, offset, length, mimeType, includeStatusMessage, totalLines, signal);
             } else {
                 if (offset > READ_PERFORMANCE_THRESHOLDS.DEEP_OFFSET_THRESHOLD) {
-                    return await this.readFromEstimatedPosition(filePath, offset, length, mimeType, includeStatusMessage, totalLines);
+                    return await this.readFromEstimatedPosition(filePath, offset, length, mimeType, includeStatusMessage, totalLines, signal);
                 } else {
-                    return await this.readFromStartWithReadline(filePath, offset, length, mimeType, includeStatusMessage, totalLines);
+                    return await this.readFromStartWithReadline(filePath, offset, length, mimeType, includeStatusMessage, totalLines, signal);
                 }
             }
         }
@@ -248,7 +249,8 @@ export class TextFileHandler implements FileHandler {
         n: number,
         mimeType: string,
         includeStatusMessage: boolean = true,
-        fileTotalLines?: number
+        fileTotalLines?: number,
+        signal?: AbortSignal
     ): Promise<FileResult> {
         const fd = await fs.open(filePath, 'r');
         try {
@@ -260,6 +262,11 @@ export class TextFileHandler implements FileHandler {
             let partialLine = '';
 
             while (position > 0 && lines.length < n) {
+                if (signal?.aborted) {
+                    const err = new Error('Read aborted') as NodeJS.ErrnoException;
+                    err.code = 'ABORT_ERR';
+                    throw err;
+                }
                 const readSize = Math.min(READ_PERFORMANCE_THRESHOLDS.CHUNK_SIZE, position);
                 position -= readSize;
 
@@ -297,10 +304,11 @@ export class TextFileHandler implements FileHandler {
         requestedLines: number,
         mimeType: string,
         includeStatusMessage: boolean = true,
-        fileTotalLines?: number
+        fileTotalLines?: number,
+        signal?: AbortSignal
     ): Promise<FileResult> {
         const rl = createInterface({
-            input: createReadStream(filePath),
+            input: createReadStream(filePath, { signal }),
             crlfDelay: Infinity
         });
 
@@ -342,10 +350,11 @@ export class TextFileHandler implements FileHandler {
         length: number,
         mimeType: string,
         includeStatusMessage: boolean = true,
-        fileTotalLines?: number
+        fileTotalLines?: number,
+        signal?: AbortSignal
     ): Promise<FileResult> {
         const rl = createInterface({
-            input: createReadStream(filePath),
+            input: createReadStream(filePath, { signal }),
             crlfDelay: Infinity
         });
 
@@ -381,11 +390,12 @@ export class TextFileHandler implements FileHandler {
         length: number,
         mimeType: string,
         includeStatusMessage: boolean = true,
-        fileTotalLines?: number
+        fileTotalLines?: number,
+        signal?: AbortSignal
     ): Promise<FileResult> {
         // First, do a quick scan to estimate lines per byte
         const rl = createInterface({
-            input: createReadStream(filePath),
+            input: createReadStream(filePath, { signal }),
             crlfDelay: Infinity
         });
 
@@ -401,7 +411,7 @@ export class TextFileHandler implements FileHandler {
         rl.close();
 
         if (sampleLines === 0) {
-            return await this.readFromStartWithReadline(filePath, offset, length, mimeType, includeStatusMessage, fileTotalLines);
+            return await this.readFromStartWithReadline(filePath, offset, length, mimeType, includeStatusMessage, fileTotalLines, signal);
         }
 
         // Estimate position
@@ -413,7 +423,7 @@ export class TextFileHandler implements FileHandler {
             const stats = await fd.stat();
             const startPosition = Math.min(estimatedBytePosition, stats.size);
 
-            const stream = createReadStream(filePath, { start: startPosition });
+            const stream = createReadStream(filePath, { start: startPosition, signal });
             const rl2 = createInterface({
                 input: stream,
                 crlfDelay: Infinity
diff --git a/src/utils/usageTracker.ts b/src/utils/usageTracker.ts
index bc210bdb..fe035451 100644
--- a/src/utils/usageTracker.ts
+++ b/src/utils/usageTracker.ts
@@ -92,10 +92,14 @@ class UsageTracker {
   }
 
   /**
-   * Save usage stats to config
+   * Save usage stats to config.
+   * Non-blocking: the tool-call return path must not wait on a disk write. The
+   * in-memory stats are already updated by the caller (getStats returns the live
+   * config object), so persistence is coalesced in the background. This is what
+   * keeps a saturated libuv threadpool from gating tool responses on every call.
    */
   private async saveStats(stats: ToolUsageStats): Promise<void> {
-    await configManager.setValue('usageStats', stats);
+    await configManager.setValueNonBlocking('usageStats', stats);
   }
 
   /**
diff --git a/src/utils/withTimeout.ts b/src/utils/withTimeout.ts
index 93ade585..13baba68 100644
--- a/src/utils/withTimeout.ts
+++ b/src/utils/withTimeout.ts
@@ -55,4 +55,45 @@ export function withTimeout<T>(
                 }
             });
     });
-}
\ No newline at end of file
+}
+
+/**
+ * Run an operation under a timeout WITH real cancellation.
+ *
+ * Unlike withTimeout (which only races a timer and leaves the underlying work
+ * running — holding its libuv thread/fd until the OS call returns), this passes
+ * an AbortSignal into the operation and aborts it when the timeout fires, so a
+ * read/stream that honors the signal is cancelled and its resources released.
+ *
+ * Rejects with an Error whose `.code` is 'ETIMEDOUT' on timeout (so existing
+ * ETIMEDOUT handling / permission-error mapping keeps working).
+ *
+ * Caveat: an operation wedged inside a single un-interruptible syscall only
+ * observes the abort once that syscall returns; library reads that ignore the
+ * signal (e.g. Excel/PDF parsers) still get the timeout rejection but keep
+ * running in the background until they finish on their own.
+ */
+export function runWithAbortableTimeout<T>(
+    operation: (signal: AbortSignal) => Promise<T>,
+    timeoutMs: number,
+    operationName: string
+): Promise<T> {
+    const controller = new AbortController();
+    let timeoutId: NodeJS.Timeout;
+
+    const timeout = new Promise<never>((_, reject) => {
+        timeoutId = setTimeout(() => {
+            controller.abort();
+            const error = new Error(`${operationName} timed out after ${timeoutMs / 1000} seconds`) as NodeJS.ErrnoException;
+            error.code = 'ETIMEDOUT';
+            reject(error);
+        }, timeoutMs);
+    });
+
+    const op = operation(controller.signal);
+    // Swallow the late abort rejection so it can't surface as an unhandled
+    // rejection after the timeout has already settled the race.
+    op.catch(() => {});
+
+    return Promise.race([op, timeout]).finally(() => clearTimeout(timeoutId));
+}
diff --git a/test/repro/test-bootstrap-threadpool.js b/test/repro/test-bootstrap-threadpool.js
new file mode 100644
index 00000000..37edfdaf
--- /dev/null
+++ b/test/repro/test-bootstrap-threadpool.js
@@ -0,0 +1,24 @@
+// Verify the SHIPPED bootstrap (dist/bootstrap.js) raises the pool before any
+// fs work, so 4 concurrently-stalled reads no longer starve a 5th fs op.
+// No UV_THREADPOOL_SIZE override here -> relies entirely on bootstrap's default.
+import '../../dist/bootstrap.js';           // first import, exactly like index.ts
+import { execSync } from 'child_process';
+import fs from 'fs/promises';
+import os from 'os';
+import path from 'path';
+
+const T0 = Date.now();
+const log = (m) => console.log(`[${Date.now() - T0}ms] ${m}`);
+const fifo = path.join(os.tmpdir(), `dc-boot-${Date.now()}`);
+execSync(`mkfifo ${fifo}`);
+log(`UV_THREADPOOL_SIZE after bootstrap = ${process.env.UV_THREADPOOL_SIZE}`);
+
+for (let i = 0; i < 4; i++) fs.readFile(fifo).catch(() => {});
+setTimeout(async () => {
+  const t = Date.now();
+  const guard = setTimeout(() => { log(`BLOCKED >3000ms -> bootstrap did NOT help`); execSync(`rm -f ${fifo}`); process.exit(1); }, 3000);
+  await fs.writeFile(path.join(os.tmpdir(), 'dc-boot-probe'), 'x');
+  clearTimeout(guard);
+  log(`trivial write completed in ${Date.now() - t}ms -> bootstrap headroom WORKS`);
+  execSync(`rm -f ${fifo}`); process.exit(0);
+}, 150);
diff --git a/test/repro/test-dc-tracking-gate.js b/test/repro/test-dc-tracking-gate.js
new file mode 100644
index 00000000..cd7d6a43
--- /dev/null
+++ b/test/repro/test-dc-tracking-gate.js
@@ -0,0 +1,47 @@
+// DC-level repro: the real per-tool-call gate.
+// server.ts CallTool handler awaits usageTracker.trackSuccess(name) before
+// returning ANY tool's result. trackSuccess -> saveStats -> configManager
+// .setValue -> saveConfig -> fs.writeFile (libuv threadpool).
+// If stalled cloud-path reads hold all threadpool threads, this awaited write
+// never resolves -> even list_processes (pure memory) never returns.
+//
+// Run: UV_THREADPOOL_SIZE=4 BLOCKERS=4 node test/test-dc-tracking-gate.js
+import { execSync } from 'child_process';
+import fs from 'fs/promises';
+import os from 'os';
+import path from 'path';
+import { configManager } from '../../dist/config-manager.js';
+import { usageTracker } from '../../dist/utils/usageTracker.js';
+
+const T0 = Date.now();
+const log = (m) => console.log(`[${Date.now() - T0}ms] ${m}`);
+const BLOCKERS = Number(process.env.BLOCKERS || process.env.UV_THREADPOOL_SIZE || 4);
+const fifo = path.join(os.tmpdir(), `dc-gate-fifo-${Date.now()}`);
+execSync(`mkfifo ${fifo}`);
+
+// Warm the config so init()'s own disk read is already done and cached.
+await configManager.getConfig();
+log(`config warmed; pool=${process.env.UV_THREADPOOL_SIZE || 4}, blockers=${BLOCKERS}`);
+
+// Simulate DC read_file/edit_block calls stuck on a stalled cloud mount:
+// each holds a threadpool thread until the (never-arriving) read returns.
+for (let i = 0; i < BLOCKERS; i++) {
+  fs.readFile(fifo).catch((e) => log(`fifo read ${i} errored: ${e.code}`));
+}
+
+// Now the exact thing the dispatcher awaits for EVERY successful tool call,
+// including list_processes:
+setTimeout(async () => {
+  log(`calling usageTracker.trackSuccess('list_processes') ...`);
+  const t = Date.now();
+  const guard = setTimeout(() => {
+    log(`trackSuccess STILL BLOCKED after 5000ms -> list_processes would hang here. GATE REPRODUCED.`);
+    execSync(`rm -f ${fifo}`);
+    process.exit(0);
+  }, 5000);
+  await usageTracker.trackSuccess('list_processes');
+  clearTimeout(guard);
+  log(`trackSuccess completed in ${Date.now() - t}ms (NOT gated)`);
+  execSync(`rm -f ${fifo}`);
+  process.exit(0);
+}, 200);
diff --git a/test/repro/test-env-threadpool-timing.js b/test/repro/test-env-threadpool-timing.js
new file mode 100644
index 00000000..133d2f26
--- /dev/null
+++ b/test/repro/test-env-threadpool-timing.js
@@ -0,0 +1,24 @@
+// Does setting process.env.UV_THREADPOOL_SIZE from inside the process (before
+// the first threadpool op) actually change the effective pool size? If yes, a
+// bootstrap module that sets it as the very first import is a valid fix.
+process.env.UV_THREADPOOL_SIZE = '8';   // set BEFORE any fs/threadpool use
+import { execSync } from 'child_process';
+import fs from 'fs/promises';
+import os from 'os';
+import path from 'path';
+
+const T0 = Date.now();
+const log = (m) => console.log(`[${Date.now() - T0}ms] ${m}`);
+const fifo = path.join(os.tmpdir(), `dc-envtest-${Date.now()}`);
+execSync(`mkfifo ${fifo}`);           // child_process, not threadpool
+log(`set UV_THREADPOOL_SIZE=8 in-process; firing 4 FIFO blockers`);
+
+for (let i = 0; i < 4; i++) fs.readFile(fifo).catch(() => {});
+setTimeout(async () => {
+  const t = Date.now();
+  const guard = setTimeout(() => { log(`BLOCKED >3000ms -> env set too late, pool still 4`); execSync(`rm -f ${fifo}`); process.exit(0); }, 3000);
+  await fs.writeFile(path.join(os.tmpdir(), 'dc-envtest-probe'), 'x');
+  clearTimeout(guard);
+  log(`write completed in ${Date.now() - t}ms -> in-process env set WORKS (pool=8)`);
+  execSync(`rm -f ${fifo}`); process.exit(0);
+}, 150);
diff --git a/test/repro/test-interact-join-stall.js b/test/repro/test-interact-join-stall.js
new file mode 100644
index 00000000..fbd6e211
--- /dev/null
+++ b/test/repro/test-interact-join-stall.js
@@ -0,0 +1,55 @@
+// Repro (v2): interact_with_process re-join()s the WHOLE buffer every 50ms poll
+// (getOutputSinceSnapshot -> outputLines.join('\n'), terminal-manager.ts).
+// Phase 1: grow the buffer to ~45MB across MANY lines (so join must concat).
+// Phase 2: trickle tiny output over ~3s; the poll loop then joins ~45MB ~60x.
+// Measure event-loop lag during Phase 2 only.
+//
+// Run: node test/test-interact-join-stall.js
+import { startProcess, interactWithProcess } from '../../dist/tools/improved-process-tools.js';
+
+const T0 = Date.now();
+const log = (m) => console.log(`[${Date.now() - T0}ms] ${m}`);
+const pidFrom = (res) => {
+  const m = (res?.content?.[0]?.text || '').match(/PID (\d+)/);
+  return m ? Number(m[1]) : null;
+};
+
+const started = await startProcess({ command: 'python3 -i -q', timeout_ms: 4000 });
+const pid = pidFrom(started);
+log(`python pid=${pid}`);
+await interactWithProcess({ pid, input: 'import time', timeout_ms: 3000 });
+
+// Phase 1: single expression (no block => no '...' continuation). ~45MB / 3000 lines.
+log(`phase 1: growing buffer to ~45MB ...`);
+await interactWithProcess({ pid, input: "print('\\n'.join('X'*1200 for _ in range(40000)))", timeout_ms: 15000 });
+log(`phase 1 done`);
+
+// Event-loop lag monitor (10ms interval; large delta => synchronous block).
+let maxLag = 0, sumLag = 0, samples = 0, last = Date.now();
+const mon = setInterval(() => {
+  const now = Date.now();
+  const lag = now - last - 10;
+  if (lag > maxLag) maxLag = lag;
+  if (lag > 0) { sumLag += lag; samples++; }
+  last = now;
+}, 10);
+
+// Phase 2: trickle 40 ticks over ~3.2s so ~60 polls each re-join the 45MB buffer.
+log(`phase 2: trickling output while polling ...`);
+const t = Date.now();
+await interactWithProcess({
+  pid,
+  input: "[ (time.sleep(0.08) or print('tick', i)) for i in range(40) ]",
+  timeout_ms: 12000
+});
+const wall = Date.now() - t;
+clearInterval(mon);
+
+log(`phase 2 wall=${wall}ms`);
+log(`event-loop lag during phase 2: max=${maxLag}ms, total blocked=${sumLag}ms over ${samples} stalls`);
+log(`blocked ${((sumLag / wall) * 100).toFixed(0)}% of phase-2 wall time`);
+log(maxLag > 150 || sumLag > wall * 0.3
+  ? `STALL REPRODUCED: per-poll whole-buffer join starves the event loop`
+  : `no significant stall observed`);
+try { await interactWithProcess({ pid, input: 'exit()', timeout_ms: 800, wait_for_prompt: false }); } catch {}
+process.exit(0);
diff --git a/test/repro/test-process-wait-client-cap.js b/test/repro/test-process-wait-client-cap.js
new file mode 100644
index 00000000..29e66a88
--- /dev/null
+++ b/test/repro/test-process-wait-client-cap.js
@@ -0,0 +1,201 @@
+import assert from 'assert';
+import { terminalManager } from '../../dist/terminal-manager.js';
+import { interactWithProcess } from '../../dist/tools/improved-process-tools.js';
+import { configManager } from '../../dist/config-manager.js';
+
+/**
+ * FAILING repro for the "No result received after 4 minutes" crash.
+ *
+ * Symptom (customer): a `Send Input to Process` / start_process call hangs and
+ * Claude Desktop reports "No result received from the Claude Desktop app after
+ * waiting 4 minutes. The local MCP server ... may be unresponsive."
+ *
+ * Verified root cause (src/terminal-manager.ts executeCommand + improved-
+ * process-tools.ts interactWithProcess): the blocking wait is bounded ONLY by
+ * the caller-supplied timeout_ms. There is no ceiling below the MCP client's
+ * hard ~4-minute (240000ms) per-call limit. So a long-running, prompt-less
+ * command (e.g. an OpenRouter multi-model sweep) with a large timeout_ms keeps
+ * the tool call pending until the client gives up at 4 minutes.
+ *
+ * These tests encode the DESIRED contract, so they FAIL against current code
+ * and should PASS once a fix lands:
+ *
+ *   The single-call blocking wait of start_process and interact_with_process
+ *   MUST be capped at min(timeout_ms, maxProcessWaitMs), where maxProcessWaitMs
+ *   is a config value defaulting BELOW the client ceiling (suggested 180000).
+ *   When the cap is hit, the tool returns an "is still running, use
+ *   read_process_output" handoff (isBlocked=true) instead of continuing to
+ *   block for the full timeout_ms.
+ *
+ * The fast tests below set maxProcessWaitMs to a tiny value so they run in ms.
+ * Set DC_REPRO_REALTIME=1 to additionally run a fix-agnostic ~200s proof that
+ * uses the real client ceiling.
+ */
+
+// The MCP client (Claude Desktop) kills a single tool call at ~4 minutes.
+const CLIENT_CEILING_MS = 240000;
+// Config key the fix should read to bound the single-call wait.
+const CAP_KEY = 'maxProcessWaitMs';
+
+// Fast-test values: tiny cap so a fixed tool returns in ms.
+const TEST_CAP_MS = 400;          // stand-in for the production maxProcessWaitMs
+const LARGE_TIMEOUT_MS = 4000;    // stand-in for "user/agent set a big timeout"
+const CHILD_LIFETIME_MS = 8000;   // child outlives both cap and timeout
+const CAP_MARGIN_MS = 1600;       // tolerance above the cap for scheduling/overhead
+
+const since = (t) => Date.now() - t;
+const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
+const cleanup = (pid) => { try { if (pid > 0) terminalManager.forceTerminate(pid); } catch {} };
+
+let ORIGINAL_CAP;
+async function setCap(v) { await configManager.setValue(CAP_KEY, v); }
+async function snapshotCap() { ORIGINAL_CAP = await configManager.getValue(CAP_KEY); }
+async function restoreCap() { await configManager.setValue(CAP_KEY, ORIGINAL_CAP); }
+
+// A silent, long-running, non-exiting process: matches none of the early-exit
+// paths (no prompt, no output, no exit) so only the wait cap can end the call.
+const SILENT_CHILD = `node -e "setTimeout(function(){}, ${CHILD_LIFETIME_MS})"`;
+
+/**
+ * Test 1 (FAILS now): start_process must cap its initial wait.
+ * A silent long process with a large timeout_ms should return at ~the cap,
+ * not block for the whole timeout_ms. Current code blocks for timeout_ms.
+ */
+async function testStartProcessCapsWait() {
+  console.log('\n📋 Test 1: start_process caps initial wait below timeout_ms...');
+  await setCap(TEST_CAP_MS);
+  const t0 = Date.now();
+  const res = await terminalManager.executeCommand(SILENT_CHILD, LARGE_TIMEOUT_MS, undefined, true);
+  const elapsed = since(t0);
+  cleanup(res.pid);
+
+  assert(res.pid > 0, 'should have spawned a process');
+  assert.strictEqual(res.isBlocked, true, 'should report still-running (isBlocked=true)');
+  assert(
+    elapsed <= TEST_CAP_MS + CAP_MARGIN_MS,
+    `start_process should return at ~the cap (<=${TEST_CAP_MS + CAP_MARGIN_MS}ms), ` +
+    `but blocked for ${elapsed}ms with timeout_ms=${LARGE_TIMEOUT_MS} and cap=${TEST_CAP_MS}. ` +
+    `Uncapped, a real ${CLIENT_CEILING_MS}ms+ timeout would blow past the client limit.`
+  );
+  console.log(`  ✅ returned in ${elapsed}ms (cap=${TEST_CAP_MS}ms)`);
+}
+
+const BUSY_MS = 5000; // child stays silent (no prompt/output) longer than LARGE_TIMEOUT_MS
+const BUSY_INPUT = `var __end=Date.now()+${BUSY_MS}; while(Date.now()<__end){}`;
+
+/**
+ * Test 2 (FAILS now): interact_with_process must cap its wait too.
+ * This is the actual crash path ("Send Input to Process"). We start a REPL,
+ * then send input that runs a long, silent computation. With wait_for_prompt
+ * (default), no prompt appears until it finishes, so the poll loop currently
+ * runs the full timeout_ms. It should instead return at ~the cap.
+ */
+async function testInteractCapsWait() {
+  console.log('\n📋 Test 2: interact_with_process caps its wait below timeout_ms...');
+  // Start a REPL (returns fast via prompt detection, well under any cap).
+  const start = await terminalManager.executeCommand('node -i', 3000, undefined, false);
+  const pid = start.pid;
+  assert(pid > 0, 'should have started a REPL session');
+
+  await setCap(TEST_CAP_MS);
+  const t0 = Date.now();
+  await interactWithProcess({ pid, input: BUSY_INPUT, timeout_ms: LARGE_TIMEOUT_MS });
+  const elapsed = since(t0);
+  cleanup(pid);
+
+  assert(
+    elapsed <= TEST_CAP_MS + CAP_MARGIN_MS,
+    `interact_with_process should return at ~the cap (<=${TEST_CAP_MS + CAP_MARGIN_MS}ms), ` +
+    `but blocked for ${elapsed}ms with timeout_ms=${LARGE_TIMEOUT_MS} and cap=${TEST_CAP_MS}. ` +
+    `This is exactly the path that produces the 4-minute "no result" crash.`
+  );
+  console.log(`  ✅ returned in ${elapsed}ms (cap=${TEST_CAP_MS}ms)`);
+}
+
+/**
+ * Test 3 (guard, passes now AND after the fix): the cap must not slow the
+ * normal fast path. With a generous cap, a prompt-emitting process still
+ * returns early via prompt detection, nowhere near the cap or the timeout.
+ * This stops a "fix" that just blanket-shortens every wait.
+ */
+async function testPromptStillReturnsEarly() {
+  console.log('\n📋 Test 3 (guard): prompt-emitting process still returns early...');
+  await setCap(60000); // generous cap, larger than the timeout below
+  const bigTimeout = 5000;
+  const t0 = Date.now();
+  const res = await terminalManager.executeCommand(
+    `node -e "process.stdout.write('>>> ');setTimeout(function(){}, ${CHILD_LIFETIME_MS})"`,
+    bigTimeout,
+    undefined,
+    true
+  );
+  const elapsed = since(t0);
+  cleanup(res.pid);
+
+  assert.strictEqual(res.isBlocked, true, 'prompt means waiting for input');
+  assert(elapsed < 1000, `should return quickly via prompt, took ${elapsed}ms`);
+  assert(
+    res.timingInfo.exitReason.startsWith('early_exit'),
+    `expected an early_exit reason, got "${res.timingInfo.exitReason}"`
+  );
+  console.log(`  ✅ returned early in ${elapsed}ms (${res.timingInfo.exitReason})`);
+}
+
+/**
+ * Test 4 (fix-agnostic, slow, opt-in via DC_REPRO_REALTIME=1): uses the real
+ * client ceiling. A silent process with a 5-minute timeout_ms must not keep
+ * the call pending past a client-safe ceiling (200s, under the 240s cap).
+ * Does not depend on any particular fix knob. ~200s to run.
+ */
+async function testRealTimeClientCeiling() {
+  console.log('\n📋 Test 4 (real-time): single call must stay under the client ceiling...');
+  await restoreCap(); // use production default, not the tiny test cap
+  const SAFE_CEILING_MS = 200000;
+  const HUGE_TIMEOUT_MS = 300000;
+
+  const t0 = Date.now();
+  const pending = terminalManager.executeCommand(SILENT_CHILD, HUGE_TIMEOUT_MS, undefined, false);
+  await sleep(800);
+  const sessions = terminalManager.listActiveSessions();
+  const pid = sessions.length ? sessions[sessions.length - 1].pid : -1;
+  // Safety net so the test can never hang forever; also resolves `pending`.
+  const killer = setTimeout(() => cleanup(pid), SAFE_CEILING_MS);
+  await pending;
+  clearTimeout(killer);
+  const elapsed = since(t0);
+  cleanup(pid);
+
+  assert(
+    elapsed < SAFE_CEILING_MS,
+    `call stayed pending ${elapsed}ms — past the ${SAFE_CEILING_MS}ms safe ceiling ` +
+    `(client kills at ${CLIENT_CEILING_MS}ms). timeout_ms was ${HUGE_TIMEOUT_MS}.`
+  );
+  console.log(`  ✅ returned in ${elapsed}ms (under ${SAFE_CEILING_MS}ms)`);
+}
+
+async function runAllTests() {
+  console.log('🚀 Starting process-wait client-ceiling repro tests...');
+  await snapshotCap();
+  let ok = true;
+  try {
+    await testStartProcessCapsWait();   // FAILS until fix
+    await testInteractCapsWait();       // FAILS until fix
+    await testPromptStillReturnsEarly();// guard, passes
+    if (process.env.DC_REPRO_REALTIME === '1') {
+      await testRealTimeClientCeiling(); // FAILS until fix (~200s)
+    } else {
+      console.log('\n⏭️  Test 4 (real-time, ~200s) skipped. Set DC_REPRO_REALTIME=1 to run it.');
+    }
+    console.log('\n🎉 All process-wait cap tests passed!');
+  } catch (error) {
+    ok = false;
+    console.error('\n❌ Test failed:', error.message);
+  } finally {
+    await restoreCap();
+  }
+  return ok;
+}
+
+runAllTests()
+  .then((success) => process.exit(success ? 0 : 1))
+  .catch((error) => { console.error('Test error:', error); process.exit(1); });
diff --git a/test/repro/test-read-abort-frees-thread.js b/test/repro/test-read-abort-frees-thread.js
new file mode 100644
index 00000000..2fff2c8d
--- /dev/null
+++ b/test/repro/test-read-abort-frees-thread.js
@@ -0,0 +1,43 @@
+// Demonstration: runWithAbortableTimeout actually CANCELS the underlying read
+// (it rejects quickly instead of returning the full buffer), whereas withTimeout
+// abandons the promise while the read runs to completion in the background.
+//
+// Deterministic part: the abortable read must reject (read cut short), proving
+// the AbortSignal reaches fs and stops it.
+//
+// Timing note: on a fast local SSD the "thread stays held" cost is invisible
+// because the file is served from the page cache at memory bandwidth. The cost
+// is real only on slow/network/cloud paths — exactly the case this fix targets,
+// and one that can't be faithfully reproduced on a local disk. So we assert the
+// cancellation (deterministic) and print timing as informational only.
+//
+// Run: UV_THREADPOOL_SIZE=1 node test/repro/test-read-abort-frees-thread.js
+import assert from 'assert';
+import { execSync } from 'child_process';
+import fs from 'fs/promises';
+import os from 'os';
+import path from 'path';
+import { withTimeout, runWithAbortableTimeout } from '../../dist/utils/withTimeout.js';
+
+const big = path.join(os.tmpdir(), `dc-big-${Date.now()}.bin`);
+console.log('creating 800MB temp file...');
+execSync(`dd if=/dev/zero of=${big} bs=1m count=800 2>/dev/null`);
+
+// Abortable read with a 0ms budget: must reject (read cancelled), NOT return 800MB.
+let rejected = false;
+let returnedBytes = -1;
+try {
+  const buf = await runWithAbortableTimeout(
+    (signal) => fs.readFile(big, { signal }),
+    0,
+    'abortable read'
+  );
+  returnedBytes = buf.length;
+} catch (e) {
+  rejected = true;
+  console.log(`abortable read rejected with code=${e.code} (read was cut short) ✓`);
+}
+execSync(`rm -f ${big}`);
+assert.ok(rejected, `expected abortable read to be cancelled, but it returned ${returnedBytes} bytes`);
+console.log('PASS: AbortController propagated to fs and cancelled the read.');
+process.exit(0);
diff --git a/test/repro/test-start-process-timeout-block.js b/test/repro/test-start-process-timeout-block.js
new file mode 100644
index 00000000..0f49e841
--- /dev/null
+++ b/test/repro/test-start-process-timeout-block.js
@@ -0,0 +1,135 @@
+import assert from 'assert';
+import { terminalManager } from '../../dist/terminal-manager.js';
+
+/**
+ * Repro / characterization test for issue #310:
+ *   "start_process blocks ... causing Claude Desktop crashes"
+ *
+ * Root cause (src/terminal-manager.ts executeCommand): the call resolves early
+ * ONLY via one of:
+ *   - a quick prompt pattern (>>> > $ #) on a stdout chunk
+ *   - analyzeProcessState() detecting a REPL prompt (guarded by output)
+ *   - process exit
+ *   - the timeoutMs fallback
+ *
+ * A long-running process that produces no prompt-like output and does not exit
+ * hits none of the early paths, so executeCommand is held for the FULL
+ * timeoutMs. With a large timeout_ms this is the multi-minute pending tool call
+ * users observe.
+ *
+ * NOTE: this is an async wait, not a literal event-loop block. The issue's
+ * "blocks Electron main thread" framing is inaccurate, but the user-visible
+ * symptom (tool call pending for the whole timeout) is real and is what this
+ * test reproduces. If a cap-the-initial-wait fix lands, Tests 1 & 2 should be
+ * updated to assert the capped duration instead.
+ */
+
+const TIMEOUT_MS = 800;        // short, keeps the test fast
+const PROC_LIFETIME_MS = 6000; // child lives well past the timeout
+
+const since = (t) => Date.now() - t;
+const cleanup = (pid) => { try { terminalManager.forceTerminate(pid); } catch {} };
+
+/**
+ * Test 1 (core repro): a SILENT long-running process produces no output, so
+ * neither the quick-pattern nor the periodic analyzeProcessState path can fire
+ * (the periodic check is guarded by `output.trim()`). The only thing that ends
+ * the call is the timeout fallback => the call is held for the full timeoutMs.
+ */
+async function testSilentProcessWaitsFullTimeout() {
+  console.log('\n📋 Test 1: silent long-running process is held for the full timeout...');
+  const t0 = Date.now();
+  const res = await terminalManager.executeCommand(
+    `node -e "setTimeout(function(){}, ${PROC_LIFETIME_MS})"`,
+    TIMEOUT_MS,
+    undefined,
+    true // collectTiming -> populates timingInfo.exitReason
+  );
+  const elapsed = since(t0);
+  cleanup(res.pid);
+
+  assert(res.pid > 0, 'should have spawned a process');
+  assert.strictEqual(res.isBlocked, true, 'should report isBlocked=true');
+  assert.strictEqual(res.output.trim(), '', 'silent process should produce no output');
+  assert.strictEqual(res.timingInfo.exitReason, 'timeout',
+    `expected exitReason "timeout", got "${res.timingInfo.exitReason}"`);
+  assert(elapsed >= TIMEOUT_MS - 75,
+    `should wait ~the full timeout (>=${TIMEOUT_MS}ms), only waited ${elapsed}ms`);
+  assert(elapsed < PROC_LIFETIME_MS - 500,
+    `should return via timeout, not process exit (elapsed ${elapsed}ms)`);
+  console.log(`  ✅ held for ${elapsed}ms via timeout (timeout=${TIMEOUT_MS}ms, proc=${PROC_LIFETIME_MS}ms)`);
+}
+
+/**
+ * Test 2 (reporter's scenario): a CHATTY but prompt-less long-running process
+ * (the "Python test script that didn't output prompt-like patterns"). It emits
+ * plain progress lines that match no REPL prompt and no completion pattern, so
+ * it is also held for the full timeout despite producing output.
+ */
+async function testChattyNonPromptProcessWaitsFullTimeout() {
+  console.log('\n📋 Test 2: chatty (no-prompt) long-running process is held for the full timeout...');
+  const t0 = Date.now();
+  const res = await terminalManager.executeCommand(
+    `node -e "setInterval(function(){console.log('progress')},100);setTimeout(function(){},${PROC_LIFETIME_MS})"`,
+    TIMEOUT_MS,
+    undefined,
+    true
+  );
+  const elapsed = since(t0);
+  cleanup(res.pid);
+
+  assert.strictEqual(res.isBlocked, true, 'should report isBlocked=true');
+  assert(res.output.includes('progress'), 'should have captured progress output');
+  assert.strictEqual(res.timingInfo.exitReason, 'timeout',
+    `non-prompt output must not trigger early exit; got "${res.timingInfo.exitReason}"`);
+  assert(elapsed >= TIMEOUT_MS - 75,
+    `should wait ~the full timeout (>=${TIMEOUT_MS}ms), only waited ${elapsed}ms`);
+  assert(elapsed < PROC_LIFETIME_MS - 500,
+    `should return via timeout, not process exit (elapsed ${elapsed}ms)`);
+  console.log(`  ✅ held for ${elapsed}ms despite output (exitReason=${res.timingInfo.exitReason})`);
+}
+
+/**
+ * Test 3 (contrast / regression guard): when the process DOES emit a recognized
+ * prompt, executeCommand returns promptly via the quick-pattern path, well
+ * before the (large) timeout. This proves the bug is specific to prompt-less
+ * processes and guards the early-exit path from regressing.
+ */
+async function testPromptProcessReturnsEarly() {
+  console.log('\n📋 Test 3: prompt-emitting process returns early (not at timeout)...');
+  const bigTimeout = 5000;
+  const t0 = Date.now();
+  const res = await terminalManager.executeCommand(
+    `node -e "process.stdout.write('>>> ');setTimeout(function(){},${PROC_LIFETIME_MS})"`,
+    bigTimeout,
+    undefined,
+    true
+  );
+  const elapsed = since(t0);
+  cleanup(res.pid);
+
+  assert.strictEqual(res.isBlocked, true, 'prompt means blocked/waiting for input');
+  assert(elapsed < 1000, `should return quickly via prompt, took ${elapsed}ms`);
+  assert(res.timingInfo.exitReason.startsWith('early_exit'),
+    `expected an early_exit reason, got "${res.timingInfo.exitReason}"`);
+  console.log(`  ✅ returned early after ${elapsed}ms (exitReason=${res.timingInfo.exitReason})`);
+}
+
+async function runAllTests() {
+  console.log('🚀 Starting #310 start_process timeout-block tests...');
+  try {
+    await testSilentProcessWaitsFullTimeout();
+    await testChattyNonPromptProcessWaitsFullTimeout();
+    await testPromptProcessReturnsEarly();
+    console.log('\n🎉 All #310 timeout-block tests passed!');
+    return true;
+  } catch (error) {
+    console.error('\n❌ Test failed:', error.message);
+    console.error(error.stack);
+    return false;
+  }
+}
+
+runAllTests()
+  .then(success => process.exit(success ? 0 : 1))
+  .catch(error => { console.error('Test error:', error); process.exit(1); });
diff --git a/test/repro/test-threadpool-starvation.js b/test/repro/test-threadpool-starvation.js
new file mode 100644
index 00000000..730b5b62
--- /dev/null
+++ b/test/repro/test-threadpool-starvation.js
@@ -0,0 +1,50 @@
+// Repro: libuv threadpool exhaustion makes a trivial fs op hang.
+// Models the real bug: stalled cloud-path fs.reads occupy all UV threads,
+// and the per-tool-call config fs.writeFile (usageTracker.saveStats) queues
+// behind them -> even a "light" tool (list_processes) can't respond.
+//
+// Run: UV_THREADPOOL_SIZE=4 node test/test-threadpool-starvation.js
+import { execSync } from 'child_process';
+import fs from 'fs/promises';
+import os from 'os';
+import path from 'path';
+
+const POOL = Number(process.env.UV_THREADPOOL_SIZE || 4);
+const BLOCKERS = Number(process.env.BLOCKERS || POOL);
+const fifo = path.join(os.tmpdir(), `dc-fifo-${Date.now()}`);
+const probe = path.join(os.tmpdir(), `dc-probe-${Date.now()}.json`);
+
+function log(m) { console.log(`[${Date.now() - T0}ms] ${m}`); }
+const T0 = Date.now();
+
+try { execSync(`mkfifo ${fifo}`); } catch (e) { console.error('mkfifo failed', e.message); process.exit(1); }
+log(`pool=${POOL}, blockers=${BLOCKERS}, fifo=${fifo}`);
+
+// Occupy BLOCKERS threads with reads that never resolve (nobody writes to the
+// FIFO) -- exactly like a cloud read that never returns.
+for (let i = 0; i < BLOCKERS; i++) {
+  fs.readFile(fifo).then(() => log(`fifo read ${i} resolved (unexpected)`))
+                   .catch((e) => log(`fifo read ${i} errored: ${e.code}`));
+}
+
+// After threads are grabbed, time a trivial write == the per-call config write.
+setTimeout(async () => {
+  log(`firing trivial write (proxy for list_processes' config save)...`);
+  const t = Date.now();
+  const guard = setTimeout(() => {
+    log(`STILL BLOCKED after 5000ms -> STARVATION REPRODUCED. Exiting.`);
+    try { execSync(`rm -f ${fifo} ${probe}`); } catch {}
+    process.exit(0);
+  }, 5000);
+  try {
+    await fs.writeFile(probe, '{}');
+    clearTimeout(guard);
+    log(`trivial write completed in ${Date.now() - t}ms (NOT starved)`);
+    execSync(`rm -f ${fifo} ${probe}`);
+    process.exit(0);
+  } catch (e) {
+    clearTimeout(guard);
+    log(`trivial write errored: ${e.message}`);
+    process.exit(1);
+  }
+}, 200);
diff --git a/test/repro/test-withtimeout-leak.js b/test/repro/test-withtimeout-leak.js
new file mode 100644
index 00000000..91c89a92
--- /dev/null
+++ b/test/repro/test-withtimeout-leak.js
@@ -0,0 +1,39 @@
+// Repro: withTimeout() rejects on schedule but does NOT cancel the underlying
+// fs op, so the libuv thread stays held. This is why a 30s read "timeout" does
+// not free capacity for 30s worth of relief -- the thread is occupied for the
+// REAL (cloud) duration, which can be minutes.
+//
+// Run: UV_THREADPOOL_SIZE=1 node test/test-withtimeout-leak.js
+import { execSync } from 'child_process';
+import fs from 'fs/promises';
+import os from 'os';
+import path from 'path';
+import { withTimeout } from '../../dist/utils/withTimeout.js';
+
+const T0 = Date.now();
+const log = (m) => console.log(`[${Date.now() - T0}ms] ${m}`);
+const fifo = path.join(os.tmpdir(), `dc-leak-fifo-${Date.now()}`);
+execSync(`mkfifo ${fifo}`);
+log(`pool=${process.env.UV_THREADPOOL_SIZE || 'default(4)'} (using 1 thread)`);
+
+// One read stuck on the FIFO, wrapped exactly like DC wraps file reads.
+const timed = withTimeout(fs.readFile(fifo), 1000, 'Read file operation', null)
+  .then((v) => log(`withTimeout resolved: ${v}`))
+  .catch((e) => log(`withTimeout REJECTED (as designed): ${String(e).slice(0, 40)}...`));
+
+// After the timeout "fires", try another fs op. If the thread were freed, this
+// would run immediately. It does not -- the un-cancelled read still owns it.
+await timed;
+log(`timeout fired; now trying a fresh fs.writeFile on the single thread...`);
+const t = Date.now();
+const guard = setTimeout(() => {
+  log(`next fs op STILL BLOCKED ${Date.now() - t}ms after timeout -> THREAD LEAKED`);
+  log(`(withTimeout freed the JS promise, not the OS thread)`);
+  execSync(`rm -f ${fifo}`);
+  process.exit(0);
+}, 4000);
+await fs.writeFile(path.join(os.tmpdir(), 'dc-leak-probe'), 'x');
+clearTimeout(guard);
+log(`next fs op completed in ${Date.now() - t}ms (thread was freed)`);
+execSync(`rm -f ${fifo}`);
+process.exit(0);
diff --git a/test/test-nonblocking-config-save.js b/test/test-nonblocking-config-save.js
new file mode 100644
index 00000000..1a761b53
--- /dev/null
+++ b/test/test-nonblocking-config-save.js
@@ -0,0 +1,61 @@
+import assert from 'assert';
+import { readFileSync } from 'fs';
+import { configManager } from '../dist/config-manager.js';
+import { CONFIG_FILE } from '../dist/config.js';
+
+/**
+ * Regression test for the parallel-load tool-call hang.
+ *
+ * Root cause: the CallTool handler awaited usageTracker.trackSuccess() before
+ * returning any tool result; that chained through configManager.setValue ->
+ * fs.writeFile on EVERY call. Under a saturated libuv threadpool (many parallel
+ * reads stalled on a slow/cloud filesystem) the awaited write could not get a
+ * thread, so even pure-memory tools (list_processes) hung until the client's
+ * ~4-minute cap. Each call also fired an independent write of the same file,
+ * risking a corrupted config.json.
+ *
+ * Fix: stats persist via configManager.setValueNonBlocking() — in-memory update
+ * is synchronous, the disk write is coalesced and serialized on a single write
+ * chain, and the caller never waits on it.
+ *
+ * This test is fast and cross-platform (no FIFO/python); the FIFO-based proof
+ * that the response no longer blocks under a starved pool lives in test/repro/.
+ */
+
+const KEY = '__nonblockingSaveRegressionTest';
+let passed = 0;
+
+async function run() {
+  await configManager.getConfig(); // warm init so disk read is cached
+
+  // 1) A burst of non-blocking saves must resolve effectively immediately —
+  //    they must not each wait on a disk write.
+  const BURST = 100;
+  const t0 = Date.now();
+  await Promise.all(
+    Array.from({ length: BURST }, (_, i) => configManager.setValueNonBlocking(KEY, i))
+  );
+  const elapsed = Date.now() - t0;
+  assert.ok(elapsed < 200, `burst of ${BURST} non-blocking saves took ${elapsed}ms (expected < 200ms)`);
+  passed++; console.log(`✓ ${BURST} non-blocking saves resolved in ${elapsed}ms`);
+
+  // 2) The in-memory value is visible immediately (synchronously updated).
+  assert.strictEqual(await configManager.getValue(KEY), BURST - 1);
+  passed++; console.log('✓ in-memory value reflects the latest write immediately');
+
+  // 3) After the background flush window, config.json is valid JSON (no torn
+  //    write from overlapping saves) and holds the final coalesced value.
+  await new Promise((r) => setTimeout(r, 300));
+  let parsed;
+  assert.doesNotThrow(() => { parsed = JSON.parse(readFileSync(CONFIG_FILE, 'utf8')); },
+    'config.json must remain valid JSON after concurrent writes');
+  assert.strictEqual(parsed[KEY], BURST - 1, 'final value must be persisted to disk');
+  passed++; console.log('✓ config.json is valid and holds the coalesced final value');
+
+  // cleanup: remove the test key (undefined is dropped by JSON.stringify)
+  await configManager.setValue(KEY, undefined);
+}
+
+run()
+  .then(() => { console.log(`\nPASS (${passed}/3)`); process.exit(0); })
+  .catch((e) => { console.error(`\nFAIL: ${e.message}`); process.exit(1); });
diff --git a/test/test-read-abort-timeout.js b/test/test-read-abort-timeout.js
new file mode 100644
index 00000000..09cc9f1d
--- /dev/null
+++ b/test/test-read-abort-timeout.js
@@ -0,0 +1,92 @@
+import assert from 'assert';
+import os from 'os';
+import path from 'path';
+import fsp from 'fs/promises';
+import { runWithAbortableTimeout } from '../dist/utils/withTimeout.js';
+import { READ_OPERATION_TIMEOUT_MS, readFile } from '../dist/tools/filesystem.js';
+import { configManager } from '../dist/config-manager.js';
+
+/**
+ * Regression tests for the abortable, 3-minute read timeout.
+ *
+ * Follow-up to the parallel-load hang fix: withTimeout() rejected on a timer
+ * but left the underlying fs op running, holding its libuv thread until the OS
+ * call returned. runWithAbortableTimeout() passes an AbortSignal into the op
+ * and aborts it on timeout. The read timeout is a flat 3 minutes, chosen to sit
+ * below the MCP client's ~4-minute hard cap so we abort + return a useful error
+ * before the client reports an opaque timeout.
+ */
+
+let passed = 0;
+const ok = (msg) => { passed++; console.log(`✓ ${msg}`); };
+
+async function run() {
+  // 1) A fast operation resolves normally and is NOT aborted.
+  {
+    let sawSignal;
+    const val = await runWithAbortableTimeout(async (signal) => {
+      sawSignal = signal;
+      return 'done';
+    }, 1000, 'fast op');
+    assert.strictEqual(val, 'done');
+    assert.strictEqual(sawSignal.aborted, false);
+    ok('fast operation resolves and signal is not aborted');
+  }
+
+  // 2) A slow operation times out with code ETIMEDOUT AND its signal is aborted
+  //    (this is the "cleanup" — the op is told to stop, not just abandoned).
+  {
+    let sawSignal;
+    let err;
+    try {
+      await runWithAbortableTimeout((signal) => {
+        sawSignal = signal;
+        // Never settles on its own — only the timeout's abort should end the
+        // race (mirrors a real fs read, which rejects asynchronously after
+        // abort, so the ETIMEDOUT rejection wins).
+        return new Promise(() => {});
+      }, 100, 'slow op');
+      assert.fail('expected timeout rejection');
+    } catch (e) {
+      err = e;
+    }
+    assert.strictEqual(err.code, 'ETIMEDOUT', `expected ETIMEDOUT, got ${err.code}`);
+    assert.strictEqual(sawSignal.aborted, true, 'operation signal must be aborted on timeout');
+    ok('slow operation rejects ETIMEDOUT and aborts the operation signal');
+  }
+
+  // 3) The read timeout is 3 minutes — safely below the client's ~4-min cap.
+  {
+    assert.strictEqual(READ_OPERATION_TIMEOUT_MS, 3 * 60 * 1000, 'read timeout must be 3 minutes');
+    assert.ok(READ_OPERATION_TIMEOUT_MS < 4 * 60 * 1000, 'must stay below the ~4-min client cap');
+    ok('read timeout is 3 minutes, below the client hard cap');
+  }
+
+  // 4) Integration: a normal read still works end-to-end (signal threading did
+  //    not break the happy path). Hermetic: uses its own temp dir + allowed-dir
+  //    config so it doesn't depend on the ambient allowedDirectories, and
+  //    restores config afterward.
+  {
+    const original = await configManager.getConfig();
+    const originalAllowed = original.allowedDirectories;
+    // realpath so the allowed dir matches what validatePath resolves the file
+    // to (e.g. macOS /tmp -> /private/tmp), avoiding a symlink mismatch.
+    const tmpDir = await fsp.realpath(await fsp.mkdtemp(path.join(os.tmpdir(), 'dc-read-abort-')));
+    const tmpFile = path.join(tmpDir, 'sample.txt');
+    await fsp.writeFile(tmpFile, 'line1\nMARKER runWithAbortableTimeout\nline3\n');
+    try {
+      await configManager.setValue('allowedDirectories', [tmpDir]);
+      const res = await readFile(tmpFile, { offset: 0, length: 5 });
+      const text = typeof res.content === 'string' ? res.content : res.content.toString('utf8');
+      assert.ok(text.includes('MARKER runWithAbortableTimeout'), 'normal read returns file content');
+      ok('normal read_file still works with the signal threaded through');
+    } finally {
+      await configManager.setValue('allowedDirectories', originalAllowed);
+      await fsp.rm(tmpDir, { recursive: true, force: true });
+    }
+  }
+}
+
+run()
+  .then(() => { console.log(`\nPASS (${passed}/4)`); process.exit(0); })
+  .catch((e) => { console.error(`\nFAIL: ${e.message}`); process.exit(1); });