Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 96 additions & 95 deletions apps/whispering/src-tauri/Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion apps/whispering/src-tauri/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ tauri-plugin-opener = "2"
tauri-plugin-clipboard-manager = "2"
tauri-plugin-dialog = "2"
tauri-plugin-fs = "2"
tauri-plugin-http = "2"
tauri-plugin-http = "2.5.7"
tauri-plugin-notification = "2"
tauri-plugin-os = "2"
tauri-plugin-process = "2"
Expand Down
151 changes: 126 additions & 25 deletions apps/whispering/src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,35 +220,136 @@ async fn write_text(app: tauri::AppHandle, text: String) -> Result<(), String> {
// Small delay to ensure clipboard is updated
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;

// 3. Simulate paste operation using virtual key codes (layout-independent)
let mut enigo = Enigo::new(&Settings::default()).map_err(|e| e.to_string())?;

// Use virtual key codes for V to work with any keyboard layout
#[cfg(target_os = "macos")]
let (modifier, v_key) = (Key::Meta, Key::Other(9)); // Virtual key code for V on macOS
#[cfg(target_os = "windows")]
let (modifier, v_key) = (Key::Control, Key::Other(0x56)); // VK_V on Windows
// 3. Simulate paste operation
#[cfg(target_os = "linux")]
let (modifier, v_key) = (Key::Control, Key::Unicode('v')); // Fallback for Linux
{
// On Linux, use xdotool instead of enigo for reliable key simulation.
// xdotool --clearmodifiers prevents stuck modifier keys from interfering
// and --window targets the correct window regardless of focus changes.
let target_window = std::process::Command::new("xdotool")
.args(["getactivewindow"])
.output()
.ok()
.and_then(|o| String::from_utf8(o.stdout).ok())
.map(|s| s.trim().to_string());

// Detect terminals by WM_CLASS — they need Ctrl+Shift+V instead of Ctrl+V.
// xprop output format: WM_CLASS(STRING) = "instance", "class"
// We extract only the quoted values to avoid false matches on the
// "STRING" prefix (e.g. "st" matching "STRING").
let is_terminal = target_window
.as_ref()
.and_then(|id| {
std::process::Command::new("xprop")
.args(["-id", id, "WM_CLASS"])
.output()
.ok()
})
.and_then(|o| String::from_utf8(o.stdout).ok())
.map(|raw| {
// Extract the values after "=" and check only those
let values = raw
.split('=')
.nth(1)
.unwrap_or("")
.to_lowercase();
[
"gnome-terminal",
"konsole",
"xfce4-terminal",
"mate-terminal",
"terminator",
"tilix",
"alacritty",
"kitty",
"wezterm",
"foot",
"st-256color",
"st",
"urxvt",
"xterm",
"sakura",
"guake",
"yakuake",
"tilda",
"hyper",
"termux",
"rio",
"ghostty",
"contour",
"blackbox",
]
.iter()
.any(|t| {
// For short names like "st", match exact quoted values
// to avoid substring false positives
if t.len() <= 3 {
values.contains(&format!("\"{}\"", t))
} else {
values.contains(*t)
}
})
|| values.contains("terminal")
|| values.contains("term")
})
.unwrap_or(false);

let paste_key = if is_terminal {
"ctrl+shift+v"
} else {
"ctrl+v"
};

// Press modifier + V
enigo
.key(modifier, Direction::Press)
.map_err(|e| format!("Failed to press modifier key: {}", e))?;
enigo
.key(v_key, Direction::Press)
.map_err(|e| format!("Failed to press V key: {}", e))?;
info!(
"write_text: target_window={:?}, is_terminal={}, paste_key={}",
target_window, is_terminal, paste_key
);

// Release V + modifier (in reverse order for proper cleanup)
enigo
.key(v_key, Direction::Release)
.map_err(|e| format!("Failed to release V key: {}", e))?;
enigo
.key(modifier, Direction::Release)
.map_err(|e| format!("Failed to release modifier key: {}", e))?;
// Refocus the target window in case focus shifted during clipboard operations.
// Also dismiss any active menus (Alt-based shortcuts can activate menu bars).
if let Some(ref id) = target_window {
let _ = std::process::Command::new("xdotool")
.args(["windowactivate", "--sync", id])
.output();
let _ = std::process::Command::new("xdotool")
.args(["key", "--clearmodifiers", "Escape"])
.output();
// Small delay for the app to process the Escape and restore focus
std::thread::sleep(std::time::Duration::from_millis(50));
}

std::process::Command::new("xdotool")
.args(["key", "--clearmodifiers", paste_key])
.output()
.map_err(|e| format!("Failed to simulate paste via xdotool: {}", e))?;
}

#[cfg(not(target_os = "linux"))]
{
let mut enigo = Enigo::new(&Settings::default()).map_err(|e| e.to_string())?;

#[cfg(target_os = "macos")]
let (modifier, v_key) = (Key::Meta, Key::Other(9));
#[cfg(target_os = "windows")]
let (modifier, v_key) = (Key::Control, Key::Other(0x56));

enigo
.key(modifier, Direction::Press)
.map_err(|e| format!("Failed to press modifier key: {}", e))?;
enigo
.key(v_key, Direction::Press)
.map_err(|e| format!("Failed to press V key: {}", e))?;
enigo
.key(v_key, Direction::Release)
.map_err(|e| format!("Failed to release V key: {}", e))?;
enigo
.key(modifier, Direction::Release)
.map_err(|e| format!("Failed to release modifier key: {}", e))?;
}

// Small delay to ensure paste completes
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
// Delay before restoring clipboard — heavy apps like VS Code (Electron) need
// more time to process the paste event and read the clipboard content.
tokio::time::sleep(tokio::time::Duration::from_millis(300)).await;

// 4. Restore original clipboard content
if let Some(content) = original_clipboard {
Expand Down
40 changes: 40 additions & 0 deletions docs/git-workflow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Git Workflow

This repo is a fork of [EpicenterHQ/epicenter](https://github.com/EpicenterHQ/epicenter) with custom enhancements for Linux terminal paste support and dependency fixes.

| Remote | URL | Purpose |
|--------|-----|---------|
| `origin` | `https://github.com/rob88pt/epicenter.git` | Your fork -- push changes here |
| `upstream` | `https://github.com/EpicenterHQ/epicenter.git` | Original repo -- pull updates from here |

## Pushing Your Changes
```bash
git add <files> && git commit -m "message" && git push origin main
```

## Pulling Upstream Updates
```bash
git fetch upstream && git merge upstream/main
```

## Viewing Upstream Changes Before Merging
```bash
git fetch upstream
git log upstream/main --oneline --not main
git diff main upstream/main
```

## Tagging a Release
```bash
git tag -a v1.0.0 -m "Description" && git push origin v1.0.0
```

## Likely Conflict Files

These files contain our custom changes and are most likely to conflict when syncing upstream:

- `apps/whispering/src-tauri/src/lib.rs` -- terminal paste detection (Ctrl+Shift+V for terminals on Linux)
- `apps/whispering/src-tauri/Cargo.toml` -- pinned `tauri-plugin-http` version to 2.5.7
- `apps/whispering/src-tauri/Cargo.lock` -- dependency lock changes from the plugin update

When resolving conflicts in these files, prioritize keeping our terminal detection logic and HTTP plugin version while incorporating upstream feature changes.
24 changes: 24 additions & 0 deletions memory/active_context.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Active Context

## Current Focus
Whispering desktop app — debugging and fixing Linux paste issues. Need to start fresh from upstream and apply a clean, minimal terminal fix.

## Recent Changes
- [2026-04-02] Discovered the "st" matching bug: `"st"` (suckless terminal) in the terminal list was matching `STRING` in xprop output prefix `WM_CLASS(STRING) = ...`, causing EVERY window to be detected as terminal → Ctrl+Shift+V sent everywhere
- [2026-04-02] Replaced enigo with xdotool for Linux paste (more reliable with `--clearmodifiers`)
- [2026-04-02] Added Escape key workaround and increased clipboard restore delay — these were unnecessary, caused by Alt+D shortcut activating VS Code menu bar
- [2026-04-02] Pushed changes to `fix/terminal-paste-and-http-plugin` branch but plan to start fresh

## Next Steps
- [ ] Reset fork to upstream/main (clean slate)
- [ ] Apply ONLY the terminal detection fix (minimal change)
- [ ] Test thoroughly in terminal, VS Code, Chrome, text editor, LibreOffice before adding more changes
- [ ] Keep changes minimal to avoid merge conflicts with upstream

## Blockers / Open Questions
- Alt-based shortcuts (e.g. Alt+D) activate menu bars in apps like VS Code — this is expected OS behavior, NOT a bug in our code. User should use non-Alt shortcuts for push-to-talk if using VS Code.
- The "2.0.0 update" notification in Whispering was a false alarm — upstream latest is still v7.11.0

## Session Notes
- User prefers minimal changes to avoid merge conflicts with upstream
- Ghost-chasing: spent time debugging paste issues that were actually caused by the `"st"` matching bug and the Alt+D shortcut, not by enigo, timing, or focus issues
61 changes: 61 additions & 0 deletions memory/changelog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Changelog

## [2026-04-02] - Terminal Detection Bug Fix and Paste Rework

### Fixed
- **Critical bug**: `"st"` (suckless terminal) in the terminal emulator list was matching `STRING` in xprop output prefix `WM_CLASS(STRING) = ...`, causing every window to be falsely detected as terminal. Ctrl+Shift+V was sent to all apps.

### Changed
- Replaced enigo with xdotool for Linux key simulation — uses `--clearmodifiers` to prevent modifier interference
- Terminal detection now parses only the values after `=` in xprop output, not the full prefix
- Short terminal names (≤3 chars like "st") now use exact quoted matching (`"st"`) to avoid substring false positives
- Added `windowactivate --sync` to refocus target window before paste
- Added Escape key press before paste to dismiss Alt-activated menus (workaround)
- Increased clipboard restore delay from 100ms to 300ms

### Discarded (unnecessary, should be reverted)
- The Escape key workaround — VS Code issue was caused by user's Alt+D shortcut activating the menu bar, not a code bug
- The 300ms delay — VS Code paste failed because of menu focus, not timing
- The windowactivate call — focus wasn't the core problem

### Files Affected
- `apps/whispering/src-tauri/src/lib.rs` — write_text command reworked for Linux

### Plan for Next Session
Reset fork to upstream/main and apply ONLY the essential terminal fix:
1. Parse xprop values after `=` (not the full prefix)
2. Use exact quoted matching for short terminal names
3. Keep the change minimal — one small diff on top of upstream

---

## [2026-04-01] - Terminal Paste Fix and HTTP Plugin Update

### Added
- Terminal window detection on Linux via `xprop WM_CLASS` in `write_text` command
- List of 23 known terminal emulator class names for matching
- Fallback matching on "terminal" and "term" substrings in WM_CLASS

### Changed
- `tauri-plugin-http` pinned from `"2"` to `"2.5.7"` in Cargo.toml
- Cargo.lock updated with tauri-plugin-http 2.5.7 and transitive dependency updates (tauri 2.9.4 -> 2.10.3, wry 0.53.5 -> 0.54.2, etc.)

### Fixed
- Clipboard paste now sends Ctrl+Shift+V in terminal emulators on Linux (was Ctrl+V which terminals ignore)
- Groq cloud transcription fixed -- `fetch_cancel_body` command was missing because JS plugin (2.5.7) was ahead of Rust plugin (2.5.4)

### Decisions
- Used `xprop -id $(xdotool getactivewindow) WM_CLASS` instead of `xdotool getwindowclassname` because xdotool v3.20160805 (shipped with Ubuntu/Mint) doesn't have `getwindowclassname`
- Pinned exact version `2.5.7` rather than `"2"` to prevent future version mismatches
- Put detection logic in `#[cfg(target_os = "linux")]` block -- macOS and Windows unaffected

### Problems & Solutions
- `xdotool getwindowclassname` not found -> used `xprop` via shell subprocess instead
- `http.fetch_cancel_body not allowed` -> root cause was JS plugin v2.5.7 calling a command that Rust plugin v2.5.4 didn't have; fixed by updating Rust side
- Dev build localstorage uses different origin than production (`http_localhost_1420` vs `tauri_localhost_0`) -> settings/API keys don't transfer; user must re-enter
- Missing system deps for Tauri build: libssl-dev, libclang-dev, libasound2-dev, libvulkan-dev, glslc, and various GTK/WebKit libs

### Files Affected
- `apps/whispering/src-tauri/src/lib.rs` - terminal detection and Ctrl+Shift+V logic
- `apps/whispering/src-tauri/Cargo.toml` - pinned tauri-plugin-http version
- `apps/whispering/src-tauri/Cargo.lock` - dependency updates
18 changes: 18 additions & 0 deletions memory/changelog_summary.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Changelog Summary

## Current State
Fork has extra changes on `fix/terminal-paste-and-http-plugin` branch that should be discarded. Plan: reset to upstream/main and apply only the essential terminal detection fix (parse xprop values correctly, exact match for short names like "st").

## Major Milestones
- **[2026-04-01]** Forked repo, fixed terminal paste and Groq transcription, opened upstream PR #1575
- **[2026-04-02]** Found and fixed critical "st" matching bug in terminal detection. Discovered several unnecessary changes made while debugging — plan to start fresh.

## Key Decisions
- Use `xprop` for WM_CLASS detection instead of newer xdotool commands (see [[decisions.md]] ADR-001)
- Pin `tauri-plugin-http = "2.5.7"` explicitly to prevent JS/Rust version drift
- Keep changes minimal to avoid merge conflicts with upstream
- Alt-based shortcuts (Alt+D) activate menu bars in VS Code — not a code bug, user should use non-Alt shortcuts

## Recent Focus
- Debugging paste issues across different Linux apps
- Isolating the real bug ("st" matching "STRING") from symptoms (menu activation from Alt shortcuts)
42 changes: 42 additions & 0 deletions memory/decisions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Architecture Decision Records

## ADR-001: Use xprop for Terminal Window Detection
- **Date:** 2026-04-01
- **Status:** accepted

### Context
Whispering sends Ctrl+V to paste transcribed text, but terminals on Linux use Ctrl+Shift+V. Need to detect whether the focused window is a terminal at paste time.

### Decision
Use `xprop -id $(xdotool getactivewindow) WM_CLASS` to get the focused window's class, then check against a list of known terminal emulator names.

### Rationale
- `xdotool getwindowclassname` would be simpler but doesn't exist in xdotool v3.20160805 shipped with Ubuntu 24.04 / Linux Mint
- `xprop` is universally available on X11 systems
- Checking WM_CLASS against a list plus "terminal"/"term" substring fallback covers most emulators
- Runs as a shell subprocess which adds ~5ms latency -- acceptable since it runs once per paste

### Consequences
- Positive: Works on all common Linux distros without extra dependencies
- Positive: Fallback matching catches most terminals not in the explicit list
- Negative: Won't work on pure Wayland (no X11) -- would need a different approach
- Negative: Shell subprocess per paste has minor overhead

## ADR-002: Pin tauri-plugin-http Version
- **Date:** 2026-04-01
- **Status:** accepted

### Context
The JS `@tauri-apps/plugin-http` (v2.5.7) called `fetch_cancel_body` but the Rust `tauri-plugin-http` (v2.5.4 via `"2"` semver) didn't have that command, causing all HTTP requests to fail silently.

### Decision
Pin `tauri-plugin-http = "2.5.7"` explicitly in Cargo.toml instead of using `"2"`.

### Rationale
- The semver `"2"` resolved to 2.5.4 due to Cargo.lock pinning, while bun installed JS plugin 2.5.7
- Explicit version prevents future mismatches between JS and Rust sides
- Both sides should always be on the same minor version

### Consequences
- Positive: Prevents silent HTTP failures from version drift
- Negative: Requires manual version bumps when updating the JS plugin
Loading