Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 75 additions & 4 deletions conductor/bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import os
import subprocess
import sys
import tempfile
import time
from pathlib import Path

Expand Down Expand Up @@ -347,6 +348,63 @@ def split_message(text: str, max_len: int = TG_MAX_LENGTH) -> list[str]:
return chunks


# ---------------------------------------------------------------------------
# Voice transcription (local parakeet-mlx via subprocess)
# ---------------------------------------------------------------------------

# Path to the STT worker script (next to this file)
STT_WORKER = Path(__file__).parent / "stt_worker.py"
# Python interpreter in the bridge venv (venv lives alongside bridge.py)
VENV_PYTHON = Path(__file__).parent / ".venv" / "bin" / "python3"


async def transcribe_voice(voice: types.Voice, bot: Bot) -> str | None:
"""Transcribe a Telegram voice message using local parakeet-mlx."""
tmp_path = None
try:
# Download voice file via Telegram Bot API
file = await bot.get_file(voice.file_id)
bio = await bot.download_file(file.file_path)
audio_data = bio.read()

# Save to temp file
with tempfile.NamedTemporaryFile(
suffix=".ogg", prefix="voice_", delete=False
) as tmp:
tmp.write(audio_data)
tmp_path = tmp.name

# Run STT worker as subprocess (crash-isolated, off the event loop)
proc = await asyncio.create_subprocess_exec(
str(VENV_PYTHON), str(STT_WORKER), tmp_path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
try:
stdout, stderr = await asyncio.wait_for(
proc.communicate(), timeout=60
)
except asyncio.TimeoutError:
proc.kill()
await proc.wait()
log.error("STT worker timed out (60s)")
return None

if proc.returncode != 0:
log.error("STT worker failed: %s", stderr.decode().strip())
return None

text = stdout.decode().strip()
return text if text else None

except Exception as e:
log.error("Voice transcription error: %s", e)
return None
finally:
if tmp_path:
Path(tmp_path).unlink(missing_ok=True)


# ---------------------------------------------------------------------------
# Telegram bot setup
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -482,20 +540,33 @@ async def cmd_restart(message: types.Message):

@dp.message()
async def handle_message(message: types.Message):
"""Forward any text message to the conductor and return its response."""
"""Forward text or voice messages to the conductor and return its response."""
if not is_authorized(message):
return
if not message.text:

text = message.text

# Transcribe voice messages
if message.voice and not text:
await message.answer("Transcribing...")
text = await transcribe_voice(message.voice, bot)
if not text:
await message.answer(
"[Could not transcribe voice message.]"
)
return

if not text:
return

# Determine target profile from message prefix
target_profile, cleaned_msg = parse_profile_prefix(
message.text, profiles
text, profiles
)
if target_profile is None:
target_profile = default_profile
if not cleaned_msg:
cleaned_msg = message.text
cleaned_msg = text

session_title = conductor_session_title(target_profile)

Expand Down
71 changes: 71 additions & 0 deletions conductor/stt_worker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env python3
"""
STT worker: transcribes an audio file using parakeet-mlx CLI.
Runs as a subprocess to isolate inference from the bridge event loop.

Usage:
python stt_worker.py /path/to/audio.ogg
python stt_worker.py --warmup
"""

import os
import subprocess
import sys
import tempfile
from pathlib import Path

VENV_BIN = Path.home() / ".agent-deck" / "conductor" / ".venv" / "bin"
PARAKEET_CLI = str(VENV_BIN / "parakeet-mlx")


def transcribe(audio_path: str) -> str:
"""Transcribe audio file using parakeet-mlx CLI."""
with tempfile.TemporaryDirectory(prefix="stt_") as tmp_dir:
result = subprocess.run(
[
PARAKEET_CLI, audio_path,
"--output-format", "txt",
"--output-dir", tmp_dir,
],
capture_output=True, text=True, timeout=60,
)
if result.returncode != 0:
print(f"parakeet-mlx error: {result.stderr}", file=sys.stderr)
sys.exit(1)

# Read the output txt file
txt_files = list(Path(tmp_dir).glob("*.txt"))
if not txt_files:
print("No transcription output file found", file=sys.stderr)
sys.exit(1)
return txt_files[0].read_text().strip()


def main():
if len(sys.argv) < 2:
print("Usage: stt_worker.py <audio_file> | --warmup", file=sys.stderr)
sys.exit(1)

if sys.argv[1] == "--warmup":
print("Warming up parakeet-mlx...", file=sys.stderr)
# Just check the CLI is accessible
result = subprocess.run(
[PARAKEET_CLI, "--help"],
capture_output=True, text=True, timeout=10,
)
if result.returncode == 0:
print("CLI accessible.", file=sys.stderr)
print("")
return

audio_path = sys.argv[1]
if not Path(audio_path).exists():
print(f"File not found: {audio_path}", file=sys.stderr)
sys.exit(1)

text = transcribe(audio_path)
print(text)


if __name__ == "__main__":
main()