Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 35 additions & 6 deletions samples/js/live-audio-transcription/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//
// Usage: node app.js

import { FoundryLocalManager } from 'foundry-local-sdk';
import { FoundryLocalManager, CoreError } from 'foundry-local-sdk';

console.log('╔══════════════════════════════════════════════════════════╗');
console.log('║ Foundry Local — Live Audio Transcription (JS SDK) ║');
Expand Down Expand Up @@ -39,9 +39,16 @@ console.log('Loading model...');
await model.load();
console.log('✓ Model loaded');

// Graceful-shutdown coordinator. Set ONCE on the session via
// createLiveTranscriptionSession(signal) — every subsequent
// start() / append() / getTranscriptionStream() / stop() call picks it
// up automatically, so we don't have to thread the signal through every
// callsite.
const shutdown = new AbortController();

// Create live transcription session (same pattern as C# sample).
const audioClient = model.createAudioClient();
const session = audioClient.createLiveTranscriptionSession();
const session = audioClient.createLiveTranscriptionSession(shutdown.signal);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of creating a new class to represent the parameters passed into the session, can we instead make an optional parameter for signal in createLiveTranscriptionSession? Then, we can remove the need for the LiveAudioTranscriptionSessionOptions class. The new optional parameter for signal can behave similar to a CancellationToken parameter.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, I will update the cancellation pattern. Thanks

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in 6ab38c4. Dropped LiveAudioTranscriptionSessionOptions and made signal a plain optional parameter on createLiveTranscriptionSession, matching C#'s CancellationToken shape:

const shutdown = new AbortController();
const session = audioClient.createLiveTranscriptionSession(shutdown.signal);

(was createLiveTranscriptionSession({ signal: shutdown.signal }))

JS 19/19 tests pass; sample updated.

session.settings.sampleRate = 16000; // Default is 16000; shown here for clarity
session.settings.channels = 1;
Expand All @@ -67,9 +74,22 @@ const readPromise = (async () => {
}
}
} catch (err) {
if (err.name !== 'AbortError') {
console.error('Stream error:', err.message);
// AbortError is expected on Ctrl+C; ignore quietly.
if (err.name === 'AbortError') return;

// CoreError surfaces native-core failure metadata (code + isTransient).
// Use it to retry quietly on transient blips instead of dying on the
// first hiccup. Without CoreError the only signal would be err.message.
if (err instanceof CoreError) {
if (err.isTransient) {
console.warn(`\n⚠ Transient ASR error (${err.code}): ${err.message}. Continuing...`);
return;
}
console.error(`\n✗ Stream error [${err.code}]: ${err.message}`);
return;
}

console.error('\n✗ Stream error:', err.message);
}
})();

Expand Down Expand Up @@ -108,14 +128,18 @@ try {
try {
while (appendQueue.length > 0) {
const pcm = appendQueue.shift();
// Session-level signal (set in createLiveTranscriptionSession)
// applies automatically — no need to pass it here.
await session.append(pcm);
}
} catch (err) {
// Aborted via Ctrl+C — exit quietly.
if (err.name === 'AbortError') return;
console.error('append error:', err.message);
} finally {
pumping = false;
// Handle race where new data arrived after loop exit.
if (appendQueue.length > 0) {
if (appendQueue.length > 0 && !shutdown.signal.aborted) {
void pumpAudio();
}
}
Expand Down Expand Up @@ -182,9 +206,14 @@ try {
process.exit(0);
}

// Handle graceful shutdown
// Handle graceful shutdown.
//
// The AbortController fires the shared `shutdown` signal so any in-flight
// session.append() / getTranscriptionStream() resolves promptly with an
// AbortError instead of waiting for stop() to finish draining the queue.
process.on('SIGINT', async () => {
console.log('\n\nStopping...');
shutdown.abort();
if (audioInput) {
audioInput.quit();
}
Expand Down
66 changes: 53 additions & 13 deletions samples/python/live-audio-transcription/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import time

from foundry_local_sdk import Configuration, FoundryLocalManager
from foundry_local_sdk.exception import FoundryLocalException
from foundry_local_sdk.openai.live_audio_transcription_types import CoreErrorResponse

use_synth = "--synth" in sys.argv

Expand All @@ -41,8 +43,16 @@
model.load()
print("done.")

# Graceful-shutdown coordinator. Set ONCE on the session via
# create_live_transcription_session(cancel_event=...) — every subsequent
# start() / append() / stop() / get_transcription_stream() call picks it
# up automatically, so we don't have to thread the event through every
# callsite. SIGINT just calls shutdown_event.set() and the in-flight
# session work unwinds cleanly.
shutdown_event = threading.Event()

audio_client = model.get_audio_client()
session = audio_client.create_live_transcription_session()
session = audio_client.create_live_transcription_session(cancel_event=shutdown_event)
session.settings.sample_rate = 16000
session.settings.channels = 1
session.settings.language = "en"
Expand All @@ -52,14 +62,30 @@

# --- Background thread reads transcription results (mirrors JS readPromise) ---


def read_results():
for result in session.get_transcription_stream():
text = result.content[0].text if result.content else ""
if result.is_final:
print()
print(f" [FINAL] {text}")
elif text:
print(text, end="", flush=True)
try:
for result in session.get_transcription_stream():
text = result.content[0].text if result.content else ""
if result.is_final:
print()
print(f" [FINAL] {text}")
elif text:
print(text, end="", flush=True)
except FoundryLocalException as ex:
# Cancelled via shutdown_event -> generator returns cleanly (no exception).
# We only land here on a real native-side push failure.
# Use CoreErrorResponse to inspect structured error metadata (code +
# is_transient) and decide whether to retry or surface the error.
# Without it, the only signal would be str(ex).
info = CoreErrorResponse.try_parse(str(ex))
if info and info.is_transient:
print(f"\n⚠ Transient ASR error ({info.code}): {info.message}. Continuing...")
return
if info:
print(f"\n✗ Stream error [{info.code}]: {info.message}")
return
print(f"\n✗ Stream error: {ex}")


read_thread = threading.Thread(target=read_results, daemon=True)
Expand All @@ -72,7 +98,6 @@ def read_results():
CHANNELS = 1
CHUNK = RATE // 10 # 100ms of audio = 1600 frames

stop_event = threading.Event()
mic_active = False
pa = None
stream = None
Expand Down Expand Up @@ -100,14 +125,21 @@ def read_results():
print()

def capture_mic():
while not stop_event.is_set():
while not shutdown_event.is_set():
try:
pcm_data = stream.read(CHUNK, exception_on_overflow=False)
if pcm_data:
# Session-level cancel_event applies — if shutdown
# fires while append() is blocked on backpressure,
# it raises FoundryLocalException("cancelled") instead
# of waiting for the queue to drain.
session.append(pcm_data)
except FoundryLocalException:
# Session was cancelled — exit the capture loop cleanly.
break
except Exception as e:
print(f"\n[ERROR] Microphone capture failed: {e}")
stop_event.set()
shutdown_event.set()
break

capture_thread = threading.Thread(target=capture_mic, daemon=True)
Expand Down Expand Up @@ -148,9 +180,17 @@ def capture_mic():

# --- Graceful shutdown (mirrors JS SIGINT handler / C++ SignalHandler) ---


def shutdown(*_args):
print("\n\nStopping...")
stop_event.set()
# Setting shutdown_event:
# - exits the mic capture loop on its next iteration
# - aborts any in-flight session.append() blocked on backpressure
# with FoundryLocalException("cancelled")
# - ends session.get_transcription_stream() iteration cleanly in
# the read thread
# - short-circuits session.stop()'s drain wait below
shutdown_event.set()

if stream:
stream.stop_stream()
Expand All @@ -169,6 +209,6 @@ def shutdown(*_args):

if mic_active:
# Block until Ctrl+C
stop_event.wait()
shutdown_event.wait()
else:
shutdown()
1 change: 1 addition & 0 deletions sdk/js/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export { AudioClient, AudioClientSettings } from './openai/audioClient.js';
export { EmbeddingClient } from './openai/embeddingClient.js';
export { LiveAudioTranscriptionSession, LiveAudioTranscriptionOptions } from './openai/liveAudioTranscriptionClient.js';
export type { LiveAudioTranscriptionResponse, TranscriptionContentPart } from './openai/liveAudioTranscriptionTypes.js';
export { CoreError } from './openai/liveAudioTranscriptionTypes.js';
export { ResponsesClient, ResponsesClientSettings, getOutputText } from './openai/responsesClient.js';
export { ModelLoadManager } from './detail/modelLoadManager.js';
/** @internal */
Expand Down
20 changes: 18 additions & 2 deletions sdk/js/src/openai/audioClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,26 @@ export class AudioClient {

/**
* Creates a LiveAudioTranscriptionSession for real-time audio streaming ASR.
*
* @param signal - Optional AbortSignal applied to **all** subsequent
* ``start`` / ``append`` / ``stop`` /
* ``getTranscriptionStream`` calls on the returned session.
* Behaves like C#'s ``CancellationToken`` parameter.
* @returns A LiveAudioTranscriptionSession instance.
*
* @example
* ```ts
* const shutdown = new AbortController();
* const session = audioClient.createLiveTranscriptionSession(shutdown.signal);
* await session.start();
* await session.append(pcm);
* for await (const r of session.getTranscriptionStream()) { ... }
*
* process.on('SIGINT', () => shutdown.abort());
* ```
*/
public createLiveTranscriptionSession(): LiveAudioTranscriptionSession {
return new LiveAudioTranscriptionSession(this.modelId, this.coreInterop);
public createLiveTranscriptionSession(signal?: AbortSignal): LiveAudioTranscriptionSession {
return new LiveAudioTranscriptionSession(this.modelId, this.coreInterop, signal);
}

/**
Expand Down
Loading
Loading