Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
b2cd9d3
feat: add deepgram tts extension with voice-assistant integration
Apr 7, 2026
a92239c
refactor: rewrite deepgram tts client with duplex websocket pattern
Apr 7, 2026
27c71be
fix: reconnect websocket per request_id to fix interleaved requests
Apr 7, 2026
cefa2f3
fix: remove reconnect-per-request, rely on cancel drain instead
Apr 7, 2026
261949a
refactor: align progressive disclosure docs with PD standard
Apr 7, 2026
2fd1ace
fix: address codex review — connect fail-fast and error handling
Apr 7, 2026
ace659f
test: add state machine, recovery, and redaction tests
Apr 7, 2026
2a65917
fix: eliminate double error emission on auth failure, add targeted tests
Apr 7, 2026
f197de3
docs: add tar sync method, cache cleanup, fix guarder test count
Apr 7, 2026
8a32823
fix: address code review — 401 detection, dead code, dump writes
Apr 7, 2026
319d504
fix: resolve pylint W1404 implicit string concatenation warnings
Apr 7, 2026
ede8cff
fix: reconnect on server errors, break after finalize, cleanup
Apr 7, 2026
35e9b11
fix: cancel finalization, exception cleanup, test bootstrap
Apr 7, 2026
cc9fbad
chore: remove progressive disclosure docs from deepgram tts PR scope
Apr 7, 2026
d66c783
fix: move cancel flag reset to just before ws.send
Apr 7, 2026
76c781d
fix: remove dual finalization path, dead config code, simplify
Apr 7, 2026
09265e3
feat: add vendor params passthrough to deepgram websocket URL
Apr 8, 2026
13b76a1
fix: add clarifying comments for event constant gap and sent_ts overw…
Apr 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@
},
{
"path": "../../../ten_packages/extension/oracle_tts_python"
},
{
"path": "../../../ten_packages/extension/deepgram_tts"
}
],
"scripts": {
Expand Down
184 changes: 184 additions & 0 deletions ai_agents/agents/examples/voice-assistant/tenapp/property.json
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,190 @@
]
}
},
{
"name": "voice_assistant_deepgram_tts",
"auto_start": false,
"graph": {
"nodes": [
{
"type": "extension",
"name": "agora_rtc",
"addon": "agora_rtc",
"extension_group": "default",
"property": {
"app_id": "${env:AGORA_APP_ID}",
"app_certificate": "${env:AGORA_APP_CERTIFICATE|}",
"channel": "ten_agent_test",
"stream_id": 1234,
"remote_stream_id": 123,
"subscribe_audio": true,
"publish_audio": true,
"publish_data": true,
"enable_agora_asr": false
}
},
{
"type": "extension",
"name": "stt",
"addon": "deepgram_asr_python",
"extension_group": "stt",
"property": {
"params": {
"api_key": "${env:DEEPGRAM_API_KEY}",
"language": "en-US",
"model": "nova-3"
}
}
},
{
"type": "extension",
"name": "llm",
"addon": "openai_llm2_python",
"extension_group": "chatgpt",
"property": {
"base_url": "https://api.openai.com/v1",
"api_key": "${env:OPENAI_API_KEY}",
"frequency_penalty": 0.9,
"model": "${env:OPENAI_MODEL}",
"max_tokens": 512,
"prompt": "",
"proxy_url": "${env:OPENAI_PROXY_URL|}",
"greeting": "TEN Agent connected. How can I help you today?",
"max_memory_length": 10
}
},
{
"type": "extension",
"name": "tts",
"addon": "deepgram_tts",
"extension_group": "tts",
"property": {
"dump": false,
"dump_path": "/tmp",
"params": {
"api_key": "${env:DEEPGRAM_API_KEY}",
"model": "aura-2-thalia-en",
"encoding": "linear16",
"sample_rate": 24000
}
}
},
{
"type": "extension",
"name": "main_control",
"addon": "main_python",
"extension_group": "control",
"property": {
"greeting": "TEN Agent connected. How can I help you today?"
}
},
{
"type": "extension",
"name": "message_collector",
"addon": "message_collector2",
"extension_group": "transcriber",
"property": {}
},
{
"type": "extension",
"name": "weatherapi_tool_python",
"addon": "weatherapi_tool_python",
"extension_group": "default",
"property": {
"api_key": "${env:WEATHERAPI_API_KEY|}"
}
},
{
"type": "extension",
"name": "streamid_adapter",
"addon": "streamid_adapter",
"property": {}
}
],
"connections": [
{
"extension": "main_control",
"cmd": [
{
"names": [
"on_user_joined",
"on_user_left"
],
"source": [
{
"extension": "agora_rtc"
}
]
},
{
"names": [
"tool_register"
],
"source": [
{
"extension": "weatherapi_tool_python"
}
]
}
],
"data": [
{
"name": "asr_result",
"source": [
{
"extension": "stt"
}
]
}
]
},
{
"extension": "agora_rtc",
"audio_frame": [
{
"name": "pcm_frame",
"dest": [
{
"extension": "streamid_adapter"
}
]
},
{
"name": "pcm_frame",
"source": [
{
"extension": "tts"
}
]
}
],
"data": [
{
"name": "data",
"source": [
{
"extension": "message_collector"
}
]
}
]
},
{
"extension": "streamid_adapter",
"audio_frame": [
{
"name": "pcm_frame",
"dest": [
{
"extension": "stt"
}
]
}
]
}
]
}
},
{
"name": "voice_assistant_oracle",
"auto_start": false,
Expand Down
103 changes: 103 additions & 0 deletions ai_agents/agents/ten_packages/extension/deepgram_tts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Deepgram TTS Extension

A TEN Framework extension that provides Text-to-Speech (TTS) capabilities using Deepgram's Aura streaming API.

## Features

- Real-time streaming TTS via WebSocket
- Multiple voice models (Aura-2 series)
- Configurable sample rates (8000, 16000, 24000, 48000 Hz)
- Linear16 PCM audio output
- TTFB (Time to First Byte) metrics reporting
- Audio dump capability for debugging

## Configuration

### Properties

| Property | Type | Default | Description |
|----------|------|---------|-------------|
| `params.api_key` | string | Required | Deepgram API key |
| `params.model` | string | `aura-2-thalia-en` | Voice model to use |
| `params.encoding` | string | `linear16` | Audio encoding format |
| `params.sample_rate` | int | `24000` | Output sample rate in Hz |
| `params.base_url` | string | `wss://api.deepgram.com/v1/speak` | WebSocket endpoint |
| `params.<deepgram_query_param>` | scalar | Optional | Additional Deepgram websocket query parameters passed through to the vendor |
| `dump` | bool | `false` | Enable audio dumping |
| `dump_path` | string | `/tmp` | Path for audio dump files |

### Example Configuration

```json
{
"params": {
"api_key": "${env:DEEPGRAM_API_KEY}",
"model": "aura-2-thalia-en",
"encoding": "linear16",
"sample_rate": 24000,
"container": "none"
},
"dump": false,
"dump_path": "/tmp"
}
```

Known extension-owned keys such as `api_key`, `base_url`, `model`, `encoding`,
and `sample_rate` are normalized onto the config object. Any remaining scalar
keys under `params` are appended to the Deepgram websocket query string.

## Available Voice Models

Deepgram Aura-2 voices:
- `aura-2-thalia-en` - Female, English (default)
- `aura-2-luna-en` - Female, English
- `aura-2-stella-en` - Female, English
- `aura-2-athena-en` - Female, English
- `aura-2-hera-en` - Female, English
- `aura-2-orion-en` - Male, English
- `aura-2-arcas-en` - Male, English
- `aura-2-perseus-en` - Male, English
- `aura-2-angus-en` - Male, English
- `aura-2-orpheus-en` - Male, English
- `aura-2-helios-en` - Male, English
- `aura-2-zeus-en` - Male, English

## Supported Sample Rates

- 8000 Hz
- 16000 Hz
- 24000 Hz (recommended)
- 48000 Hz

## API Interface

This extension implements the standard TEN TTS interface:

### Input Data
- `tts_text_input` - Text to synthesize
- `tts_flush` - Flush pending audio

### Output Data
- `tts_audio_start` - Audio generation started
- `tts_audio_end` - Audio generation completed
- `metrics` - Performance metrics (TTFB, duration)
- `error` - Error information

### Output Audio
- `pcm_frame` - PCM audio data (16-bit, mono)

## Running Tests

```bash
cd deepgram_tts
tman -y install --standalone
./tests/bin/start
```

## Environment Variables

- `DEEPGRAM_API_KEY` - Your Deepgram API key

## License

Apache License, Version 2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#
# This file is part of TEN Framework, an open source project.
# Licensed under the Apache License, Version 2.0.
# See the LICENSE file for more information.
#
from . import addon
20 changes: 20 additions & 0 deletions ai_agents/agents/ten_packages/extension/deepgram_tts/addon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#
# This file is part of TEN Framework, an open source project.
# Licensed under the Apache License, Version 2.0.
# See the LICENSE file for more information.
#
from ten_runtime import (
Addon,
register_addon_as_extension,
TenEnv,
)


@register_addon_as_extension("deepgram_tts")
class DeepgramTTSExtensionAddon(Addon):

def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None:
from .extension import DeepgramTTSExtension

ten_env.log_info("DeepgramTTSExtensionAddon on_create_instance")
ten_env.on_create_instance_done(DeepgramTTSExtension(name), context)
Loading
Loading