Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions docs/setup/vendors.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ Alternatively vendors can be configured manually in `config.toml` file.

The table summarizes vendor alternative for core AI service and optional RAI modules:

| Module | Open source | Alternative | Why to consider alternative? | More information |
| ----------------------------------------------- | ------------------ | ----------------------- | ------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [LLM service](#llm-model-configuration-in-rai) | Ollama | OpenAI, Bedrock | Overall performance of the LLM models, supported modalities and features | [LangChain models](https://docs.langchain4j.dev/integrations/language-models/) |
| Module | Open source | Alternative | Why to consider alternative? | More information |
| ----------------------------------------------- | ------------------ | ------------------------------ | ------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [LLM service](#llm-model-configuration-in-rai) | Ollama | OpenAI, Bedrock, MiniMax | Overall performance of the LLM models, supported modalities and features | [LangChain models](https://docs.langchain4j.dev/integrations/language-models/) |
| **Optional:** [Tracing tool](./tracing.md) | Langfuse | LangSmith | Better integration with LangChain | [Comparison](https://langfuse.com/faq/all/langsmith-alternative) |
| **Optional:** [Text to speech](#text-to-speech) | KokoroTTS, OpenTTS | ElevenLabs | Arguably, significantly better voice synthesis | <li> [KokoroTTS](https://huggingface.co/hexgrad/Kokoro-82M#usage) </li><li> [OpenTTS GitHub](https://github.com/synesthesiam/opentts) </li><li> [RAI voice interface][s2s] </li> |
| **Optional:** [Speech to text](#speech-to-text) | Whisper | OpenAI Whisper (hosted) | When suitable local GPU is not an option | <li> [Whisper GitHub](https://github.com/openai/whisper) </li><li> [RAI voice interface][s2s] </li> |
Expand Down Expand Up @@ -67,6 +67,34 @@ Ollama can be used to host models locally.

2. Use [RAI Configurator][configurator] -> `Model Selection` -> `bedrock` vendor

### MiniMax

MiniMax provides high-capacity cloud LLM models (M2.7, M2.7-highspeed) with a 204K context
window via an OpenAI-compatible API.

1. Obtain a MiniMax API key from [https://www.minimax.io](https://www.minimax.io) and set it:

```bash
export MINIMAX_API_KEY="your-api-key"
```

2. In `config.toml`, set the vendor and model:

```toml
[vendor]
simple_model = "minimax"
complex_model = "minimax"

[minimax]
simple_model = "MiniMax-M2.7-highspeed"
complex_model = "MiniMax-M2.7"
base_url = "https://api.minimax.io/v1"
```

> [!NOTE]
> MiniMax does not provide a public embeddings API. Use a different vendor
> (e.g. `openai` or `ollama`) for the `embeddings_model` setting.

## Complex LLM Model Configuration

For custom setups please use LangChain API.
Expand Down
5 changes: 5 additions & 0 deletions src/rai_core/rai/initialization/config_initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@
complex_model = "gemini-3-pro"
embeddings_model = "text-embedding-004"

[minimax]
simple_model = "MiniMax-M2.7-highspeed"
complex_model = "MiniMax-M2.7"
base_url = "https://api.minimax.io/v1"

[tracing]
project = "rai"

Expand Down
50 changes: 50 additions & 0 deletions src/rai_core/rai/initialization/model_initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
from langchain_openai import ChatOpenAI
from langsmith import Client

_MINIMAX_DEFAULT_BASE_URL = "https://api.minimax.io/v1"

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
coloredlogs.install(level="INFO") # type: ignore
Expand Down Expand Up @@ -67,6 +69,13 @@ class GoogleConfig(ModelConfig):
pass


@dataclass
class MiniMaxConfig:
simple_model: str
complex_model: str
base_url: str


@dataclass
class LangfuseConfig:
use_langfuse: bool
Expand All @@ -93,6 +102,7 @@ class RAIConfig:
openai: OpenAIConfig
ollama: OllamaConfig
google: GoogleConfig
minimax: MiniMaxConfig
tracing: TracingConfig


Expand All @@ -109,6 +119,16 @@ def load_config(config_path: Optional[str] = None) -> RAIConfig:
openai=OpenAIConfig(**config_dict["openai"]),
ollama=OllamaConfig(**config_dict["ollama"]),
google=GoogleConfig(**config_dict["google"]),
minimax=MiniMaxConfig(
**config_dict.get(
"minimax",
{
"simple_model": "MiniMax-M2.7-highspeed",
"complex_model": "MiniMax-M2.7",
"base_url": _MINIMAX_DEFAULT_BASE_URL,
},
)
),
tracing=TracingConfig(
project=config_dict["tracing"]["project"],
langfuse=LangfuseConfig(**config_dict["tracing"]["langfuse"]),
Expand Down Expand Up @@ -170,6 +190,18 @@ def get_llm_model(

model_config = cast(GoogleConfig, model_config)
return ChatGoogleGenerativeAI(model=model, **kwargs)
elif vendor == "minimax":
from langchain_openai import ChatOpenAI

model_config = cast(MiniMaxConfig, model_config)
if "temperature" in kwargs and kwargs["temperature"] <= 0.0:
kwargs["temperature"] = 0.01
return ChatOpenAI(
model=model,
base_url=model_config.base_url,
api_key=os.environ.get("MINIMAX_API_KEY", ""),
**kwargs,
)
else:
raise ValueError(f"Unknown LLM vendor: {vendor}")

Expand Down Expand Up @@ -212,6 +244,18 @@ def get_llm_model_direct(

model_config = cast(GoogleConfig, model_config)
return ChatGoogleGenerativeAI(model=model_name, **kwargs)
elif vendor == "minimax":
from langchain_openai import ChatOpenAI

model_config = cast(MiniMaxConfig, model_config)
if "temperature" in kwargs and kwargs["temperature"] <= 0.0:
kwargs["temperature"] = 0.01
return ChatOpenAI(
model=model_name,
base_url=model_config.base_url,
api_key=os.environ.get("MINIMAX_API_KEY", ""),
**kwargs,
)
else:
raise ValueError(f"Unknown LLM vendor: {vendor}")

Expand All @@ -225,6 +269,12 @@ def get_embeddings_model(

model_config = getattr(config, vendor)

if vendor == "minimax":
raise ValueError(
"MiniMax does not provide a public embeddings API. "
"Please use a different vendor for embeddings (e.g., 'openai', 'ollama')."
)

logger.info(f"Using embeddings model: {vendor}-{model_config.embeddings_model}")
if vendor == "openai":
from langchain_openai import OpenAIEmbeddings
Expand Down
Loading
Loading