-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Added NVIDIA NIM model support in interpreter/core/llm/llm.py.
#1728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| --- | ||
| title: NVIDIA NIM | ||
| --- | ||
|
|
||
| Open Interpreter supports NVIDIA NIM via LiteLLM's OpenAI-compatible provider route. | ||
|
|
||
| Use the `nvidia_nim/` model prefix, for example: | ||
|
|
||
| <CodeGroup> | ||
|
|
||
| ```bash Terminal | ||
| interpreter --model nvidia_nim/meta/llama-3.1-8b-instruct | ||
| ``` | ||
|
|
||
| ```python Python | ||
| from interpreter import interpreter | ||
|
|
||
| interpreter.llm.model = "nvidia_nim/meta/llama-3.1-8b-instruct" | ||
| interpreter.chat() | ||
| ``` | ||
|
|
||
| </CodeGroup> | ||
|
|
||
| ## Shorthand aliases | ||
|
|
||
| Open Interpreter also supports these shorthand model names: | ||
|
|
||
| - `llama-3.1-8b` → `nvidia_nim/meta/llama-3.1-8b-instruct` | ||
| - `llama-3.1-70b` → `nvidia_nim/meta/llama-3.1-70b-instruct` | ||
| - `llama-3.1-405b` → `nvidia_nim/meta/llama-3.1-405b-instruct` | ||
| - `llama-3.3-70b` → `nvidia_nim/meta/llama-3.3-70b-instruct` | ||
| - `llama-4-maverick` → `nvidia_nim/meta/llama-4-maverick-17b-128e-instruct` | ||
| - `nemotron-70b` → `nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct` | ||
| - `nemotron-ultra` → `nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1` | ||
| - `nemotron-340b` → `nvidia_nim/nvidia/nemotron-4-340b-instruct` | ||
| - `deepseek-v3` → `nvidia_nim/deepseek-ai/deepseek-v3.2` | ||
| - `qwen3-coder` → `nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct` | ||
|
|
||
| You can also use `nvidia/...` and Open Interpreter will normalize it to `nvidia_nim/...`. | ||
|
|
||
| ## Required environment variables | ||
|
|
||
| Set one of the following environment variables: | ||
|
|
||
| | Environment Variable | Description | | ||
| | ---------------------- | ----------------------------------- | | ||
| | `NVIDIA_API_KEY` | NVIDIA API key for NIM requests | | ||
| | `NVIDIA_NIM_API_KEY` | Alternate environment variable name | | ||
|
|
||
| ## Optional settings | ||
|
|
||
| By default, Open Interpreter uses: | ||
|
|
||
| - `api_base = https://integrate.api.nvidia.com/v1` | ||
| - Model-specific `context_window` values for common NIM models | ||
| - `max_tokens` default of 20% of context window (capped at 4096) | ||
|
|
||
| ## Notes | ||
|
|
||
| NIM is OpenAI-compatible, so Open Interpreter uses the standard LiteLLM request path. |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -116,6 +116,23 @@ def run(self, messages): | |||||
| ]: | ||||||
| model = "claude-3-5-sonnet-20240620" | ||||||
| self.model = "claude-3-5-sonnet-20240620" | ||||||
|
|
||||||
| NVIDIA_MODEL_ALIASES = { | ||||||
| "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct", | ||||||
| "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct", | ||||||
| "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct", | ||||||
| "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct", | ||||||
| "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct", | ||||||
| "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct", | ||||||
| "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", | ||||||
| "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct", | ||||||
| "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2", | ||||||
| "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct", | ||||||
| } | ||||||
| if model in NVIDIA_MODEL_ALIASES: | ||||||
| model = NVIDIA_MODEL_ALIASES[model] | ||||||
| self.model = model | ||||||
|
|
||||||
| # Setup our model endpoint | ||||||
| if model == "i": | ||||||
| model = "openai/i" | ||||||
|
|
@@ -340,6 +357,24 @@ def load(self): | |||||
| if self._is_loaded: | ||||||
| return | ||||||
|
|
||||||
| NVIDIA_MODEL_ALIASES = { | ||||||
| "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct", | ||||||
| "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct", | ||||||
| "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct", | ||||||
| "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct", | ||||||
| "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct", | ||||||
| "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct", | ||||||
| "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", | ||||||
| "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct", | ||||||
| "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2", | ||||||
| "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct", | ||||||
| } | ||||||
| if self.model in NVIDIA_MODEL_ALIASES: | ||||||
| self.model = NVIDIA_MODEL_ALIASES[self.model] | ||||||
|
|
||||||
|
||||||
| if self.model.startswith("nvidia/"): | ||||||
| self.model = self.model.replace("nvidia/", "nvidia_nim/", 1) | ||||||
|
|
||||||
| if self.model.startswith("ollama/") and not ":" in self.model: | ||||||
| self.model = self.model + ":latest" | ||||||
|
|
||||||
|
|
@@ -402,6 +437,43 @@ def load(self): | |||||
|
|
||||||
| self.interpreter.display_message("*Model loaded.*\n") | ||||||
|
|
||||||
| if self.model.startswith("nvidia_nim/"): | ||||||
| model_name = self.model.replace("nvidia_nim/", "", 1) | ||||||
|
|
||||||
| if not self.api_base: | ||||||
| self.api_base = "https://integrate.api.nvidia.com/v1" | ||||||
|
|
||||||
| if not self.api_key: | ||||||
| self.api_key = os.getenv("NVIDIA_API_KEY") or os.getenv( | ||||||
| "NVIDIA_NIM_API_KEY" | ||||||
| ) | ||||||
| if not self.api_key: | ||||||
| self.interpreter.display_message( | ||||||
| f"> Missing NVIDIA API key\n\nTo use `{model_name}`, set either `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY`.\n" | ||||||
| ) | ||||||
| exit() | ||||||
|
|
||||||
| nvidia_context_windows = { | ||||||
| "meta/llama-3.1-8b-instruct": 128000, | ||||||
| "meta/llama-3.1-70b-instruct": 128000, | ||||||
| "meta/llama-3.1-405b-instruct": 128000, | ||||||
| "meta/llama-3.3-70b-instruct": 128000, | ||||||
| "meta/llama-4-maverick-17b-128e-instruct": 1000000, | ||||||
| "nvidia/llama-3.1-nemotron-70b-instruct": 32768, | ||||||
| "nvidia/llama-3.1-nemotron-ultra-253b-v1": 128000, | ||||||
| "nvidia/nemotron-4-340b-instruct": 4096, | ||||||
| "mistralai/mixtral-8x22b-instruct-v0.1": 65536, | ||||||
| "mistralai/mistral-large-2-instruct": 128000, | ||||||
| "deepseek-ai/deepseek-v3.2": 131072, | ||||||
| "qwen/qwen3-coder-480b-a35b-instruct": 32768, | ||||||
| } | ||||||
| model_context_window = nvidia_context_windows.get(model_name, 8192) | ||||||
|
|
||||||
| if self.context_window == None: | ||||||
| self.context_window = model_context_window | ||||||
| if self.max_tokens is None: | ||||||
|
||||||
| if self.max_tokens is None: | |
| if self.max_tokens == None: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NVIDIA_MODEL_ALIASESis defined inline insiderun(); the same mapping is duplicated again inload(). This creates maintenance risk (the two copies can drift) and adds unnecessary per-call dict allocation. Consider moving the alias map (and any prefix normalization) to a single module-level constant/helper, and applying it in one place (typicallyload()).