Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
60fe271
feat: add MobiusModelBuilder Olive pass
Apr 9, 2026
5921223
test: extend EP_MAP coverage to all four EPs; add CPU example config
Apr 9, 2026
9d77132
docs: correct Gemma4 model IDs and annotate audio support
Apr 9, 2026
5ba5b1f
fix: correct example config format (engine.target, remove invalid fie…
Apr 9, 2026
cd86ba3
fix: readability improvements for MobiusModelBuilder pass
Apr 10, 2026
3ee4a23
fix: additional readability fixes for MobiusModelBuilder pass
Apr 10, 2026
c82f407
feat: add output validation and trust_remote_code warning to MobiusMo…
Apr 10, 2026
8c1259c
docs: clarify _patch_build comment on lazy import patch target
Apr 10, 2026
2eb7de5
fix: address all open PR review comments on MobiusModelBuilder
Apr 10, 2026
209b616
fix: update mobius PyPI package name to mobius-ai
Apr 10, 2026
0c4a3cf
fix: remove unused noqa directives (RUF100)
Apr 10, 2026
be13f27
fix: get trust_remote_code from model load_kwargs and add additional_…
Copilot Apr 10, 2026
ee7fbd4
fix: use .get(key, default) over or False for trust_remote_code; clar…
Copilot Apr 10, 2026
e02b3f3
fix: remove unsupported 'comment' field from Gemma4 example configs
justinchuby Apr 23, 2026
dca7795
fix: use OnnxBlockWiseRtnQuantization for Gemma4 INT4 pipeline
justinchuby Apr 23, 2026
2af889f
fix: add MobiusEP enum for execution_provider validation
justinchuby Apr 23, 2026
f1c0a1a
Merge origin/main
justinchuby Apr 23, 2026
16f74dd
chore: move gemma4 example configs to olive-recipes
justinchuby Apr 23, 2026
68ed349
feat: generate ORT GenAI configs by default in MobiusModelBuilder
justinchuby Apr 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions olive/olive_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,15 @@
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ]
},
"MobiusModelBuilder": {
"module_path": "olive.passes.onnx.mobius_model_builder.MobiusModelBuilder",
"supported_providers": [ "*" ],
"supported_accelerators": [ "*" ],
"supported_precisions": [ "fp32", "fp16", "bf16" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ],
"extra_dependencies": [ "mobius-ai", "onnx-ir" ]
},
Comment thread
justinchuby marked this conversation as resolved.
"LoftQ": {
"module_path": "olive.passes.pytorch.lora.LoftQ",
"supported_providers": [ "*" ],
Expand Down Expand Up @@ -682,6 +691,8 @@
"inc": [ "neural-compressor" ],
"lora": [ "accelerate>=0.30.0", "peft", "scipy" ],
"diffusers": [ "accelerate>=0.30.0", "peft", "diffusers" ],
"mobius-ai": [ "mobius-ai" ],
"onnx-ir": [ "onnx-ir" ],
"nvmo": [ "nvidia-modelopt[onnx]" ],
"openvino": [
"openvino>=2025.4.1",
Expand Down
250 changes: 250 additions & 0 deletions olive/passes/onnx/mobius_model_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
# -------------------------------------------------------------------------

Check warning

Code scanning / lintrunner

RUFF-FORMAT/format Warning

Run lintrunner -a to apply this patch.
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
"""Build ONNX models from HuggingFace model IDs using the mobius package."""

from __future__ import annotations

import logging
from pathlib import Path
from typing import TYPE_CHECKING, ClassVar

from olive.common.utils import StrEnumBase
from olive.constants import Precision
from olive.hardware.constants import ExecutionProvider
from olive.model import HfModelHandler, ONNXModelHandler
from olive.model.handler.composite import CompositeModelHandler
from olive.passes import Pass
from olive.passes.olive_pass import PassConfigParam

if TYPE_CHECKING:
from olive.hardware.accelerator import AcceleratorSpec
from olive.passes.pass_config import BasePassConfig

logger = logging.getLogger(__name__)

# Maps Olive Precision values to mobius dtype strings.
# "f32" = 32-bit float (torch.float32), standard full precision.
# "f16" = 16-bit float (torch.float16), half precision — good for GPU inference.
# "bf16" = bfloat16 (torch.bfloat16), brain float — preferred over f16 on newer hardware.
# For INT4/INT8 quantization, use a downstream Olive quantization pass (e.g. OnnxMatMulNBits)
# after this pass rather than setting precision here.
_PRECISION_TO_DTYPE: dict[str, str] = {
Precision.FP32: "f32",
Precision.FP16: "f16",
Precision.BF16: "bf16",
}


class MobiusModelBuilder(Pass):
"""Olive pass that uses mobius to build ONNX models from HuggingFace model IDs.

Supports all model architectures registered in mobius (LLMs, VLMs, speech
models, diffusion models). For multi-component models (e.g. vision-language
models that produce ``model``, ``vision``, and ``embedding`` sub-graphs) the
pass returns a :class:`~olive.model.handler.composite.CompositeModelHandler`
whose components are individual :class:`~olive.model.ONNXModelHandler` objects.
Single-component models return a plain :class:`~olive.model.ONNXModelHandler`.

Requires ``mobius-ai`` to be installed::

pip install mobius-ai

Comment thread
justinchuby marked this conversation as resolved.
See https://github.com/microsoft/mobius
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Incorrect URL

"""

class MobiusRuntime(StrEnumBase):
"""Target runtimes for genai config generation."""

NONE = "none"
ORT_GENAI = "ort-genai"

class MobiusEP(StrEnumBase):
"""Execution providers supported by mobius."""

DEFAULT = "default"
CPU = "cpu"
CUDA = "cuda"
DML = "dml"
WEBGPU = "webgpu"
TRT_RTX = "trt-rtx"
ONNX_STANDARD = "onnx-standard"

# Maps Olive ExecutionProvider enum values to mobius EP names.
EP_MAP: ClassVar[dict[ExecutionProvider, str]] = {
ExecutionProvider.CPUExecutionProvider: "cpu",
ExecutionProvider.CUDAExecutionProvider: "cuda",
ExecutionProvider.DmlExecutionProvider: "dml",
ExecutionProvider.WebGpuExecutionProvider: "webgpu",
}

@classmethod
def is_accelerator_agnostic(cls, accelerator_spec: AcceleratorSpec) -> bool:
# EP selection determines which fused ops are emitted, so this pass is
# EP-specific.
return False

@classmethod
def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]:
return {
"precision": PassConfigParam(
type_=Precision,
required=False,
default_value=Precision.FP32,
description=(
"Model weight / compute precision. One of: fp32, fp16, bf16. "
"Defaults to fp32. For INT4 quantization, run an Olive "
"quantization pass (e.g. OnnxMatMulNBits) after this pass."
),
),
"execution_provider": PassConfigParam(
type_=MobiusModelBuilder.MobiusEP,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should not have pass level execution provider user choices.
The Olive engine is running a series of passes and user selects the EP for a given engine run.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See another relevant comment below _run_for_config

required=False,
default_value=None,
description=(
"Override the mobius execution provider. "
"When None (default), the EP is auto-detected from the Olive "
"accelerator spec."
),
),
"runtime": PassConfigParam(
type_=MobiusModelBuilder.MobiusRuntime,
required=False,
default_value=MobiusModelBuilder.MobiusRuntime.ORT_GENAI,
description=(
"Target runtime. 'ort-genai' (default) generates "
"genai_config.json, tokenizer files, and processor "
"configs alongside the ONNX models. 'none' to skip."
),
),
}

def _run_for_config(
self,
model: HfModelHandler,
config: type[BasePassConfig],
output_model_path: str,
) -> ONNXModelHandler | CompositeModelHandler:
try:
from mobius import build
except ImportError as exc:
raise ImportError(
"mobius-ai is required to run MobiusModelBuilder. Install with: pip install mobius-ai"
) from exc
Comment thread
justinchuby marked this conversation as resolved.

if not isinstance(model, HfModelHandler):
raise ValueError(f"MobiusModelBuilder requires an HfModelHandler input, got {type(model).__name__}.")

# Resolve EP: explicit config override > accelerator spec > fallback to cpu.
ep_str: str = config.execution_provider or self.EP_MAP.get(self.accelerator_spec.execution_provider, "cpu")

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Olive Engine expects a pass to raise an error if accelerator spec provided EP is not supported by the pass. Alternatively return on modified input model in cases where input model type is same as output model type.

dtype_str: str = _PRECISION_TO_DTYPE.get(config.precision, "f32")
model_id: str = model.model_name_or_path

# Read trust_remote_code from the model's HuggingFace load kwargs.
trust_remote_code: bool = model.get_load_kwargs().get("trust_remote_code", False)

logger.info(
"MobiusModelBuilder: building '%s' (ep=%s, dtype=%s)",
model_id,
ep_str,
dtype_str,
)

if trust_remote_code:
logger.warning("MobiusModelBuilder: trust_remote_code=True — only use with trusted model sources.")

output_dir = Path(output_model_path)
output_dir.mkdir(parents=True, exist_ok=True)

pkg = build(
model_id,
dtype=dtype_str,
execution_provider=ep_str,
load_weights=True,
trust_remote_code=trust_remote_code,
)

# ModelPackage.save() handles both single and multi-component layouts:
# single component → <output_dir>/model.onnx
# multi-component → <output_dir>/<name>/model.onnx for each key
pkg.save(str(output_dir))

# Generate ORT GenAI config artifacts (genai_config.json, tokenizer
# files, processor configs) when runtime is set to ort-genai.
if config.runtime == self.MobiusRuntime.ORT_GENAI:
self._write_genai_config(pkg, str(output_dir), model_id, ep_str)

package_keys = list(pkg.keys())
logger.info("MobiusModelBuilder: saved components %s to '%s'", package_keys, output_dir)

if len(package_keys) == 1:
# Single-component model (most LLMs): return a plain ONNXModelHandler.
onnx_path = output_dir / "model.onnx"
if not onnx_path.exists():
raise RuntimeError(
f"MobiusModelBuilder: expected output file not found: {onnx_path}. "
"mobius.build() may have failed silently or saved to an unexpected path."
)
additional_files = sorted(
{str(fp) for fp in output_dir.iterdir()} - {str(onnx_path), str(onnx_path) + ".data"}
)
return ONNXModelHandler(
model_path=str(output_dir),
onnx_file_name="model.onnx",
model_attributes={
"mobius_package_keys": package_keys,
Comment thread
jambayk marked this conversation as resolved.
"additional_files": additional_files,
**(model.model_attributes or {}),
},
)

# Multi-component model (VLMs, encoder-decoders, diffusion pipelines):
# mobius saves each component to <output_dir>/<key>/model.onnx.
components = []
for key in package_keys:
component_dir = output_dir / key
onnx_path = component_dir / "model.onnx"
if not onnx_path.exists():
raise RuntimeError(
f"MobiusModelBuilder: expected output file not found: {onnx_path}. "
f"mobius.build() may have failed silently for component '{key}'."
)
additional_files = sorted(
{str(fp) for fp in component_dir.iterdir()} - {str(onnx_path), str(onnx_path) + ".data"}
)
components.append(
ONNXModelHandler(
model_path=str(component_dir),
onnx_file_name="model.onnx",
model_attributes={
"mobius_component": key,
"additional_files": additional_files,
**(model.model_attributes or {}),
},
)
)

return CompositeModelHandler(
model_components=components,
model_component_names=package_keys,
model_path=str(output_dir),
model_attributes={
"mobius_package_keys": package_keys,
**(model.model_attributes or {}),
},
)

@staticmethod
def _write_genai_config(pkg, output_dir: str, model_id: str, ep: str) -> None:
"""Generate ORT GenAI config artifacts alongside the ONNX models."""
from mobius.integrations.ort_genai import write_ort_genai_config

genai_artifacts = write_ort_genai_config(
pkg, output_dir, hf_model_id=model_id, ep=ep,
)
logger.info(
"MobiusModelBuilder: wrote ORT GenAI config: %s",
list(genai_artifacts.keys()),
)
Loading
Loading