Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
60fe271
feat: add MobiusModelBuilder Olive pass
Apr 9, 2026
5921223
test: extend EP_MAP coverage to all four EPs; add CPU example config
Apr 9, 2026
9d77132
docs: correct Gemma4 model IDs and annotate audio support
Apr 9, 2026
5ba5b1f
fix: correct example config format (engine.target, remove invalid fie…
Apr 9, 2026
cd86ba3
fix: readability improvements for MobiusModelBuilder pass
Apr 10, 2026
3ee4a23
fix: additional readability fixes for MobiusModelBuilder pass
Apr 10, 2026
c82f407
feat: add output validation and trust_remote_code warning to MobiusMo…
Apr 10, 2026
8c1259c
docs: clarify _patch_build comment on lazy import patch target
Apr 10, 2026
2eb7de5
fix: address all open PR review comments on MobiusModelBuilder
Apr 10, 2026
209b616
fix: update mobius PyPI package name to mobius-ai
Apr 10, 2026
0c4a3cf
fix: remove unused noqa directives (RUF100)
Apr 10, 2026
be13f27
fix: get trust_remote_code from model load_kwargs and add additional_…
Copilot Apr 10, 2026
ee7fbd4
fix: use .get(key, default) over or False for trust_remote_code; clar…
Copilot Apr 10, 2026
e02b3f3
fix: remove unsupported 'comment' field from Gemma4 example configs
justinchuby Apr 23, 2026
dca7795
fix: use OnnxBlockWiseRtnQuantization for Gemma4 INT4 pipeline
justinchuby Apr 23, 2026
2af889f
fix: add MobiusEP enum for execution_provider validation
justinchuby Apr 23, 2026
f1c0a1a
Merge origin/main
justinchuby Apr 23, 2026
16f74dd
chore: move gemma4 example configs to olive-recipes
justinchuby Apr 23, 2026
68ed349
feat: generate ORT GenAI configs by default in MobiusModelBuilder
justinchuby Apr 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions examples/gemma4/gemma4_fp32_cpu.json
Comment thread
justinchuby marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
"comment": "Build google/gemma-4-E2B-it as a float32 ONNX model using mobius, targeting CPU execution. E2B and E4B are Any-to-Any (vision + audio + text). For Image-Text-to-Text only models (no audio encoder), use google/gemma-4-26B-A4B-it or google/gemma-4-31B-it.",
"input_model": { "type": "HfModel", "model_path": "google/gemma-4-E2B-it", "task": "text-generation" },
"systems": {
"local_system": {
"type": "LocalSystem",
"accelerators": [ { "device": "cpu", "execution_providers": [ "CPUExecutionProvider" ] } ]
}
},
"passes": { "mobius_build": { "type": "MobiusModelBuilder", "precision": "fp32" } },
"engine": { "target": "local_system", "output_dir": "models/gemma4-e2b-fp32-cpu", "log_severity_level": 1 }
}
15 changes: 15 additions & 0 deletions examples/gemma4/gemma4_int4_cuda.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
"comment": "Build google/gemma-4-E2B-it as a float16 ONNX model using mobius, then apply INT4 weight-only quantization for compact deployment. E2B and E4B are Any-to-Any (vision + audio + text). For Image-Text-to-Text only models (no audio encoder), use google/gemma-4-26B-A4B-it or google/gemma-4-31B-it.",
"input_model": { "type": "HfModel", "model_path": "google/gemma-4-E2B-it", "task": "text-generation" },
Comment thread
justinchuby marked this conversation as resolved.
Outdated
"systems": {
"local_system": {
"type": "LocalSystem",
"accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
}
},
"passes": {
"mobius_build": { "type": "MobiusModelBuilder", "precision": "fp16" },
"int4_quantize": { "type": "GptqQuantizer", "bits": 4, "group_size": 128, "sym": true }
Comment thread
justinchuby marked this conversation as resolved.
Outdated
},
Comment thread
justinchuby marked this conversation as resolved.
Outdated
"engine": { "target": "local_system", "output_dir": "models/gemma4-e2b-int4-cuda", "log_severity_level": 1 }
}
11 changes: 11 additions & 0 deletions olive/olive_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,15 @@
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ]
},
"MobiusModelBuilder": {
"module_path": "olive.passes.onnx.mobius_model_builder.MobiusModelBuilder",
"supported_providers": [ "*" ],
"supported_accelerators": [ "*" ],
"supported_precisions": [ "fp32", "fp16", "bf16" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ],
"extra_dependencies": [ "mobius-ai", "onnx-ir" ]
},
Comment thread
justinchuby marked this conversation as resolved.
"LoftQ": {
"module_path": "olive.passes.pytorch.lora.LoftQ",
"supported_providers": [ "*" ],
Expand Down Expand Up @@ -665,6 +674,8 @@
"inc": [ "neural-compressor" ],
"lora": [ "accelerate>=0.30.0", "peft", "scipy" ],
"diffusers": [ "accelerate>=0.30.0", "peft", "diffusers" ],
"mobius-ai": [ "mobius-ai" ],
"onnx-ir": [ "onnx-ir" ],
"nvmo": [ "nvidia-modelopt[onnx]" ],
"openvino": [
"openvino>=2025.4.1",
Expand Down
196 changes: 196 additions & 0 deletions olive/passes/onnx/mobius_model_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# -------------------------------------------------------------------------

Check warning

Code scanning / lintrunner

RUFF/format Warning

Run lintrunner -a to apply this patch.
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
"""Build ONNX models from HuggingFace model IDs using the mobius package."""

from __future__ import annotations

import logging
from pathlib import Path
from typing import TYPE_CHECKING, ClassVar

from olive.constants import Precision
from olive.hardware.constants import ExecutionProvider
from olive.model import HfModelHandler, ONNXModelHandler
from olive.model.handler.composite import CompositeModelHandler
from olive.passes import Pass
from olive.passes.olive_pass import PassConfigParam

if TYPE_CHECKING:
from olive.hardware.accelerator import AcceleratorSpec
from olive.passes.pass_config import BasePassConfig

logger = logging.getLogger(__name__)

# Maps Olive Precision values to mobius dtype strings.
# "f32" = 32-bit float (torch.float32), standard full precision.
# "f16" = 16-bit float (torch.float16), half precision — good for GPU inference.
# "bf16" = bfloat16 (torch.bfloat16), brain float — preferred over f16 on newer hardware.
# For INT4/INT8 quantization, use a downstream Olive quantization pass (e.g. OnnxMatMulNBits)
# after this pass rather than setting precision here.
_PRECISION_TO_DTYPE: dict[str, str] = {
Precision.FP32: "f32",
Precision.FP16: "f16",
Precision.BF16: "bf16",
}


class MobiusModelBuilder(Pass):
"""Olive pass that uses mobius to build ONNX models from HuggingFace model IDs.

Supports all model architectures registered in mobius (LLMs, VLMs, speech
models, diffusion models). For multi-component models (e.g. vision-language
models that produce ``model``, ``vision``, and ``embedding`` sub-graphs) the
pass returns a :class:`~olive.model.handler.composite.CompositeModelHandler`
whose components are individual :class:`~olive.model.ONNXModelHandler` objects.
Single-component models return a plain :class:`~olive.model.ONNXModelHandler`.

Requires ``mobius-ai`` to be installed::

pip install mobius-ai

Comment thread
justinchuby marked this conversation as resolved.
See https://github.com/microsoft/mobius
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Incorrect URL

"""

# Maps Olive ExecutionProvider enum values to mobius EP names.
EP_MAP: ClassVar[dict[ExecutionProvider, str]] = {
ExecutionProvider.CPUExecutionProvider: "cpu",
ExecutionProvider.CUDAExecutionProvider: "cuda",
ExecutionProvider.DmlExecutionProvider: "dml",
ExecutionProvider.WebGpuExecutionProvider: "webgpu",
}

@classmethod
def is_accelerator_agnostic(cls, accelerator_spec: AcceleratorSpec) -> bool:
# EP selection determines which fused ops are emitted, so this pass is
# EP-specific.
return False

@classmethod
def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]:
return {
"precision": PassConfigParam(
type_=Precision,
required=False,
default_value=Precision.FP32,
description=(
"Model weight / compute precision. One of: fp32, fp16, bf16. "
"Defaults to fp32. For INT4 quantization, run an Olive "
"quantization pass (e.g. OnnxMatMulNBits) after this pass."
),
),
"execution_provider": PassConfigParam(
type_=str,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could create an enum of the supported eps for automatic validation like in

class ModelDtype(StrEnumBase):
.
unless you think the options might keep growing and it would be hard to keep it in sync across versions

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

required=False,
default_value=None,
description=(
"Override the mobius execution provider (cpu, cuda, dml, webgpu). "
"When None (default), the EP is auto-detected from the Olive "
"accelerator spec."
),
),
"trust_remote_code": PassConfigParam(
Comment thread
jambayk marked this conversation as resolved.
Outdated
type_=bool,
required=False,
default_value=False,
description="Pass trust_remote_code=True to the HuggingFace config loader.",
),
}

def _run_for_config(
self,
model: HfModelHandler,
config: type[BasePassConfig],
output_model_path: str,
) -> ONNXModelHandler | CompositeModelHandler:
try:
from mobius import build # noqa: PLC0415

Check warning

Code scanning / lintrunner

RUFF/RUF100 Warning

Unused noqa directive (non-enabled: PLC0415).
See https://docs.astral.sh/ruff/rules/unused-noqa
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
except ImportError as exc:
raise ImportError(
"mobius-ai is required to run MobiusModelBuilder. Install with: pip install mobius-ai"
) from exc
Comment thread
justinchuby marked this conversation as resolved.

if not isinstance(model, HfModelHandler):
raise ValueError(f"MobiusModelBuilder requires an HfModelHandler input, got {type(model).__name__}.")

# Resolve EP: explicit config override > accelerator spec > fallback to cpu.
ep_str: str = config.execution_provider or self.EP_MAP.get(self.accelerator_spec.execution_provider, "cpu")

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Olive Engine expects a pass to raise an error if accelerator spec provided EP is not supported by the pass. Alternatively return on modified input model in cases where input model type is same as output model type.

dtype_str: str = _PRECISION_TO_DTYPE.get(config.precision, "f32")
model_id: str = model.model_name_or_path

logger.info(
"MobiusModelBuilder: building '%s' (ep=%s, dtype=%s)",
model_id,
ep_str,
dtype_str,
)

if config.trust_remote_code:
logger.warning("MobiusModelBuilder: trust_remote_code=True — only use with trusted model sources.")

output_dir = Path(output_model_path)
output_dir.mkdir(parents=True, exist_ok=True)

pkg = build(
model_id,
dtype=dtype_str,
execution_provider=ep_str,
load_weights=True,
trust_remote_code=config.trust_remote_code,
)

# ModelPackage.save() handles both single and multi-component layouts:
# single component → <output_dir>/model.onnx
# multi-component → <output_dir>/<name>/model.onnx for each key
pkg.save(str(output_dir))

package_keys = list(pkg.keys())
logger.info("MobiusModelBuilder: saved components %s to '%s'", package_keys, output_dir)

if len(package_keys) == 1:
# Single-component model (most LLMs): return a plain ONNXModelHandler.
onnx_path = output_dir / "model.onnx"
if not onnx_path.exists():
raise RuntimeError(
f"MobiusModelBuilder: expected output file not found: {onnx_path}. "
"mobius.build() may have failed silently or saved to an unexpected path."
)
return ONNXModelHandler(
model_path=str(output_dir),
onnx_file_name="model.onnx",
model_attributes={
"mobius_package_keys": package_keys,
Comment thread
jambayk marked this conversation as resolved.
**(model.model_attributes or {}),
},
)

# Multi-component model (VLMs, encoder-decoders, diffusion pipelines):
# mobius saves each component to <output_dir>/<key>/model.onnx.
components = []
for key in package_keys:
component_dir = output_dir / key
onnx_path = component_dir / "model.onnx"
if not onnx_path.exists():
raise RuntimeError(
f"MobiusModelBuilder: expected output file not found: {onnx_path}. "
f"mobius.build() may have failed silently for component '{key}'."
)
components.append(
ONNXModelHandler(
model_path=str(component_dir),
onnx_file_name="model.onnx",
model_attributes={"mobius_component": key},
Comment thread
justinchuby marked this conversation as resolved.
Outdated
)
)

return CompositeModelHandler(
model_components=components,
model_component_names=package_keys,
model_path=str(output_dir),
model_attributes={
"mobius_package_keys": package_keys,
**(model.model_attributes or {}),
},
)
Loading
Loading