diff --git a/olive/olive_config.json b/olive/olive_config.json index 97ca9bd02..0ec5bd07c 100644 --- a/olive/olive_config.json +++ b/olive/olive_config.json @@ -205,6 +205,15 @@ "supported_algorithms": [ ], "supported_quantization_encodings": [ ] }, + "MobiusModelBuilder": { + "module_path": "olive.passes.onnx.mobius_model_builder.MobiusModelBuilder", + "supported_providers": [ "*" ], + "supported_accelerators": [ "*" ], + "supported_precisions": [ "fp32", "fp16", "bf16" ], + "supported_algorithms": [ ], + "supported_quantization_encodings": [ ], + "extra_dependencies": [ "mobius-ai", "onnx-ir" ] + }, "LoftQ": { "module_path": "olive.passes.pytorch.lora.LoftQ", "supported_providers": [ "*" ], @@ -682,6 +691,8 @@ "inc": [ "neural-compressor" ], "lora": [ "accelerate>=0.30.0", "peft", "scipy" ], "diffusers": [ "accelerate>=0.30.0", "peft", "diffusers" ], + "mobius-ai": [ "mobius-ai" ], + "onnx-ir": [ "onnx-ir" ], "nvmo": [ "nvidia-modelopt[onnx]" ], "openvino": [ "openvino>=2025.4.1", diff --git a/olive/passes/onnx/mobius_model_builder.py b/olive/passes/onnx/mobius_model_builder.py new file mode 100644 index 000000000..dfc225a07 --- /dev/null +++ b/olive/passes/onnx/mobius_model_builder.py @@ -0,0 +1,250 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Build ONNX models from HuggingFace model IDs using the mobius package.""" + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import TYPE_CHECKING, ClassVar + +from olive.common.utils import StrEnumBase +from olive.constants import Precision +from olive.hardware.constants import ExecutionProvider +from olive.model import HfModelHandler, ONNXModelHandler +from olive.model.handler.composite import CompositeModelHandler +from olive.passes import Pass +from olive.passes.olive_pass import PassConfigParam + +if TYPE_CHECKING: + from olive.hardware.accelerator import AcceleratorSpec + from olive.passes.pass_config import BasePassConfig + +logger = logging.getLogger(__name__) + +# Maps Olive Precision values to mobius dtype strings. +# "f32" = 32-bit float (torch.float32), standard full precision. +# "f16" = 16-bit float (torch.float16), half precision — good for GPU inference. +# "bf16" = bfloat16 (torch.bfloat16), brain float — preferred over f16 on newer hardware. +# For INT4/INT8 quantization, use a downstream Olive quantization pass (e.g. OnnxMatMulNBits) +# after this pass rather than setting precision here. +_PRECISION_TO_DTYPE: dict[str, str] = { + Precision.FP32: "f32", + Precision.FP16: "f16", + Precision.BF16: "bf16", +} + + +class MobiusModelBuilder(Pass): + """Olive pass that uses mobius to build ONNX models from HuggingFace model IDs. + + Supports all model architectures registered in mobius (LLMs, VLMs, speech + models, diffusion models). For multi-component models (e.g. vision-language + models that produce ``model``, ``vision``, and ``embedding`` sub-graphs) the + pass returns a :class:`~olive.model.handler.composite.CompositeModelHandler` + whose components are individual :class:`~olive.model.ONNXModelHandler` objects. + Single-component models return a plain :class:`~olive.model.ONNXModelHandler`. + + Requires ``mobius-ai`` to be installed:: + + pip install mobius-ai + + See https://github.com/microsoft/mobius + """ + + class MobiusRuntime(StrEnumBase): + """Target runtimes for genai config generation.""" + + NONE = "none" + ORT_GENAI = "ort-genai" + + class MobiusEP(StrEnumBase): + """Execution providers supported by mobius.""" + + DEFAULT = "default" + CPU = "cpu" + CUDA = "cuda" + DML = "dml" + WEBGPU = "webgpu" + TRT_RTX = "trt-rtx" + ONNX_STANDARD = "onnx-standard" + + # Maps Olive ExecutionProvider enum values to mobius EP names. + EP_MAP: ClassVar[dict[ExecutionProvider, str]] = { + ExecutionProvider.CPUExecutionProvider: "cpu", + ExecutionProvider.CUDAExecutionProvider: "cuda", + ExecutionProvider.DmlExecutionProvider: "dml", + ExecutionProvider.WebGpuExecutionProvider: "webgpu", + } + + @classmethod + def is_accelerator_agnostic(cls, accelerator_spec: AcceleratorSpec) -> bool: + # EP selection determines which fused ops are emitted, so this pass is + # EP-specific. + return False + + @classmethod + def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]: + return { + "precision": PassConfigParam( + type_=Precision, + required=False, + default_value=Precision.FP32, + description=( + "Model weight / compute precision. One of: fp32, fp16, bf16. " + "Defaults to fp32. For INT4 quantization, run an Olive " + "quantization pass (e.g. OnnxMatMulNBits) after this pass." + ), + ), + "execution_provider": PassConfigParam( + type_=MobiusModelBuilder.MobiusEP, + required=False, + default_value=None, + description=( + "Override the mobius execution provider. " + "When None (default), the EP is auto-detected from the Olive " + "accelerator spec." + ), + ), + "runtime": PassConfigParam( + type_=MobiusModelBuilder.MobiusRuntime, + required=False, + default_value=MobiusModelBuilder.MobiusRuntime.ORT_GENAI, + description=( + "Target runtime. 'ort-genai' (default) generates " + "genai_config.json, tokenizer files, and processor " + "configs alongside the ONNX models. 'none' to skip." + ), + ), + } + + def _run_for_config( + self, + model: HfModelHandler, + config: type[BasePassConfig], + output_model_path: str, + ) -> ONNXModelHandler | CompositeModelHandler: + try: + from mobius import build + except ImportError as exc: + raise ImportError( + "mobius-ai is required to run MobiusModelBuilder. Install with: pip install mobius-ai" + ) from exc + + if not isinstance(model, HfModelHandler): + raise ValueError(f"MobiusModelBuilder requires an HfModelHandler input, got {type(model).__name__}.") + + # Resolve EP: explicit config override > accelerator spec > fallback to cpu. + ep_str: str = config.execution_provider or self.EP_MAP.get(self.accelerator_spec.execution_provider, "cpu") + + dtype_str: str = _PRECISION_TO_DTYPE.get(config.precision, "f32") + model_id: str = model.model_name_or_path + + # Read trust_remote_code from the model's HuggingFace load kwargs. + trust_remote_code: bool = model.get_load_kwargs().get("trust_remote_code", False) + + logger.info( + "MobiusModelBuilder: building '%s' (ep=%s, dtype=%s)", + model_id, + ep_str, + dtype_str, + ) + + if trust_remote_code: + logger.warning("MobiusModelBuilder: trust_remote_code=True — only use with trusted model sources.") + + output_dir = Path(output_model_path) + output_dir.mkdir(parents=True, exist_ok=True) + + pkg = build( + model_id, + dtype=dtype_str, + execution_provider=ep_str, + load_weights=True, + trust_remote_code=trust_remote_code, + ) + + # ModelPackage.save() handles both single and multi-component layouts: + # single component → /model.onnx + # multi-component → //model.onnx for each key + pkg.save(str(output_dir)) + + # Generate ORT GenAI config artifacts (genai_config.json, tokenizer + # files, processor configs) when runtime is set to ort-genai. + if config.runtime == self.MobiusRuntime.ORT_GENAI: + self._write_genai_config(pkg, str(output_dir), model_id, ep_str) + + package_keys = list(pkg.keys()) + logger.info("MobiusModelBuilder: saved components %s to '%s'", package_keys, output_dir) + + if len(package_keys) == 1: + # Single-component model (most LLMs): return a plain ONNXModelHandler. + onnx_path = output_dir / "model.onnx" + if not onnx_path.exists(): + raise RuntimeError( + f"MobiusModelBuilder: expected output file not found: {onnx_path}. " + "mobius.build() may have failed silently or saved to an unexpected path." + ) + additional_files = sorted( + {str(fp) for fp in output_dir.iterdir()} - {str(onnx_path), str(onnx_path) + ".data"} + ) + return ONNXModelHandler( + model_path=str(output_dir), + onnx_file_name="model.onnx", + model_attributes={ + "mobius_package_keys": package_keys, + "additional_files": additional_files, + **(model.model_attributes or {}), + }, + ) + + # Multi-component model (VLMs, encoder-decoders, diffusion pipelines): + # mobius saves each component to //model.onnx. + components = [] + for key in package_keys: + component_dir = output_dir / key + onnx_path = component_dir / "model.onnx" + if not onnx_path.exists(): + raise RuntimeError( + f"MobiusModelBuilder: expected output file not found: {onnx_path}. " + f"mobius.build() may have failed silently for component '{key}'." + ) + additional_files = sorted( + {str(fp) for fp in component_dir.iterdir()} - {str(onnx_path), str(onnx_path) + ".data"} + ) + components.append( + ONNXModelHandler( + model_path=str(component_dir), + onnx_file_name="model.onnx", + model_attributes={ + "mobius_component": key, + "additional_files": additional_files, + **(model.model_attributes or {}), + }, + ) + ) + + return CompositeModelHandler( + model_components=components, + model_component_names=package_keys, + model_path=str(output_dir), + model_attributes={ + "mobius_package_keys": package_keys, + **(model.model_attributes or {}), + }, + ) + + @staticmethod + def _write_genai_config(pkg, output_dir: str, model_id: str, ep: str) -> None: + """Generate ORT GenAI config artifacts alongside the ONNX models.""" + from mobius.integrations.ort_genai import write_ort_genai_config + + genai_artifacts = write_ort_genai_config( + pkg, output_dir, hf_model_id=model_id, ep=ep, + ) + logger.info( + "MobiusModelBuilder: wrote ORT GenAI config: %s", + list(genai_artifacts.keys()), + ) diff --git a/test/passes/onnx/test_mobius_model_builder.py b/test/passes/onnx/test_mobius_model_builder.py new file mode 100644 index 000000000..085642e83 --- /dev/null +++ b/test/passes/onnx/test_mobius_model_builder.py @@ -0,0 +1,364 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Unit tests for the MobiusModelBuilder Olive pass.""" + +from __future__ import annotations + +import sys +import types +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from olive.hardware.accelerator import AcceleratorSpec, Device +from olive.hardware.constants import ExecutionProvider +from olive.model import HfModelHandler, ONNXModelHandler +from olive.model.handler.composite import CompositeModelHandler +from olive.passes.olive_pass import create_pass_from_dict +from olive.passes.onnx.mobius_model_builder import MobiusModelBuilder + + +@pytest.fixture(autouse=True, scope="module") +def _stub_mobius_module(): + """Stub the optional mobius package into sys.modules for the duration of this module. + + patch("mobius.build") resolves the module via sys.modules, so it works correctly + even in environments where mobius-ai is not installed (e.g. Olive CI). + The stub is only injected when mobius is absent; if the real package is installed, + this fixture is a no-op. + """ + if "mobius" in sys.modules: + yield + return + fake = types.ModuleType("mobius") + fake.build = None # overridden per-test by patch("mobius.build") + sys.modules["mobius"] = fake + yield + sys.modules.pop("mobius", None) + + +@pytest.fixture(autouse=True) +def mock_hf_config(): + """Prevent HfModelHandler.__init__ from making network calls to resolve model configs.""" + mock_cfg = MagicMock() + mock_cfg.to_dict.return_value = {} + with ( + patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg), + patch.object(HfModelHandler, "get_load_kwargs", return_value={}), + ): + yield + + +def _make_hf_model(model_path: str, load_kwargs: dict | None = None) -> HfModelHandler: + model = HfModelHandler(model_path=model_path) + if load_kwargs: + # Patch get_load_kwargs on the instance to return the given kwargs. + model.get_load_kwargs = lambda: load_kwargs + return model + + +def _make_pass(ep: str = ExecutionProvider.CPUExecutionProvider) -> MobiusModelBuilder: + accelerator_spec = AcceleratorSpec(accelerator_type=Device.CPU, execution_provider=ep) + return create_pass_from_dict( + MobiusModelBuilder, + {"precision": "fp32"}, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + +def _fake_pkg(keys: list[str], _output_dir: Path) -> MagicMock: + """Create a fake ModelPackage that writes dummy .onnx files when .save() is called.""" + + def _save(directory: str, **_kwargs): + out = Path(directory) + if len(keys) == 1: + # Single-component: saved as /model.onnx + (out / "model.onnx").write_text("dummy") + else: + # Multi-component: saved as //model.onnx + for k in keys: + (out / k).mkdir(parents=True, exist_ok=True) + (out / k / "model.onnx").write_text("dummy") + + pkg = MagicMock() + pkg.keys.return_value = keys + pkg.__iter__ = MagicMock(return_value=iter(keys)) + pkg.items.return_value = [(k, MagicMock()) for k in keys] + pkg.save.side_effect = _save + return pkg + + +def _patch_build(pkg: MagicMock): + # Patch mobius.build directly — lazy import inside _run_for_config means + # patching the module attribute, not the local binding. + # Also patch _write_genai_config since the default runtime is ort-genai. + return _combine_patches( + patch("mobius.build", return_value=pkg), + patch.object(MobiusModelBuilder, "_write_genai_config"), + ) + + +class _combine_patches: + """Combine multiple patch context managers into one.""" + + def __init__(self, *patches): + self._patches = patches + self._mocks = [] + + def __enter__(self): + self._mocks = [p.__enter__() for p in self._patches] + return self._mocks[0] # return the build mock + + def __exit__(self, *args): + for p in reversed(self._patches): + p.__exit__(*args) + + +# --------------------------------------------------------------------------- +# Configuration tests +# --------------------------------------------------------------------------- + + +def test_default_config_params(): + """MobiusModelBuilder must declare precision and execution_provider, and must not declare trust_remote_code.""" + accelerator_spec = AcceleratorSpec( + accelerator_type=Device.CPU, execution_provider=ExecutionProvider.CPUExecutionProvider + ) + config = MobiusModelBuilder._default_config(accelerator_spec) # pylint: disable=protected-access + assert "precision" in config + assert "execution_provider" in config + assert "trust_remote_code" not in config + + +def test_is_not_accelerator_agnostic(): + """Pass must be EP-specific because it chooses fused ops based on the EP.""" + accelerator_spec = AcceleratorSpec( + accelerator_type=Device.CPU, execution_provider=ExecutionProvider.CPUExecutionProvider + ) + assert MobiusModelBuilder.is_accelerator_agnostic(accelerator_spec) is False + + +def test_ep_map_covers_common_providers(): + assert ExecutionProvider.CPUExecutionProvider in MobiusModelBuilder.EP_MAP + assert ExecutionProvider.CUDAExecutionProvider in MobiusModelBuilder.EP_MAP + assert ExecutionProvider.DmlExecutionProvider in MobiusModelBuilder.EP_MAP + assert ExecutionProvider.WebGpuExecutionProvider in MobiusModelBuilder.EP_MAP + assert MobiusModelBuilder.EP_MAP[ExecutionProvider.CPUExecutionProvider] == "cpu" + assert MobiusModelBuilder.EP_MAP[ExecutionProvider.CUDAExecutionProvider] == "cuda" + assert MobiusModelBuilder.EP_MAP[ExecutionProvider.DmlExecutionProvider] == "dml" + assert MobiusModelBuilder.EP_MAP[ExecutionProvider.WebGpuExecutionProvider] == "webgpu" + + +# --------------------------------------------------------------------------- +# Single-component model tests +# --------------------------------------------------------------------------- + + +def test_single_component_returns_onnx_handler(tmp_path): + """Single-component package (e.g. LLM) → ONNXModelHandler.""" + out = tmp_path / "out" + pkg = _fake_pkg(["model"], out) + + with _patch_build(pkg) as mock_build: + p = _make_pass() + result = p.run(_make_hf_model("meta-llama/Llama-3-8B"), out) + + assert isinstance(result, ONNXModelHandler) + assert not isinstance(result, CompositeModelHandler) + assert Path(result.model_path).exists() + mock_build.assert_called_once() + call_kwargs = mock_build.call_args.kwargs + assert call_kwargs["execution_provider"] == "cpu" + assert call_kwargs["dtype"] == "f32" + + +def test_model_onnx_exists_after_run(tmp_path): + """The saved model.onnx file must exist on disk.""" + out = tmp_path / "out" + pkg = _fake_pkg(["model"], out) + + with _patch_build(pkg): + p = _make_pass() + result = p.run(_make_hf_model("org/model"), out) + + # ONNXModelHandler.model_path already points to the .onnx file + assert Path(result.model_path).exists() + + +# --------------------------------------------------------------------------- +# Multi-component model tests +# --------------------------------------------------------------------------- + + +def test_multi_component_returns_composite_handler(tmp_path): + """Multi-component package (VLM) → CompositeModelHandler with one component per key.""" + out = tmp_path / "out" + keys = ["model", "vision", "embedding"] + pkg = _fake_pkg(keys, out) + + with _patch_build(pkg): + p = _make_pass() + result = p.run(_make_hf_model("microsoft/phi-4-vision"), out) + + assert isinstance(result, CompositeModelHandler) + assert result.model_component_names == keys + components = list(result.model_components) + assert len(components) == 3 + for comp in components: + assert isinstance(comp, ONNXModelHandler) + + +# --------------------------------------------------------------------------- +# EP auto-detection tests +# --------------------------------------------------------------------------- + + +def test_ep_auto_detected_from_accelerator(tmp_path): + """When execution_provider config is None, use the Olive accelerator EP.""" + out = tmp_path / "out" + pkg = _fake_pkg(["model"], out) + + accelerator_spec = AcceleratorSpec( + accelerator_type=Device.GPU, execution_provider=ExecutionProvider.CUDAExecutionProvider + ) + p = create_pass_from_dict( + MobiusModelBuilder, + {"precision": "fp16"}, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with _patch_build(pkg) as mock_build: + p.run(_make_hf_model("org/model"), out) + + call_kwargs = mock_build.call_args.kwargs + assert call_kwargs["execution_provider"] == "cuda" + assert call_kwargs["dtype"] == "f16" + + +def test_ep_override_from_config(tmp_path): + """Explicit execution_provider in config overrides the accelerator EP.""" + out = tmp_path / "out" + pkg = _fake_pkg(["model"], out) + + accelerator_spec = AcceleratorSpec( + accelerator_type=Device.GPU, execution_provider=ExecutionProvider.CUDAExecutionProvider + ) + p = create_pass_from_dict( + MobiusModelBuilder, + {"precision": "fp32", "execution_provider": "webgpu"}, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with _patch_build(pkg) as mock_build: + p.run(_make_hf_model("org/model"), out) + + assert mock_build.call_args.kwargs["execution_provider"] == "webgpu" + + +# --------------------------------------------------------------------------- +# Input validation tests +# --------------------------------------------------------------------------- + + +def test_non_hf_model_raises(tmp_path): + """Passing a non-HfModelHandler must raise ValueError.""" + out = tmp_path / "out" + out.mkdir() + (out / "model.onnx").write_bytes(b"") + + onnx_model = ONNXModelHandler(model_path=str(out), onnx_file_name="model.onnx") + p = _make_pass() + with pytest.raises(ValueError, match="HfModelHandler"): + p.run(onnx_model, tmp_path / "result") + + +def test_import_error_raised_when_mobius_missing(tmp_path): + """ImportError must surface clearly when mobius is not installed.""" + p = _make_pass() + with patch.dict(sys.modules, {"mobius": None}), pytest.raises(ImportError, match="mobius"): + p.run(_make_hf_model("org/model"), tmp_path / "out") + + +# --------------------------------------------------------------------------- +# Output validation tests +# --------------------------------------------------------------------------- + + +def test_missing_output_file_raises_runtime_error(tmp_path): + """RuntimeError must be raised if pkg.save() does not produce model.onnx.""" + out = tmp_path / "out" + # _fake_pkg normally writes the file; use a pkg whose save() does nothing. + pkg = MagicMock() + pkg.keys.return_value = ["model"] + pkg.__iter__ = MagicMock(return_value=iter(["model"])) + pkg.save.return_value = None # save() succeeds but writes nothing + + with _patch_build(pkg), pytest.raises(RuntimeError, match="expected output file not found"): + _make_pass().run(_make_hf_model("org/model"), out) + + +def test_missing_component_file_raises_runtime_error(tmp_path): + """RuntimeError for multi-component if any component's model.onnx is missing.""" + out = tmp_path / "out" + keys = ["model", "vision", "embedding"] + pkg = MagicMock() + pkg.keys.return_value = keys + pkg.__iter__ = MagicMock(return_value=iter(keys)) + + # save() only creates 'model' component, skips 'vision' and 'embedding' + def _partial_save(directory: str, **_kwargs): + d = Path(directory) / "model" + d.mkdir(parents=True) + (d / "model.onnx").write_text("dummy") + + pkg.save.side_effect = _partial_save + + with _patch_build(pkg), pytest.raises(RuntimeError, match="expected output file not found"): + _make_pass().run(_make_hf_model("org/vlm"), out) + + +# --------------------------------------------------------------------------- +# Security / trust_remote_code tests +# --------------------------------------------------------------------------- + + +def test_trust_remote_code_warning_logged(tmp_path): + """trust_remote_code=True on the model must emit a warning about trusted model sources.""" + out = tmp_path / "out" + pkg = _fake_pkg(["model"], out) + p = create_pass_from_dict( + MobiusModelBuilder, + {"precision": "fp32"}, + disable_search=True, + accelerator_spec=AcceleratorSpec( + accelerator_type=Device.CPU, execution_provider=ExecutionProvider.CPUExecutionProvider + ), + ) + with ( + _patch_build(pkg), + patch("olive.passes.onnx.mobius_model_builder.logger") as mock_logger, + ): + p.run(_make_hf_model("org/model", load_kwargs={"trust_remote_code": True}), out) + + warning_messages = [call.args[0] for call in mock_logger.warning.call_args_list] + assert any("trust_remote_code" in msg for msg in warning_messages) + + +def test_no_warning_when_trust_remote_code_false(tmp_path): + """No trust_remote_code warning must be emitted when the model does not set trust_remote_code.""" + out = tmp_path / "out" + pkg = _fake_pkg(["model"], out) + with ( + _patch_build(pkg), + patch("olive.passes.onnx.mobius_model_builder.logger") as mock_logger, + ): + _make_pass().run(_make_hf_model("org/model"), out) + + warning_messages = [call.args[0] for call in mock_logger.warning.call_args_list] + assert not any("trust_remote_code" in msg for msg in warning_messages)