Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ classifiers = [
dependencies = [
# MLX - Required for Apple Silicon GPU acceleration
"mlx>=0.29.2; platform_system == 'Darwin' and platform_machine == 'arm64'",
"mlx-lm>=0.28.4; platform_system == 'Darwin' and platform_machine == 'arm64'",
"mlx-lm>=0.30.8; platform_system == 'Darwin' and platform_machine == 'arm64'",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change mixes two conflicting strategies: it raises the minimum mlx-lm version to one that should already include native qwen3_5* support, and also adds a runtime alias shim for older versions.

"mlx-vlm>=0.3.0; platform_system == 'Darwin' and platform_machine == 'arm64'", # Vision-language model support
# Model loading and weights
"transformers>=4.40.0",
Expand Down
44 changes: 44 additions & 0 deletions vllm_metal/v1/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
"""

import hashlib
import importlib
import math
import os
import sys
import time
from array import array
from dataclasses import dataclass
Expand Down Expand Up @@ -61,6 +63,47 @@
_model_cache: dict[str, tuple[Any, Any]] = {} # model_name -> (model, tokenizer)
_model_cache_lock = Lock()


def _ensure_mlx_lm_model_aliases() -> None:
"""Register temporary model-type aliases for mlx_lm compatibility.

Some newer model snapshots report a ``model_type`` that is not yet
available as a module in the currently bundled ``mlx_lm`` release.
In that case we register a best-effort import alias so ``mlx_lm`` can
still resolve the architecture.
"""
alias_map = {
# Qwen3.5 MoE currently reuses the same implementation shape as qwen3_moe.
"qwen3_5_moe": "qwen3_moe",
}

for alias, target in alias_map.items():
alias_module = f"mlx_lm.models.{alias}"
target_module = f"mlx_lm.models.{target}"

if alias_module in sys.modules:
continue

# If mlx_lm already ships this architecture, do not override it.
try:
importlib.import_module(alias_module)
continue
except ModuleNotFoundError:
pass

try:
resolved = importlib.import_module(target_module)
except ModuleNotFoundError:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

except ModuleNotFoundError is too broad here and can hide real import problems inside the imported module. Please only suppress the error when e.name matches the exact module you attempted to import; otherwise re-raise.

logger.debug(
"Skipping mlx_lm alias registration for %s (target %s missing)",
alias,
target,
)
continue

sys.modules[alias_module] = resolved
logger.info("Registered mlx_lm model alias: %s -> %s", alias, target)

# Try to import Rust extension for high-performance token state management
try:
from vllm_metal._rs import RequestStateManager as RustRequestStateManager
Expand Down Expand Up @@ -645,6 +688,7 @@ def load_model(self) -> None:
self._is_vlm = True
else:
# Load model and tokenizer using mlx_lm for text-only models
_ensure_mlx_lm_model_aliases()
self.model, self.tokenizer = mlx_lm_load(
model_name,
tokenizer_config={
Expand Down