From 990f5226a99bbf774e404695c9cf56b54b6dae02 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 12 Dec 2025 18:44:27 +0100 Subject: [PATCH 01/64] update to transformers v5 Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docker/Dockerfile | 4 ++-- requirements/nightly_torch_test.txt | 4 ++-- requirements/test.in | 4 ++-- requirements/test.txt | 24 ++++++++++++++++-------- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 0d50d97e54c6..64b7a8261c66 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -341,7 +341,7 @@ COPY requirements/lint.txt requirements/lint.txt COPY requirements/test.txt requirements/test.txt COPY requirements/dev.txt requirements/dev.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \ + uv pip install --pre --python /opt/venv/bin/python3 -r requirements/dev.txt \ --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') #################### DEV IMAGE #################### @@ -533,7 +533,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ RUN --mount=type=cache,target=/root/.cache/uv \ CUDA_MAJOR="${CUDA_VERSION%%.*}"; \ if [ "$CUDA_MAJOR" -ge 12 ]; then \ - uv pip install --system -r requirements/dev.txt \ + uv pip install --pre --system -r requirements/dev.txt \ --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ fi diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt index 7b2c665448a3..01e9bbc1f67a 100644 --- a/requirements/nightly_torch_test.txt +++ b/requirements/nightly_torch_test.txt @@ -29,8 +29,8 @@ opencv-python-headless >= 4.11.0 # required for video test datamodel_code_generator # required for minicpm3 test lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d # required for model evaluation test mteb>=1.38.11, <2 # required for mteb test -transformers==4.57.3 -tokenizers==0.22.0 +transformers==5.0.0rc1 +tokenizers==0.22.1 schemathesis>=3.39.15 # Required for openai schema test. # quantization bitsandbytes>=0.46.1 diff --git a/requirements/test.in b/requirements/test.in index dfae5b75821f..8b49865c6c43 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -37,8 +37,8 @@ datamodel_code_generator # required for minicpm3 test # TODO: Use lm-eval[api]==0.4.10 once released lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d # required for model evaluation test mteb[bm25s]>=2, <3 # required for mteb test -transformers==4.57.3 -tokenizers==0.22.0 +transformers==5.0.0rc1 +tokenizers==0.22.1 schemathesis>=3.39.15 # Required for openai schema test. # quantization bitsandbytes==0.46.1 diff --git a/requirements/test.txt b/requirements/test.txt index 571194e05c1b..3e5ee09944ac 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -122,6 +122,7 @@ click==8.1.7 # ray # schemathesis # typer + # typer-slim # uvicorn click-plugins==1.1.1.2 # via @@ -306,7 +307,7 @@ h5py==3.13.0 # via terratorch harfile==0.3.0 # via schemathesis -hf-xet==1.1.7 +hf-xet==1.2.0 # via huggingface-hub hiredis==3.0.0 # via tensorizer @@ -317,8 +318,9 @@ httpcore==1.0.6 httpx==0.27.2 # via # -r requirements/test.in + # huggingface-hub # schemathesis -huggingface-hub==0.34.3 +huggingface-hub==1.2.3 # via # accelerate # datasets @@ -711,7 +713,6 @@ pillow==10.4.0 # mistral-common # scikit-image # segmentation-models-pytorch - # sentence-transformers # torchgeo # torchvision platformdirs==4.3.6 @@ -928,7 +929,6 @@ requests==2.32.3 # google-api-core # google-cloud-storage # gpt-oss - # huggingface-hub # lightly # lm-eval # mistral-common @@ -1010,7 +1010,7 @@ segmentation-models-pytorch==0.4.0 # via # terratorch # torchgeo -sentence-transformers==3.2.1 +sentence-transformers==5.2.0 # via # -r requirements/test.in # mteb @@ -1024,7 +1024,9 @@ shapely==2.1.1 # geopandas # torchgeo shellingham==1.5.4 - # via typer + # via + # huggingface-hub + # typer six==1.16.0 # via # junit-xml @@ -1115,7 +1117,7 @@ timm==1.0.17 # segmentation-models-pytorch # terratorch # torchgeo -tokenizers==0.22.0 +tokenizers==0.22.1 # via # -r requirements/test.in # transformers @@ -1196,7 +1198,7 @@ tqdm==4.66.6 # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.57.3 +transformers==5.0.0rc1 # via # -r requirements/test.in # genai-perf @@ -1219,6 +1221,10 @@ typepy==1.3.2 # tabledata typer==0.15.2 # via fastsafetensors +typer-slim==0.20.0 + # via + # huggingface-hub + # transformers types-python-dateutil==2.9.0.20241206 # via arrow typeshed-client==2.8.2 @@ -1246,10 +1252,12 @@ typing-extensions==4.15.0 # pydantic-core # pydantic-extra-types # pytorch-lightning + # sentence-transformers # sqlalchemy # torch # torchgeo # typer + # typer-slim # typeshed-client # typing-inspection typing-inspection==0.4.2 From 933bef9e83ead84f3467aeaee9c313abb43afbe8 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 27 Jan 2026 09:30:47 +0100 Subject: [PATCH 02/64] Allow Transformer v5 in `common.txt` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/common.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/common.txt b/requirements/common.txt index 2cf54e0fd014..c0996f043b22 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -7,7 +7,7 @@ requests >= 2.26.0 tqdm blake3 py-cpuinfo -transformers >= 4.56.0, < 5 +transformers >= 4.56.0 tokenizers >= 0.21.1 # Required for fast incremental detokenization. protobuf >= 6.30.0 # Required by LlamaTokenizer, gRPC. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. From 769d43658599b878c6f30cd3e579f26292819979 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 27 Jan 2026 18:32:23 +0100 Subject: [PATCH 03/64] Update PEFT pin to avoid bad import Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.in | 2 +- requirements/test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/test.in b/requirements/test.in index 6d5caac7a7d6..7b83fa46bb2b 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -17,7 +17,7 @@ httpx librosa # required for audio tests vector_quantize_pytorch # required for minicpmo_26 test vocos # required for minicpmo_26 test -peft>=0.15.0 # required for phi-4-mm test +peft>=0.18.1 # required for phi-4-mm test pqdm ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests sentence-transformers>=5.2.0 # required for embedding tests diff --git a/requirements/test.txt b/requirements/test.txt index 9749813ed676..be2ae8f556f8 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -724,7 +724,7 @@ pathvalidate==3.2.1 # via pytablewriter patsy==1.0.1 # via statsmodels -peft==0.16.0 +peft==0.18.1 # via # -r requirements/test.in # lm-eval From 214c373127ec5817de05822a1a151ebd29e5c778 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 28 Jan 2026 00:31:33 +0100 Subject: [PATCH 04/64] Update lm-eval Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/nightly_torch_test.txt | 2 +- requirements/rocm-test.txt | 2 +- requirements/test.in | 2 +- requirements/test.txt | 24 ++++++------------------ 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt index e369e8904b0c..c884d5e7292e 100644 --- a/requirements/nightly_torch_test.txt +++ b/requirements/nightly_torch_test.txt @@ -27,7 +27,7 @@ mistral_common[image,audio] >= 1.8.8 # required for voxtral test num2words # required for smolvlm test opencv-python-headless >= 4.13.0 # required for video test datamodel_code_generator # required for minicpm3 test -lm-eval[api]>=0.4.9.2 # required for model evaluation test +lm-eval[api]>=0.4.10 # required for model evaluation test mteb>=1.38.11, <2 # required for mteb test transformers==5.0.0 tokenizers==0.22.2 diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index 540d97cc4bb4..15b011c93b11 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -58,7 +58,7 @@ schemathesis==3.39.15 # OpenAI schema test # Evaluation and benchmarking -lm-eval[api]>=0.4.9.2 +lm-eval[api]>=0.4.10 jiwer==4.0.0 # Required for multiprocessed tests that use spawn method, Datasets and Evaluate Test diff --git a/requirements/test.in b/requirements/test.in index 7b83fa46bb2b..d5ad17cfc3b4 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -35,7 +35,7 @@ num2words # required for smolvlm test open_clip_torch==2.32.0 # Required for nemotron_vl test, Nemotron Parse in test_common.py opencv-python-headless >= 4.13.0 # required for video test datamodel_code_generator # required for minicpm3 test -lm-eval[api]>=0.4.9.2 # required for model evaluation test +lm-eval[api]>=0.4.10 # required for model evaluation test mteb[bm25s]>=2, <3 # required for mteb test transformers==5.0.0 tokenizers==0.22.2 diff --git a/requirements/test.txt b/requirements/test.txt index be2ae8f556f8..5b0e6c50a0b6 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -3,9 +3,7 @@ absl-py==2.1.0 # via rouge-score accelerate==1.0.1 - # via - # lm-eval - # peft + # via peft aenum==3.1.16 # via lightly affine==2.4.0 @@ -145,7 +143,6 @@ colorama==0.4.6 # perceptron # sacrebleu # schemathesis - # tqdm-multiprocess colorful==0.5.6 # via ray colorlog==6.10.1 @@ -396,6 +393,7 @@ jinja2==3.1.6 # datamodel-code-generator # flask # genai-perf + # lm-eval # mlflow # torch jiwer==3.0.5 @@ -460,7 +458,7 @@ lightning-utilities==0.14.3 # torchmetrics llvmlite==0.44.0 # via numba -lm-eval==0.4.9.2 +lm-eval==0.4.10 # via -r requirements/test.in lxml==5.3.0 # via @@ -533,8 +531,6 @@ numba==0.61.2 # via # -r requirements/test.in # librosa -numexpr==2.10.1 - # via lm-eval numpy==2.2.6 # via # -r requirements/test.in @@ -558,12 +554,12 @@ numpy==2.2.6 # librosa # lightly # lightly-utils + # lm-eval # matplotlib # mistral-common # mlflow # mteb # numba - # numexpr # opencv-python-headless # optuna # pandas @@ -725,9 +721,7 @@ pathvalidate==3.2.1 patsy==1.0.1 # via statsmodels peft==0.18.1 - # via - # -r requirements/test.in - # lm-eval + # via -r requirements/test.in perceptron==0.1.4 # via -r requirements/test.in perf-analyzer==0.1.0 @@ -805,8 +799,6 @@ pyasn1==0.6.1 # rsa pyasn1-modules==0.4.2 # via google-auth -pybind11==2.13.6 - # via lm-eval pycocotools==2.0.8 # via terratorch pycountry==24.6.1 @@ -1169,7 +1161,6 @@ torch==2.9.1+cu129 # kornia # lightly # lightning - # lm-eval # mteb # open-clip-torch # peft @@ -1228,15 +1219,11 @@ tqdm==4.66.6 # pytorch-lightning # segmentation-models-pytorch # sentence-transformers - # tqdm-multiprocess # transformers -tqdm-multiprocess==0.0.11 - # via lm-eval transformers==5.0.0 # via # -r requirements/test.in # genai-perf - # lm-eval # peft # sentence-transformers # transformers-stream-generator @@ -1276,6 +1263,7 @@ typing-extensions==4.15.0 # librosa # lightning # lightning-utilities + # lm-eval # mistral-common # mlflow-skinny # mteb From ec4ffa9db82df3318df4fd8a2bc4e057274a3366 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 28 Jan 2026 01:28:14 +0100 Subject: [PATCH 05/64] `HF_HUB_ENABLE_HF_TRANSFER` -> `HF_XET_HIGH_PERFORMANCE` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docker/Dockerfile | 6 ++--- docker/Dockerfile.nightly_torch | 4 +--- docker/Dockerfile.rocm | 4 +--- docker/Dockerfile.xpu | 2 +- .../installation/gpu.rocm.inc.md | 2 +- tests/model_executor/test_weight_utils.py | 22 +------------------ .../model_loader/weight_utils.py | 16 ++------------ 7 files changed, 9 insertions(+), 47 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 5f9649144a0f..743abb829245 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -627,7 +627,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ else \ BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \ fi; \ - uv pip install --system accelerate hf_transfer modelscope \ + uv pip install --system accelerate modelscope \ "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs]${RUNAI_MODEL_STREAMER_VERSION}" # ============================================================ @@ -752,9 +752,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -e tests/vllm_test_utils # enable fast downloads from hf (for testing) -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system hf_transfer -ENV HF_HUB_ENABLE_HF_TRANSFER 1 +ENV HF_XET_HIGH_PERFORMANCE 1 # Copy in the v1 package for testing (it isn't distributed yet) COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1 diff --git a/docker/Dockerfile.nightly_torch b/docker/Dockerfile.nightly_torch index 7731c0477f5f..a0546dde117c 100644 --- a/docker/Dockerfile.nightly_torch +++ b/docker/Dockerfile.nightly_torch @@ -273,9 +273,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -e tests/vllm_test_utils # enable fast downloads from hf (for testing) -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system hf_transfer -ENV HF_HUB_ENABLE_HF_TRANSFER 1 +ENV HF_XET_HIGH_PERFORMANCE 1 RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r requirements/nightly_torch_test.txt diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index dc4c8deafd3e..ffd0b8beb93f 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -317,9 +317,7 @@ RUN cd /vllm-workspace \ && python3 -m pip install pytest-shard # enable fast downloads from hf (for testing) -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system hf_transfer -ENV HF_HUB_ENABLE_HF_TRANSFER=1 +ENV HF_XET_HIGH_PERFORMANCE=1 # install audio decode package `torchcodec` from source (required due to # ROCm and torch version mismatch) for tests with datasets package diff --git a/docker/Dockerfile.xpu b/docker/Dockerfile.xpu index f63ce2c5037f..416b1894c4d1 100644 --- a/docker/Dockerfile.xpu +++ b/docker/Dockerfile.xpu @@ -76,7 +76,7 @@ FROM vllm-base AS vllm-openai # install additional dependencies for openai api server RUN --mount=type=cache,target=/root/.cache/pip \ - pip install accelerate hf_transfer pytest pytest_asyncio lm_eval[api] modelscope + pip install accelerate pytest pytest_asyncio lm_eval[api] modelscope # install development dependencies (for testing) RUN python3 -m pip install -e tests/vllm_test_utils diff --git a/docs/getting_started/installation/gpu.rocm.inc.md b/docs/getting_started/installation/gpu.rocm.inc.md index 65fb7ba5ffef..06e1cacd7ad0 100644 --- a/docs/getting_started/installation/gpu.rocm.inc.md +++ b/docs/getting_started/installation/gpu.rocm.inc.md @@ -149,7 +149,7 @@ uv pip install vllm --extra-index-url https://wheels.vllm.ai/rocm/0.14.1/rocm700 # Install dependencies pip install --upgrade numba \ scipy \ - huggingface-hub[cli,hf_transfer] \ + huggingface-hub[cli] \ setuptools_scm pip install -r requirements/rocm.txt diff --git a/tests/model_executor/test_weight_utils.py b/tests/model_executor/test_weight_utils.py index 6dc120ddbac9..dd07f2d73fcf 100644 --- a/tests/model_executor/test_weight_utils.py +++ b/tests/model_executor/test_weight_utils.py @@ -1,32 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import os import tempfile import huggingface_hub.constants import pytest from huggingface_hub.utils import LocalEntryNotFoundError -from vllm.model_executor.model_loader.weight_utils import ( - download_weights_from_hf, - enable_hf_transfer, -) - - -def test_hf_transfer_auto_activation(): - if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ: - # in case it is already set, we can't test the auto activation - pytest.skip("HF_HUB_ENABLE_HF_TRANSFER is set, can't test auto activation") - enable_hf_transfer() - try: - # enable hf hub transfer if available - import hf_transfer # type: ignore # noqa - - HF_TRANSFER_ACTIVE = True - except ImportError: - HF_TRANSFER_ACTIVE = False - assert huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == HF_TRANSFER_ACTIVE +from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf def test_download_weights_from_hf(): @@ -62,5 +43,4 @@ def test_download_weights_from_hf(): if __name__ == "__main__": - test_hf_transfer_auto_activation() test_download_weights_from_hf() diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 7ea3bb2ebd19..0cbf2891a297 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -65,20 +65,8 @@ # system reboots, so users will not complain about annoying lock files temp_dir = tempfile.gettempdir() - -def enable_hf_transfer(): - """automatically activates hf_transfer""" - if "HF_HUB_ENABLE_HF_TRANSFER" not in os.environ: - try: - # enable hf hub transfer if available - import hf_transfer # type: ignore # noqa - - huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER = True - except ImportError: - pass - - -enable_hf_transfer() +# Automatically activates `hf-xet` high performance mode +huggingface_hub.constants.HF_XET_HIGH_PERFORMANCE = True class DisabledTqdm(tqdm): From 94e14293775f8fd4c69e4cc706fa7507af581ab5 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 28 Jan 2026 11:53:17 +0100 Subject: [PATCH 06/64] Skip custom model which uses old imports Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index fd6e4ecb1763..317755e39cce 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -866,7 +866,12 @@ def check_available_online( "nano_vl_dummy", is_available_online=False, trust_remote_code=True ), "OpenCUAForConditionalGeneration": _HfExamplesInfo( - "xlangai/OpenCUA-7B", trust_remote_code=True + "xlangai/OpenCUA-7B", + trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": "HF model uses remote code that is not compatible with latest Transformers" # noqa: E501 + }, ), "Ovis": _HfExamplesInfo( "AIDC-AI/Ovis2-1B", From fbb843a42b2a5d88b99118d8286a260af75b0553 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 29 Jan 2026 13:38:24 +0100 Subject: [PATCH 07/64] Update some more lm-eval pins Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh | 2 +- .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh | 2 +- .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh | 2 +- .buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh | 2 +- .buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh | 2 +- .buildkite/scripts/hardware_ci/run-tpu-v1-test.sh | 2 +- docs/features/quantization/fp8.md | 2 +- docs/features/quantization/int4.md | 2 +- docs/features/quantization/int8.md | 2 +- docs/features/quantization/quark.md | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh index 0745da8dc418..dc8eb9f62fc7 100755 --- a/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh +++ b/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh @@ -2,7 +2,7 @@ # We can use this script to compute baseline accuracy on chartqa for vllm. # # Make sure you have lm-eval-harness installed: -# pip install "lm-eval[api]>=0.4.9.2" +# pip install "lm-eval[api]>=0.4.10" usage() { echo`` diff --git a/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh index 5c17a06245bc..bc39f575d89a 100755 --- a/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh +++ b/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh @@ -2,7 +2,7 @@ # We can use this script to compute baseline accuracy on GSM for transformers. # # Make sure you have lm-eval-harness installed: -# pip install "lm-eval[api]>=0.4.9.2" +# pip install "lm-eval[api]>=0.4.10" usage() { echo`` diff --git a/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh index 1b617ff17c41..3a91aca77df6 100644 --- a/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh +++ b/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh @@ -3,7 +3,7 @@ # We use this for fp8, which HF does not support. # # Make sure you have lm-eval-harness installed: -# pip install "lm-eval[api]>=0.4.9.2" +# pip install "lm-eval[api]>=0.4.10" usage() { echo`` diff --git a/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh index 12336d7f85bc..7ccb35bae1b7 100644 --- a/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh +++ b/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh @@ -3,7 +3,7 @@ # We use this for fp8, which HF does not support. # # Make sure you have lm-eval-harness installed: -# pip install "lm-eval[api]>=0.4.9.2" +# pip install "lm-eval[api]>=0.4.10" usage() { echo`` diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh index 6959f81eab37..9235e42fbac0 100755 --- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh +++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh @@ -61,7 +61,7 @@ echo "Results will be stored in: $RESULTS_DIR" echo "--- Installing Python dependencies ---" python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \ && python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \ - && python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.9.2" \ + && python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.10" \ && python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0 echo "--- Python dependencies installed ---" diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh index eafc82b98439..9e28325d9b8f 100755 --- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh +++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh @@ -61,7 +61,7 @@ echo "Results will be stored in: $RESULTS_DIR" echo "--- Installing Python dependencies ---" python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \ && python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \ - && python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.9.2" \ + && python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.10" \ && python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0 echo "--- Python dependencies installed ---" diff --git a/docs/features/quantization/fp8.md b/docs/features/quantization/fp8.md index f17ef89a5cbf..e8c45af4e499 100644 --- a/docs/features/quantization/fp8.md +++ b/docs/features/quantization/fp8.md @@ -84,7 +84,7 @@ Since simple RTN does not require data for weight quantization and the activatio Install `vllm` and `lm-evaluation-harness` for evaluation: ```bash -pip install vllm "lm-eval[api]>=0.4.9.2" +pip install vllm "lm-eval[api]>=0.4.10" ``` Load and run the model in `vllm`: diff --git a/docs/features/quantization/int4.md b/docs/features/quantization/int4.md index 049a7ceed079..b737de10e335 100644 --- a/docs/features/quantization/int4.md +++ b/docs/features/quantization/int4.md @@ -18,7 +18,7 @@ pip install llmcompressor Additionally, install `vllm` and `lm-evaluation-harness` for evaluation: ```bash -pip install vllm "lm-eval[api]>=0.4.9.2" +pip install vllm "lm-eval[api]>=0.4.10" ``` ## Quantization Process diff --git a/docs/features/quantization/int8.md b/docs/features/quantization/int8.md index 8af3e24c7357..7677cdf03f18 100644 --- a/docs/features/quantization/int8.md +++ b/docs/features/quantization/int8.md @@ -23,7 +23,7 @@ pip install llmcompressor Additionally, install `vllm` and `lm-evaluation-harness` for evaluation: ```bash -pip install vllm "lm-eval[api]>=0.4.9.2" +pip install vllm "lm-eval[api]>=0.4.10" ``` ## Quantization Process diff --git a/docs/features/quantization/quark.md b/docs/features/quantization/quark.md index bbab97740ff1..05d82e468fd0 100644 --- a/docs/features/quantization/quark.md +++ b/docs/features/quantization/quark.md @@ -20,7 +20,7 @@ for more installation details. Additionally, install `vllm` and `lm-evaluation-harness` for evaluation: ```bash -pip install vllm "lm-eval[api]>=0.4.9.2" +pip install vllm "lm-eval[api]>=0.4.10" ``` ## Quantization Process From 352a2740c1b5de8d04ab875db0255a95c079d9b1 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:58:57 +0100 Subject: [PATCH 08/64] Fix timtout issues from `huggingface-hub` v1 Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docker/Dockerfile | 3 +++ requirements/test.txt | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 9eec04ed530c..82a385c8a5c5 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -754,6 +754,9 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # enable fast downloads from hf (for testing) ENV HF_XET_HIGH_PERFORMANCE 1 +# increase timeout for hf downloads (for testing) +ENV HF_HUB_DOWNLOAD_TIMEOUT 60 + # Copy in the v1 package for testing (it isn't distributed yet) COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1 diff --git a/requirements/test.txt b/requirements/test.txt index 7e5f9dedaf3b..580cdf517b66 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -331,7 +331,7 @@ httpx==0.27.2 # huggingface-hub # perceptron # schemathesis -huggingface-hub==1.3.4 +huggingface-hub==1.3.5 # via # accelerate # datasets From 7c81a9c9585d72818137d67baa864326a301888c Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 29 Jan 2026 15:37:22 +0100 Subject: [PATCH 09/64] Add `HF_HUB_DOWNLOAD_TIMEOUT` to other test images Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docker/Dockerfile.cpu | 6 ++++++ docker/Dockerfile.nightly_torch | 3 +++ docker/Dockerfile.rocm | 3 +++ 3 files changed, 12 insertions(+) diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 98f99d0892d2..ec6746cc6813 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -201,6 +201,12 @@ ADD ./.buildkite/ ./.buildkite/ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install -e tests/vllm_test_utils +# enable fast downloads from hf (for testing) +ENV HF_XET_HIGH_PERFORMANCE 1 + +# increase timeout for hf downloads (for testing) +ENV HF_HUB_DOWNLOAD_TIMEOUT 60 + ######################### RELEASE IMAGE ######################### FROM base AS vllm-openai diff --git a/docker/Dockerfile.nightly_torch b/docker/Dockerfile.nightly_torch index a0546dde117c..89749358df77 100644 --- a/docker/Dockerfile.nightly_torch +++ b/docker/Dockerfile.nightly_torch @@ -275,6 +275,9 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # enable fast downloads from hf (for testing) ENV HF_XET_HIGH_PERFORMANCE 1 +# increase timeout for hf downloads (for testing) +ENV HF_HUB_DOWNLOAD_TIMEOUT 60 + RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r requirements/nightly_torch_test.txt diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 7ca0e93ec0e5..8b3d4bb23db1 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -319,6 +319,9 @@ RUN cd /vllm-workspace \ # enable fast downloads from hf (for testing) ENV HF_XET_HIGH_PERFORMANCE=1 +# increase timeout for hf downloads (for testing) +ENV HF_HUB_DOWNLOAD_TIMEOUT 60 + # install audio decode package `torchcodec` from source (required due to # ROCm and torch version mismatch) for tests with datasets package COPY tools/install_torchcodec_rocm.sh /tmp/install_torchcodec.sh From eea0d7c4c4f29c459604d0099269862a70ed9c94 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 30 Jan 2026 14:07:07 +0100 Subject: [PATCH 10/64] Update missed ROCM pin Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/rocm-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index 15b011c93b11..3572593d99ad 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -93,4 +93,4 @@ timm==1.0.17 # Required for plugins test albumentations==1.4.6 # Pin transformers version -transformers==4.57.3 +transformers==5.0.0 From 30d8b3d37522fad91f7ee67c27d0b85870f857e9 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 30 Jan 2026 14:09:14 +0100 Subject: [PATCH 11/64] Install transformers from main temporarily Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docker/Dockerfile | 2 +- docker/Dockerfile.cpu | 2 +- requirements/nightly_torch_test.txt | 2 +- requirements/test.in | 2 +- requirements/test.txt | 3 +-- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 82a385c8a5c5..a0ee4bd0da23 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -474,7 +474,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ && uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match \ --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ && uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | xargs) --pre \ - -r requirements/dev.txt \ + -r requirements/dev.txt --pre \ --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ else \ echo "Installing dev requirements..." \ diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index ec6746cc6813..53ae7fefc8ad 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -177,7 +177,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ COPY --from=vllm-test-deps /vllm-workspace/requirements/cpu-test.txt requirements/test.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install -r requirements/dev.txt && \ + uv pip install -r requirements/dev.txt --pre && \ pre-commit install --hook-type pre-commit --hook-type commit-msg ENTRYPOINT ["bash"] diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt index 606abc4f3b93..dae378e3950a 100644 --- a/requirements/nightly_torch_test.txt +++ b/requirements/nightly_torch_test.txt @@ -29,7 +29,7 @@ opencv-python-headless >= 4.13.0 # required for video test datamodel_code_generator # required for minicpm3 test lm-eval[api]>=0.4.10 # required for model evaluation test mteb>=1.38.11, <2 # required for mteb test -transformers==5.0.0 +transformers @ git+https://github.com/huggingface/transformers.git@main tokenizers==0.22.2 schemathesis>=3.39.15 # Required for openai schema test. # quantization diff --git a/requirements/test.in b/requirements/test.in index 707155279c5d..cc6e1f770709 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -37,7 +37,7 @@ opencv-python-headless >= 4.13.0 # required for video test datamodel_code_generator # required for minicpm3 test lm-eval[api]>=0.4.10 # required for model evaluation test mteb[bm25s]>=2, <3 # required for mteb test -transformers==5.0.0 +transformers @ git+https://github.com/huggingface/transformers.git@main tokenizers==0.22.2 schemathesis>=3.39.15 # Required for openai schema test. # quantization diff --git a/requirements/test.txt b/requirements/test.txt index 580cdf517b66..b8483cf4c584 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -227,7 +227,6 @@ filelock==3.16.1 # huggingface-hub # ray # torch - # transformers # virtualenv fiona==1.10.1 # via torchgeo @@ -1220,7 +1219,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers==5.0.0 +transformers @ git+https://github.com/huggingface/transformers.git@6bc84bb3f9563ae3dfb5528f6a1f084812aa146d # via # -r requirements/test.in # genai-perf From 17ad8ca4e6815a3d00522c1bdda4c08502a02130 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 30 Jan 2026 18:13:57 +0100 Subject: [PATCH 12/64] new main pin Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index b8483cf4c584..f4ed3f76b900 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1219,7 +1219,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers @ git+https://github.com/huggingface/transformers.git@6bc84bb3f9563ae3dfb5528f6a1f084812aa146d +transformers @ git+https://github.com/huggingface/transformers.git@16eca6b5d2067975e1ecb7a3283cda6593100fae # via # -r requirements/test.in # genai-perf From 489d5d9aa7acf15b07c61f5430d70c807d0a607a Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 2 Feb 2026 14:09:15 +0100 Subject: [PATCH 13/64] Add backward compatibility test as copy of nightly test Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .buildkite/test_areas/models_basic.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.buildkite/test_areas/models_basic.yaml b/.buildkite/test_areas/models_basic.yaml index aa6161ffa66b..ab2c25f659ad 100644 --- a/.buildkite/test_areas/models_basic.yaml +++ b/.buildkite/test_areas/models_basic.yaml @@ -64,3 +64,18 @@ steps: - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl # Whisper needs spawn method to avoid deadlock - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper + +- label: Transformers Backward Compatibility Models + working_dir: "/vllm-workspace/" + optional: true + soft_fail: true + commands: + - pip install transformers==4.57.5 + - pytest -v -s tests/models/test_initialization.py + - pytest -v -s tests/models/test_transformers.py + - pytest -v -s tests/models/multimodal/processing/ + - pytest -v -s tests/models/multimodal/test_mapping.py + - python3 examples/offline_inference/basic/chat.py + - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl + # Whisper needs spawn method to avoid deadlock + - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper From c3abbd733685941c83fce46953cc15e4e539c713 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 2 Feb 2026 14:12:24 +0100 Subject: [PATCH 14/64] Skip `MiniCPMV` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/models/registry.py b/tests/models/registry.py index 2af8780391e3..4d73c6c20a09 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -841,6 +841,13 @@ def check_available_online( "4.0": "openbmb/MiniCPM-V-4", "4.5": "openbmb/MiniCPM-V-4_5", }, + max_transformers_version="4.57", + transformers_version_reason={ + "vllm": ( + "MiniCPMVBatchFeature is incompatible with its base class in " + "Transformers v5. See https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5/discussions/78" + ) + }, trust_remote_code=True, ), "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo( From 97bdae09a0f15a746217640d3ac67107ec1c3287 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 3 Feb 2026 09:18:30 +0100 Subject: [PATCH 15/64] bump huggingface-hub Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/rocm-test.txt | 2 +- requirements/test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index 9879a0c6326a..955c94fcb11a 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -95,4 +95,4 @@ albumentations==1.4.6 # Pin transformers version transformers==5.0.0 # Pin HF Hub version -huggingface-hub==1.3.5 +huggingface-hub==1.3.7 diff --git a/requirements/test.txt b/requirements/test.txt index d8c0e458af8f..9a5d93a8059d 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -330,7 +330,7 @@ httpx==0.27.2 # huggingface-hub # perceptron # schemathesis -huggingface-hub==1.3.5 +huggingface-hub==1.3.7 # via # accelerate # datasets From ede39e67c3fdb40d4a94604455fdc4ac77f7b5f2 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 3 Feb 2026 11:45:12 +0100 Subject: [PATCH 16/64] Bump accelerate version Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/test.txt b/requirements/test.txt index 9a5d93a8059d..c7b9e662c83a 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -2,7 +2,7 @@ # uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match --torch-backend cu129 --python-platform x86_64-manylinux_2_28 --python-version 3.12 absl-py==2.1.0 # via rouge-score -accelerate==1.0.1 +accelerate==1.1.0 # via peft aenum==3.1.16 # via lightly @@ -1219,7 +1219,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers @ git+https://github.com/huggingface/transformers.git@aefa23ad1c52de9c115f3d762fe1a1eda643275a +transformers @ git+https://github.com/huggingface/transformers.git@b6a202f868d261c7404d331cf9d8ce03aec12fe2 # via # -r requirements/test.in # genai-perf From 113b5eebfa309e958654de06d8876aa030667f1b Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 3 Feb 2026 17:17:19 +0100 Subject: [PATCH 17/64] bump transformers main pin Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index c7b9e662c83a..3597845c5680 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1219,7 +1219,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers @ git+https://github.com/huggingface/transformers.git@b6a202f868d261c7404d331cf9d8ce03aec12fe2 +transformers @ git+https://github.com/huggingface/transformers.git@01e860ebc6b827c88e2d75e70864d1b618364653 # via # -r requirements/test.in # genai-perf From 9ee40ac9f36f2c760991d03fc1a73c41d61fe83a Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 4 Feb 2026 16:20:53 +0100 Subject: [PATCH 18/64] bump main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 22ba25ae6a68..6afc3b37520a 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1219,7 +1219,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers @ git+https://github.com/huggingface/transformers.git@01e860ebc6b827c88e2d75e70864d1b618364653 +transformers @ git+https://github.com/huggingface/transformers.git@8dce31003b16946d0e2ee035b94a5e73e7dee7cd # via # -r requirements/test.in # genai-perf From 84447bdd837c2d7b0eaa0e6790bcfb3cbe7a2f9f Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 6 Feb 2026 09:12:18 +0100 Subject: [PATCH 19/64] bump main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 6afc3b37520a..1644f16ee73b 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1219,7 +1219,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers @ git+https://github.com/huggingface/transformers.git@8dce31003b16946d0e2ee035b94a5e73e7dee7cd +transformers @ git+https://github.com/huggingface/transformers.git@ecd0536d5fec7904db4f35f67ac95227e440282e # via # -r requirements/test.in # genai-perf From ccc8b3e5e42422be08abf71a43e25c8a8defd598 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 6 Feb 2026 09:17:01 +0100 Subject: [PATCH 20/64] Skip experimental Transformers backend features, fix later Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/test_transformers.py | 2 +- tests/v1/e2e/test_spec_decode.py | 2 +- vllm/model_executor/models/transformers/moe.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index 15ebb5f4a38f..f21c426bacf5 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -78,7 +78,7 @@ def test_models( from packaging.version import Version installed = Version(transformers.__version__) - required = Version("5.0.0") + required = Version("5.0.1.dev0") if model == "allenai/OLMoE-1B-7B-0924" and installed < required: pytest.skip( "MoE models with the Transformers modeling backend require " diff --git a/tests/v1/e2e/test_spec_decode.py b/tests/v1/e2e/test_spec_decode.py index a141e9da08a1..3ccd03dd98ff 100644 --- a/tests/v1/e2e/test_spec_decode.py +++ b/tests/v1/e2e/test_spec_decode.py @@ -452,7 +452,7 @@ def test_eagle_correctness( from packaging.version import Version installed = Version(transformers.__version__) - required = Version("5.0.0") + required = Version("5.0.1.dev0") if installed < required: pytest.skip( "Eagle3 with the Transformers modeling backend requires " diff --git a/vllm/model_executor/models/transformers/moe.py b/vllm/model_executor/models/transformers/moe.py index c636da211c2c..22b1896ef177 100644 --- a/vllm/model_executor/models/transformers/moe.py +++ b/vllm/model_executor/models/transformers/moe.py @@ -118,7 +118,7 @@ def transformers_moe_forward_fake( class MoEMixin(MixtureOfExperts): def __init__(self, *, vllm_config: "VllmConfig", prefix: str = ""): - self.check_version("5.0.0", "MoE models support") + self.check_version("5.0.1.dev0", "MoE models support") # Skip MixtureOfExperts.__init__ and call the next class in MRO super(MixtureOfExperts, self).__init__(vllm_config=vllm_config, prefix=prefix) From ee4c25cc0347ce6e388a5fc553f579b35808322e Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 6 Feb 2026 10:38:31 +0100 Subject: [PATCH 21/64] bump hf hub Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/rocm-test.txt | 2 +- requirements/test.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index fcef017f4f68..eb1b6749abd2 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -95,4 +95,4 @@ albumentations==1.4.6 # Pin transformers version transformers==5.0.0 # Pin HF Hub version -huggingface-hub==1.3.7 +huggingface-hub==1.4.1 diff --git a/requirements/test.txt b/requirements/test.txt index 1644f16ee73b..73fe8766298d 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -330,7 +330,7 @@ httpx==0.27.2 # huggingface-hub # perceptron # schemathesis -huggingface-hub==1.3.7 +huggingface-hub==1.4.1 # via # accelerate # datasets @@ -1219,7 +1219,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers @ git+https://github.com/huggingface/transformers.git@ecd0536d5fec7904db4f35f67ac95227e440282e +transformers @ git+https://github.com/huggingface/transformers.git@0b2900dd7ae8c6024f820db777830415bb70d44e # via # -r requirements/test.in # genai-perf From d7dd270ce79cff2b35fe7694e0df7adfd39bb04e Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 6 Feb 2026 11:35:59 +0100 Subject: [PATCH 22/64] bumpm hf experimental version Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/test_transformers.py | 2 +- tests/v1/e2e/test_spec_decode.py | 2 +- vllm/model_executor/models/transformers/moe.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index f21c426bacf5..37e6919faac7 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -78,7 +78,7 @@ def test_models( from packaging.version import Version installed = Version(transformers.__version__) - required = Version("5.0.1.dev0") + required = Version("5.2.0.dev0") if model == "allenai/OLMoE-1B-7B-0924" and installed < required: pytest.skip( "MoE models with the Transformers modeling backend require " diff --git a/tests/v1/e2e/test_spec_decode.py b/tests/v1/e2e/test_spec_decode.py index 3ccd03dd98ff..a401266bde7d 100644 --- a/tests/v1/e2e/test_spec_decode.py +++ b/tests/v1/e2e/test_spec_decode.py @@ -452,7 +452,7 @@ def test_eagle_correctness( from packaging.version import Version installed = Version(transformers.__version__) - required = Version("5.0.1.dev0") + required = Version("5.2.0.dev0") if installed < required: pytest.skip( "Eagle3 with the Transformers modeling backend requires " diff --git a/vllm/model_executor/models/transformers/moe.py b/vllm/model_executor/models/transformers/moe.py index 22b1896ef177..b2f0ae710b54 100644 --- a/vllm/model_executor/models/transformers/moe.py +++ b/vllm/model_executor/models/transformers/moe.py @@ -118,7 +118,7 @@ def transformers_moe_forward_fake( class MoEMixin(MixtureOfExperts): def __init__(self, *, vllm_config: "VllmConfig", prefix: str = ""): - self.check_version("5.0.1.dev0", "MoE models support") + self.check_version("5.2.0.dev0", "MoE models support") # Skip MixtureOfExperts.__init__ and call the next class in MRO super(MixtureOfExperts, self).__init__(vllm_config=vllm_config, prefix=prefix) From 4da0a8315feb5962030096ea074c90312d8c5ceb Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 6 Feb 2026 15:00:21 +0100 Subject: [PATCH 23/64] OpenCUA should be fixed now Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 1d1b11c5e22d..3373dd4c9de3 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -892,12 +892,7 @@ def check_available_online( "nano_vl_dummy", is_available_online=False, trust_remote_code=True ), "OpenCUAForConditionalGeneration": _HfExamplesInfo( - "xlangai/OpenCUA-7B", - trust_remote_code=True, - max_transformers_version="4.57", - transformers_version_reason={ - "hf": "HF model uses remote code that is not compatible with latest Transformers" # noqa: E501 - }, + "xlangai/OpenCUA-7B", trust_remote_code=True ), "OpenPanguVLForConditionalGeneration": _HfExamplesInfo( "FreedomIntelligence/openPangu-VL-7B", From f7ac9c24bea3254c1a3fb77a596326ff57e3e073 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 10 Feb 2026 10:38:21 +0100 Subject: [PATCH 24/64] bump treansformers main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 72fa22e4e5d0..c9aaa42203bc 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1229,7 +1229,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers @ git+https://github.com/huggingface/transformers.git@fc9137225880a9d03f130634c20f9dbe36a7b8bf +transformers @ git+https://github.com/huggingface/transformers.git@b2028e775a52bf57ac2b6bd71b49ce61fa3adde6 # via # -r requirements/test.in # genai-perf From 093999bd645691563cf126f015a183b2bfee759f Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 10 Feb 2026 15:09:31 +0100 Subject: [PATCH 25/64] bump transformers main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index c9aaa42203bc..9babb15c3971 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1229,7 +1229,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers @ git+https://github.com/huggingface/transformers.git@b2028e775a52bf57ac2b6bd71b49ce61fa3adde6 +transformers @ git+https://github.com/huggingface/transformers.git@520fad98fe370c69807481e2cf2e2dce946f9374 # via # -r requirements/test.in # genai-perf From 06a569f52b6b23aea076fc8edd675fa737abde56 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 10 Feb 2026 19:22:26 +0100 Subject: [PATCH 26/64] Skip Molmo2 Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 710d00ce8b82..2d1df5efea6c 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -859,7 +859,7 @@ def check_available_online( }, max_transformers_version="4.57", transformers_version_reason={ - "vllm": ( + "hf": ( "MiniCPMVBatchFeature is incompatible with its base class in " "Transformers v5. See https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5/discussions/78" ) @@ -887,6 +887,14 @@ def check_available_online( "allenai/Molmo2-8B", extras={"olmo": "allenai/Molmo2-O-7B"}, min_transformers_version="4.51", + max_transformers_version="4.57", + transformers_version_reason={ + "hf": ( + "Molmo2Processor uses deprecated optional_attributes and passes " + "arbitrary kwargs to ProcessorMixin.__init__ which is no longer " + "supported in Transformers v5." + ) + }, trust_remote_code=True, # required by current PrefixLM implementation max_num_batched_tokens=31872, From af9715397d1a643a1b3c4109dc49fafc036a3d04 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 11 Feb 2026 16:19:05 +0100 Subject: [PATCH 27/64] Skip openpangu Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/models/registry.py b/tests/models/registry.py index 2d1df5efea6c..2958c1186e73 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -915,6 +915,13 @@ def check_available_online( trust_remote_code=True, max_model_len=4096, enforce_eager=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": ( + "OpenPanguVLVideoProcessorInitKwargs does not specify total=False, " + "making all kwargs required. See https://huggingface.co/FreedomIntelligence/openPangu-VL-7B/discussions/2" + ) + }, ), "Ovis": _HfExamplesInfo( "AIDC-AI/Ovis2-1B", From c0ac4cdf4535bab5948d3073854def965276a829 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 11 Feb 2026 16:30:01 +0100 Subject: [PATCH 28/64] bump transformers main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 8f85cfb64c52..d772cd9a1f21 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1228,7 +1228,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers @ git+https://github.com/huggingface/transformers.git@520fad98fe370c69807481e2cf2e2dce946f9374 +transformers @ git+https://github.com/huggingface/transformers.git@64e41924f45d37593c8297b50578f432b6f893da # via # -r requirements/test.in # genai-perf From 6e6fa6f13da8c109adbe75e2b431336878ab3b3d Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 11 Feb 2026 16:49:47 +0100 Subject: [PATCH 29/64] glmasr is no longer remote code in v5 Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 507410578630..64c3081e51d4 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -725,7 +725,6 @@ def check_available_online( "Gemma3nForConditionalGeneration": _HfExamplesInfo("google/gemma-3n-E2B-it"), "GlmAsrForConditionalGeneration": _HfExamplesInfo( "zai-org/GLM-ASR-Nano-2512", - trust_remote_code=True, min_transformers_version="5.0.0", ), "GraniteVision": _HfExamplesInfo("ibm-granite/granite-vision-3.3-2b"), From ced047ccded7842de394f68830ea518cafc3be41 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 11 Feb 2026 17:40:07 +0100 Subject: [PATCH 30/64] skip OpenCUA Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 64c3081e51d4..729f08a833a2 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -911,7 +911,12 @@ def check_available_online( "nano_vl_dummy", is_available_online=False, trust_remote_code=True ), "OpenCUAForConditionalGeneration": _HfExamplesInfo( - "xlangai/OpenCUA-7B", trust_remote_code=True + "xlangai/OpenCUA-7B", + trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": "Custom model code is not compatible with Transformers v5." + }, ), "OpenPanguVLForConditionalGeneration": _HfExamplesInfo( "FreedomIntelligence/openPangu-VL-7B", From 148c40e651066c211b48ecd48521db4a2bc8ed55 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 11 Feb 2026 17:48:15 +0100 Subject: [PATCH 31/64] Skip HCXVisionForCausalLM Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/models/registry.py b/tests/models/registry.py index 729f08a833a2..fd49f13cbfe9 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -753,6 +753,11 @@ def check_available_online( "HCXVisionForCausalLM": _HfExamplesInfo( "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B", trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": "Custom model code import ChatTemplateLoadKwargs which was removed " + "in Transformers v5." + }, ), "HunYuanVLForConditionalGeneration": _HfExamplesInfo( "tencent/HunyuanOCR", From c46b56d9d24ebbe0e319747334f31bafa10fa484 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 11 Feb 2026 22:03:23 +0100 Subject: [PATCH 32/64] bump transformers main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index d772cd9a1f21..cb6b39bb0ee0 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1228,7 +1228,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers @ git+https://github.com/huggingface/transformers.git@64e41924f45d37593c8297b50578f432b6f893da +transformers @ git+https://github.com/huggingface/transformers.git@ae05b2ae619aa28fdfdcb8244009d585b7e1fed7 # via # -r requirements/test.in # genai-perf From f0f00aa1a391a4cd2f9a2ab60dcf2d7a92535427 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 13 Feb 2026 09:23:48 +0100 Subject: [PATCH 33/64] bump transformers main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index cb6b39bb0ee0..a8b89888545b 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1228,7 +1228,7 @@ tqdm==4.66.6 # segmentation-models-pytorch # sentence-transformers # transformers -transformers @ git+https://github.com/huggingface/transformers.git@ae05b2ae619aa28fdfdcb8244009d585b7e1fed7 +transformers @ git+https://github.com/huggingface/transformers.git@d0c054bae1c0a83173dba18cf2b17996a0f8dae1 # via # -r requirements/test.in # genai-perf From 37c707dfed703d67c7730bea6bcf09a0405c24cb Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 16 Feb 2026 16:15:10 +0100 Subject: [PATCH 34/64] Skip broken custom models for processor tests Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../processing/test_tensor_schema.py | 6 +-- tests/models/registry.py | 38 ++++++++++++++++--- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/tests/models/multimodal/processing/test_tensor_schema.py b/tests/models/multimodal/processing/test_tensor_schema.py index 8f79936478da..0120bd93f954 100644 --- a/tests/models/multimodal/processing/test_tensor_schema.py +++ b/tests/models/multimodal/processing/test_tensor_schema.py @@ -163,11 +163,7 @@ def test_model_tensor_schema(model_id: str): model_info = HF_EXAMPLE_MODELS.find_hf_info(model_id) model_info.check_available_online(on_fail="skip") - model_info.check_transformers_version( - on_fail="skip", - check_max_version=False, - check_version_reason="vllm", - ) + model_info.check_transformers_version(on_fail="skip") model_arch = next( arch for arch, info in HF_EXAMPLE_MODELS.hf_models.items() if info == model_info diff --git a/tests/models/registry.py b/tests/models/registry.py index 809fe41b9e22..0b277e55efe3 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -688,7 +688,7 @@ def check_available_online( "nvidia/audio-flamingo-3-hf", min_transformers_version="5.0.0" ), "MusicFlamingoForConditionalGeneration": _HfExamplesInfo( - "nvidia/music-flamingo-2601-hf", min_transformers_version="5.0.0.dev" + "nvidia/music-flamingo-2601-hf", min_transformers_version="5.3.0" ), "AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereLabs/aya-vision-8b"), "BagelForConditionalGeneration": _HfExamplesInfo("ByteDance-Seed/BAGEL-7B-MoT"), @@ -786,11 +786,20 @@ def check_available_online( extras={"0.2-2B-Preview": "PerceptronAI/Isaac-0.2-2B-Preview"}, ), "InternS1ForConditionalGeneration": _HfExamplesInfo( - "internlm/Intern-S1", trust_remote_code=True + "internlm/Intern-S1", + trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": "Custom tokenizer code is not compatible with Transformers v5." + }, ), "InternS1ProForConditionalGeneration": _HfExamplesInfo( "internlm/Intern-S1-Pro", trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": "Custom model code is not compatible with Transformers v5." + }, ), "InternVLChatModel": _HfExamplesInfo( "OpenGVLab/InternVL2-1B", @@ -870,7 +879,14 @@ def check_available_online( "MiDashengLMModel": _HfExamplesInfo( "mispeech/midashenglm-7b", trust_remote_code=True ), - "MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6", trust_remote_code=True), + "MiniCPMO": _HfExamplesInfo( + "openbmb/MiniCPM-o-2_6", + trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": "Custom processor code is not compatible with Transformers v5." + }, + ), "MiniCPMV": _HfExamplesInfo( "openbmb/MiniCPM-Llama3-V-2_5", extras={ @@ -959,12 +975,24 @@ def check_available_online( "1.6-gemma": "AIDC-AI/Ovis1.6-Gemma2-9B", }, ), - "Ovis2_5": _HfExamplesInfo("AIDC-AI/Ovis2.5-2B", trust_remote_code=True), + "Ovis2_5": _HfExamplesInfo( + "AIDC-AI/Ovis2.5-2B", + trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": "Custom processor code is not compatible with Transformers v5." + }, + ), "Ovis2_6ForCausalLM": _HfExamplesInfo( "AIDC-AI/Ovis2.6-2B", is_available_online=False, trust_remote_code=True ), "Ovis2_6_MoeForCausalLM": _HfExamplesInfo( - "AIDC-AI/Ovis2.6-30B-A3B", trust_remote_code=True + "AIDC-AI/Ovis2.6-30B-A3B", + trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": "Custom processor code is not compatible with Transformers v5." + }, ), "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo( "PaddlePaddle/PaddleOCR-VL", From 567e00ff5bce5c2127cb2ab4092ed060529eda33 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 16 Feb 2026 16:15:35 +0100 Subject: [PATCH 35/64] bump transformers main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index f81089b0948b..b03a2b9a690d 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1221,7 +1221,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@9a9231632eeb8be48f31db992b6f212ce34ab30b +transformers @ git+https://github.com/huggingface/transformers.git@53f8a08290bf835c9891094352f9efd7da0ccece # via # -r requirements/test.in # genai-perf From c0f2e1b65b3c1e2c4e1804b9f232f0626fde3b47 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 16 Feb 2026 16:27:25 +0100 Subject: [PATCH 36/64] Leave these version limits alone Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/test_transformers.py | 2 +- tests/v1/e2e/test_spec_decode.py | 2 +- vllm/model_executor/models/transformers/moe.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index 37e6919faac7..15ebb5f4a38f 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -78,7 +78,7 @@ def test_models( from packaging.version import Version installed = Version(transformers.__version__) - required = Version("5.2.0.dev0") + required = Version("5.0.0") if model == "allenai/OLMoE-1B-7B-0924" and installed < required: pytest.skip( "MoE models with the Transformers modeling backend require " diff --git a/tests/v1/e2e/test_spec_decode.py b/tests/v1/e2e/test_spec_decode.py index a401266bde7d..a141e9da08a1 100644 --- a/tests/v1/e2e/test_spec_decode.py +++ b/tests/v1/e2e/test_spec_decode.py @@ -452,7 +452,7 @@ def test_eagle_correctness( from packaging.version import Version installed = Version(transformers.__version__) - required = Version("5.2.0.dev0") + required = Version("5.0.0") if installed < required: pytest.skip( "Eagle3 with the Transformers modeling backend requires " diff --git a/vllm/model_executor/models/transformers/moe.py b/vllm/model_executor/models/transformers/moe.py index b5fcdfbb56e1..320bbab085ed 100644 --- a/vllm/model_executor/models/transformers/moe.py +++ b/vllm/model_executor/models/transformers/moe.py @@ -118,7 +118,7 @@ def transformers_moe_forward_fake( class MoEMixin(MixtureOfExperts): def __init__(self, *, vllm_config: "VllmConfig", prefix: str = ""): - self.check_version("5.2.0.dev0", "MoE models support") + self.check_version("5.0.0", "MoE models support") # Skip MixtureOfExperts.__init__ and call the next class in MRO super(MixtureOfExperts, self).__init__(vllm_config=vllm_config, prefix=prefix) From 4159b7fe222428c25f8e445086f2e7870186e66c Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 17 Feb 2026 12:30:24 +0100 Subject: [PATCH 37/64] bump main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/requirements/test.txt b/requirements/test.txt index b03a2b9a690d..2dfda2f29cfc 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1221,7 +1221,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@53f8a08290bf835c9891094352f9efd7da0ccece +transformers @ git+https://github.com/huggingface/transformers.git@4355bc790e473e9a158f0b33001b192fd8b63a34 # via # -r requirements/test.in # genai-perf @@ -1243,10 +1243,9 @@ typer==0.15.2 # via # fastsafetensors # perceptron -typer-slim==0.20.0 - # via - # huggingface-hub # transformers +typer-slim==0.20.0 + # via huggingface-hub types-python-dateutil==2.9.0.20241206 # via arrow typeshed-client==2.8.2 From a1fb41b725f8a79594953276844ddb17a6dbee72 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 19 Feb 2026 17:11:19 +0100 Subject: [PATCH 38/64] bump transformers main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 2dfda2f29cfc..a57028a1ebc5 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1221,7 +1221,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@4355bc790e473e9a158f0b33001b192fd8b63a34 +transformers @ git+https://github.com/huggingface/transformers.git@3532437769f416c5cc7981c3c5f1a14f7d376360 # via # -r requirements/test.in # genai-perf From b0d99c9e3c01c89956525237843599e8c9573c60 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 20 Feb 2026 15:19:50 +0100 Subject: [PATCH 39/64] Fix Flamingo min versions Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 0b277e55efe3..efadb6a75437 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -685,10 +685,18 @@ def check_available_online( # [Decoder-only] "AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria"), "AudioFlamingo3ForConditionalGeneration": _HfExamplesInfo( - "nvidia/audio-flamingo-3-hf", min_transformers_version="5.0.0" + "nvidia/audio-flamingo-3-hf", + min_transformers_version="5.3.0", + transformers_version_reason={ + "vllm": "Needs https://github.com/huggingface/transformers/pull/43538" + }, ), "MusicFlamingoForConditionalGeneration": _HfExamplesInfo( - "nvidia/music-flamingo-2601-hf", min_transformers_version="5.3.0" + "nvidia/music-flamingo-2601-hf", + min_transformers_version="5.3.0", + transformers_version_reason={ + "vllm": "Needs https://github.com/huggingface/transformers/pull/43538" + }, ), "AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereLabs/aya-vision-8b"), "BagelForConditionalGeneration": _HfExamplesInfo("ByteDance-Seed/BAGEL-7B-MoT"), From 5f1d9f9e38a772344250ef7c5ee41009a35e011c Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 20 Feb 2026 15:25:31 +0100 Subject: [PATCH 40/64] Fix Qwen3.5 min version and availability of checkpoints Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index efadb6a75437..a87142c2b4ca 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -1063,22 +1063,26 @@ def check_available_online( "Qwen3_5ForConditionalGeneration": _HfExamplesInfo( "Qwen/Qwen3.5-9B-Instruct", max_model_len=4096, - min_transformers_version="5.1.0", + min_transformers_version="5.2.0", + is_available_online=False, ), "Qwen3_5MoeForConditionalGeneration": _HfExamplesInfo( "Qwen/Qwen3.5-35B-A3B-Instruct", max_model_len=4096, - min_transformers_version="5.1.0", + min_transformers_version="5.2.0", + is_available_online=False, ), "Qwen3_5MTP": _HfExamplesInfo( "Qwen/Qwen3.5-9B-Instruct", speculative_model="Qwen/Qwen3.5-9B-Instruct", - min_transformers_version="5.1.0", + min_transformers_version="5.2.0", + is_available_online=False, ), "Qwen3_5MoeMTP": _HfExamplesInfo( "Qwen/Qwen3.5-35B-A3B-Instruct", speculative_model="Qwen/Qwen3.5-35B-A3B-Instruct", - min_transformers_version="5.1.0", + min_transformers_version="5.2.0", + is_available_online=False, ), "Qwen3OmniMoeForConditionalGeneration": _HfExamplesInfo( "Qwen/Qwen3-Omni-30B-A3B-Instruct", From a2fc2723baeb579235d10b615cb662637a91c8e8 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 20 Feb 2026 15:47:05 +0100 Subject: [PATCH 41/64] Skip Plamo2 for HF (vLLM should still run ok) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/models/registry.py b/tests/models/registry.py index a87142c2b4ca..8a426f19cc4f 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -451,6 +451,13 @@ def check_available_online( "Plamo2ForCausalLM": _HfExamplesInfo( "pfnet/plamo-2-1b", trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": ( + "Custom model code uses `_tied_weight_keys: list[str]` but " + "Transformers v5 now expects `_tied_weight_keys: dict[str, str]`" + ) + }, ), "Plamo3ForCausalLM": _HfExamplesInfo( "pfnet/plamo-3-nict-2b-base", From 6b563d477889926cfcac4703e57e304e3e254fc4 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 20 Feb 2026 16:05:59 +0100 Subject: [PATCH 42/64] Leave tensor schema skip alone and add another for hf reasons Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/multimodal/processing/test_tensor_schema.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/models/multimodal/processing/test_tensor_schema.py b/tests/models/multimodal/processing/test_tensor_schema.py index e749d3ac7556..83c8f1dd9a78 100644 --- a/tests/models/multimodal/processing/test_tensor_schema.py +++ b/tests/models/multimodal/processing/test_tensor_schema.py @@ -166,7 +166,12 @@ def test_model_tensor_schema(model_id: str): model_info = HF_EXAMPLE_MODELS.find_hf_info(model_id) model_info.check_available_online(on_fail="skip") - model_info.check_transformers_version(on_fail="skip") + model_info.check_transformers_version( + on_fail="skip", + check_max_version=False, + check_version_reason="vllm", + ) + model_info.check_requirements(on_fail="skip", check_version_reason="hf") model_arch = next( arch for arch, info in HF_EXAMPLE_MODELS.hf_models.items() if info == model_info From 64fa2e2e94329f3e9ca147e40f7f0a3f36b8ec54 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 20 Feb 2026 18:43:15 +0100 Subject: [PATCH 43/64] Remove hf skip for tensor schema test Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/multimodal/processing/test_tensor_schema.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/models/multimodal/processing/test_tensor_schema.py b/tests/models/multimodal/processing/test_tensor_schema.py index 83c8f1dd9a78..c81a8fe09d30 100644 --- a/tests/models/multimodal/processing/test_tensor_schema.py +++ b/tests/models/multimodal/processing/test_tensor_schema.py @@ -171,7 +171,6 @@ def test_model_tensor_schema(model_id: str): check_max_version=False, check_version_reason="vllm", ) - model_info.check_requirements(on_fail="skip", check_version_reason="hf") model_arch = next( arch for arch, info in HF_EXAMPLE_MODELS.hf_models.items() if info == model_info From 773ad0e73af58bf9d9248fe0c7198181439a57fc Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 20 Feb 2026 18:44:06 +0100 Subject: [PATCH 44/64] `MiniCPMV` version reason should stop it working in vLLM, not just HF Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 8a426f19cc4f..ba8cb20f7943 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -911,7 +911,7 @@ def check_available_online( }, max_transformers_version="4.57", transformers_version_reason={ - "hf": ( + "vllm": ( "MiniCPMVBatchFeature is incompatible with its base class in " "Transformers v5. See https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5/discussions/78" ) From 445c7fe15b119a5192665df149c7a39db536ce1b Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Sat, 21 Feb 2026 09:38:37 +0100 Subject: [PATCH 45/64] bump main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 6c3788d4d108..c12687b9868b 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1222,7 +1222,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@3532437769f416c5cc7981c3c5f1a14f7d376360 +transformers @ git+https://github.com/huggingface/transformers.git@147b7aa040812b079f467e777a2d2e1284167de0 # via # -r requirements/test.in # genai-perf From cfaa2ed5a964f66f3d9fe1c3514282ad34d717ac Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 23 Feb 2026 16:46:13 +0100 Subject: [PATCH 46/64] Unskip models which should now work Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 73040e81db7f..a5b105156a89 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -787,11 +787,6 @@ def check_available_online( "HCXVisionForCausalLM": _HfExamplesInfo( "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B", trust_remote_code=True, - max_transformers_version="4.57", - transformers_version_reason={ - "hf": "Custom model code import ChatTemplateLoadKwargs which was removed " - "in Transformers v5." - }, ), "HunYuanVLForConditionalGeneration": _HfExamplesInfo( "tencent/HunyuanOCR", @@ -945,14 +940,6 @@ def check_available_online( "allenai/Molmo2-8B", extras={"olmo": "allenai/Molmo2-O-7B"}, min_transformers_version="4.51", - max_transformers_version="4.57", - transformers_version_reason={ - "hf": ( - "Molmo2Processor uses deprecated optional_attributes and passes " - "arbitrary kwargs to ProcessorMixin.__init__ which is no longer " - "supported in Transformers v5." - ) - }, trust_remote_code=True, # required by current PrefixLM implementation max_num_batched_tokens=31872, From 04692c2a98adcfee4896128ce2c6c8620679355a Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 23 Feb 2026 17:35:22 +0100 Subject: [PATCH 47/64] Ovis doesn't work in vLLM actually Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index a5b105156a89..cd12bbc3da73 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -988,7 +988,7 @@ def check_available_online( trust_remote_code=True, max_transformers_version="4.57", transformers_version_reason={ - "hf": "Custom processor code is not compatible with Transformers v5." + "vllm": "Custom processor code is not compatible with Transformers v5." }, ), "Ovis2_6ForCausalLM": _HfExamplesInfo( @@ -999,7 +999,7 @@ def check_available_online( trust_remote_code=True, max_transformers_version="4.57", transformers_version_reason={ - "hf": "Custom processor code is not compatible with Transformers v5." + "vllm": "Custom processor code is not compatible with Transformers v5." }, ), "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo( From f7c7f5e8926ea80e8b2161f2146c30cf1f6271ca Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 24 Feb 2026 10:08:32 +0100 Subject: [PATCH 48/64] bump main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 927bc5557e6f..6b7cd4aa7495 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1227,7 +1227,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@a3dcad9e25af4c8747a365ec3f9a6b33e4b9abc1 +transformers @ git+https://github.com/huggingface/transformers.git@91d7b6456c5ef62d72ffd9faac5d21260b91df5b # via # -r requirements/test.in # genai-perf From d99f3b5b47f3b27431e459a6b3f1a2a259f9a20d Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 24 Feb 2026 13:17:12 +0100 Subject: [PATCH 49/64] Skip InternS1 properly Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index cd12bbc3da73..5ffed9e5c278 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -806,7 +806,7 @@ def check_available_online( trust_remote_code=True, max_transformers_version="4.57", transformers_version_reason={ - "hf": "Custom tokenizer code is not compatible with Transformers v5." + "vllm": "Custom tokenizer code is not compatible with Transformers v5." }, ), "InternS1ProForConditionalGeneration": _HfExamplesInfo( From a7f676c85ed4b3380bf2e5714b9f84f4c237a480 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 24 Feb 2026 13:17:48 +0100 Subject: [PATCH 50/64] InternS1Pro can work Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 5ffed9e5c278..6f8f7f130ea5 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -812,10 +812,6 @@ def check_available_online( "InternS1ProForConditionalGeneration": _HfExamplesInfo( "internlm/Intern-S1-Pro", trust_remote_code=True, - max_transformers_version="4.57", - transformers_version_reason={ - "hf": "Custom model code is not compatible with Transformers v5." - }, ), "InternVLChatModel": _HfExamplesInfo( "OpenGVLab/InternVL2-1B", From 44b75040083ddb8811f5fa4cf8af8ce74c493f47 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 24 Feb 2026 13:55:13 +0100 Subject: [PATCH 51/64] Update OpenCUA skip Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 6f8f7f130ea5..e17ccdd8e506 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -953,7 +953,7 @@ def check_available_online( trust_remote_code=True, max_transformers_version="4.57", transformers_version_reason={ - "hf": "Custom model code is not compatible with Transformers v5." + "vllm": "Tokenizer cannot be initialised in Transformers v5." }, ), "OpenPanguVLForConditionalGeneration": _HfExamplesInfo( From a6d41005792bfd043f5ea65919998e5107719176 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 24 Feb 2026 13:56:20 +0100 Subject: [PATCH 52/64] Update OpenPanguVL skip Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index e17ccdd8e506..c7cc832fdab4 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -963,7 +963,7 @@ def check_available_online( enforce_eager=True, max_transformers_version="4.57", transformers_version_reason={ - "hf": ( + "vllm": ( "OpenPanguVLVideoProcessorInitKwargs does not specify total=False, " "making all kwargs required. See https://huggingface.co/FreedomIntelligence/openPangu-VL-7B/discussions/2" ) From 6f6ee9e9b95a294e5259148036baf3425db97627 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:01:52 +0100 Subject: [PATCH 53/64] Skip `ExaoneMoeMTP` because it's not compatible with the test harness... Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index c7cc832fdab4..0755c6f553c1 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -1211,11 +1211,12 @@ def check_available_online( trust_remote_code=True, speculative_model="baidu/ERNIE-4.5-21B-A3B-PT", ), - "ExaoneMoeMTP": _HfExamplesInfo( - "LGAI-EXAONE/K-EXAONE-236B-A23B", - speculative_model="LGAI-EXAONE/K-EXAONE-236B-A23B", - min_transformers_version="5.1.0", - ), + # TODO: Re-enable once it supports prefix caching + # "ExaoneMoeMTP": _HfExamplesInfo( + # "LGAI-EXAONE/K-EXAONE-236B-A23B", + # speculative_model="LGAI-EXAONE/K-EXAONE-236B-A23B", + # min_transformers_version="5.1.0", + # ), "Glm4MoeMTPModel": _HfExamplesInfo( "zai-org/GLM-4.5", speculative_model="zai-org/GLM-4.5", From d35c05dbfec153a875258b8a1d563bd8b08aceb3 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 27 Feb 2026 09:02:25 +0100 Subject: [PATCH 54/64] bump main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 6b7cd4aa7495..34c5ef768ee2 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1227,7 +1227,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@91d7b6456c5ef62d72ffd9faac5d21260b91df5b +transformers @ git+https://github.com/huggingface/transformers.git@710cfdb0af09542df087e1aaca8059fadcd8f364 # via # -r requirements/test.in # genai-perf From b0d6bb384eb95034e261d735aa495546e775335c Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 3 Mar 2026 15:50:06 +0100 Subject: [PATCH 55/64] bump main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 34c5ef768ee2..f9ec92f4bb6a 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1227,7 +1227,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@710cfdb0af09542df087e1aaca8059fadcd8f364 +transformers @ git+https://github.com/huggingface/transformers.git@24c5bc4b1b6186a5d95e6e7359a21e48a4e9def2 # via # -r requirements/test.in # genai-perf From bd8cc8be5e33859c4a9529a845a20f83f850c923 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 6 Mar 2026 09:44:33 +0100 Subject: [PATCH 56/64] bump transformers Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/rocm-test.txt | 2 +- requirements/test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index b6fa38e2537d..af83f9163cf4 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -91,7 +91,7 @@ timm==1.0.17 # Required for plugins test albumentations==1.4.6 # Pin transformers version -transformers==5.0.0 +transformers==5.3.0 # Pin HF Hub version huggingface-hub==1.4.1 # Pin Mistral Common diff --git a/requirements/test.txt b/requirements/test.txt index a1bfe86bac8e..a29fd5e9ef34 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1229,7 +1229,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@fd6bc380c8854a370fbc9f68a157895d84dce7d7 +transformers @ git+https://github.com/huggingface/transformers.git@4f91111b8ef37bd227f33c7facb92c41aa77604d # via # -r requirements/test.in # genai-perf From db2c8006e981322b944f70658966e35e7f501d87 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 9 Mar 2026 17:39:34 +0100 Subject: [PATCH 57/64] bump transformers Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/test.txt b/requirements/test.txt index a29fd5e9ef34..8bb367f1e772 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -930,7 +930,7 @@ referencing==0.35.1 # via # jsonschema # jsonschema-specifications -regex==2024.9.11 +regex==2026.2.28 # via # diffusers # nltk @@ -1229,7 +1229,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@4f91111b8ef37bd227f33c7facb92c41aa77604d +transformers @ git+https://github.com/huggingface/transformers.git@1a50a3b13b6d17c2637fe19e94a8c459bd4208a5 # via # -r requirements/test.in # genai-perf From 91f54acaf06c571625c6141f774587f624e797e4 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 10 Mar 2026 17:20:22 +0100 Subject: [PATCH 58/64] bump transformers Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 3f57829cf322..039db4410259 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1224,7 +1224,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@1a50a3b13b6d17c2637fe19e94a8c459bd4208a5 +transformers @ git+https://github.com/huggingface/transformers.git@1bd97f246318456c1b87cf8ef8dc043ec1a53fff # via # -r requirements/test.in # genai-perf From 121b6819007ed9689cffd30724da06272f819927 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 10 Mar 2026 17:22:47 +0100 Subject: [PATCH 59/64] Put ExaoneMoe back, we'll fix it another way Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 16aa8fe0ae61..0f3b96b4c5d2 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -1243,12 +1243,11 @@ def check_available_online( trust_remote_code=True, speculative_model="baidu/ERNIE-4.5-21B-A3B-PT", ), - # TODO: Re-enable once it supports prefix caching - # "ExaoneMoeMTP": _HfExamplesInfo( - # "LGAI-EXAONE/K-EXAONE-236B-A23B", - # speculative_model="LGAI-EXAONE/K-EXAONE-236B-A23B", - # min_transformers_version="5.1.0", - # ), + "ExaoneMoeMTP": _HfExamplesInfo( + "LGAI-EXAONE/K-EXAONE-236B-A23B", + speculative_model="LGAI-EXAONE/K-EXAONE-236B-A23B", + min_transformers_version="5.1.0", + ), "ExtractHiddenStatesModel": _HfExamplesInfo( "Qwen/Qwen3-8B", speculative_method="extract_hidden_states", From 489aeda0decf76645286ad06044ccd4403687b78 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 11 Mar 2026 09:59:04 +0100 Subject: [PATCH 60/64] bump transformers Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 039db4410259..b796c58d9850 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1224,7 +1224,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@1bd97f246318456c1b87cf8ef8dc043ec1a53fff +transformers @ git+https://github.com/huggingface/transformers.git@ff2ba441a8bc9f7636bf22def908b53bfa4e1db2 # via # -r requirements/test.in # genai-perf From 4c138ee78cfaddb6f7e12277eaa6fbd14e3089bd Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 13 Mar 2026 19:24:52 +0100 Subject: [PATCH 61/64] bump transformers Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index c47cc4e180f8..cd0358a622ae 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1230,7 +1230,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@ff2ba441a8bc9f7636bf22def908b53bfa4e1db2 +transformers @ git+https://github.com/huggingface/transformers.git@064f0e97c69ca2ac865be78ddff5ce73c54ab071 # via # -r requirements/test.in # genai-perf From b99bedc737166ae5ca98cb9e3534b96e0c8c69aa Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Sat, 14 Mar 2026 19:35:07 +0100 Subject: [PATCH 62/64] bump transformers Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 4f548a88ff27..f87e0a67b214 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1233,7 +1233,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@064f0e97c69ca2ac865be78ddff5ce73c54ab071 +transformers @ git+https://github.com/huggingface/transformers.git@c368e139aade3ee7cdfa29387f3249168a912e5c # via # -r requirements/test.in # genai-perf From 0c515b017fa3f90eaa5d4586c65a96ffe0ac85cb Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 19 Mar 2026 09:13:15 +0100 Subject: [PATCH 63/64] Bump main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index bcc60638629d..bb21576d7769 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1263,7 +1263,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@4ec84a022d2ba1efb2cbbdc9eb415e4190113d22 +transformers @ git+https://github.com/huggingface/transformers.git@cecacd374f575ad7ffe37dcd69a98cf00b551011 # via # -r requirements/test.in # genai-perf From 1786f7fcf5992e7b461f9601fe987e63596ed80b Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 19 Mar 2026 19:12:55 +0100 Subject: [PATCH 64/64] bump main Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index bb21576d7769..3750d26a19bf 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1263,7 +1263,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers @ git+https://github.com/huggingface/transformers.git@cecacd374f575ad7ffe37dcd69a98cf00b551011 +transformers @ git+https://github.com/huggingface/transformers.git@b96f8a98965a744ef5137dd25efd2e280cddcc25 # via # -r requirements/test.in # genai-perf