diff --git a/.buildkite/test_areas/models_basic.yaml b/.buildkite/test_areas/models_basic.yaml index c1cc9e9a36e0..a22a0723ce34 100644 --- a/.buildkite/test_areas/models_basic.yaml +++ b/.buildkite/test_areas/models_basic.yaml @@ -69,3 +69,18 @@ steps: - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl # Whisper needs spawn method to avoid deadlock - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper + +- label: Transformers Backward Compatibility Models + working_dir: "/vllm-workspace/" + optional: true + soft_fail: true + commands: + - pip install transformers==4.57.5 + - pytest -v -s tests/models/test_initialization.py + - pytest -v -s tests/models/test_transformers.py + - pytest -v -s tests/models/multimodal/processing/ + - pytest -v -s tests/models/multimodal/test_mapping.py + - python3 examples/offline_inference/basic/chat.py + - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl + # Whisper needs spawn method to avoid deadlock + - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper diff --git a/docker/Dockerfile b/docker/Dockerfile index 2abf03515fb9..b5f617d6411e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -474,7 +474,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ && uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match \ --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ && uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | xargs) --pre \ - -r requirements/dev.txt \ + -r requirements/dev.txt --pre \ --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ else \ echo "Installing dev requirements..." \ @@ -627,7 +627,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ else \ BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \ fi; \ - uv pip install --system accelerate hf_transfer modelscope \ + uv pip install --system accelerate modelscope \ "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}" # ============================================================ @@ -750,9 +750,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -e tests/vllm_test_utils # enable fast downloads from hf (for testing) -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system hf_transfer -ENV HF_HUB_ENABLE_HF_TRANSFER 1 +ENV HF_XET_HIGH_PERFORMANCE 1 + +# increase timeout for hf downloads (for testing) +ENV HF_HUB_DOWNLOAD_TIMEOUT 60 # Copy in the v1 package for testing (it isn't distributed yet) COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1 diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 5f819acc6aea..9808177a385e 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -171,7 +171,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ COPY --from=vllm-test-deps /vllm-workspace/requirements/cpu-test.txt requirements/test.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install -r requirements/dev.txt && \ + uv pip install -r requirements/dev.txt --pre && \ pre-commit install --hook-type pre-commit --hook-type commit-msg ENTRYPOINT ["bash"] @@ -195,6 +195,12 @@ ADD ./.buildkite/ ./.buildkite/ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install -e tests/vllm_test_utils +# enable fast downloads from hf (for testing) +ENV HF_XET_HIGH_PERFORMANCE 1 + +# increase timeout for hf downloads (for testing) +ENV HF_HUB_DOWNLOAD_TIMEOUT 60 + ######################### RELEASE IMAGE ######################### FROM base AS vllm-openai diff --git a/docker/Dockerfile.nightly_torch b/docker/Dockerfile.nightly_torch index 5c424980ee2d..0a8a35bdce59 100644 --- a/docker/Dockerfile.nightly_torch +++ b/docker/Dockerfile.nightly_torch @@ -269,9 +269,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -e tests/vllm_test_utils # enable fast downloads from hf (for testing) -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system hf_transfer -ENV HF_HUB_ENABLE_HF_TRANSFER 1 +ENV HF_XET_HIGH_PERFORMANCE 1 + +# increase timeout for hf downloads (for testing) +ENV HF_HUB_DOWNLOAD_TIMEOUT 60 RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r requirements/nightly_torch_test.txt diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index f8a4274a179f..7d9162a11bfe 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -356,9 +356,10 @@ RUN cd /vllm-workspace \ && python3 -m pip install pytest-shard # enable fast downloads from hf (for testing) -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system hf_transfer -ENV HF_HUB_ENABLE_HF_TRANSFER=1 +ENV HF_XET_HIGH_PERFORMANCE=1 + +# increase timeout for hf downloads (for testing) +ENV HF_HUB_DOWNLOAD_TIMEOUT 60 # install audio decode package `torchcodec` from source (required due to # ROCm and torch version mismatch) for tests with datasets package diff --git a/docker/Dockerfile.xpu b/docker/Dockerfile.xpu index d4c98bf7405d..b158d0428c3f 100644 --- a/docker/Dockerfile.xpu +++ b/docker/Dockerfile.xpu @@ -113,7 +113,7 @@ FROM vllm-base AS vllm-openai # install additional dependencies for openai api server RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install accelerate hf_transfer pytest pytest_asyncio lm_eval[api] modelscope + uv pip install accelerate pytest pytest_asyncio lm_eval[api] modelscope # install development dependencies (for testing) RUN uv pip install -e tests/vllm_test_utils diff --git a/docs/getting_started/installation/gpu.rocm.inc.md b/docs/getting_started/installation/gpu.rocm.inc.md index 1f36ceba617a..564fa1efc28e 100644 --- a/docs/getting_started/installation/gpu.rocm.inc.md +++ b/docs/getting_started/installation/gpu.rocm.inc.md @@ -147,7 +147,7 @@ uv pip install vllm --extra-index-url https://wheels.vllm.ai/rocm/0.15.0/rocm700 # Install dependencies pip install --upgrade numba \ scipy \ - huggingface-hub[cli,hf_transfer] \ + huggingface-hub[cli] \ setuptools_scm pip install -r requirements/rocm.txt diff --git a/requirements/common.txt b/requirements/common.txt index 05666c5d14b0..cded2c0433b4 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -7,7 +7,7 @@ requests >= 2.26.0 tqdm blake3 py-cpuinfo -transformers >= 4.56.0, < 5 +transformers >= 4.56.0 tokenizers >= 0.21.1 # Required for fast incremental detokenization. protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* # Required by LlamaTokenizer, gRPC. CVE-2026-0994 fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt index ca9c5bd1cace..5e40c4cd9f23 100644 --- a/requirements/nightly_torch_test.txt +++ b/requirements/nightly_torch_test.txt @@ -29,8 +29,8 @@ opencv-python-headless >= 4.13.0 # required for video test datamodel_code_generator # required for minicpm3 test lm-eval[api]>=0.4.11 # required for model evaluation test mteb[bm25s]>=2, <3 # required for mteb test -transformers==4.57.5 -tokenizers==0.22.0 +transformers @ git+https://github.com/huggingface/transformers.git@main +tokenizers==0.22.2 schemathesis>=3.39.15 # Required for openai schema test. # quantization bitsandbytes>=0.49.2 diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index 9a7bd9f59bcd..ada2a1d5268e 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -93,9 +93,9 @@ timm==1.0.17 # Required for plugins test albumentations==1.4.6 # Pin transformers version -transformers==4.57.5 +transformers==5.3.0 # Pin HF Hub version -huggingface-hub==0.36.2 +huggingface-hub==1.4.1 # Pin Mistral Common mistral-common[image,audio]==1.10.0 # Required for Prithvi tests diff --git a/requirements/test.in b/requirements/test.in index 8bd00514435b..e36493949e17 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -18,7 +18,7 @@ httpx librosa # required for audio tests vector_quantize_pytorch # required for minicpmo_26 test vocos # required for minicpmo_26 test -peft>=0.15.0 # required for phi-4-mm test +peft>=0.18.1 # required for phi-4-mm test pqdm ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests sentence-transformers>=5.2.0 # required for embedding tests @@ -38,8 +38,8 @@ opencv-python-headless >= 4.13.0 # required for video test datamodel_code_generator # required for minicpm3 test lm-eval[api]>=0.4.11 # required for model evaluation test mteb[bm25s]>=2, <3 # required for mteb test -transformers==4.57.5 -tokenizers==0.22.0 +transformers @ git+https://github.com/huggingface/transformers.git@main +tokenizers==0.22.2 schemathesis>=3.39.15 # Required for openai schema test. # quantization bitsandbytes==0.49.2 diff --git a/requirements/test.txt b/requirements/test.txt index e2f9040beecc..3750d26a19bf 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,7 +4,7 @@ absl-py==2.1.0 # via # rouge-score # tensorboard -accelerate==1.0.1 +accelerate==1.1.0 # via peft aenum==3.1.16 # via lightly @@ -240,7 +240,6 @@ filelock==3.16.1 # huggingface-hub # ray # torch - # transformers # virtualenv fiona==1.10.1 # via torchgeo @@ -323,7 +322,7 @@ h5py==3.13.0 # via terratorch harfile==0.3.0 # via schemathesis -hf-xet==1.1.7 +hf-xet==1.4.2 # via huggingface-hub hiredis==3.0.0 # via tensorizer @@ -337,9 +336,10 @@ httpx==0.27.2 # via # -r requirements/test.in # diffusers + # huggingface-hub # perceptron # schemathesis -huggingface-hub==0.36.2 +huggingface-hub==1.7.1 # via # accelerate # datasets @@ -738,7 +738,7 @@ pathvalidate==3.2.1 # via pytablewriter patsy==1.0.1 # via statsmodels -peft==0.16.0 +peft==0.18.1 # via -r requirements/test.in perceptron==0.1.4 # via -r requirements/test.in @@ -961,7 +961,7 @@ referencing==0.35.1 # via # jsonschema # jsonschema-specifications -regex==2024.9.11 +regex==2026.2.28 # via # diffusers # nltk @@ -980,7 +980,6 @@ requests==2.32.3 # google-api-core # google-cloud-storage # gpt-oss - # huggingface-hub # lightly # lm-eval # mistral-common @@ -993,7 +992,6 @@ requests==2.32.3 # starlette-testclient # tacoreader # tiktoken - # transformers # wandb responses==0.25.3 # via genai-perf @@ -1189,7 +1187,7 @@ timm==1.0.17 # segmentation-models-pytorch # terratorch # torchgeo -tokenizers==0.22.0 +tokenizers==0.22.2 # via # -r requirements/test.in # transformers @@ -1265,7 +1263,7 @@ tqdm==4.67.3 # tacoreader # terratorch # transformers -transformers==4.57.5 +transformers @ git+https://github.com/huggingface/transformers.git@b96f8a98965a744ef5137dd25efd2e280cddcc25 # via # -r requirements/test.in # genai-perf @@ -1286,7 +1284,9 @@ typepy==1.3.2 typer==0.15.2 # via # fastsafetensors + # huggingface-hub # perceptron + # transformers types-python-dateutil==2.9.0.20241206 # via arrow typeshed-client==2.8.2 diff --git a/tests/model_executor/test_weight_utils.py b/tests/model_executor/test_weight_utils.py index 93535ae0aacd..260ebdcefb3b 100644 --- a/tests/model_executor/test_weight_utils.py +++ b/tests/model_executor/test_weight_utils.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import os import tempfile import huggingface_hub.constants @@ -10,26 +9,10 @@ from vllm.model_executor.model_loader.weight_utils import ( download_weights_from_hf, - enable_hf_transfer, maybe_remap_kv_scale_name, ) -def test_hf_transfer_auto_activation(): - if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ: - # in case it is already set, we can't test the auto activation - pytest.skip("HF_HUB_ENABLE_HF_TRANSFER is set, can't test auto activation") - enable_hf_transfer() - try: - # enable hf hub transfer if available - import hf_transfer # type: ignore # noqa - - HF_TRANSFER_ACTIVE = True - except ImportError: - HF_TRANSFER_ACTIVE = False - assert huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == HF_TRANSFER_ACTIVE - - def test_download_weights_from_hf(): with tempfile.TemporaryDirectory() as tmpdir: # assert LocalEntryNotFoundError error is thrown @@ -178,5 +161,4 @@ def test_missing_target_returns_none(self): if __name__ == "__main__": - test_hf_transfer_auto_activation() test_download_weights_from_hf() diff --git a/tests/models/registry.py b/tests/models/registry.py index aac707a9065b..1e02e40bfc60 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -465,6 +465,13 @@ def check_available_online( "Plamo2ForCausalLM": _HfExamplesInfo( "pfnet/plamo-2-1b", trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": ( + "Custom model code uses `_tied_weight_keys: list[str]` but " + "Transformers v5 now expects `_tied_weight_keys: dict[str, str]`" + ) + }, ), "Plamo3ForCausalLM": _HfExamplesInfo( "pfnet/plamo-3-nict-2b-base", @@ -744,10 +751,18 @@ def check_available_online( # [Decoder-only] "AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria"), "AudioFlamingo3ForConditionalGeneration": _HfExamplesInfo( - "nvidia/audio-flamingo-3-hf", min_transformers_version="5.0.0" + "nvidia/audio-flamingo-3-hf", + min_transformers_version="5.3.0", + transformers_version_reason={ + "vllm": "Needs https://github.com/huggingface/transformers/pull/43538" + }, ), "MusicFlamingoForConditionalGeneration": _HfExamplesInfo( - "nvidia/music-flamingo-2601-hf", min_transformers_version="5.0.0.dev" + "nvidia/music-flamingo-2601-hf", + min_transformers_version="5.3.0", + transformers_version_reason={ + "vllm": "Needs https://github.com/huggingface/transformers/pull/43538" + }, ), "AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereLabs/aya-vision-8b"), "BagelForConditionalGeneration": _HfExamplesInfo("ByteDance-Seed/BAGEL-7B-MoT"), @@ -847,7 +862,12 @@ def check_available_online( extras={"0.2-2B-Preview": "PerceptronAI/Isaac-0.2-2B-Preview"}, ), "InternS1ForConditionalGeneration": _HfExamplesInfo( - "internlm/Intern-S1", trust_remote_code=True + "internlm/Intern-S1", + trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "vllm": "Custom tokenizer code is not compatible with Transformers v5." + }, ), "InternS1ProForConditionalGeneration": _HfExamplesInfo( "internlm/Intern-S1-Pro", @@ -936,7 +956,14 @@ def check_available_online( "MiDashengLMModel": _HfExamplesInfo( "mispeech/midashenglm-7b", trust_remote_code=True ), - "MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6", trust_remote_code=True), + "MiniCPMO": _HfExamplesInfo( + "openbmb/MiniCPM-o-2_6", + trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "hf": "Custom processor code is not compatible with Transformers v5." + }, + ), "MiniCPMV": _HfExamplesInfo( "openbmb/MiniCPM-Llama3-V-2_5", extras={ @@ -944,6 +971,13 @@ def check_available_online( "4.0": "openbmb/MiniCPM-V-4", "4.5": "openbmb/MiniCPM-V-4_5", }, + max_transformers_version="4.57", + transformers_version_reason={ + "vllm": ( + "MiniCPMVBatchFeature is incompatible with its base class in " + "Transformers v5. See https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5/discussions/78" + ) + }, trust_remote_code=True, ), "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo( @@ -980,13 +1014,25 @@ def check_available_online( "nano_vl_dummy", is_available_online=False, trust_remote_code=True ), "OpenCUAForConditionalGeneration": _HfExamplesInfo( - "xlangai/OpenCUA-7B", trust_remote_code=True + "xlangai/OpenCUA-7B", + trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "vllm": "Tokenizer cannot be initialised in Transformers v5." + }, ), "OpenPanguVLForConditionalGeneration": _HfExamplesInfo( "FreedomIntelligence/openPangu-VL-7B", trust_remote_code=True, max_model_len=4096, enforce_eager=True, + max_transformers_version="4.57", + transformers_version_reason={ + "vllm": ( + "OpenPanguVLVideoProcessorInitKwargs does not specify total=False, " + "making all kwargs required. See https://huggingface.co/FreedomIntelligence/openPangu-VL-7B/discussions/2" + ) + }, ), "Ovis": _HfExamplesInfo( "AIDC-AI/Ovis2-1B", @@ -998,12 +1044,24 @@ def check_available_online( "1.6-gemma": "AIDC-AI/Ovis1.6-Gemma2-9B", }, ), - "Ovis2_5": _HfExamplesInfo("AIDC-AI/Ovis2.5-2B", trust_remote_code=True), + "Ovis2_5": _HfExamplesInfo( + "AIDC-AI/Ovis2.5-2B", + trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "vllm": "Custom processor code is not compatible with Transformers v5." + }, + ), "Ovis2_6ForCausalLM": _HfExamplesInfo( "AIDC-AI/Ovis2.6-2B", is_available_online=False, trust_remote_code=True ), "Ovis2_6_MoeForCausalLM": _HfExamplesInfo( - "AIDC-AI/Ovis2.6-30B-A3B", trust_remote_code=True + "AIDC-AI/Ovis2.6-30B-A3B", + trust_remote_code=True, + max_transformers_version="4.57", + transformers_version_reason={ + "vllm": "Custom processor code is not compatible with Transformers v5." + }, ), "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo( "PaddlePaddle/PaddleOCR-VL",