diff --git a/.buildkite/test_areas/models_basic.yaml b/.buildkite/test_areas/models_basic.yaml
index c1cc9e9a36e0..a22a0723ce34 100644
--- a/.buildkite/test_areas/models_basic.yaml
+++ b/.buildkite/test_areas/models_basic.yaml
@@ -69,3 +69,18 @@ steps:
     - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
     # Whisper needs spawn method to avoid deadlock
     - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
+
+- label: Transformers Backward Compatibility Models
+  working_dir: "/vllm-workspace/"
+  optional: true
+  soft_fail: true
+  commands:
+    - pip install transformers==4.57.5
+    - pytest -v -s tests/models/test_initialization.py
+    - pytest -v -s tests/models/test_transformers.py
+    - pytest -v -s tests/models/multimodal/processing/
+    - pytest -v -s tests/models/multimodal/test_mapping.py
+    - python3 examples/offline_inference/basic/chat.py
+    - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
+    # Whisper needs spawn method to avoid deadlock
+    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 2abf03515fb9..b5f617d6411e 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -474,7 +474,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
         && uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
         && uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | xargs) --pre \
-        -r requirements/dev.txt \
+        -r requirements/dev.txt --pre \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
     else \
         echo "Installing dev requirements..." \
@@ -627,7 +627,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     else \
         BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \
     fi; \
-    uv pip install --system accelerate hf_transfer modelscope \
+    uv pip install --system accelerate modelscope \
         "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}"
 
 # ============================================================
@@ -750,9 +750,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system -e tests/vllm_test_utils
 
 # enable fast downloads from hf (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --system hf_transfer
-ENV HF_HUB_ENABLE_HF_TRANSFER 1
+ENV HF_XET_HIGH_PERFORMANCE 1
+
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
 
 # Copy in the v1 package for testing (it isn't distributed yet)
 COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu
index 5f819acc6aea..9808177a385e 100644
--- a/docker/Dockerfile.cpu
+++ b/docker/Dockerfile.cpu
@@ -171,7 +171,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 COPY --from=vllm-test-deps /vllm-workspace/requirements/cpu-test.txt requirements/test.txt
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -r requirements/dev.txt && \
+    uv pip install -r requirements/dev.txt --pre && \
     pre-commit install --hook-type pre-commit --hook-type commit-msg
 
 ENTRYPOINT ["bash"]
@@ -195,6 +195,12 @@ ADD ./.buildkite/ ./.buildkite/
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install -e tests/vllm_test_utils
 
+# enable fast downloads from hf (for testing)
+ENV HF_XET_HIGH_PERFORMANCE 1
+
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
+
 ######################### RELEASE IMAGE #########################
 FROM base AS vllm-openai
 
diff --git a/docker/Dockerfile.nightly_torch b/docker/Dockerfile.nightly_torch
index 5c424980ee2d..0a8a35bdce59 100644
--- a/docker/Dockerfile.nightly_torch
+++ b/docker/Dockerfile.nightly_torch
@@ -269,9 +269,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system -e tests/vllm_test_utils
 
 # enable fast downloads from hf (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --system hf_transfer
-ENV HF_HUB_ENABLE_HF_TRANSFER 1
+ENV HF_XET_HIGH_PERFORMANCE 1
+
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
 
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system -r requirements/nightly_torch_test.txt
diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm
index f8a4274a179f..7d9162a11bfe 100644
--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@@ -356,9 +356,10 @@ RUN cd /vllm-workspace \
     && python3 -m pip install pytest-shard
 
 # enable fast downloads from hf (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --system hf_transfer
-ENV HF_HUB_ENABLE_HF_TRANSFER=1
+ENV HF_XET_HIGH_PERFORMANCE=1
+
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
 
 # install audio decode package `torchcodec` from source (required due to 
 # ROCm and torch version mismatch) for tests with datasets package
diff --git a/docker/Dockerfile.xpu b/docker/Dockerfile.xpu
index d4c98bf7405d..b158d0428c3f 100644
--- a/docker/Dockerfile.xpu
+++ b/docker/Dockerfile.xpu
@@ -113,7 +113,7 @@ FROM vllm-base AS vllm-openai
 
 # install additional dependencies for openai api server
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install accelerate hf_transfer pytest pytest_asyncio lm_eval[api] modelscope
+    uv pip install accelerate pytest pytest_asyncio lm_eval[api] modelscope
 
 # install development dependencies (for testing)
 RUN uv pip install -e tests/vllm_test_utils
diff --git a/docs/getting_started/installation/gpu.rocm.inc.md b/docs/getting_started/installation/gpu.rocm.inc.md
index 1f36ceba617a..564fa1efc28e 100644
--- a/docs/getting_started/installation/gpu.rocm.inc.md
+++ b/docs/getting_started/installation/gpu.rocm.inc.md
@@ -147,7 +147,7 @@ uv pip install vllm --extra-index-url https://wheels.vllm.ai/rocm/0.15.0/rocm700
         # Install dependencies
         pip install --upgrade numba \
             scipy \
-            huggingface-hub[cli,hf_transfer] \
+            huggingface-hub[cli] \
             setuptools_scm
         pip install -r requirements/rocm.txt
 
diff --git a/requirements/common.txt b/requirements/common.txt
index 05666c5d14b0..cded2c0433b4 100644
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -7,7 +7,7 @@ requests >= 2.26.0
 tqdm
 blake3
 py-cpuinfo
-transformers >= 4.56.0, < 5
+transformers >= 4.56.0
 tokenizers >= 0.21.1  # Required for fast incremental detokenization.
 protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* # Required by LlamaTokenizer, gRPC. CVE-2026-0994
 fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt
index ca9c5bd1cace..5e40c4cd9f23 100644
--- a/requirements/nightly_torch_test.txt
+++ b/requirements/nightly_torch_test.txt
@@ -29,8 +29,8 @@ opencv-python-headless >= 4.13.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api]>=0.4.11 # required for model evaluation test
 mteb[bm25s]>=2, <3 # required for mteb test
-transformers==4.57.5
-tokenizers==0.22.0
+transformers @ git+https://github.com/huggingface/transformers.git@main
+tokenizers==0.22.2
 schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
 bitsandbytes>=0.49.2
diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt
index 9a7bd9f59bcd..ada2a1d5268e 100644
--- a/requirements/rocm-test.txt
+++ b/requirements/rocm-test.txt
@@ -93,9 +93,9 @@ timm==1.0.17
 # Required for plugins test
 albumentations==1.4.6
 # Pin transformers version
-transformers==4.57.5
+transformers==5.3.0
 # Pin HF Hub version
-huggingface-hub==0.36.2
+huggingface-hub==1.4.1
 # Pin Mistral Common
 mistral-common[image,audio]==1.10.0
 # Required for Prithvi tests
diff --git a/requirements/test.in b/requirements/test.in
index 8bd00514435b..e36493949e17 100644
--- a/requirements/test.in
+++ b/requirements/test.in
@@ -18,7 +18,7 @@ httpx
 librosa # required for audio tests
 vector_quantize_pytorch # required for minicpmo_26 test
 vocos # required for minicpmo_26 test
-peft>=0.15.0 # required for phi-4-mm test
+peft>=0.18.1 # required for phi-4-mm test
 pqdm
 ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
 sentence-transformers>=5.2.0 # required for embedding tests
@@ -38,8 +38,8 @@ opencv-python-headless >= 4.13.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api]>=0.4.11 # required for model evaluation test
 mteb[bm25s]>=2, <3 # required for mteb test
-transformers==4.57.5
-tokenizers==0.22.0
+transformers @ git+https://github.com/huggingface/transformers.git@main
+tokenizers==0.22.2
 schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
 bitsandbytes==0.49.2
diff --git a/requirements/test.txt b/requirements/test.txt
index e2f9040beecc..3750d26a19bf 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -4,7 +4,7 @@ absl-py==2.1.0
     # via
     #   rouge-score
     #   tensorboard
-accelerate==1.0.1
+accelerate==1.1.0
     # via peft
 aenum==3.1.16
     # via lightly
@@ -240,7 +240,6 @@ filelock==3.16.1
     #   huggingface-hub
     #   ray
     #   torch
-    #   transformers
     #   virtualenv
 fiona==1.10.1
     # via torchgeo
@@ -323,7 +322,7 @@ h5py==3.13.0
     # via terratorch
 harfile==0.3.0
     # via schemathesis
-hf-xet==1.1.7
+hf-xet==1.4.2
     # via huggingface-hub
 hiredis==3.0.0
     # via tensorizer
@@ -337,9 +336,10 @@ httpx==0.27.2
     # via
     #   -r requirements/test.in
     #   diffusers
+    #   huggingface-hub
     #   perceptron
     #   schemathesis
-huggingface-hub==0.36.2
+huggingface-hub==1.7.1
     # via
     #   accelerate
     #   datasets
@@ -738,7 +738,7 @@ pathvalidate==3.2.1
     # via pytablewriter
 patsy==1.0.1
     # via statsmodels
-peft==0.16.0
+peft==0.18.1
     # via -r requirements/test.in
 perceptron==0.1.4
     # via -r requirements/test.in
@@ -961,7 +961,7 @@ referencing==0.35.1
     # via
     #   jsonschema
     #   jsonschema-specifications
-regex==2024.9.11
+regex==2026.2.28
     # via
     #   diffusers
     #   nltk
@@ -980,7 +980,6 @@ requests==2.32.3
     #   google-api-core
     #   google-cloud-storage
     #   gpt-oss
-    #   huggingface-hub
     #   lightly
     #   lm-eval
     #   mistral-common
@@ -993,7 +992,6 @@ requests==2.32.3
     #   starlette-testclient
     #   tacoreader
     #   tiktoken
-    #   transformers
     #   wandb
 responses==0.25.3
     # via genai-perf
@@ -1189,7 +1187,7 @@ timm==1.0.17
     #   segmentation-models-pytorch
     #   terratorch
     #   torchgeo
-tokenizers==0.22.0
+tokenizers==0.22.2
     # via
     #   -r requirements/test.in
     #   transformers
@@ -1265,7 +1263,7 @@ tqdm==4.67.3
     #   tacoreader
     #   terratorch
     #   transformers
-transformers==4.57.5
+transformers @ git+https://github.com/huggingface/transformers.git@b96f8a98965a744ef5137dd25efd2e280cddcc25
     # via
     #   -r requirements/test.in
     #   genai-perf
@@ -1286,7 +1284,9 @@ typepy==1.3.2
 typer==0.15.2
     # via
     #   fastsafetensors
+    #   huggingface-hub
     #   perceptron
+    #   transformers
 types-python-dateutil==2.9.0.20241206
     # via arrow
 typeshed-client==2.8.2
diff --git a/tests/model_executor/test_weight_utils.py b/tests/model_executor/test_weight_utils.py
index 93535ae0aacd..260ebdcefb3b 100644
--- a/tests/model_executor/test_weight_utils.py
+++ b/tests/model_executor/test_weight_utils.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import os
 import tempfile
 
 import huggingface_hub.constants
@@ -10,26 +9,10 @@
 
 from vllm.model_executor.model_loader.weight_utils import (
     download_weights_from_hf,
-    enable_hf_transfer,
     maybe_remap_kv_scale_name,
 )
 
 
-def test_hf_transfer_auto_activation():
-    if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ:
-        # in case it is already set, we can't test the auto activation
-        pytest.skip("HF_HUB_ENABLE_HF_TRANSFER is set, can't test auto activation")
-    enable_hf_transfer()
-    try:
-        # enable hf hub transfer if available
-        import hf_transfer  # type: ignore # noqa
-
-        HF_TRANSFER_ACTIVE = True
-    except ImportError:
-        HF_TRANSFER_ACTIVE = False
-    assert huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == HF_TRANSFER_ACTIVE
-
-
 def test_download_weights_from_hf():
     with tempfile.TemporaryDirectory() as tmpdir:
         # assert LocalEntryNotFoundError error is thrown
@@ -178,5 +161,4 @@ def test_missing_target_returns_none(self):
 
 
 if __name__ == "__main__":
-    test_hf_transfer_auto_activation()
     test_download_weights_from_hf()
diff --git a/tests/models/registry.py b/tests/models/registry.py
index aac707a9065b..1e02e40bfc60 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -465,6 +465,13 @@ def check_available_online(
     "Plamo2ForCausalLM": _HfExamplesInfo(
         "pfnet/plamo-2-1b",
         trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "hf": (
+                "Custom model code uses `_tied_weight_keys: list[str]` but "
+                "Transformers v5 now expects `_tied_weight_keys: dict[str, str]`"
+            )
+        },
     ),
     "Plamo3ForCausalLM": _HfExamplesInfo(
         "pfnet/plamo-3-nict-2b-base",
@@ -744,10 +751,18 @@ def check_available_online(
     # [Decoder-only]
     "AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria"),
     "AudioFlamingo3ForConditionalGeneration": _HfExamplesInfo(
-        "nvidia/audio-flamingo-3-hf", min_transformers_version="5.0.0"
+        "nvidia/audio-flamingo-3-hf",
+        min_transformers_version="5.3.0",
+        transformers_version_reason={
+            "vllm": "Needs https://github.com/huggingface/transformers/pull/43538"
+        },
     ),
     "MusicFlamingoForConditionalGeneration": _HfExamplesInfo(
-        "nvidia/music-flamingo-2601-hf", min_transformers_version="5.0.0.dev"
+        "nvidia/music-flamingo-2601-hf",
+        min_transformers_version="5.3.0",
+        transformers_version_reason={
+            "vllm": "Needs https://github.com/huggingface/transformers/pull/43538"
+        },
     ),
     "AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereLabs/aya-vision-8b"),
     "BagelForConditionalGeneration": _HfExamplesInfo("ByteDance-Seed/BAGEL-7B-MoT"),
@@ -847,7 +862,12 @@ def check_available_online(
         extras={"0.2-2B-Preview": "PerceptronAI/Isaac-0.2-2B-Preview"},
     ),
     "InternS1ForConditionalGeneration": _HfExamplesInfo(
-        "internlm/Intern-S1", trust_remote_code=True
+        "internlm/Intern-S1",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": "Custom tokenizer code is not compatible with Transformers v5."
+        },
     ),
     "InternS1ProForConditionalGeneration": _HfExamplesInfo(
         "internlm/Intern-S1-Pro",
@@ -936,7 +956,14 @@ def check_available_online(
     "MiDashengLMModel": _HfExamplesInfo(
         "mispeech/midashenglm-7b", trust_remote_code=True
     ),
-    "MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6", trust_remote_code=True),
+    "MiniCPMO": _HfExamplesInfo(
+        "openbmb/MiniCPM-o-2_6",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "hf": "Custom processor code is not compatible with Transformers v5."
+        },
+    ),
     "MiniCPMV": _HfExamplesInfo(
         "openbmb/MiniCPM-Llama3-V-2_5",
         extras={
@@ -944,6 +971,13 @@ def check_available_online(
             "4.0": "openbmb/MiniCPM-V-4",
             "4.5": "openbmb/MiniCPM-V-4_5",
         },
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": (
+                "MiniCPMVBatchFeature is incompatible with its base class in "
+                "Transformers v5. See https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5/discussions/78"
+            )
+        },
         trust_remote_code=True,
     ),
     "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
@@ -980,13 +1014,25 @@ def check_available_online(
         "nano_vl_dummy", is_available_online=False, trust_remote_code=True
     ),
     "OpenCUAForConditionalGeneration": _HfExamplesInfo(
-        "xlangai/OpenCUA-7B", trust_remote_code=True
+        "xlangai/OpenCUA-7B",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": "Tokenizer cannot be initialised in Transformers v5."
+        },
     ),
     "OpenPanguVLForConditionalGeneration": _HfExamplesInfo(
         "FreedomIntelligence/openPangu-VL-7B",
         trust_remote_code=True,
         max_model_len=4096,
         enforce_eager=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": (
+                "OpenPanguVLVideoProcessorInitKwargs does not specify total=False, "
+                "making all kwargs required. See https://huggingface.co/FreedomIntelligence/openPangu-VL-7B/discussions/2"
+            )
+        },
     ),
     "Ovis": _HfExamplesInfo(
         "AIDC-AI/Ovis2-1B",
@@ -998,12 +1044,24 @@ def check_available_online(
             "1.6-gemma": "AIDC-AI/Ovis1.6-Gemma2-9B",
         },
     ),
-    "Ovis2_5": _HfExamplesInfo("AIDC-AI/Ovis2.5-2B", trust_remote_code=True),
+    "Ovis2_5": _HfExamplesInfo(
+        "AIDC-AI/Ovis2.5-2B",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": "Custom processor code is not compatible with Transformers v5."
+        },
+    ),
     "Ovis2_6ForCausalLM": _HfExamplesInfo(
         "AIDC-AI/Ovis2.6-2B", is_available_online=False, trust_remote_code=True
     ),
     "Ovis2_6_MoeForCausalLM": _HfExamplesInfo(
-        "AIDC-AI/Ovis2.6-30B-A3B", trust_remote_code=True
+        "AIDC-AI/Ovis2.6-30B-A3B",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": "Custom processor code is not compatible with Transformers v5."
+        },
     ),
     "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo(
         "PaddlePaddle/PaddleOCR-VL",