diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml
index 7f23be921b..b694785cd5 100644
--- a/.github/workflows/extremes.yml
+++ b/.github/workflows/extremes.yml
@@ -6,6 +6,7 @@ on:
 env:
   PY_COLORS: 1
   PYTEST_ADDOPTS: "--numprocesses=logical"
+  COVERAGE_PROCESS_START: pyproject.toml
   UV_SYSTEM_PYTHON: 1
 
 permissions:
@@ -48,7 +49,10 @@ jobs:
           echo "$DEPS" | grep 'scikit-learn==1.1.0'
           echo "$DEPS" | grep 'duckdb==1.1'
       - name: Run pytest
-        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb
+        run: |
+          coverage run -m pytest tests --runslow --nw-backends=pandas,pyarrow,polars[eager],polars[lazy],duckdb
+          coverage combine
+          coverage report --fail-under=50
 
   pretty_old_versions:
     strategy:
@@ -86,7 +90,10 @@ jobs:
           echo "$DEPS" | grep 'scikit-learn==1.1.0'
           echo "$DEPS" | grep 'duckdb==1.2'
       - name: Run pytest
-        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb
+        run: |
+          coverage run -m pytest tests --runslow --nw-backends=pandas,pyarrow,polars[eager],polars[lazy],duckdb
+          coverage combine
+          coverage report --fail-under=50
 
   not_so_old_versions:
     strategy:
@@ -123,7 +130,10 @@ jobs:
           echo "$DEPS" | grep 'dask==2024.10'
           echo "$DEPS" | grep 'duckdb==1.3'
       - name: Run pytest
-        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],dask,duckdb
+        run: |
+          coverage run -m pytest tests --runslow --nw-backends=pandas,pyarrow,polars[eager],polars[lazy],dask,duckdb
+          coverage combine
+          coverage report --fail-under=50 
 
   nightlies:
     strategy:
@@ -179,5 +189,6 @@ jobs:
           echo "$DEPS" | grep 'dask.*@'
       - name: Run pytest
         run: |
-          pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow \
-          --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb
+          coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb
+          coverage combine
+          coverage report --fail-under=50
diff --git a/.github/workflows/pytest-ibis.yml b/.github/workflows/pytest-ibis.yml
index 61ca076a3d..74132f8828 100644
--- a/.github/workflows/pytest-ibis.yml
+++ b/.github/workflows/pytest-ibis.yml
@@ -6,6 +6,7 @@ on:
 env:
   PY_COLORS: 1
   PYTEST_ADDOPTS: "--numprocesses=logical"
+  COVERAGE_PROCESS_START: pyproject.toml
   UV_SYSTEM_PYTHON: 1
 
 permissions:
@@ -40,4 +41,4 @@ jobs:
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
-        run: pytest tests --constructors ibis
+        run: pytest tests --nw-backends ibis
diff --git a/.github/workflows/pytest-modin.yml b/.github/workflows/pytest-modin.yml
index e20aff8539..923ce920e6 100644
--- a/.github/workflows/pytest-modin.yml
+++ b/.github/workflows/pytest-modin.yml
@@ -6,6 +6,7 @@ on:
 env:
   PY_COLORS: 1
   PYTEST_ADDOPTS: "--numprocesses=logical"
+  COVERAGE_PROCESS_START: pyproject.toml
   UV_SYSTEM_PYTHON: 1
 
 permissions:
@@ -38,4 +39,4 @@ jobs:
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
-        run: pytest tests --constructors modin[pyarrow]
+        run: pytest tests --nw-backends modin[pyarrow]
diff --git a/.github/workflows/pytest-pyspark.yml b/.github/workflows/pytest-pyspark.yml
index 7aae7c599d..bf4e412e4a 100644
--- a/.github/workflows/pytest-pyspark.yml
+++ b/.github/workflows/pytest-pyspark.yml
@@ -14,6 +14,7 @@ on:
 env:
   PY_COLORS: 1
   PYTEST_ADDOPTS: "--numprocesses=logical"
+  COVERAGE_PROCESS_START: pyproject.toml
   UV_SYSTEM_PYTHON: 1
 
 permissions:
@@ -44,7 +45,10 @@ jobs:
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
-        run: pytest tests --cov=narwhals/_spark_like --cov-fail-under=95 --runslow --constructors pyspark
+        run: |
+          coverage run -m pytest tests --runslow --nw-backends pyspark
+          coverage combine
+          coverage report --fail-under=95 --include "narwhals/_spark_like/*"
 
 
   pytest-pyspark-min-version-constructor:
@@ -71,7 +75,7 @@ jobs:
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
-        run: pytest tests --constructors pyspark
+        run: pytest tests --nw-backends pyspark
 
   pytest-pyspark-connect-constructor:
     strategy:
@@ -137,7 +141,10 @@ jobs:
           echo "Spark Connect server started"
 
       - name: Run pytest
-        run: pytest tests --cov=narwhals/_spark_like --cov-fail-under=95 --runslow --constructors "pyspark[connect]"
+        run: |
+          coverage run -m pytest tests --runslow --nw-backends "pyspark[connect]"
+          coverage combine
+          coverage report --fail-under=95 --include="narwhals/_spark_like/*"
 
       - name: Stop Spark Connect server
         if: always()
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 9da8bf293b..b308015a51 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -6,6 +6,7 @@ on:
 env:
   PY_COLORS: 1
   PYTEST_ADDOPTS: "--numprocesses=logical"
+  COVERAGE_PROCESS_START: pyproject.toml
   UV_SYSTEM_PYTHON: 1
 
 permissions:
@@ -34,7 +35,15 @@ jobs:
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
-        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=75 --constructors=pandas,pyarrow,polars[eager],polars[lazy]
+        env:
+          # coverage's execv/fork patches raise on Windows; collapse to `subprocess`
+          # there (coverage dedupes) and keep the default values on Linux.
+          COVERAGE_PATCH_EXECV: ${{ matrix.os == 'windows-latest' && 'subprocess' || 'execv' }}
+          COVERAGE_PATCH_FORK: ${{ matrix.os == 'windows-latest' && 'subprocess' || 'fork' }}
+        run: |
+          coverage run -m pytest tests --nw-backends=pandas,pyarrow,polars[eager],polars[lazy]
+          coverage combine
+          coverage report --fail-under=75
       - name: install-test-plugin
         run: uv pip install -e test-plugin/.
 
@@ -44,6 +53,11 @@ jobs:
         python-version: ["3.10", "3.12"]
         os: [windows-latest]
     runs-on: ${{ matrix.os }}
+    env:
+      # coverage's execv/fork patches raise on Windows; collapse them to `subprocess`
+      # in the pyproject `patch` list (coverage dedupes).
+      COVERAGE_PATCH_EXECV: subprocess
+      COVERAGE_PATCH_FORK: subprocess
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
       - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
@@ -64,7 +78,9 @@ jobs:
         run: uv pip freeze
       - name: Run pytest
         run: |
-          pytest tests --cov=narwhals --cov=tests --runslow --cov-fail-under=95 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --durations=30
+          coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --durations=30
+          coverage combine
+          coverage report --fail-under=95
 
   pytest-full-coverage:
     strategy:
@@ -95,7 +111,10 @@ jobs:
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
-        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100 --runslow --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb,sqlframe --durations=30
+        run: |
+          coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb,sqlframe --durations=30
+          coverage combine
+          coverage report --fail-under=100
       - name: Run doctests
         # reprs differ between versions, so we only run doctests on the latest Python
         if: matrix.python-version == '3.13'
@@ -124,20 +143,20 @@ jobs:
           uv pip install -e ".[pandas]" --group tests
           uv pip freeze
       - name: Run pytest (pandas and pandas[nullable])
-        run: pytest tests --runslow --constructors=pandas,pandas[nullable]
+        run: pytest tests --runslow --nw-backends=pandas,pandas[nullable]
       - name: install-more-reqs
         run: |
           uv pip install -U pyarrow
           uv pip freeze
       - name: Run pytest (pandas[pyarrow] and pyarrow)
-        run: pytest tests --runslow --constructors=pandas[pyarrow],pyarrow
+        run: pytest tests --runslow --nw-backends=pandas[pyarrow],pyarrow
       - name: install-polars
         run: |
           uv pip uninstall pandas pyarrow
           uv pip install polars
           uv pip freeze
       - name: Run pytest (polars)
-        run: pytest tests --runslow --constructors=polars[eager],polars[lazy]
+        run: pytest tests --runslow --nw-backends=polars[eager],polars[lazy]
 
   python-314:
     strategy:
@@ -161,7 +180,10 @@ jobs:
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
-        run: pytest tests --cov=narwhals --cov=tests --runslow --durations=30 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --cov-fail-under=50
+        run: |
+          coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --durations=30
+          coverage combine
+          coverage report --fail-under=50
 
   python-314t:
     strategy:
@@ -187,4 +209,7 @@ jobs:
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
-        run: pytest tests --cov=narwhals --cov=tests --runslow --durations=30 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow --cov-fail-under=50
+        run: |
+          coverage run -m pytest tests --runslow --durations=30 --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow
+          coverage combine
+          coverage report --fail-under=50
diff --git a/.github/workflows/random_ci_pytest.yml b/.github/workflows/random_ci_pytest.yml
index e48909d3f6..74ae375309 100644
--- a/.github/workflows/random_ci_pytest.yml
+++ b/.github/workflows/random_ci_pytest.yml
@@ -6,6 +6,7 @@ on:
 env:
   PY_COLORS: 1
   PYTEST_ADDOPTS: "--numprocesses=logical"
+  COVERAGE_PROCESS_START: pyproject.toml
 
 permissions:
   contents: read
@@ -39,5 +40,6 @@ jobs:
         run: uv pip freeze
       - name: Run pytest
         run: |
-            pytest tests --cov=narwhals --cov=tests --cov-fail-under=75 \
-            --constructors=pandas,pyarrow,polars[eager],polars[lazy]
+            coverage run -m pytest tests --nw-backends=pandas,pyarrow,polars[eager],polars[lazy]
+            coverage combine
+            coverage report --fail-under=75
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 98a274e4ed..3dc3129c66 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -149,7 +149,7 @@ If you add code that should be tested, please add tests.
 
 - To run tests, run `pytest`. To check coverage: `pytest --cov=narwhals`
 - To run tests on the doctests, use `pytest narwhals --doctest-modules`
-- To run unit tests and doctests at the same time, run `pytest tests narwhals --cov=narwhals --doctest-modules`
+- To run unit tests and doctests at the same time, run `pytest tests narwhals --doctest-modules`
 - To run tests multiprocessed, you may also want to use [pytest-xdist](https://github.com/pytest-dev/pytest-xdist) (optional)
 - To choose which backends to run tests with you, you can use the `--constructors` flag:
   - To only run tests for pandas, Polars, and PyArrow, use `pytest --constructors=pandas,pyarrow,polars`
diff --git a/Makefile b/Makefile
index 90538ea1a0..cdc85888f3 100644
--- a/Makefile
+++ b/Makefile
@@ -41,3 +41,15 @@ docs-serve:  # Build and serve the docs locally
 	$(VENV_BIN)/uv run --no-sync utils/generate_backend_completeness.py
 	$(VENV_BIN)/uv run --no-sync utils/generate_zen_content.py
 	$(VENV_BIN)/uv run --no-sync zensical serve
+
+.PHONY: test
+test: ## Run unittest
+	$(VENV_BIN)/uv pip install \
+		--upgrade \
+		--editable test-plugin/. \
+		--editable .[ibis,modin,pyspark] \
+		--group core \
+		--group tests
+	$(VENV_BIN)/uv run --no-sync coverage run -m pytest tests --all-nw-backends --numprocesses=logical
+	$(VENV_BIN)/uv run --no-sync coverage combine
+	$(VENV_BIN)/uv run --no-sync coverage report --fail-under=95
diff --git a/docs/api-reference/testing.md b/docs/api-reference/testing.md
index db83c6930e..0ee8ec5f36 100644
--- a/docs/api-reference/testing.md
+++ b/docs/api-reference/testing.md
@@ -1,8 +1,87 @@
 # `narwhals.testing`
 
+## Assertions
+
 ::: narwhals.testing
     handler: python
     options:
+      show_root_heading: false
+      heading_level: 3
       members:
         - assert_frame_equal
         - assert_series_equal
+
+## `pytest` plugin
+
+Narwhals register a pytest plugin that exposes parametrized fixtures with callables
+to build Narwhals frames from a column-oriented python `dict`.
+
+### Available fixtures
+
+| Fixture | Backends |
+|---|---|
+| `nw_frame` | every selected backend (eager + lazy) |
+| `nw_lazyframe` | only lazy backends |
+| `nw_dataframe` | only eager backends |
+| `nw_pandas_like_frame` | pandas-like backends |
+
+### Pytest options
+
+The backend selection is controlled by the following CLI options:
+
+* `--nw-backends=pandas,polars[lazy],duckdb`: comma-separated list.
+    Defaults to the following list: `pandas,pandas[pyarrow],polars[eager],pyarrow,duckdb,sqlframe,ibis`
+    intersected with the backends installed in the current environment.
+* `--nw-all-backends`: shortcut for "every **CPU** backend that is installed".
+* `--use-nw-external-constructor`: Skip narwhals.testing's parametrisation and let
+    another plugin provide the `constructor*` fixtures.
+
+Set the `NARWHALS_DEFAULT_BACKENDS` environment variable to override the default
+list (useful e.g. when running under `cudf.pandas`).
+
+### Quick start
+
+The plugin auto-loads as soon as you `pip install narwhals`. Just write a test:
+
+```python
+from typing import TYPE_CHECKING
+
+import narwhals as nw
+import narwhals.stable.v2 as nw_v2
+
+if TYPE_CHECKING:
+    from narwhals.testing.typing import Data, DataFrameConstructor, LazyFrameConstructor
+
+
+def test_shape(nw_dataframe: DataFrameConstructor) -> None:
+    data: Data = {"x": [1, 2, 3]}
+    df = nw_dataframe(data, namespace=nw)
+    assert df.shape == (3, 1)
+
+
+def test_laziness(nw_lazyframe: LazyFrameConstructor) -> None:
+    data: Data = {"x": [1, 2, 3]}
+    lf = nw_lazyframe(data, namespace=nw_v2)
+    assert isinstance(lf, nw_v2.LazyFrame)
+```
+
+The fixtures are parametrised against every supported backend that is installed
+in the current environment. Filter the matrix on the command line:
+
+```bash
+pytest --nw-backends="pandas,polars[lazy]"
+pytest --all-nw-backends
+```
+
+## Type aliases
+
+::: narwhals.testing.typing
+    handler: python
+    options:
+      show_root_heading: false
+      heading_level: 3
+      members:
+        - Data
+        - FrameConstructor
+        - DataFrameConstructor
+        - LazyFrameConstructor
diff --git a/narwhals/stable/v1/typing.py b/narwhals/stable/v1/typing.py
index a154a1a3f8..bc0a1ca26a 100644
--- a/narwhals/stable/v1/typing.py
+++ b/narwhals/stable/v1/typing.py
@@ -8,7 +8,12 @@
 if TYPE_CHECKING:
     from typing_extensions import TypeAlias
 
-    from narwhals._native import NativeDataFrame, NativeDuckDB, NativeLazyFrame
+    from narwhals._native import (
+        NativeDataFrame,
+        NativeDuckDB,
+        NativeIbis,
+        NativeLazyFrame,
+    )
     from narwhals.stable.v1 import DataFrame, Expr, LazyFrame, Series
 
     class DataFrameLike(Protocol):
@@ -25,7 +30,9 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ...
 `nw.Expr`, e.g. `df.select('a')`.
 """
 
-IntoDataFrame: TypeAlias = Union["NativeDataFrame", "DataFrameLike", "NativeDuckDB"]
+IntoDataFrame: TypeAlias = Union[
+    "NativeDataFrame", "DataFrameLike", "NativeDuckDB", "NativeIbis"
+]
 """Anything which can be converted to a Narwhals DataFrame.
 
 Use this if your function accepts a narwhalifiable object but doesn't care about its backend.
diff --git a/narwhals/testing/__init__.py b/narwhals/testing/__init__.py
index 649463383f..6eb8c0b0d0 100644
--- a/narwhals/testing/__init__.py
+++ b/narwhals/testing/__init__.py
@@ -2,5 +2,6 @@
 
 from narwhals.testing.asserts.frame import assert_frame_equal
 from narwhals.testing.asserts.series import assert_series_equal
+from narwhals.testing.constructors import frame_constructor
 
-__all__ = ("assert_frame_equal", "assert_series_equal")
+__all__ = ("assert_frame_equal", "assert_series_equal", "frame_constructor")
diff --git a/narwhals/testing/asserts/frame.py b/narwhals/testing/asserts/frame.py
index 64eec42abc..9386abad13 100644
--- a/narwhals/testing/asserts/frame.py
+++ b/narwhals/testing/asserts/frame.py
@@ -13,7 +13,6 @@
 
 if TYPE_CHECKING:
     from narwhals._typing import Arrow, IntoBackend, Pandas, Polars
-    from narwhals.typing import DataFrameT, LazyFrameT
 
 GUARANTEES_ROW_ORDER = {
     Implementation.PANDAS,
@@ -26,8 +25,8 @@
 
 
 def assert_frame_equal(
-    left: DataFrameT | LazyFrameT,
-    right: DataFrameT | LazyFrameT,
+    left: DataFrame[Any] | LazyFrame[Any],
+    right: DataFrame[Any] | LazyFrame[Any],
     *,
     check_row_order: bool = True,
     check_column_order: bool = True,
@@ -145,8 +144,8 @@ def assert_frame_equal(
 
 
 def _check_correct_input_type(  # noqa: RET503
-    left: DataFrameT | LazyFrameT,
-    right: DataFrameT | LazyFrameT,
+    left: DataFrame[Any] | LazyFrame[Any],
+    right: DataFrame[Any] | LazyFrame[Any],
     backend: IntoBackend[Polars | Pandas | Arrow] | None,
 ) -> tuple[DataFrame[Any], DataFrame[Any]]:
     # Adapted from https://github.com/pola-rs/polars/blob/afdbf3056d1228cf493901e45f536b0905cec8ea/py-polars/src/polars/testing/asserts/frame.py#L15-L17
@@ -165,8 +164,8 @@ def _check_correct_input_type(  # noqa: RET503
 
 
 def _assert_dataframe_equal(
-    left: DataFrameT,
-    right: DataFrameT,
+    left: DataFrame[Any],
+    right: DataFrame[Any],
     impl: Implementation,
     *,
     check_row_order: bool,
@@ -232,7 +231,11 @@ def _assert_dataframe_equal(
 
 
 def _check_schema_equal(
-    left: DataFrameT, right: DataFrameT, *, check_dtypes: bool, check_column_order: bool
+    left: DataFrame[Any],
+    right: DataFrame[Any],
+    *,
+    check_dtypes: bool,
+    check_column_order: bool,
 ) -> None:
     """Compares DataFrame schema based on specified criteria.
 
diff --git a/narwhals/testing/constructors.py b/narwhals/testing/constructors.py
new file mode 100644
index 0000000000..1ea6737d29
--- /dev/null
+++ b/narwhals/testing/constructors.py
@@ -0,0 +1,701 @@
+"""Constructor registry for `narwhals.testing`.
+
+Each constructor wraps one backend library (pandas, Polars, DuckDB, ...) and
+knows how to turn a column-oriented `dict` into a native frame.
+
+Registration is explicit: wrap a plain builder function with `@frame_constructor.register(...)`.
+The decorator instantiates a [`narwhals.testing.frame_constructor`][] with the
+declared metadata and stores it in the shared `_registry`.
+
+## Adding a new constructor
+
+```py
+from narwhals.testing import frame_constructor
+
+
+@frame_constructor.register(
+    name="my_backend",
+    implementation=Implementation.MY_BACKEND,
+    requirements=("my_backend",),
+)
+def my_backend_lazy_constructor(obj: Data, /, **kwds: Any) -> IntoLazyFrame:
+    import my_backend
+
+    return my_backend.from_dict(obj)
+```
+"""
+
+from __future__ import annotations
+
+import os
+import uuid
+import warnings
+from copy import deepcopy
+from functools import lru_cache
+from importlib.util import find_spec
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    ClassVar,
+    Generic,
+    Literal,
+    TypeVar,
+    cast,
+    overload,
+)
+
+from narwhals._utils import Implementation, generate_temporary_column_name
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+    from types import ModuleType
+
+    import ibis
+    import pandas as pd
+    import polars as pl
+    import pyarrow as pa
+    from ibis.backends.duckdb import Backend as IbisDuckDBBackend
+    from pyspark.sql import SparkSession
+    from sqlframe.duckdb import DuckDBSession
+    from typing_extensions import Concatenate, TypeAlias
+
+    from narwhals import DataFrame, LazyFrame
+    from narwhals._native import NativeDask, NativeDuckDB, NativePySpark, NativeSQLFrame
+    from narwhals.testing.typing import Data
+    from narwhals.typing import (
+        IntoDataFrame,
+        IntoDataFrameT,
+        IntoFrame,
+        IntoLazyFrame,
+        IntoLazyFrameT,
+    )
+
+
+__all__ = (
+    "available_backends",
+    "available_cpu_backends",
+    "frame_constructor",
+    "get_backend_constructor",
+    "is_backend_available",
+    "prepare_backends",
+    "pyspark_session",
+    "sqlframe_session",
+)
+
+T_co = TypeVar("T_co", covariant=True, bound="IntoFrame")
+R = TypeVar("R", bound="IntoFrame")
+
+
+class frame_constructor(Generic[T_co]):  # noqa: N801
+    """Callable wrapper around a backend frame builder.
+
+    Turns a column-oriented `dict` (typed as [`Data`][narwhals.testing.typing.Data])
+    into a native frame. Metadata (implementation, requirements, eager/lazy,
+    nullability, GPU need) lives on the instance, alongside the wrapped
+    `func`. Equality and hashing are keyed on `(type, name)`, so two lookups
+    of the same registered constructor compare equal.
+
+    Warning:
+        Instances should be created via [`narwhals.testing.constructors.frame_constructor.register`][],
+        which is the only supported entry point.
+
+        Direct instantiation is allowed but **does not** register the instance.
+    """
+
+    _registry: ClassVar[dict[str, frame_constructor[IntoFrame]]] = {}
+
+    func: Callable[Concatenate[Data, ...], T_co]
+
+    def __init__(
+        self,
+        func: Callable[Concatenate[Data, ...], T_co],
+        /,
+        *,
+        name: str,
+        implementation: Implementation,
+        requirements: tuple[str, ...] = (),
+        is_eager: bool = False,
+        is_nullable: bool = True,
+        needs_gpu: bool = False,
+    ) -> None:
+        self.func = func
+        self.name = name
+        self.implementation = implementation
+        self.requirements = requirements
+        self.is_eager = is_eager
+        self.is_nullable = is_nullable
+        self.needs_gpu = needs_gpu
+
+    @classmethod
+    def register(
+        cls,
+        *,
+        name: str,
+        implementation: Implementation,
+        requirements: tuple[str, ...] = (),
+        is_eager: bool = False,
+        is_nullable: bool = True,
+        needs_gpu: bool = False,
+    ) -> Callable[[Callable[Concatenate[Data, ...], R]], frame_constructor[R]]:
+        """Decorator: register `func` as the constructor named `name`.
+
+        Arguments:
+            name: The string identifier of the constructor (e.g. `"pandas[pyarrow]"`).
+            implementation: The [`Implementation`][] this constructor belongs to.
+            requirements: Package names that must be importable for this constructor
+                to be available (checked via `importlib.util.find_spec`).
+            is_eager: Whether the backend returns an eager dataframe.
+            is_nullable: Whether the backend has native null support.
+            needs_gpu: Whether the backend requires GPU hardware.
+
+        Returns:
+            A decorator that replaces `func` with a `frame_constructor`
+            instance registered into the shared `_registry`.
+        """
+
+        def decorator(func: Callable[Concatenate[Data, ...], R]) -> frame_constructor[R]:
+            inst: frame_constructor[R] = frame_constructor(
+                func,
+                name=name,
+                implementation=implementation,
+                requirements=requirements,
+                is_eager=is_eager,
+                is_nullable=is_nullable,
+                needs_gpu=needs_gpu,
+            )
+            cls._registry[name] = inst
+            return inst
+
+        return decorator
+
+    @overload
+    def __call__(
+        self: frame_constructor[IntoDataFrameT],
+        obj: Data,
+        /,
+        namespace: ModuleType,
+        **kwds: Any,
+    ) -> DataFrame[IntoDataFrameT]: ...
+    @overload
+    def __call__(
+        self: frame_constructor[IntoLazyFrameT],
+        obj: Data,
+        /,
+        namespace: ModuleType,
+        **kwds: Any,
+    ) -> LazyFrame[IntoLazyFrameT]: ...
+    @overload
+    def __call__(
+        self: frame_constructor[IntoFrame],
+        obj: Data,
+        /,
+        namespace: ModuleType,
+        **kwds: Any,
+    ) -> DataFrame[Any] | LazyFrame[Any]: ...
+
+    def __call__(
+        self, obj: Data, /, namespace: ModuleType, **kwds: Any
+    ) -> DataFrame[Any] | LazyFrame[Any]:
+        """Build a native frame and wrap it with `namespace.from_native`.
+
+        Arguments:
+            obj: Column-oriented mapping passed to the wrapped builder.
+            namespace: A narwhals namespace (e.g. `narwhals`, `narwhals.stable.v1`)
+                whose `from_native` performs the wrapping.
+            **kwds: Forwarded to the wrapped builder.
+        """
+        native = self.func(obj, **kwds)
+        return namespace.from_native(native)  # type: ignore[no-any-return]
+
+    @property
+    def identifier(self) -> str:
+        """Instance-level string identifier for test IDs."""
+        return self.name
+
+    @property
+    def is_lazy(self) -> bool:
+        """Whether this constructor produces a lazy native frame."""
+        return not self.is_eager
+
+    @property
+    def is_pandas(self) -> bool:
+        """Whether this is one of the pandas constructors."""
+        return self.implementation.is_pandas()
+
+    @property
+    def is_modin(self) -> bool:
+        """Whether this is one of the modin constructors."""
+        return self.implementation.is_modin()
+
+    @property
+    def is_cudf(self) -> bool:
+        """Whether this is the cudf constructor."""
+        return self.implementation.is_cudf()
+
+    @property
+    def is_pandas_like(self) -> bool:
+        """Whether this constructor produces a pandas-like dataframe (pandas, modin, cudf)."""
+        return self.implementation.is_pandas_like()
+
+    @property
+    def is_polars(self) -> bool:
+        """Whether this is one of the polars constructors."""
+        return self.implementation.is_polars()
+
+    @property
+    def is_pyarrow(self) -> bool:
+        """Whether this is the pyarrow table constructor."""
+        return self.implementation.is_pyarrow()
+
+    @property
+    def is_dask(self) -> bool:
+        """Whether this is the dask constructor."""
+        return self.implementation.is_dask()
+
+    @property
+    def is_duckdb(self) -> bool:
+        """Whether this is the duckdb constructor."""
+        return self.implementation.is_duckdb()
+
+    @property
+    def is_pyspark(self) -> bool:
+        """Whether this is one of the pyspark constructors."""
+        impl = self.implementation
+        return impl.is_pyspark() or impl.is_pyspark_connect()
+
+    @property
+    def is_sqlframe(self) -> bool:
+        """Whether this is the sqlframe constructor."""
+        return self.implementation.is_sqlframe()
+
+    @property
+    def is_ibis(self) -> bool:
+        """Whether this is the ibis constructor."""
+        return self.implementation.is_ibis()
+
+    @property
+    def is_spark_like(self) -> bool:
+        """Whether this constructor uses a spark-like backend (pyspark, sqlframe)."""
+        return self.implementation.is_spark_like()
+
+    @property
+    def needs_pyarrow(self) -> bool:
+        """Whether this constructor requires `pyarrow` to be installed."""
+        return "pyarrow" in self.requirements
+
+    @property
+    def is_available(self) -> bool:
+        """Whether every package this constructor needs is importable."""
+        return is_backend_available(*self.requirements)
+
+    def __str__(self) -> str:
+        # NOTE: This is a temporary hack
+        # TODO(FBruzzesi): Remove once all the `"backend" in str(constructor)`
+        # statements in the test suite are properly replaced
+        return self.func.__name__
+
+    def __repr__(self) -> str:
+        return f"{type(self).__name__}(name={self.name!r})"
+
+    def __hash__(self) -> int:
+        return hash((type(self), self.name))
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, frame_constructor) and self.name == other.name
+
+
+# Eager constructors
+
+
+@frame_constructor.register(
+    name="pandas",
+    implementation=Implementation.PANDAS,
+    requirements=("pandas",),
+    is_eager=True,
+    is_nullable=False,
+)
+def pandas_constructor(obj: Data, /, **kwds: Any) -> pd.DataFrame:
+    import pandas as pd
+
+    return pd.DataFrame(obj, **kwds)
+
+
+@frame_constructor.register(
+    name="pandas[nullable]",
+    implementation=Implementation.PANDAS,
+    requirements=("pandas",),
+    is_eager=True,
+)
+def pandas_nullable_constructor(obj: Data, /, **kwds: Any) -> pd.DataFrame:
+    import pandas as pd
+
+    return pd.DataFrame(obj, **kwds).convert_dtypes(dtype_backend="numpy_nullable")
+
+
+@frame_constructor.register(
+    name="pandas[pyarrow]",
+    implementation=Implementation.PANDAS,
+    requirements=("pandas", "pyarrow"),
+    is_eager=True,
+)
+def pandas_pyarrow_constructor(obj: Data, /, **kwds: Any) -> pd.DataFrame:
+    import pandas as pd
+
+    return pd.DataFrame(obj, **kwds).convert_dtypes(dtype_backend="pyarrow")
+
+
+@frame_constructor.register(
+    name="pyarrow",
+    implementation=Implementation.PYARROW,
+    requirements=("pyarrow",),
+    is_eager=True,
+)
+def pyarrow_table_constructor(obj: Data, /, **kwds: Any) -> pa.Table:
+    import pyarrow as pa
+
+    return pa.table(obj, **kwds)
+
+
+@frame_constructor.register(
+    name="modin",
+    implementation=Implementation.MODIN,
+    requirements=("modin",),
+    is_eager=True,
+    is_nullable=False,
+)
+def modin_constructor(obj: Data, /, **kwds: Any) -> IntoDataFrame:  # pragma: no cover
+    import modin.pandas as mpd
+    import pandas as pd
+
+    return cast("IntoDataFrame", mpd.DataFrame(pd.DataFrame(obj, **kwds)))
+
+
+@frame_constructor.register(
+    name="modin[pyarrow]",
+    implementation=Implementation.MODIN,
+    requirements=("modin", "pyarrow"),
+    is_eager=True,
+)
+def modin_pyarrow_constructor(
+    obj: Data, /, **kwds: Any
+) -> IntoDataFrame:  # pragma: no cover
+    import modin.pandas as mpd
+    import pandas as pd
+
+    df = mpd.DataFrame(pd.DataFrame(obj, **kwds)).convert_dtypes(dtype_backend="pyarrow")
+    return cast("IntoDataFrame", df)
+
+
+@frame_constructor.register(
+    name="cudf",
+    implementation=Implementation.CUDF,
+    requirements=("cudf",),
+    is_eager=True,
+    needs_gpu=True,
+)
+def cudf_constructor(obj: Data, /, **kwds: Any) -> IntoDataFrame:  # pragma: no cover
+    import cudf
+
+    return cast("IntoDataFrame", cudf.DataFrame(obj, **kwds))
+
+
+@frame_constructor.register(
+    name="polars[eager]",
+    implementation=Implementation.POLARS,
+    requirements=("polars",),
+    is_eager=True,
+)
+def polars_eager_constructor(obj: Data, /, **kwds: Any) -> pl.DataFrame:
+    import polars as pl
+
+    return pl.DataFrame(obj, **kwds)
+
+
+# Lazy constructors
+
+
+@frame_constructor.register(
+    name="polars[lazy]", implementation=Implementation.POLARS, requirements=("polars",)
+)
+def polars_lazy_constructor(obj: Data, /, **kwds: Any) -> pl.LazyFrame:
+    import polars as pl
+
+    return pl.LazyFrame(obj, **kwds)
+
+
+@frame_constructor.register(
+    name="dask",
+    implementation=Implementation.DASK,
+    requirements=("dask",),
+    is_nullable=False,
+)
+def dask_lazy_p2_constructor(
+    obj: Data, /, npartitions: int = 2, **kwds: Any
+) -> NativeDask:  # pragma: no cover
+    import dask.dataframe as dd
+
+    return cast("NativeDask", dd.from_dict(obj, npartitions=npartitions, **kwds))
+
+
+@frame_constructor.register(
+    name="duckdb",
+    implementation=Implementation.DUCKDB,
+    requirements=("duckdb", "pyarrow"),
+)
+def duckdb_lazy_constructor(obj: Data, /, **kwds: Any) -> NativeDuckDB:
+    import duckdb
+    import pyarrow as pa
+
+    duckdb.sql("""set timezone = 'UTC'""")
+    _df = pa.table(obj, **kwds)
+    return duckdb.sql("select * from _df")
+
+
+def _pyspark_build(obj: Data, /, **kwds: Any) -> NativePySpark:  # pragma: no cover
+    session = _pyspark_session_lazy()
+    _obj = deepcopy(obj)
+    index_col_name = generate_temporary_column_name(n_bytes=8, columns=list(_obj))
+    _obj[index_col_name] = list(range(len(_obj[next(iter(_obj))])))
+    result = (
+        session.createDataFrame([*zip(*_obj.values())], schema=[*_obj.keys()], **kwds)
+        .repartition(2)
+        .orderBy(index_col_name)
+        .drop(index_col_name)
+    )
+    return cast("NativePySpark", result)
+
+
+@frame_constructor.register(
+    name="pyspark", implementation=Implementation.PYSPARK, requirements=("pyspark",)
+)
+def pyspark_lazy_constructor(
+    obj: Data, /, **kwds: Any
+) -> NativePySpark:  # pragma: no cover
+    return _pyspark_build(obj, **kwds)
+
+
+@frame_constructor.register(
+    name="pyspark[connect]",
+    implementation=Implementation.PYSPARK_CONNECT,
+    requirements=("pyspark",),
+)
+def pyspark_connect_lazy_constructor(
+    obj: Data, /, **kwds: Any
+) -> NativePySpark:  # pragma: no cover
+    return _pyspark_build(obj, **kwds)
+
+
+@frame_constructor.register(
+    name="sqlframe",
+    implementation=Implementation.SQLFRAME,
+    requirements=("sqlframe", "duckdb"),
+)
+def sqlframe_pyspark_lazy_constructor(obj: Data, /, **kwds: Any) -> NativeSQLFrame:
+    session = sqlframe_session()
+    return session.createDataFrame([*zip(*obj.values())], schema=[*obj.keys()], **kwds)
+
+
+@frame_constructor.register(
+    name="ibis",
+    implementation=Implementation.IBIS,
+    requirements=("ibis", "duckdb", "pyarrow"),
+)
+def ibis_lazy_constructor(obj: Data, /, **kwds: Any) -> ibis.Table:  # pragma: no cover
+    import pyarrow as pa
+
+    table = pa.table(obj)
+    table_name = str(uuid.uuid4())
+    return _ibis_backend().create_table(table_name, table, **kwds)
+
+
+DEFAULT_BACKENDS: frozenset[str] = frozenset(
+    {
+        "pandas",
+        "pandas[pyarrow]",
+        "polars[eager]",
+        "pyarrow",
+        "duckdb",
+        "sqlframe",
+        "ibis",
+    }
+)
+"""Subset of backends enabled by default for parametrised tests when the
+user does not pass `--nw-backends` (mirrors the historical Narwhals defaults).
+"""
+
+
+def available_backends() -> frozenset[str]:
+    """Return the names of every constructor whose backend is importable.
+
+    Examples:
+        >>> from narwhals.testing.constructors import available_backends
+        >>> "pandas" in available_backends()
+        True
+    """
+    return frozenset(
+        name for name, c in frame_constructor._registry.items() if c.is_available
+    )
+
+
+def available_cpu_backends() -> frozenset[str]:  # pragma: no cover
+    """Return the names of every CPU constructor whose backend is importable.
+
+    Examples:
+        >>> from narwhals.testing.constructors import available_cpu_backends
+        >>> "pandas" in available_cpu_backends()
+        True
+    """
+    return frozenset(
+        name
+        for name, c in frame_constructor._registry.items()
+        if c.is_available and not c.needs_gpu
+    )
+
+
+EagerName: TypeAlias = Literal[
+    "pandas",
+    "pandas[nullable]",
+    "pandas[pyarrow]",
+    "modin",
+    "modin[pyarrow]",
+    "cudf",
+    "polars[eager]",
+    "pyarrow",
+]
+LazyName: TypeAlias = Literal[
+    "polars[lazy]", "dask", "duckdb", "pyspark", "pyspark[connect]", "sqlframe", "ibis"
+]
+
+
+@overload
+def get_backend_constructor(name: EagerName) -> frame_constructor[IntoDataFrame]: ...
+@overload
+def get_backend_constructor(name: LazyName) -> frame_constructor[IntoLazyFrame]: ...
+@overload
+def get_backend_constructor(name: str) -> frame_constructor[IntoFrame]: ...
+
+
+def get_backend_constructor(name: str) -> frame_constructor[IntoFrame]:
+    """Return the registered constructor for `name`.
+
+    Arguments:
+        name: The string identifier of a registered constructor
+            (e.g. `"pandas[pyarrow]"`).
+
+    Raises:
+        ValueError: If `name` is not a registered constructor identifier.
+
+    Examples:
+        >>> from narwhals.testing.constructors import get_backend_constructor
+        >>> get_backend_constructor("pandas")
+        frame_constructor(name='pandas')
+    """
+    try:
+        return frame_constructor._registry[name]
+    except KeyError as exc:
+        valid = sorted(frame_constructor._registry)
+        msg = f"Unknown constructor {name!r}. Expected one of: {valid}."
+        raise ValueError(msg) from exc
+
+
+def prepare_backends(
+    *, include: Iterable[str] | None = None, exclude: Iterable[str] | None = None
+) -> list[frame_constructor[IntoFrame]]:
+    """Return available constructors, optionally filtered.
+
+    Note:
+        `exclude` is given precedence in the selection.
+
+    Arguments:
+        include: If given, only return backends whose name is in this set.
+        exclude: If given, remove backends whose name is in this set.
+
+    Examples:
+        >>> from narwhals.testing.constructors import prepare_backends
+        >>> backends = prepare_backends(include=["pandas", "polars[eager]"])
+    """
+    available = available_backends()
+    candidates: list[frame_constructor[Any]] = [
+        c for name, c in frame_constructor._registry.items() if name in available
+    ]
+
+    include_set: frozenset[str] = (
+        frozenset(include) if include is not None else frozenset()
+    )
+    exclude_set: frozenset[str] = (
+        frozenset(exclude) if exclude is not None else frozenset()
+    )
+
+    if unknown := (include_set.union(exclude_set).difference(available)):
+        msg = f"The following names are not known constructors: {sorted(unknown)}"
+        raise ValueError(msg)
+
+    if include is not None:
+        candidates = [c for c in candidates if c.name in include_set]
+    if exclude is not None:
+        candidates = [c for c in candidates if c.name not in exclude_set]
+    return sorted(candidates, key=lambda c: c.name)
+
+
+def is_backend_available(*packages: str) -> bool:
+    """Whether every package in `packages` can be imported in this environment.
+
+    Examples:
+        >>> from narwhals.testing.constructors import is_backend_available
+        >>> is_backend_available("pandas")
+        True
+    """
+    return all(find_spec(pkg) is not None for pkg in packages)
+
+
+def sqlframe_session() -> DuckDBSession:
+    """Return a fresh in-memory `sqlframe` DuckDB session."""
+    from sqlframe.duckdb import DuckDBSession
+
+    # NOTE: `__new__` override inferred by `pyright` only
+    # https://github.com/eakmanrq/sqlframe/blob/772b3a6bfe5a1ffd569b7749d84bea2f3a314510/sqlframe/base/session.py#L181-L184
+    return cast("DuckDBSession", DuckDBSession())  # type: ignore[redundant-cast]
+
+
+def pyspark_session() -> SparkSession:  # pragma: no cover
+    """Return a singleton local `pyspark` (or pyspark[connect]) session."""
+    if is_spark_connect := os.environ.get("SPARK_CONNECT", None):
+        from pyspark.sql.connect.session import SparkSession
+    else:
+        from pyspark.sql import SparkSession
+    builder = cast("SparkSession.Builder", SparkSession.builder).appName("unit-tests")
+    builder = (
+        builder.remote(f"sc://localhost:{os.environ.get('SPARK_PORT', '15002')}")
+        if is_spark_connect
+        else builder.master("local[1]").config("spark.ui.enabled", "false")
+    )
+    return (
+        builder.config("spark.default.parallelism", "1")
+        .config("spark.sql.shuffle.partitions", "2")
+        .config("spark.sql.session.timeZone", "UTC")
+        .getOrCreate()
+    )
+
+
+@lru_cache(maxsize=1)
+def _ibis_backend() -> IbisDuckDBBackend:  # pragma: no cover
+    """Cached singleton in-memory ibis backend, so all tables share one database."""
+    import ibis
+
+    return ibis.duckdb.connect()
+
+
+@lru_cache(maxsize=1)
+def _pyspark_session_lazy() -> SparkSession:  # pragma: no cover
+    """Cached pyspark session; created on first use, stopped at interpreter exit."""
+    from atexit import register
+
+    with warnings.catch_warnings():
+        # The spark session seems to trigger a polars warning.
+        warnings.filterwarnings(
+            "ignore", r"Using fork\(\) can cause Polars", category=RuntimeWarning
+        )
+        session = pyspark_session()
+        register(session.stop)
+        return session
diff --git a/narwhals/testing/pytest_plugin.py b/narwhals/testing/pytest_plugin.py
new file mode 100644
index 0000000000..28e9701742
--- /dev/null
+++ b/narwhals/testing/pytest_plugin.py
@@ -0,0 +1,138 @@
+"""Narwhals pytest plugin - auto-parametrises fixtures.
+
+NOTE: All imports from `narwhals.*` are deferred inside the hook functions so that
+the entry-point module can be loaded by pytest without pulling in the narwhals package tree.
+
+This is critical because entry-point plugins are loaded *before* `coveragepy` starts
+coverage measurement; any narwhals module imported at that stage would have its
+module-level code (class definitions, constants, etc.) executed outside the coverage tracer.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+    import pytest
+
+    from narwhals.testing.typing import FrameConstructor
+
+
+_MIN_PANDAS_NULLABLE_VERSION: tuple[int, ...] = (2, 0, 0)
+"""`pandas.convert_dtypes(dtype_backend=...)` requires pandas >= 2.0.0."""
+
+
+def _pandas_version() -> tuple[int, ...]:
+    try:
+        import pandas as pd
+    except ImportError:  # pragma: no cover
+        return (0, 0, 0)
+
+    from narwhals._utils import parse_version
+
+    return parse_version(pd.__version__)
+
+
+def _default_backend_ids() -> list[str]:
+    """Resolve the default `--nw-backends` value for the current environment.
+
+    Honours `NARWHALS_DEFAULT_BACKENDS` if set, otherwise restricts
+    [`DEFAULT_BACKENDS`][] to backends whose libraries are importable.
+    """
+    if env := os.environ.get("NARWHALS_DEFAULT_BACKENDS"):  # pragma: no cover
+        return env.split(",")
+    from narwhals.testing.constructors import DEFAULT_BACKENDS, frame_constructor
+
+    return [
+        name
+        for name, constructor in frame_constructor._registry.items()
+        if constructor.is_available and name in DEFAULT_BACKENDS
+    ]
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    from narwhals.testing.constructors import DEFAULT_BACKENDS
+
+    group = parser.getgroup("narwhals", "narwhals-testing")
+    defaults = ", ".join(f"'{c}'" for c in sorted(DEFAULT_BACKENDS))
+    group.addoption(
+        "--nw-backends",
+        action="store",
+        default=",".join(_default_backend_ids()),
+        type=str,
+        help=(
+            "Comma-separated list of (data|lazy) frame backend constructors to"
+            f"parametrise. Defaults to the installed subset of ({defaults})"
+        ),
+    )
+    group.addoption(
+        "--all-nw-backends",
+        action="store_true",
+        default=False,
+        help=("Run tests against every installed CPU backend (overrides --nw-backends)."),
+    )
+    # Escape hatch for downstream test suites that ship their own backend plugin.
+    # When set, this plugin still adds the CLI options but stops parametrising the fixtures.
+    group.addoption(
+        "--use-external-nw-backend",
+        action="store_true",
+        default=False,
+        help=(
+            "Skip narwhals-testing's parametrisation and let another plugin "
+            "provide the `nw_*frame_constructor` fixtures."
+        ),
+    )
+
+
+def _select_backends(config: pytest.Config) -> list[FrameConstructor]:  # pragma: no cover
+    from narwhals.testing.constructors import available_cpu_backends, prepare_backends
+
+    _all_cpu_exclusions = frozenset({"modin", "pyspark[connect]"})
+
+    if config.getoption("all_nw_backends"):
+        selected = prepare_backends(
+            include=available_cpu_backends(), exclude=_all_cpu_exclusions
+        )
+    else:
+        opt = cast("str", config.getoption("nw_backends"))
+        names = [c for c in opt.split(",") if c]
+        selected = prepare_backends(include=names)
+
+    if _pandas_version() < _MIN_PANDAS_NULLABLE_VERSION:
+        _pandas_nullables = {"pandas[nullable]", "pandas[pyarrow]"}
+        selected = [c for c in selected if c.name not in _pandas_nullables]
+    return selected
+
+
+def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
+    if metafunc.config.getoption("use_external_nw_backend"):  # pragma: no cover
+        return
+
+    fixturenames = set(metafunc.fixturenames)
+    if not fixturenames & {
+        "nw_frame",
+        "nw_dataframe",
+        "nw_lazyframe",
+        "nw_pandas_like_frame",
+    }:
+        return
+
+    selected = _select_backends(metafunc.config)
+
+    if "nw_dataframe" in fixturenames:
+        params = [c for c in selected if c.is_eager]
+        ids = [c.name for c in params]
+        metafunc.parametrize("nw_dataframe", params, ids=ids)
+    elif "nw_lazyframe" in fixturenames:  # pragma: no cover
+        params = [c for c in selected if not c.is_eager]
+        ids = [c.name for c in params]
+        metafunc.parametrize("nw_dataframe", params, ids=ids)
+    elif "nw_frame" in fixturenames:
+        metafunc.parametrize("nw_frame", selected, ids=[c.name for c in selected])
+    elif "nw_pandas_like_frame" in fixturenames:
+        params = [c for c in selected if c.is_eager and c.is_pandas_like]
+        ids = [c.name for c in params]
+        metafunc.parametrize("nw_pandas_like_frame", params, ids=ids)
+    else:  # pragma: no cover
+        ...
diff --git a/narwhals/testing/typing.py b/narwhals/testing/typing.py
new file mode 100644
index 0000000000..f03e946887
--- /dev/null
+++ b/narwhals/testing/typing.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from typing_extensions import TypeAlias
+
+    from narwhals.testing.constructors import frame_constructor
+    from narwhals.typing import IntoDataFrame, IntoFrame, IntoLazyFrame
+
+
+__all__ = ("Data", "DataFrameConstructor", "FrameConstructor", "LazyFrameConstructor")
+
+FrameConstructor: TypeAlias = "frame_constructor[IntoFrame]"
+"""Type alias for a constructor that returns a native eager or lazy frame."""
+
+DataFrameConstructor: TypeAlias = "frame_constructor[IntoDataFrame]"
+"""Type alias for a constructor that returns an eager native dataframe."""
+
+LazyFrameConstructor: TypeAlias = "frame_constructor[IntoLazyFrame]"
+"""Type alias for a constructor that returns a lazy native frame."""
+
+Data: TypeAlias = dict[str, Any]  # TODO(Unassined): This should have a better annotation
+"""A column-oriented mapping used as input to a frame constructor."""
diff --git a/pyproject.toml b/pyproject.toml
index ef43aebc23..e69bf49f2c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,10 @@ Documentation = "https://narwhals-dev.github.io/narwhals/"
 Repository = "https://github.com/narwhals-dev/narwhals"
 "Bug Tracker" = "https://github.com/narwhals-dev/narwhals/issues"
 
+[project.entry-points.pytest11]
+narwhals_testing = "narwhals.testing.pytest_plugin"
+# See: https://docs.pytest.org/en/stable/how-to/writing_plugins.html#making-your-plugin-installable-by-others
+
 [project.optional-dependencies]
 # These should be aligned with MIN_VERSIONS in narwhals/utils.py
 # Exception: modin, because `modin.__version__` isn't aligned with
@@ -63,7 +67,6 @@ core = [
 tests = [
   "covdefaults",
   "pytest",
-  "pytest-cov",
   "pytest-env",
   "pytest-randomly",
   "pytest-xdist",
@@ -298,7 +301,12 @@ env = [
 ]
 
 [tool.coverage.run]
+# execv and fork patches are unsupported on Windows (coverage raises), so Windows
+# CI jobs set these env vars to "subprocess" — coverage dedupes the final list.
+patch = ["${COVERAGE_PATCH_EXECV-execv}", "${COVERAGE_PATCH_FORK-fork}", "subprocess"]
 plugins = ["covdefaults"]
+source = ["narwhals", "tests"]
+parallel = true
 
 [tool.coverage.report]
 fail_under = 80  # This is just for local development, in CI we set it to 100
diff --git a/tests/conftest.py b/tests/conftest.py
index 3fc3e91fa9..ed94521264 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,80 +1,37 @@
 from __future__ import annotations
 
-import os
-import uuid
-from copy import deepcopy
-from functools import lru_cache
 from importlib.util import find_spec
-from typing import TYPE_CHECKING, Any, Callable, cast
+from typing import TYPE_CHECKING, Any, cast
 
 import pytest
 
 import narwhals as nw
-from narwhals._utils import Implementation, generate_temporary_column_name
-from tests.utils import ID_PANDAS_LIKE, PANDAS_VERSION, pyspark_session, sqlframe_session
+from narwhals._utils import Implementation
+
+# `narwhals.testing.pytest_plugin` registers itself via the `pytest11` entry point (see pyproject.toml)
+# so it auto-loads as soon as Narwhals is installed.
+# That plugin is what owns the `--constructors`, `--all-cpu-constructors`, and `--use-external-constructor`
+# CLI options as well as parametrising the `constructor*` fixtures.
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
+    from types import ModuleType
 
-    import ibis
-    import pandas as pd
-    import polars as pl
-    import pyarrow as pa
-    from ibis.backends.duckdb import Backend as IbisDuckDBBackend
-    from typing_extensions import TypeAlias
-
-    from narwhals._native import NativeDask, NativeDuckDB, NativePySpark, NativeSQLFrame
     from narwhals._typing import EagerAllowed
-    from narwhals.typing import IntoDataFrame, NonNestedDType
-    from tests.utils import (
-        Constructor,
-        ConstructorEager,
-        ConstructorLazy,
-        NestedOrEnumDType,
-    )
-
-    Data: TypeAlias = "dict[str, list[Any]]"
-
+    from narwhals.dataframe import DataFrame, LazyFrame
+    from narwhals.testing.constructors import frame_constructor
+    from narwhals.testing.typing import Data, DataFrameConstructor, FrameConstructor
+    from narwhals.typing import IntoFrame, NonNestedDType
+    from tests.utils import NestedOrEnumDType
 
-MIN_PANDAS_NULLABLE_VERSION = (2,)
 
-# When testing cudf.pandas in Kaggle, we get an error if we try to run
-# python -m cudf.pandas -m pytest --constructors=pandas. This gives us
-# a way to run `python -m cudf.pandas -m pytest` and control which constructors
-# get tested.
-if default_constructors := os.environ.get(
-    "NARWHALS_DEFAULT_CONSTRUCTORS", None
-):  # pragma: no cover
-    DEFAULT_CONSTRUCTORS = default_constructors
-else:
-    DEFAULT_CONSTRUCTORS = (
-        "pandas,pandas[pyarrow],polars[eager],pyarrow,duckdb,sqlframe,ibis"
-    )
+# Narwhals-internal pytest options (not part of the public testing plugin)
 
 
 def pytest_addoption(parser: pytest.Parser) -> None:
     parser.addoption(
         "--runslow", action="store_true", default=False, help="run slow tests"
     )
-    parser.addoption(
-        "--all-cpu-constructors",
-        action="store_true",
-        default=False,
-        help="run tests with all cpu constructors",
-    )
-    parser.addoption(
-        "--use-external-constructor",
-        action="store_true",
-        default=False,
-        help="run tests with external constructor",
-    )
-    parser.addoption(
-        "--constructors",
-        action="store",
-        default=DEFAULT_CONSTRUCTORS,
-        type=str,
-        help="libraries to test",
-    )
 
 
 def pytest_configure(config: pytest.Config) -> None:
@@ -85,7 +42,6 @@ def pytest_collection_modifyitems(
     config: pytest.Config, items: Sequence[pytest.Function]
 ) -> None:  # pragma: no cover
     if config.getoption("--runslow"):
-        # --runslow given in cli: do not skip slow tests
         return
     skip_slow = pytest.mark.skip(reason="need --runslow option to run")
     for item in items:
@@ -93,235 +49,6 @@ def pytest_collection_modifyitems(
             item.add_marker(skip_slow)
 
 
-def pandas_constructor(obj: Data) -> pd.DataFrame:
-    import pandas as pd
-
-    return pd.DataFrame(obj)
-
-
-def pandas_nullable_constructor(obj: Data) -> pd.DataFrame:
-    import pandas as pd
-
-    return pd.DataFrame(obj).convert_dtypes(dtype_backend="numpy_nullable")
-
-
-def pandas_pyarrow_constructor(obj: Data) -> pd.DataFrame:
-    pytest.importorskip("pyarrow")
-    import pandas as pd
-
-    return pd.DataFrame(obj).convert_dtypes(dtype_backend="pyarrow")
-
-
-def modin_constructor(obj: Data) -> IntoDataFrame:  # pragma: no cover
-    import modin.pandas as mpd
-    import pandas as pd
-
-    df = mpd.DataFrame(pd.DataFrame(obj))
-    return cast("IntoDataFrame", df)
-
-
-def modin_pyarrow_constructor(obj: Data) -> IntoDataFrame:  # pragma: no cover
-    import modin.pandas as mpd
-    import pandas as pd
-
-    df = mpd.DataFrame(pd.DataFrame(obj)).convert_dtypes(dtype_backend="pyarrow")
-    return cast("IntoDataFrame", df)
-
-
-def cudf_constructor(obj: Data) -> IntoDataFrame:  # pragma: no cover
-    import cudf
-
-    df = cudf.DataFrame(obj)
-    return cast("IntoDataFrame", df)
-
-
-def polars_eager_constructor(obj: Data) -> pl.DataFrame:
-    pytest.importorskip("polars")
-    import polars as pl
-
-    return pl.DataFrame(obj)
-
-
-def polars_lazy_constructor(obj: Data) -> pl.LazyFrame:
-    import polars as pl
-
-    return pl.LazyFrame(obj)
-
-
-def duckdb_lazy_constructor(obj: dict[str, Any]) -> NativeDuckDB:
-    pytest.importorskip("duckdb")
-    pytest.importorskip("pyarrow")
-    import duckdb
-    import pyarrow as pa
-
-    duckdb.sql("""set timezone = 'UTC'""")
-
-    _df = pa.table(obj)
-    return duckdb.sql("select * from _df")
-
-
-def dask_lazy_p1_constructor(obj: Data) -> NativeDask:  # pragma: no cover
-    import dask.dataframe as dd
-
-    return cast("NativeDask", dd.from_dict(obj, npartitions=1))
-
-
-def dask_lazy_p2_constructor(obj: Data) -> NativeDask:  # pragma: no cover
-    import dask.dataframe as dd
-
-    return cast("NativeDask", dd.from_dict(obj, npartitions=2))
-
-
-def pyarrow_table_constructor(obj: dict[str, Any]) -> pa.Table:
-    pytest.importorskip("pyarrow")
-    import pyarrow as pa
-
-    return pa.table(obj)
-
-
-def pyspark_lazy_constructor() -> Callable[[Data], NativePySpark]:  # pragma: no cover
-    pytest.importorskip("pyspark")
-    import warnings
-    from atexit import register
-
-    with warnings.catch_warnings():
-        # The spark session seems to trigger a polars warning.
-        # Polars is imported in the tests, but not used in the spark operations
-        warnings.filterwarnings(
-            "ignore", r"Using fork\(\) can cause Polars", category=RuntimeWarning
-        )
-        session = pyspark_session()
-
-        register(session.stop)
-
-        def _constructor(obj: Data) -> NativePySpark:
-            _obj = deepcopy(obj)
-            index_col_name = generate_temporary_column_name(n_bytes=8, columns=list(_obj))
-            _obj[index_col_name] = list(range(len(_obj[next(iter(_obj))])))
-            result = (
-                session.createDataFrame([*zip(*_obj.values())], schema=[*_obj.keys()])
-                .repartition(2)
-                .orderBy(index_col_name)
-                .drop(index_col_name)
-            )
-            return cast("NativePySpark", result)
-
-        return _constructor
-
-
-def sqlframe_pyspark_lazy_constructor(obj: Data) -> NativeSQLFrame:  # pragma: no cover
-    pytest.importorskip("sqlframe")
-    pytest.importorskip("duckdb")
-    session = sqlframe_session()
-    return session.createDataFrame([*zip(*obj.values())], schema=[*obj.keys()])
-
-
-@lru_cache(maxsize=1)
-def _ibis_backend() -> IbisDuckDBBackend:  # pragma: no cover
-    """Cached (singleton) in-memory backend to ensure all tables exist within the same in-memory database."""
-    import ibis
-
-    return ibis.duckdb.connect()
-
-
-def ibis_lazy_constructor(obj: Data) -> ibis.Table:  # pragma: no cover
-    pytest.importorskip("ibis")
-    pytest.importorskip("polars")
-    import polars as pl
-
-    ldf = pl.LazyFrame(obj)
-    table_name = str(uuid.uuid4())
-    return _ibis_backend().create_table(table_name, ldf)
-
-
-EAGER_CONSTRUCTORS: dict[str, ConstructorEager] = {
-    "pandas": pandas_constructor,
-    "pandas[nullable]": pandas_nullable_constructor,
-    "pandas[pyarrow]": pandas_pyarrow_constructor,
-    "pyarrow": pyarrow_table_constructor,
-    "modin": modin_constructor,
-    "modin[pyarrow]": modin_pyarrow_constructor,
-    "cudf": cudf_constructor,
-    "polars[eager]": polars_eager_constructor,
-}
-LAZY_CONSTRUCTORS: dict[str, ConstructorLazy] = {  # pyrefly: ignore[bad-assignment]
-    "dask": dask_lazy_p2_constructor,
-    "polars[lazy]": polars_lazy_constructor,
-    "duckdb": duckdb_lazy_constructor,
-    "pyspark": pyspark_lazy_constructor,  # type: ignore[dict-item]
-    "sqlframe": sqlframe_pyspark_lazy_constructor,
-    "ibis": ibis_lazy_constructor,
-}
-GPU_CONSTRUCTORS: dict[str, ConstructorEager] = {"cudf": cudf_constructor}
-
-
-def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
-    if metafunc.config.getoption("use_external_constructor"):  # pragma: no cover
-        return  # let the plugin handle this
-    if metafunc.config.getoption("all_cpu_constructors"):  # pragma: no cover
-        selected_constructors: list[str] = [
-            *iter(EAGER_CONSTRUCTORS.keys()),
-            *iter(LAZY_CONSTRUCTORS.keys()),
-        ]
-        selected_constructors = [
-            x
-            for x in selected_constructors
-            if x not in GPU_CONSTRUCTORS
-            and x
-            not in {
-                "modin",  # too slow
-                "spark[connect]",  # complex local setup; can't run together with local spark
-            }
-        ]
-    else:  # pragma: no cover
-        opt = cast("str", metafunc.config.getoption("constructors"))
-        selected_constructors = opt.split(",")
-
-    eager_constructors: list[ConstructorEager] = []
-    eager_constructors_ids: list[str] = []
-    constructors: list[Constructor] = []
-    constructors_ids: list[str] = []
-
-    for constructor in selected_constructors:
-        if (
-            constructor in {"pandas[nullable]", "pandas[pyarrow]"}
-            and MIN_PANDAS_NULLABLE_VERSION > PANDAS_VERSION
-        ):
-            continue  # pragma: no cover
-
-        if constructor in EAGER_CONSTRUCTORS:
-            eager_constructors.append(EAGER_CONSTRUCTORS[constructor])
-            eager_constructors_ids.append(constructor)
-            constructors.append(EAGER_CONSTRUCTORS[constructor])
-        elif constructor in {"pyspark", "pyspark[connect]"}:  # pragma: no cover
-            constructors.append(pyspark_lazy_constructor())
-        elif constructor in LAZY_CONSTRUCTORS:
-            constructors.append(LAZY_CONSTRUCTORS[constructor])
-        else:  # pragma: no cover
-            msg = f"Expected one of {EAGER_CONSTRUCTORS.keys()} or {LAZY_CONSTRUCTORS.keys()}, got {constructor}"
-            raise ValueError(msg)
-        constructors_ids.append(constructor)
-
-    if "constructor_eager" in metafunc.fixturenames:
-        metafunc.parametrize(
-            "constructor_eager", eager_constructors, ids=eager_constructors_ids
-        )
-    elif "constructor" in metafunc.fixturenames:
-        metafunc.parametrize("constructor", constructors, ids=constructors_ids)
-    elif "constructor_pandas_like" in metafunc.fixturenames:
-        pandas_like_constructors = []
-        pandas_like_constructors_ids = []
-        for fn, name in zip(eager_constructors, eager_constructors_ids):
-            if name in ID_PANDAS_LIKE:
-                pandas_like_constructors.append(fn)
-                pandas_like_constructors_ids.append(name)
-        metafunc.parametrize(
-            "constructor_pandas_like",
-            pandas_like_constructors,
-            ids=pandas_like_constructors_ids,
-        )
-
-
 TEST_EAGER_BACKENDS: list[EagerAllowed] = []
 TEST_EAGER_BACKENDS.extend(
     (Implementation.POLARS, "polars") if find_spec("polars") is not None else ()
@@ -390,3 +117,64 @@ def non_nested_type(request: pytest.FixtureRequest) -> type[NonNestedDType]:
 def nested_dtype(request: pytest.FixtureRequest) -> NestedOrEnumDType:
     dtype: NestedOrEnumDType = request.param
     return dtype
+
+
+# The following fixtures are aliases of those registered in `narwhals/testing/pytest_plugin.py`,
+# wrapped so that calling them without an explicit `namespace` defaults to the main
+# `narwhals` namespace. Tests can still pass `nw_v1` / `nw_v2` explicitly to opt in
+# to a stable namespace; the legacy pattern `nw.from_native(constructor(data))` keeps
+# working because `nw.from_native` is idempotent on narwhals objects.
+# TODO(FBruzzesi): Drop these aliases once every test calls `nw_frame` / `nw_dataframe`
+# directly with an explicit namespace.
+
+
+class _PatchedFrameConstructor:
+    """Proxy over a `frame_constructor` defaulting `namespace` to `narwhals`.
+
+    Delegates attribute access, `str()`, and `repr()` to the wrapped instance
+    so that test helpers (e.g. `constructor.is_nullable`, `"pandas" in str(constructor)`)
+    keep working unchanged.
+    """
+
+    __slots__ = ("_inner",)
+
+    def __init__(self, inner: frame_constructor[IntoFrame]) -> None:
+        self._inner = inner
+
+    def __call__(
+        self, obj: Data, /, namespace: ModuleType = nw, **kwds: Any
+    ) -> DataFrame[Any] | LazyFrame[Any]:
+        return self._inner(obj, namespace=namespace, **kwds)
+
+    def __getattr__(self, name: str) -> Any:
+        return getattr(self._inner, name)
+
+    def __str__(self) -> str:
+        return str(self._inner)
+
+    def __repr__(self) -> str:
+        return repr(self._inner)
+
+
+class _PatchedDataFrameConstructor(_PatchedFrameConstructor):
+    def __call__(
+        self, obj: Data, /, namespace: ModuleType = nw, **kwds: Any
+    ) -> DataFrame[Any]:
+        return cast("DataFrame[Any]", self._inner(obj, namespace=namespace, **kwds))
+
+
+@pytest.fixture
+def constructor(nw_frame: FrameConstructor) -> _PatchedFrameConstructor:
+    return _PatchedFrameConstructor(nw_frame)
+
+
+@pytest.fixture
+def constructor_eager(nw_dataframe: DataFrameConstructor) -> _PatchedDataFrameConstructor:
+    return _PatchedDataFrameConstructor(nw_dataframe)
+
+
+@pytest.fixture
+def constructor_pandas_like(
+    nw_pandas_like_frame: DataFrameConstructor,
+) -> _PatchedDataFrameConstructor:
+    return _PatchedDataFrameConstructor(nw_pandas_like_frame)
diff --git a/tests/dependencies/is_narwhals_dataframe_test.py b/tests/dependencies/is_narwhals_dataframe_test.py
index aeedf15981..0897e64cc8 100644
--- a/tests/dependencies/is_narwhals_dataframe_test.py
+++ b/tests/dependencies/is_narwhals_dataframe_test.py
@@ -2,7 +2,6 @@
 
 from typing import TYPE_CHECKING
 
-import narwhals as nw
 from narwhals.stable.v1.dependencies import is_narwhals_dataframe
 
 if TYPE_CHECKING:
@@ -12,5 +11,5 @@
 def test_is_narwhals_dataframe(constructor_eager: ConstructorEager) -> None:
     df = constructor_eager({"col1": [1, 2], "col2": [3, 4]})
 
-    assert is_narwhals_dataframe(nw.from_native(df))
-    assert not is_narwhals_dataframe(df)
+    assert is_narwhals_dataframe(df)
+    assert not is_narwhals_dataframe(df.to_native())
diff --git a/tests/dependencies/is_narwhals_lazyframe_test.py b/tests/dependencies/is_narwhals_lazyframe_test.py
index 0e4c6e1bd9..113fd4a511 100644
--- a/tests/dependencies/is_narwhals_lazyframe_test.py
+++ b/tests/dependencies/is_narwhals_lazyframe_test.py
@@ -2,7 +2,6 @@
 
 from typing import TYPE_CHECKING
 
-import narwhals as nw
 from narwhals.stable.v1.dependencies import is_narwhals_lazyframe
 from tests.utils import Constructor
 
@@ -13,5 +12,5 @@
 def test_is_narwhals_lazyframe(constructor: Constructor) -> None:
     lf = constructor({"a": [1, 2, 3]})
 
-    assert is_narwhals_lazyframe(nw.from_native(lf).lazy())
-    assert not is_narwhals_lazyframe(lf)
+    assert is_narwhals_lazyframe(lf.lazy())
+    assert not is_narwhals_lazyframe(lf.to_native())
diff --git a/tests/dtypes/dtypes_test.py b/tests/dtypes/dtypes_test.py
index 33fa61ac08..a233f955f3 100644
--- a/tests/dtypes/dtypes_test.py
+++ b/tests/dtypes/dtypes_test.py
@@ -9,13 +9,8 @@
 
 import narwhals as nw
 from narwhals.exceptions import InvalidOperationError, PerformanceWarning
-from tests.utils import (
-    PANDAS_VERSION,
-    POLARS_VERSION,
-    PYARROW_VERSION,
-    assert_equal_hash,
-    pyspark_session,
-)
+from narwhals.testing.constructors import pyspark_session
+from tests.utils import PANDAS_VERSION, POLARS_VERSION, PYARROW_VERSION, assert_equal_hash
 
 if TYPE_CHECKING:
     from collections.abc import Iterable
diff --git a/tests/expr_and_series/arithmetic_test.py b/tests/expr_and_series/arithmetic_test.py
index af0c464e5b..9755435871 100644
--- a/tests/expr_and_series/arithmetic_test.py
+++ b/tests/expr_and_series/arithmetic_test.py
@@ -45,7 +45,7 @@ def test_arithmetic_expr(
         request.applymarker(pytest.mark.xfail)
 
     data = {"a": [1.0, 2.0, 3.0]}
-    df = nw.from_native(constructor(data))
+    df = constructor(data, nw)
     result = df.select(getattr(nw.col("a"), attr)(rhs))
     assert_equal_data(result, {"a": expected})
 
@@ -76,7 +76,7 @@ def test_right_arithmetic_expr(
     ):
         request.applymarker(pytest.mark.xfail)
     data = {"a": [1, 2, 3]}
-    df = nw.from_native(constructor(data))
+    df = constructor(data)
     result = df.select(getattr(nw.col("a"), attr)(rhs))
     assert_equal_data(result, {"literal": expected})
 
@@ -98,16 +98,16 @@ def test_arithmetic_series(
     attr: str,
     rhs: Any,
     expected: list[Any],
-    constructor_eager: ConstructorEager,
+    nw_dataframe: ConstructorEager,
     request: pytest.FixtureRequest,
 ) -> None:
     if attr == "__mod__" and any(
-        x in str(constructor_eager) for x in ["pandas_pyarrow", "modin_pyarrow"]
+        x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin_pyarrow"]
     ):
         request.applymarker(pytest.mark.xfail)
 
     data = {"a": [1, 2, 3]}
-    df = nw.from_native(constructor_eager(data), eager_only=True)
+    df = nw_dataframe(data, nw)
     result = df.select(getattr(df["a"], attr)(rhs))
     assert_equal_data(result, {"a": expected})
 
@@ -128,29 +128,29 @@ def test_right_arithmetic_series(
     attr: str,
     rhs: Any,
     expected: list[Any],
-    constructor_eager: ConstructorEager,
+    nw_dataframe: ConstructorEager,
     request: pytest.FixtureRequest,
 ) -> None:
     if attr == "__rmod__" and any(
-        x in str(constructor_eager) for x in ["pandas_pyarrow", "modin_pyarrow"]
+        x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin_pyarrow"]
     ):
         request.applymarker(pytest.mark.xfail)
 
     data = {"a": [1, 2, 3]}
-    df = nw.from_native(constructor_eager(data), eager_only=True)
+    df = nw_dataframe(data, nw)
     result_series = getattr(df["a"], attr)(rhs)
     assert result_series.name == "a"
     assert_equal_data({"a": result_series}, {"a": expected})
 
 
 def test_truediv_same_dims(
-    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
+    nw_dataframe: ConstructorEager, request: pytest.FixtureRequest
 ) -> None:
-    if "polars" in str(constructor_eager):
+    if "polars" in str(nw_dataframe):
         # https://github.com/pola-rs/polars/issues/17760
         request.applymarker(pytest.mark.xfail)
-    s_left = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
-    s_right = nw.from_native(constructor_eager({"a": [2, 2, 1]}), eager_only=True)["a"]
+    s_left = nw_dataframe({"a": [1, 2, 3]}, nw)["a"]
+    s_right = nw_dataframe({"a": [2, 2, 1]}, nw)["a"]
     result = s_left / s_right
     assert_equal_data({"a": result}, {"a": [0.5, 1.0, 3.0]})
     result = s_left.__rtruediv__(s_right)
@@ -160,31 +160,27 @@ def test_truediv_same_dims(
 @given(left=st.integers(-100, 100), right=st.integers(-100, 100))
 @pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="convert_dtypes not available")
 @pytest.mark.slow
-def test_floordiv(constructor_eager: ConstructorEager, *, left: int, right: int) -> None:
-    if any(x in str(constructor_eager) for x in ["modin", "cudf"]):
+def test_floordiv(nw_dataframe: ConstructorEager, *, left: int, right: int) -> None:
+    if any(x in str(nw_dataframe) for x in ["modin", "cudf"]):
         # modin & cudf are too slow here
         pytest.skip()
     assume(right != 0)
     expected = {"a": [left // right]}
-    result = nw.from_native(constructor_eager({"a": [left]}), eager_only=True).select(
-        nw.col("a") // right
-    )
+    result = nw_dataframe({"a": [left]}, nw).select(nw.col("a") // right)
     assert_equal_data(result, expected)
 
 
 @pytest.mark.slow
 @given(left=st.integers(-100, 100), right=st.integers(-100, 100))
 @pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="convert_dtypes not available")
-def test_mod(constructor_eager: ConstructorEager, *, left: int, right: int) -> None:
-    if any(x in str(constructor_eager) for x in ["pandas_pyarrow", "modin", "cudf"]):
+def test_mod(nw_dataframe: ConstructorEager, *, left: int, right: int) -> None:
+    if any(x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin", "cudf"]):
         # pandas[pyarrow] does not implement mod
         # modin & cudf are too slow here
         pytest.skip()
     assume(right != 0)
     expected = {"a": [left % right]}
-    result = nw.from_native(constructor_eager({"a": [left]}), eager_only=True).select(
-        nw.col("a") % right
-    )
+    result = nw_dataframe({"a": [left]}, nw).select(nw.col("a") % right)
     assert_equal_data(result, expected)
 
 
@@ -218,7 +214,7 @@ def test_arithmetic_expr_left_literal(
         request.applymarker(pytest.mark.xfail)
 
     data = {"a": [1.0, 2.0, 4.0]}
-    df = nw.from_native(constructor(data))
+    df = constructor(data, nw)
     result = df.select(getattr(lhs, attr)(nw.col("a")))
     assert_equal_data(result, {"literal": expected})
 
@@ -240,16 +236,16 @@ def test_arithmetic_series_left_literal(
     attr: str,
     lhs: Any,
     expected: list[Any],
-    constructor_eager: ConstructorEager,
+    nw_dataframe: ConstructorEager,
     request: pytest.FixtureRequest,
 ) -> None:
     if attr == "__mod__" and any(
-        x in str(constructor_eager) for x in ["pandas_pyarrow", "modin_pyarrow"]
+        x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin_pyarrow"]
     ):
         request.applymarker(pytest.mark.xfail)
 
     data = {"a": [1.0, 2.0, 4.0]}
-    df = nw.from_native(constructor_eager(data))
+    df = nw_dataframe(data, nw)
     result = df.select(getattr(lhs, attr)(nw.col("a")))
     assert_equal_data(result, {"literal": expected})
 
@@ -258,7 +254,7 @@ def test_std_broadcating(constructor: Constructor) -> None:
     if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
         # `std(ddof=2)` fails for duckdb here
         pytest.skip()
-    df = nw.from_native(constructor({"a": [1, 2, 3]}))
+    df = constructor({"a": [1, 2, 3]}, nw)
     result = df.with_columns(b=nw.col("a").std()).sort("a")
     expected = {"a": [1, 2, 3], "b": [1.0, 1.0, 1.0]}
     assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/corr_test.py b/tests/expr_and_series/corr_test.py
index 9152df69c8..15e371d62d 100644
--- a/tests/expr_and_series/corr_test.py
+++ b/tests/expr_and_series/corr_test.py
@@ -25,7 +25,7 @@ def test_corr_expr(
     output_name: str,
     a: str | nw.Expr,
     b: str | nw.Expr,
-    expected_corr: float,
+    expected_corr: float | None,
 ) -> None:
     if "pyspark" in str(constructor) and expected_corr is None:
         request.applymarker(
@@ -51,7 +51,7 @@ def test_corr_expr_spearman(
     output_name: str,
     a: str | nw.Expr,
     b: str | nw.Expr,
-    expected_corr: float,
+    expected_corr: float | None,
 ) -> None:
     context = (
         does_not_raise()
@@ -75,7 +75,7 @@ def test_corr_series(
     output_name: str,
     a: str,
     b: str,
-    expected_corr: float,
+    expected_corr: float | None,
 ) -> None:
     if "pyspark" in str(constructor_eager) and expected_corr is None:
         request.applymarker(
@@ -97,7 +97,7 @@ def test_corr_series_spearman(
     output_name: str,
     a: str,
     b: str,
-    expected_corr: float,
+    expected_corr: float | None,
 ) -> None:
     if "pyspark" in str(constructor_eager) and expected_corr is None:
         request.applymarker(
diff --git a/tests/expr_and_series/dt/convert_time_zone_test.py b/tests/expr_and_series/dt/convert_time_zone_test.py
index 65d1a6e3b6..8fd654ad6d 100644
--- a/tests/expr_and_series/dt/convert_time_zone_test.py
+++ b/tests/expr_and_series/dt/convert_time_zone_test.py
@@ -7,13 +7,13 @@
 import pytest
 
 import narwhals as nw
+from narwhals.testing.constructors import pyspark_session
 from tests.utils import (
     PANDAS_VERSION,
     POLARS_VERSION,
     Constructor,
     assert_equal_data,
     is_windows,
-    pyspark_session,
 )
 
 if TYPE_CHECKING:
diff --git a/tests/expr_and_series/dt/datetime_attributes_test.py b/tests/expr_and_series/dt/datetime_attributes_test.py
index c7bf55e7c0..830666cac2 100644
--- a/tests/expr_and_series/dt/datetime_attributes_test.py
+++ b/tests/expr_and_series/dt/datetime_attributes_test.py
@@ -123,10 +123,10 @@ def test_to_date(request: pytest.FixtureRequest, constructor: Constructor) -> No
         request.applymarker(pytest.mark.xfail)
     dates = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]}
     if "dask" in str(constructor):
-        df_dask = cast("dd.DataFrame", constructor(dates))
+        df_dask = cast("dd.DataFrame", constructor(dates).to_native())
         df_dask = cast("dd.DataFrame", df_dask.astype({"a": "timestamp[ns][pyarrow]"}))
         df = nw.from_native(df_dask)
     else:
-        df = nw.from_native(constructor(dates))
+        df = constructor(dates)
     result = df.select(nw.col("a").dt.date())
     assert result.collect_schema() == {"a": nw.Date}
diff --git a/tests/expr_and_series/dt/datetime_duration_test.py b/tests/expr_and_series/dt/datetime_duration_test.py
index b84ecfa66e..ac7d132bfc 100644
--- a/tests/expr_and_series/dt/datetime_duration_test.py
+++ b/tests/expr_and_series/dt/datetime_duration_test.py
@@ -74,7 +74,7 @@ def test_duration_attributes_nano(
     import numpy as np
 
     data = {"c": np.array([None, 20], dtype="timedelta64[ns]")}
-    df = nw.from_native(constructor(data))
+    df = constructor(data, nw)
 
     result_c = df.select(getattr(nw.col("c").dt, attribute)().fill_null(0))
     assert_equal_data(result_c, {"c": expected_c})
diff --git a/tests/expr_and_series/dt/replace_time_zone_test.py b/tests/expr_and_series/dt/replace_time_zone_test.py
index 1c9dff7d59..27bc394b69 100644
--- a/tests/expr_and_series/dt/replace_time_zone_test.py
+++ b/tests/expr_and_series/dt/replace_time_zone_test.py
@@ -7,13 +7,8 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import (
-    PANDAS_VERSION,
-    Constructor,
-    assert_equal_data,
-    is_windows,
-    pyspark_session,
-)
+from narwhals.testing.constructors import pyspark_session
+from tests.utils import PANDAS_VERSION, Constructor, assert_equal_data, is_windows
 
 if TYPE_CHECKING:
     from tests.utils import ConstructorEager
diff --git a/tests/expr_and_series/fill_nan_test.py b/tests/expr_and_series/fill_nan_test.py
index 132b553c50..1835d6c1f1 100644
--- a/tests/expr_and_series/fill_nan_test.py
+++ b/tests/expr_and_series/fill_nan_test.py
@@ -3,21 +3,8 @@
 import pytest
 
 import narwhals as nw
-from tests.conftest import (
-    dask_lazy_p1_constructor,
-    dask_lazy_p2_constructor,
-    modin_constructor,
-    pandas_constructor,
-)
 from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data
 
-NON_NULLABLE_CONSTRUCTORS = [
-    pandas_constructor,
-    dask_lazy_p1_constructor,
-    dask_lazy_p2_constructor,
-    modin_constructor,
-]
-
 
 def test_fill_nan(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if "cudf" in str(constructor):
@@ -36,7 +23,7 @@ def test_fill_nan(request: pytest.FixtureRequest, constructor: Constructor) -> N
     assert_equal_data(result, expected)
     assert result.lazy().collect()["float_na"].null_count() == 2
     result = df.select(nw.all().fill_nan(3.0))
-    if any(constructor is c for c in NON_NULLABLE_CONSTRUCTORS):
+    if not constructor.is_nullable:
         # no nan vs null distinction
         expected = {"float": [-1.0, 1.0, 3.0], "float_na": [3.0, 1.0, 3.0]}
         assert result.lazy().collect()["float_na"].null_count() == 0
@@ -55,7 +42,7 @@ def test_fill_nan_series(constructor_eager: ConstructorEager) -> None:
         "float_na"
     ]
     result = s.fill_nan(999)
-    if any(constructor_eager is c for c in NON_NULLABLE_CONSTRUCTORS):
+    if not constructor_eager.is_nullable:
         # no nan vs null distinction
         assert_equal_data({"a": result}, {"a": [999.0, 1.0, 999.0]})
     elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,):
diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py
index 16c59536ca..01579e3f3f 100644
--- a/tests/expr_and_series/is_close_test.py
+++ b/tests/expr_and_series/is_close_test.py
@@ -12,12 +12,6 @@
 
 import narwhals as nw
 from narwhals.exceptions import ComputeError, InvalidOperationError
-from tests.conftest import (
-    dask_lazy_p1_constructor,
-    dask_lazy_p2_constructor,
-    modin_constructor,
-    pandas_constructor,
-)
 from tests.utils import (
     PANDAS_VERSION,
     PYARROW_VERSION,
@@ -29,12 +23,6 @@
 if TYPE_CHECKING:
     from narwhals.typing import NumericLiteral
 
-NON_NULLABLE_CONSTRUCTORS = (
-    pandas_constructor,
-    dask_lazy_p1_constructor,
-    dask_lazy_p2_constructor,
-    modin_constructor,
-)
 NULL_PLACEHOLDER, NAN_PLACEHOLDER = 9999.0, -1.0
 INF_POS, INF_NEG = float("inf"), float("-inf")
 
@@ -126,7 +114,7 @@ def test_is_close_series_with_series(
     y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls)
     result = x.is_close(y, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal)
 
-    if constructor_eager in NON_NULLABLE_CONSTRUCTORS:
+    if not constructor_eager.is_nullable:
         expected = [v if v is not None else nans_equal for v in expected]
     elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,):
         expected = [
@@ -154,7 +142,7 @@ def test_is_close_series_with_scalar(
     y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls)
     result = y.is_close(other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal)
 
-    if constructor_eager in NON_NULLABLE_CONSTRUCTORS:
+    if not constructor_eager.is_nullable:
         expected = [v if v is not None else False for v in expected]
     elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,):
         expected = [
@@ -199,7 +187,7 @@ def test_is_close_expr_with_expr(
         )
         .sort("idx")
     )
-    if constructor in NON_NULLABLE_CONSTRUCTORS:
+    if not constructor.is_nullable:
         expected = [v if v is not None else nans_equal for v in expected]
     elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,):
         expected = [
@@ -240,7 +228,7 @@ def test_is_close_expr_with_scalar(
         )
         .sort("idx")
     )
-    if constructor in NON_NULLABLE_CONSTRUCTORS:
+    if not constructor.is_nullable:
         expected = [v if v is not None else False for v in expected]
     elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,):
         expected = [
diff --git a/tests/expr_and_series/is_finite_test.py b/tests/expr_and_series/is_finite_test.py
index eb07b2a41e..f55b106593 100644
--- a/tests/expr_and_series/is_finite_test.py
+++ b/tests/expr_and_series/is_finite_test.py
@@ -5,21 +5,8 @@
 import pytest
 
 import narwhals as nw
-from tests.conftest import (
-    dask_lazy_p1_constructor,
-    dask_lazy_p2_constructor,
-    modin_constructor,
-    pandas_constructor,
-)
 from tests.utils import POLARS_VERSION, Constructor, ConstructorEager, assert_equal_data
 
-NON_NULLABLE_CONSTRUCTORS = [
-    pandas_constructor,
-    dask_lazy_p1_constructor,
-    dask_lazy_p2_constructor,
-    modin_constructor,
-]
-
 data = {"a": [float("nan"), float("inf"), 2.0, None]}
 
 
@@ -77,7 +64,7 @@ def test_is_finite_column_with_null(constructor: Constructor, data: list[float])
     result = df.select(nw.col("a").is_finite())
 
     expected: dict[str, list[Any]]
-    if any(constructor is c for c in NON_NULLABLE_CONSTRUCTORS):
+    if not constructor.is_nullable:
         # Null values are coerced to NaN for non-nullable datatypes
         expected = {"a": [True, True, False]}
     else:
diff --git a/tests/expr_and_series/is_nan_test.py b/tests/expr_and_series/is_nan_test.py
index 27790e27b2..9dce78c535 100644
--- a/tests/expr_and_series/is_nan_test.py
+++ b/tests/expr_and_series/is_nan_test.py
@@ -5,21 +5,8 @@
 import pytest
 
 import narwhals as nw
-from tests.conftest import (
-    dask_lazy_p1_constructor,
-    dask_lazy_p2_constructor,
-    modin_constructor,
-    pandas_constructor,
-)
 from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data
 
-NON_NULLABLE_CONSTRUCTORS = [
-    pandas_constructor,
-    dask_lazy_p1_constructor,
-    dask_lazy_p2_constructor,
-    modin_constructor,
-]
-
 
 def test_nan(constructor: Constructor) -> None:
     data_na = {"int": [-1, 1, None]}
@@ -33,7 +20,7 @@ def test_nan(constructor: Constructor) -> None:
     )
 
     expected: dict[str, list[Any]]
-    if any(constructor is c for c in NON_NULLABLE_CONSTRUCTORS):
+    if not constructor.is_nullable:
         # Null values are coerced to NaN for non-nullable datatypes
         expected = {
             "int": [False, False, True],
@@ -70,7 +57,7 @@ def test_nan_series(constructor_eager: ConstructorEager) -> None:
         "float_na": df["float_na"].is_nan(),
     }
     expected: dict[str, list[Any]]
-    if any(constructor_eager is c for c in NON_NULLABLE_CONSTRUCTORS):
+    if not constructor_eager.is_nullable:
         # Null values are coerced to NaN for non-nullable datatypes
         expected = {
             "int": [False, False, True],
diff --git a/tests/expr_and_series/list/get_test.py b/tests/expr_and_series/list/get_test.py
index 52ca3386ba..338ab6197a 100644
--- a/tests/expr_and_series/list/get_test.py
+++ b/tests/expr_and_series/list/get_test.py
@@ -45,9 +45,8 @@ def test_get_series(
             pytest.skip()
         pytest.importorskip("pyarrow")
 
-    if (
-        constructor_eager.__name__.startswith("pandas")
-        and "pyarrow" not in constructor_eager.__name__
+    if str(constructor_eager).startswith("pandas") and "pyarrow" not in str(
+        constructor_eager
     ):
         df = nw.from_native(constructor_eager(data), eager_only=True)
         msg = re.escape("Series must be of PyArrow List type to support list namespace.")
diff --git a/tests/expr_and_series/nth_test.py b/tests/expr_and_series/nth_test.py
index 1249f7f2e2..86f9bfe2eb 100644
--- a/tests/expr_and_series/nth_test.py
+++ b/tests/expr_and_series/nth_test.py
@@ -1,17 +1,14 @@
 from __future__ import annotations
 
 import re
-from typing import TYPE_CHECKING, Any
+from typing import Any
 
 import pytest
 
 import narwhals as nw
 from tests.utils import POLARS_VERSION, Constructor, assert_equal_data
 
-if TYPE_CHECKING:
-    from collections.abc import Mapping
-
-data: Mapping[str, Any] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]}
+data: dict[str, list[Any]] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]}
 
 
 @pytest.mark.parametrize(
diff --git a/tests/expr_and_series/over_test.py b/tests/expr_and_series/over_test.py
index ec9dea6104..7ea345af30 100644
--- a/tests/expr_and_series/over_test.py
+++ b/tests/expr_and_series/over_test.py
@@ -475,10 +475,10 @@ def test_over_quantile(constructor: Constructor, request: pytest.FixtureRequest)
     data = {"a": [1, 2, 3, 4, 5, 6], "b": ["x", "x", "x", "y", "y", "y"]}
 
     quantile_expr = nw.col("a").quantile(quantile=0.5, interpolation="linear")
-    native_frame = constructor(data)
+    native_frame = constructor(data).to_native()
 
     if "dask" in str(constructor):
-        native_frame = native_frame.repartition(npartitions=1)  # type: ignore[union-attr]
+        native_frame = native_frame.repartition(npartitions=1)
 
     result = (
         nw.from_native(native_frame)
diff --git a/tests/expr_and_series/str/split_test.py b/tests/expr_and_series/str/split_test.py
index b6b25cd024..f206b84e35 100644
--- a/tests/expr_and_series/str/split_test.py
+++ b/tests/expr_and_series/str/split_test.py
@@ -20,8 +20,7 @@
 )
 def test_str_split(constructor: Constructor, by: str, expected: Any) -> None:
     if "cudf" not in str(constructor) and (
-        constructor.__name__.startswith("pandas")
-        and "pyarrow" not in constructor.__name__
+        str(constructor).startswith("pandas") and "pyarrow" not in str(constructor)
     ):
         df = nw.from_native(constructor(data))
         msg = re.escape("This operation requires a pyarrow-backed series. ")
@@ -44,8 +43,8 @@ def test_str_split_series(
     constructor_eager: ConstructorEager, by: str, expected: Any
 ) -> None:
     if "cudf" not in str(constructor_eager) and (
-        constructor_eager.__name__.startswith("pandas")
-        and "pyarrow" not in constructor_eager.__name__
+        str(constructor_eager).startswith("pandas")
+        and "pyarrow" not in str(constructor_eager)
     ):
         df = nw.from_native(constructor_eager(data), eager_only=True)
         msg = re.escape("This operation requires a pyarrow-backed series. ")
diff --git a/tests/expr_and_series/str/to_time_test.py b/tests/expr_and_series/str/to_time_test.py
index fceed2688d..29af4c0225 100644
--- a/tests/expr_and_series/str/to_time_test.py
+++ b/tests/expr_and_series/str/to_time_test.py
@@ -21,7 +21,7 @@ def requires_time_support(
     Skip or mark tests as expected failures depending on backend capabilities,
     version, and pyarrow availability when testing Time dtype support.
     """
-    if constructor.__name__.startswith(("pandas", "modin")):
+    if str(constructor).startswith(("pandas", "modin")):
         if PANDAS_VERSION < (2, 2, 0):
             pytest.skip(
                 "pandas < 2.2.0 has no pyarrow dtype support (and therefore does not support the Time dtype)"
diff --git a/tests/expr_and_series/struct_/field_test.py b/tests/expr_and_series/struct_/field_test.py
index a351c31500..dd8dd26790 100644
--- a/tests/expr_and_series/struct_/field_test.py
+++ b/tests/expr_and_series/struct_/field_test.py
@@ -1,84 +1,53 @@
 from __future__ import annotations
 
-from typing import cast
-
 import pytest
 
 import narwhals as nw
-from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data
+from tests.utils import (
+    DUCKDB_VERSION,
+    PANDAS_VERSION,
+    Constructor,
+    ConstructorEager,
+    assert_equal_data,
+)
 
 
 def test_get_field_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     pytest.importorskip("pyarrow")
-    import pyarrow as pa
 
-    if any(backend in str(constructor) for backend in ("dask", "modin")):
+    if any(backend in str(constructor) for backend in ("dask",)):
         request.applymarker(pytest.mark.xfail)
-    if "pandas" in str(constructor) and PANDAS_VERSION < (2, 2, 0):
+    if ("pandas" in str(constructor) and PANDAS_VERSION < (2, 2, 0)) or (
+        "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3, 0)
+    ):
         pytest.skip()
-    data = {"user": [{"id": "0", "name": "john"}, {"id": "1", "name": "jane"}]}
-
-    df_native = constructor(data)
-
-    if "pandas" in str(constructor):
-        import pandas as pd
 
-        df_native = cast("pd.DataFrame", df_native).assign(
-            user=pd.Series(
-                data["user"],
-                dtype=pd.ArrowDtype(
-                    pa.struct([("id", pa.string()), ("name", pa.string())])
-                ),
-            )
-        )
-
-    df = nw.from_native(df_native)
+    data = {"id": ["0", "1"], "name": ["john", "jane"]}
+    expected = data.copy()
+    df = constructor(data, nw).select(user=nw.struct("id", "name"))
 
     result = nw.from_native(df).select(
         nw.col("user").struct.field("id"), nw.col("user").struct.field("name")
     )
-    expected = {"id": ["0", "1"], "name": ["john", "jane"]}
     assert_equal_data(result, expected)
     result = nw.from_native(df).select(nw.col("user").struct.field("id").name.keep())
     expected = {"user": ["0", "1"]}
     assert_equal_data(result, expected)
 
 
-def test_get_field_series(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
-) -> None:
+def test_get_field_series(constructor_eager: ConstructorEager) -> None:
     pytest.importorskip("pyarrow")
-    import pyarrow as pa
 
-    if any(backend in str(constructor_eager) for backend in ("modin",)):
-        request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor_eager) and PANDAS_VERSION < (2, 2, 0):
         pytest.skip()
-    data = {"user": [{"id": "0", "name": "john"}, {"id": "1", "name": "jane"}]}
-    expected = {"id": ["0", "1"], "name": ["john", "jane"]}
-
-    _expected = expected.copy()
-    df_native = constructor_eager(data)
-
-    if "pandas" in str(constructor_eager):
-        import pandas as pd
-
-        df_native = cast("pd.DataFrame", df_native).assign(
-            user=pd.Series(
-                data["user"],
-                dtype=pd.ArrowDtype(
-                    pa.struct([("id", pa.string()), ("name", pa.string())])
-                ),
-            )
-        )
-
-    df = nw.from_native(df_native, eager_only=True)
+    data = {"id": ["0", "1"], "name": ["john", "jane"]}
+    expected = data.copy()
+    df = constructor_eager(data, nw).select(user=nw.struct("id", "name"))
 
     result = nw.from_native(df).select(
         df["user"].struct.field("id"), df["user"].struct.field("name")
     )
-    expected = {"id": ["0", "1"], "name": ["john", "jane"]}
-    assert_equal_data(result, _expected)
+    assert_equal_data(result, expected)
 
 
 def test_pandas_object_series() -> None:
diff --git a/tests/frame/group_by_test.py b/tests/frame/group_by_test.py
index 57aacae09b..788a5363a4 100644
--- a/tests/frame/group_by_test.py
+++ b/tests/frame/group_by_test.py
@@ -26,7 +26,7 @@
     from narwhals.typing import NonNestedLiteral
 
 
-data: Mapping[str, Any] = {"a": [1, 1, 3], "b": [4, 4, 6], "c": [7.0, 8.0, 9.0]}
+data: dict[str, list[Any]] = {"a": [1, 1, 3], "b": [4, 4, 6], "c": [7.0, 8.0, 9.0]}
 
 POLARS_COLLECT_STREAMING_ENGINE = os.environ.get("NARWHALS_POLARS_NEW_STREAMING", None)
 
diff --git a/tests/frame/interchange_native_namespace_test.py b/tests/frame/interchange_native_namespace_test.py
index 79a92ef6c9..0face73928 100644
--- a/tests/frame/interchange_native_namespace_test.py
+++ b/tests/frame/interchange_native_namespace_test.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import Any
 
 import pytest
 
@@ -9,10 +9,7 @@
 pytest.importorskip("polars")
 import polars as pl
 
-if TYPE_CHECKING:
-    from collections.abc import Mapping
-
-data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]}
+data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]}
 
 
 def test_interchange() -> None:
@@ -60,9 +57,9 @@ def test_duckdb() -> None:
     pytest.importorskip("duckdb")
     import duckdb
 
-    df_pl = pl.DataFrame(data)  # noqa: F841
+    _df_pl = pl.DataFrame(data)
 
-    rel = duckdb.sql("select * from df_pl")
+    rel = duckdb.sql("select * from _df_pl")
     df = nw_v1.from_native(rel, eager_or_interchange_only=True)
     series = df["a"]
 
diff --git a/tests/frame/interchange_select_test.py b/tests/frame/interchange_select_test.py
index a927ba18c6..90279f0296 100644
--- a/tests/frame/interchange_select_test.py
+++ b/tests/frame/interchange_select_test.py
@@ -1,16 +1,13 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import Any
 
 import pytest
 
 import narwhals as nw
 import narwhals.stable.v1 as nw_v1
 
-if TYPE_CHECKING:
-    from collections.abc import Mapping
-
-data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
+data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
 
 
 class InterchangeDataFrame:
diff --git a/tests/frame/interchange_to_arrow_test.py b/tests/frame/interchange_to_arrow_test.py
index 2277d498ea..e8604f816d 100644
--- a/tests/frame/interchange_to_arrow_test.py
+++ b/tests/frame/interchange_to_arrow_test.py
@@ -1,15 +1,12 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw_v1
 
-if TYPE_CHECKING:
-    from collections.abc import Mapping
-
-data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
+data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
 
 pytest.importorskip("polars")
 pytest.importorskip("pyarrow")
diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py
index 05543105a6..50ac30c6ed 100644
--- a/tests/frame/join_test.py
+++ b/tests/frame/join_test.py
@@ -16,19 +16,7 @@
 )
 
 if TYPE_CHECKING:
-    from narwhals.typing import IntoDataFrame, IntoLazyFrameT, JoinStrategy
-
-
-def from_native_lazy(
-    native: IntoLazyFrameT | IntoDataFrame,
-) -> nw.LazyFrame[IntoLazyFrameT] | nw.LazyFrame[Any]:
-    """Every join test [needs to use `.lazy()` for typing]*.
-
-    *Unless both left/right frames are of the same concrete type.
-
-    [needs to use `.lazy()` for typing]: https://github.com/narwhals-dev/narwhals/pull/2944#discussion_r2286264815
-    """
-    return nw.from_native(native).lazy()
+    from narwhals.typing import JoinStrategy
 
 
 @pytest.mark.parametrize(
@@ -107,8 +95,8 @@ def test_full_join(
     right_on: None | str | list[str],
     constructor: Constructor,
 ) -> None:
-    df_left = from_native_lazy(constructor(df1))
-    df_right = from_native_lazy(constructor(df2))
+    df_left = constructor(df1).lazy()
+    df_right = constructor(df2).lazy()
     result = df_left.join(
         df_right, on=on, left_on=left_on, right_on=right_on, how="full"
     ).sort("id", nulls_last=True)
@@ -123,8 +111,8 @@ def test_full_join_duplicate(
 
     df1 = {"foo": [1, 2, 3], "val1": [1, 2, 3]}
     df2 = {"foo": [1, 2, 3], "foo_right": [1, 2, 3]}
-    df_left = from_native_lazy(constructor(df1))
-    df_right = from_native_lazy(constructor(df2))
+    df_left = constructor(df1).lazy()
+    df_right = constructor(df2).lazy()
 
     exceptions: list[type[Exception]] = [nw.exceptions.NarwhalsError]
     if "pyspark" in str(constructor) and "sqlframe" not in str(constructor):
@@ -146,7 +134,7 @@ def test_inner_join_two_keys(constructor: Constructor) -> None:
         "zor ro": [7.0, 8.0, 9.0],
         "idx": [0, 1, 2],
     }
-    df = from_native_lazy(constructor(data))
+    df = constructor(data).lazy()
     df_right = df
     result = df.join(
         df_right,
@@ -175,7 +163,7 @@ def test_inner_join_single_key(constructor: Constructor) -> None:
         "zor ro": [7.0, 8.0, 9.0],
         "idx": [0, 1, 2],
     }
-    df = from_native_lazy(constructor(data))
+    df = constructor(data).lazy()
     df_right = df
     result = df.join(
         df_right, left_on="antananarivo", right_on="antananarivo", how="inner"
@@ -199,7 +187,7 @@ def test_cross_join(constructor: Constructor) -> None:
     if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 1, 4):
         pytest.skip()
     data = {"antananarivo": [1, 3, 2]}
-    df = from_native_lazy(constructor(data))
+    df = constructor(data).lazy()
     result = df.join(df, how="cross").sort("antananarivo", "antananarivo_right")
     expected = {
         "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3],
@@ -219,7 +207,7 @@ def test_suffix(
     constructor: Constructor, how: Literal["inner", "left"], suffix: str
 ) -> None:
     data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]}
-    df = from_native_lazy(constructor(data))
+    df = constructor(data).lazy()
     df_right = df
     result = df.join(
         df_right,
@@ -237,7 +225,7 @@ def test_cross_join_suffix(constructor: Constructor, suffix: str) -> None:
     if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 1, 4):
         pytest.skip()
     data = {"antananarivo": [1, 3, 2]}
-    df = from_native_lazy(constructor(data))
+    df = constructor(data).lazy()
     result = df.join(df, how="cross", suffix=suffix).sort(
         "antananarivo", f"antananarivo{suffix}"
     )
@@ -287,7 +275,7 @@ def test_anti_join(
     expected: dict[str, list[Any]],
 ) -> None:
     data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]}
-    df = from_native_lazy(constructor(data))
+    df = constructor(data).lazy()
     other = df.filter(filter_expr)
     result = df.join(other, how="anti", left_on=join_key, right_on=join_key)
     assert_equal_data(result, expected)
@@ -325,7 +313,7 @@ def test_semi_join(
     expected: dict[str, list[Any]],
 ) -> None:
     data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]}
-    df = from_native_lazy(constructor(data))
+    df = constructor(data).lazy()
     other = df.filter(filter_expr)
     result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort(
         "antananarivo"
@@ -336,7 +324,7 @@ def test_semi_join(
 @pytest.mark.parametrize("how", ["right"])
 def test_join_not_implemented(constructor: Constructor, how: str) -> None:
     data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]}
-    df = from_native_lazy(constructor(data))
+    df = constructor(data).lazy()
 
     with pytest.raises(
         NotImplementedError,
@@ -363,8 +351,8 @@ def test_left_join(constructor: Constructor) -> None:
         "co": [4.0, 5.0, 7.0],
         "idx": [0.0, 1.0, 2.0],
     }
-    df_left = from_native_lazy(constructor(data_left))
-    df_right = from_native_lazy(constructor(data_right))
+    df_left = constructor(data_left).lazy()
+    df_right = constructor(data_right).lazy()
     result = df_left.join(df_right, left_on="bob", right_on="co", how="left")
     result = result.sort("idx")
     result = result.drop("idx_right")
@@ -389,8 +377,8 @@ def test_left_join(constructor: Constructor) -> None:
 def test_left_join_multiple_column(constructor: Constructor) -> None:
     data_left = {"antananarivo": [1, 2, 3], "bob": [4, 5, 6], "idx": [0, 1, 2]}
     data_right = {"antananarivo": [1, 2, 3], "c": [4, 5, 6], "idx": [0, 1, 2]}
-    df_left = from_native_lazy(constructor(data_left))
-    df_right = from_native_lazy(constructor(data_right))
+    df_left = constructor(data_left).lazy()
+    df_right = constructor(data_right).lazy()
     result = df_left.join(
         df_right,
         left_on=["antananarivo", "bob"],
@@ -416,8 +404,8 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None:
         "d": [1.0, 4.0, 2.0],
         "idx": [0.0, 1.0, 2.0],
     }
-    df_left = from_native_lazy(constructor(data_left))
-    df_right = from_native_lazy(constructor(data_right))
+    df_left = constructor(data_left).lazy()
+    df_right = constructor(data_right).lazy()
     result = df_left.join(df_right, left_on="bob", right_on="c", how="left").sort("idx")
     result = result.drop("idx_right")
     expected: dict[str, list[Any]] = {
@@ -446,7 +434,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None:
 @pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"])
 def test_join_keys_exceptions(constructor: Constructor, how: JoinStrategy) -> None:
     data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]}
-    df = from_native_lazy(constructor(data))
+    df = constructor(data).lazy()
 
     with pytest.raises(
         ValueError,
@@ -512,16 +500,27 @@ def test_joinasof_numeric(
         ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
-    df = from_native_lazy(
-        constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]})
-    ).sort("antananarivo")
-    df_right = from_native_lazy(
-        constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]})
-    ).sort("antananarivo")
-    result = df.join_asof(
-        df_right, left_on="antananarivo", right_on="antananarivo", strategy=strategy
-    )
-    result_on = df.join_asof(df_right, on="antananarivo", strategy=strategy)
+
+    data_left = {"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]}
+    data_right = {"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}
+    left_lf = constructor(data_left).lazy().sort("antananarivo")
+    right_lf = constructor(data_right).lazy().sort("antananarivo")
+
+    result: nw.DataFrame[Any] | nw.LazyFrame[Any]
+    result_on: nw.DataFrame[Any] | nw.LazyFrame[Any]
+    if constructor.is_lazy:
+        result = left_lf.join_asof(
+            right_lf, left_on="antananarivo", right_on="antananarivo", strategy=strategy
+        )
+        result_on = left_lf.join_asof(right_lf, on="antananarivo", strategy=strategy)
+
+    else:
+        left_df, right_df = left_lf.collect(), right_lf.collect()
+        result = left_df.join_asof(
+            right_df, left_on="antananarivo", right_on="antananarivo", strategy=strategy
+        )
+        result_on = left_df.join_asof(right_df, on="antananarivo", strategy=strategy)
+
     assert_equal_data(result.sort(by="antananarivo"), expected)
     assert_equal_data(result_on.sort(by="antananarivo"), expected)
 
@@ -581,7 +580,7 @@ def test_joinasof_time(
         request.applymarker(pytest.mark.xfail)
     if PANDAS_VERSION < (2, 1) and ("pandas_pyarrow" in str(constructor)):
         request.applymarker(pytest.mark.xfail)
-    df = from_native_lazy(
+    df = (
         constructor(
             {
                 "datetime": [
@@ -592,8 +591,10 @@ def test_joinasof_time(
                 "population": [82.19, 82.66, 83.12],
             }
         )
-    ).sort("datetime")
-    df_right = from_native_lazy(
+        .lazy()
+        .sort("datetime")
+    )
+    df_right = (
         constructor(
             {
                 "datetime": [
@@ -606,7 +607,9 @@ def test_joinasof_time(
                 "gdp": [4164, 4411, 4566, 4696, 4827],
             }
         )
-    ).sort("datetime")
+        .lazy()
+        .sort("datetime")
+    )
     result = df.join_asof(
         df_right, left_on="datetime", right_on="datetime", strategy=strategy
     )
@@ -622,7 +625,7 @@ def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) -
         ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
-    df = from_native_lazy(
+    df = (
         constructor(
             {
                 "antananarivo": [1, 5, 7, 10],
@@ -630,12 +633,16 @@ def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) -
                 "c": [9, 2, 1, 1],
             }
         )
-    ).sort("antananarivo")
-    df_right = from_native_lazy(
+        .lazy()
+        .sort("antananarivo")
+    )
+    df_right = (
         constructor(
             {"antananarivo": [1, 4, 5, 8], "bob": ["D", "D", "A", "F"], "d": [1, 3, 4, 1]}
         )
-    ).sort("antananarivo")
+        .lazy()
+        .sort("antananarivo")
+    )
     result = df.join_asof(df_right, on="antananarivo", by_left="bob", by_right="bob")
     result_by = df.join_asof(df_right, on="antananarivo", by="bob")
     expected = {
@@ -657,12 +664,16 @@ def test_joinasof_suffix(
         ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
-    df = from_native_lazy(
+    df = (
         constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]})
-    ).sort("antananarivo")
-    df_right = from_native_lazy(
+        .lazy()
+        .sort("antananarivo")
+    )
+    df_right = (
         constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]})
-    ).sort("antananarivo")
+        .lazy()
+        .sort("antananarivo")
+    )
     result = df.join_asof(
         df_right, left_on="antananarivo", right_on="antananarivo", suffix="_y"
     )
@@ -675,7 +686,7 @@ def test_joinasof_not_implemented(
     constructor: Constructor, strategy: Literal["backward", "forward"]
 ) -> None:
     data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]}
-    df = from_native_lazy(constructor(data))
+    df = constructor(data).lazy()
 
     with pytest.raises(
         NotImplementedError,
@@ -688,7 +699,7 @@ def test_joinasof_not_implemented(
 
 def test_joinasof_keys_exceptions(constructor: Constructor) -> None:
     data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]}
-    df = from_native_lazy(constructor(data))
+    df = constructor(data).lazy()
 
     with pytest.raises(
         ValueError,
@@ -754,13 +765,16 @@ def test_joinasof_by_exceptions(
     message: str,
 ) -> None:
     data = {ON: [1, 3, 2], BY: [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]}
-    df = nw.from_native(constructor(data))
-    if isinstance(df, nw.LazyFrame):
+    frame = constructor(data).lazy()
+
+    if constructor.is_lazy:
         with pytest.raises(ValueError, match=message):
-            df.join_asof(df, on=on, by_left=by_left, by_right=by_right, by=by)
+            frame.join_asof(frame, on=on, by_left=by_left, by_right=by_right, by=by)
     else:
         with pytest.raises(ValueError, match=message):
-            df.join_asof(df, on=on, by_left=by_left, by_right=by_right, by=by)
+            frame.collect().join_asof(
+                frame.collect(), on=on, by_left=by_left, by_right=by_right, by=by
+            )
 
 
 def test_join_duplicate_column_names(
@@ -777,7 +791,7 @@ def test_join_duplicate_column_names(
     ):
         request.applymarker(pytest.mark.xfail)
     data = {"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]}
-    df = nw.from_native(constructor(data))
+    lf = constructor(data).lazy()
     if any(
         x in str(constructor)
         for x in ("pandas", "pandas[pyarrow]", "pandas[nullable]", "dask")
@@ -796,10 +810,12 @@ def test_join_duplicate_column_names(
         request.applymarker(pytest.mark.xfail)
     else:
         exception = nw.exceptions.DuplicateError
-    if isinstance(df, nw.LazyFrame):
+
+    if constructor.is_lazy:
         with pytest.raises(exception):  # pyrefly: ignore[unbound-name]
-            df.join(df, on=["a"]).join(df, on=["a"]).collect()
+            lf.join(lf, on=["a"]).join(lf, on=["a"]).collect()
     else:
+        df = lf.collect()
         with pytest.raises(exception):  # pyrefly: ignore[unbound-name]
             df.join(df, on=["a"]).join(df, on=["a"])
 
@@ -875,8 +891,8 @@ def test_join_on_null_values(
     data_left = {**keys, "x": [1, 2, 3, 4]}
     data_right = {**keys, "y": [1.2, 3.4, 5.6, 7.8]}
 
-    df_left = from_native_lazy(constructor(data_left))
-    df_right = from_native_lazy(constructor(data_right))
+    df_left = constructor(data_left).lazy()
+    df_right = constructor(data_right).lazy()
 
     on = None if how == "cross" else list(keys)
     sort_by = ["a", "x", "y"] if how in {"cross", "full"} else ["a", "x"]
@@ -902,8 +918,8 @@ def test_full_join_with_overlapping_non_key_columns_and_nulls(
         "right_only": [100, 200, 300],
     }
 
-    df_left = from_native_lazy(constructor(data_left))
-    df_right = from_native_lazy(constructor(data_right))
+    df_left = constructor(data_left).lazy()
+    df_right = constructor(data_right).lazy()
 
     result = df_left.join(df_right, on="id", how="full", suffix="_r").sort(
         "id", nulls_last=True
@@ -929,7 +945,7 @@ def test_join_with_float_nan(
 
     data = {"a": [0, 0, 0], "b": [0, 0, 0], "c": [0.0, 0.0, float("nan")]}
     join_cols = ["a", "c"]
-    frame = from_native_lazy(constructor(data))
+    frame = constructor(data).lazy()
 
     result = (
         frame.join(frame, on=join_cols, how="inner").sort("c", nulls_last=True).collect()
diff --git a/tests/frame/lazy_test.py b/tests/frame/lazy_test.py
index 9e671c68d2..658a61c68b 100644
--- a/tests/frame/lazy_test.py
+++ b/tests/frame/lazy_test.py
@@ -9,13 +9,8 @@
 import narwhals as nw
 from narwhals._utils import Implementation
 from narwhals.dependencies import get_cudf, get_modin
-from tests.utils import (
-    DUCKDB_VERSION,
-    PANDAS_VERSION,
-    assert_equal_data,
-    pyspark_session,
-    sqlframe_session,
-)
+from narwhals.testing.constructors import pyspark_session, sqlframe_session
+from tests.utils import DUCKDB_VERSION, PANDAS_VERSION, assert_equal_data
 
 if TYPE_CHECKING:
     from narwhals._typing import LazyAllowed, SparkLike
diff --git a/tests/frame/sample_test.py b/tests/frame/sample_test.py
index b86ddaee1d..8db480e02c 100644
--- a/tests/frame/sample_test.py
+++ b/tests/frame/sample_test.py
@@ -19,9 +19,7 @@ def test_sample_n(constructor_eager: ConstructorEager) -> None:
 
 
 def test_sample_fraction(constructor_eager: ConstructorEager) -> None:
-    df = nw.from_native(
-        constructor_eager({"a": [1, 2, 3, 4], "b": ["x", "y", "x", "y"]}), eager_only=True
-    )
+    df = constructor_eager({"a": [1, 2, 3, 4], "b": ["x", "y", "x", "y"]})
 
     result_expr = df.sample(fraction=0.5).shape
     expected_expr = (2, 2)
@@ -30,11 +28,11 @@ def test_sample_fraction(constructor_eager: ConstructorEager) -> None:
 
 def test_sample_with_seed(constructor_eager: ConstructorEager) -> None:
     size, n = 100, 10
-    df = nw.from_native(constructor_eager({"a": range(size)}), eager_only=True)
+    df = constructor_eager({"a": range(size)})
 
     r1 = nw.to_native(df.sample(n=n, seed=123))
     r2 = nw.to_native(df.sample(n=n, seed=123))
     r3 = nw.to_native(df.sample(n=n, seed=42))
 
-    assert r1.equals(r2)  # type: ignore[attr-defined]
-    assert not r1.equals(r3)  # type: ignore[attr-defined]
+    assert r1.equals(r2)
+    assert not r1.equals(r3)
diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py
index a4ee5d36a3..3d66c308b7 100644
--- a/tests/frame/schema_test.py
+++ b/tests/frame/schema_test.py
@@ -9,7 +9,7 @@
 
 import narwhals as nw
 from narwhals.exceptions import PerformanceWarning
-from tests.utils import PANDAS_VERSION, POLARS_VERSION, ConstructorPandasLike
+from tests.utils import PANDAS_VERSION, POLARS_VERSION
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Sequence
@@ -23,7 +23,7 @@
         IntoPandasSchema,
         IntoPolarsSchema,
     )
-    from tests.utils import Constructor, ConstructorEager
+    from tests.utils import Constructor, ConstructorEager, ConstructorPandasLike
 
     TimeUnit: TypeAlias = Literal["ns", "us"]
 
@@ -578,7 +578,7 @@ def origin_pandas_like(
         "d": [5.3, 4.99],
         "e": [datetime(2006, 1, 1), datetime(2001, 9, 3)],
     }
-    return constructor_pandas_like(data).dtypes.to_dict()
+    return constructor_pandas_like(data).to_native().dtypes.to_dict()  # type: ignore[no-any-return]
 
 
 @pytest.fixture
@@ -588,8 +588,8 @@ def origin_pandas_like_pyarrow(
     if PANDAS_VERSION < (1, 5):
         pytest.skip(reason="pandas too old for `pyarrow`")
     name_pandas_like = {"pandas_pyarrow_constructor", "modin_pyarrow_constructor"}
-    if constructor_pandas_like.__name__ not in name_pandas_like:
-        pytest.skip(f"{constructor_pandas_like.__name__!r} is not pandas_like_pyarrow")
+    if str(constructor_pandas_like) not in name_pandas_like:
+        pytest.skip(f"{constructor_pandas_like!s} is not pandas_like_pyarrow")
     data = {
         "a": [2, 1],
         "b": ["hello", "hi"],
@@ -603,7 +603,7 @@ def origin_pandas_like_pyarrow(
     df_nw = nw.from_native(df_pd).with_columns(
         nw.col("f").cast(nw.Date()), nw.col("g").cast(nw.Time())
     )
-    return df_nw.to_native().dtypes.to_dict()
+    return df_nw.to_native().dtypes.to_dict()  # type: ignore[no-any-return]
 
 
 def test_schema_from_polars(
diff --git a/tests/frame/to_native_test.py b/tests/frame/to_native_test.py
index 0ef0ae885a..cdb03e2675 100644
--- a/tests/frame/to_native_test.py
+++ b/tests/frame/to_native_test.py
@@ -10,7 +10,7 @@
 
 def test_to_native(constructor: Constructor) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]}
-    df_raw = constructor(data)
+    df_raw = constructor(data).to_native()
     df = nw.from_native(df_raw)
 
     assert isinstance(df.to_native(), df_raw.__class__)
diff --git a/tests/frame/to_pandas_test.py b/tests/frame/to_pandas_test.py
index 473b685c19..bcdcc10fc3 100644
--- a/tests/frame/to_pandas_test.py
+++ b/tests/frame/to_pandas_test.py
@@ -7,7 +7,6 @@
 pytest.importorskip("pandas")
 import pandas as pd
 
-import narwhals as nw
 from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
@@ -19,11 +18,10 @@
 def test_convert_pandas(constructor_eager: ConstructorEager) -> None:
     pytest.importorskip("pyarrow")
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]}
-    df_raw = constructor_eager(data)
-    result = nw.from_native(df_raw, eager_only=True).to_pandas()
+    result = constructor_eager(data).to_pandas()
 
-    if constructor_eager.__name__.startswith("pandas"):
-        expected = cast("pd.DataFrame", constructor_eager(data))
+    if str(constructor_eager).startswith("pandas"):
+        expected = cast("pd.DataFrame", constructor_eager(data).to_native())
     elif "modin_pyarrow" in str(constructor_eager):
         expected = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow")
     else:
diff --git a/tests/frame/to_polars_test.py b/tests/frame/to_polars_test.py
index 60ca653f32..89d4a65b2a 100644
--- a/tests/frame/to_polars_test.py
+++ b/tests/frame/to_polars_test.py
@@ -1,14 +1,13 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 import pytest
 
 import narwhals as nw
 
 if TYPE_CHECKING:
-    from collections.abc import Mapping
-
+    from narwhals.testing.typing import Data
     from tests.utils import ConstructorEager
 
 pytest.importorskip("polars")
@@ -20,7 +19,7 @@ def test_convert_polars(constructor_eager: ConstructorEager) -> None:
     pytest.importorskip("pyarrow")
     from polars.testing import assert_frame_equal
 
-    data: Mapping[str, Any] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]}
+    data: Data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]}
     df_raw = constructor_eager(data)
     result = nw.from_native(df_raw).to_polars()
 
diff --git a/tests/hypothesis/getitem_test.py b/tests/hypothesis/getitem_test.py
index 759a292f97..c18f860872 100644
--- a/tests/hypothesis/getitem_test.py
+++ b/tests/hypothesis/getitem_test.py
@@ -1,29 +1,30 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Callable, cast
+from typing import TYPE_CHECKING, Any, cast
 
 import hypothesis.strategies as st
 import pytest
 from hypothesis import assume, given
 
 import narwhals as nw
-from tests.conftest import pandas_constructor, pyarrow_table_constructor
+from narwhals.testing.constructors import get_backend_constructor
 from tests.utils import assert_equal_data
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
-    from narwhals.typing import IntoDataFrame
+    from narwhals.testing.typing import DataFrameConstructor
 
 pytest.importorskip("pandas")
 pytest.importorskip("polars")
 import polars as pl
 
 
-@pytest.fixture(params=[pandas_constructor, pyarrow_table_constructor], scope="module")
-def pandas_or_pyarrow_constructor(
-    request: pytest.FixtureRequest,
-) -> Callable[[Any], IntoDataFrame]:
+@pytest.fixture(
+    params=[get_backend_constructor("pandas"), get_backend_constructor("pyarrow")],
+    scope="module",
+)
+def pandas_or_pyarrow_constructor(request: pytest.FixtureRequest) -> DataFrameConstructor:
     return request.param  # type: ignore[no-any-return]
 
 
@@ -117,7 +118,9 @@ def tuple_selector(draw: st.DrawFn) -> tuple[Any, Any]:
 
 @given(selector=st.one_of(single_selector, tuple_selector()))
 @pytest.mark.slow
-def test_getitem(pandas_or_pyarrow_constructor: Any, selector: Any) -> None:
+def test_getitem(
+    pandas_or_pyarrow_constructor: DataFrameConstructor, selector: Any
+) -> None:
     """Compare __getitem__ against polars."""
     # TODO(PR - clean up): documenting current differences
     # These assume(...) lines each filter out a known difference.
@@ -125,7 +128,7 @@ def test_getitem(pandas_or_pyarrow_constructor: Any, selector: Any) -> None:
     # NotImplementedError: Slicing with step is not supported on PyArrow tables
     assume(
         not (
-            pandas_or_pyarrow_constructor is pyarrow_table_constructor
+            pandas_or_pyarrow_constructor.is_pyarrow
             and isinstance(selector, slice)
             and selector.step is not None
         )
@@ -134,7 +137,7 @@ def test_getitem(pandas_or_pyarrow_constructor: Any, selector: Any) -> None:
     # NotImplementedError: Slicing with step is not supported on PyArrow tables
     assume(
         not (
-            pandas_or_pyarrow_constructor is pyarrow_table_constructor
+            pandas_or_pyarrow_constructor.is_pyarrow
             and isinstance(selector, tuple)
             and (
                 (isinstance(selector[0], slice) and selector[0].step is not None)
@@ -155,7 +158,7 @@ def test_getitem(pandas_or_pyarrow_constructor: Any, selector: Any) -> None:
         # rows/columns sides.
         return
 
-    df_other = nw.from_native(pandas_or_pyarrow_constructor(TEST_DATA))
+    df_other = pandas_or_pyarrow_constructor(TEST_DATA, nw)
     result_other = df_other[cast("Any", selector)]
 
     if isinstance(result_polars, nw.Series):
diff --git a/tests/ibis_test.py b/tests/ibis_test.py
index 14a93c8ef8..a9a9dc413b 100644
--- a/tests/ibis_test.py
+++ b/tests/ibis_test.py
@@ -1,30 +1,14 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
-
 import pytest
 
 import narwhals as nw
-
-if TYPE_CHECKING:
-    import ibis
-    import polars as pl
-
-    from tests.utils import Constructor
-else:
-    ibis = pytest.importorskip("ibis")
-    pl = pytest.importorskip("polars")
-
-
-@pytest.fixture
-def ibis_constructor() -> Constructor:
-    def func(data: dict[str, Any]) -> ibis.Table:
-        df = pl.DataFrame(data)
-        return ibis.memtable(df)
-
-    return func
+from narwhals.testing.constructors import get_backend_constructor
 
 
-def test_from_native(ibis_constructor: Constructor) -> None:
-    df = nw.from_native(ibis_constructor({"a": [1, 2, 3], "b": [4, 5, 6]}))
+def test_from_native() -> None:
+    ibis_constructor = get_backend_constructor("ibis")
+    if not ibis_constructor.is_available:
+        pytest.skip()
+    df = ibis_constructor({"a": [1, 2, 3], "b": [4, 5, 6]}, nw)
     assert df.columns == ["a", "b"]
diff --git a/tests/modern_polars/method_chaining_test.py b/tests/modern_polars/method_chaining_test.py
index 611f85973e..ba7a06b894 100644
--- a/tests/modern_polars/method_chaining_test.py
+++ b/tests/modern_polars/method_chaining_test.py
@@ -38,10 +38,7 @@ def test_split_list_get(request: pytest.FixtureRequest, constructor: Constructor
         if PANDAS_VERSION < (2, 2):
             pytest.skip()
         pytest.importorskip("pyarrow")
-    if (
-        constructor.__name__.startswith("pandas")
-        and "pyarrow" not in constructor.__name__
-    ):
+    if str(constructor).startswith("pandas") and "pyarrow" not in str(constructor):
         df = nw.from_native(constructor(data))
         msg = re.escape("This operation requires a pyarrow-backed series. ")
         with pytest.raises(TypeError, match=msg):
diff --git a/tests/namespace_test.py b/tests/namespace_test.py
index 34d0d60204..ce95c01e05 100644
--- a/tests/namespace_test.py
+++ b/tests/namespace_test.py
@@ -72,7 +72,7 @@ def test_namespace_from_backend_name(backend: BackendName) -> None:
 
 def test_namespace_from_native_object(constructor: Constructor) -> None:
     data = {"a": [1, 2, 3], "b": [4, 5, 6]}
-    frame = constructor(data)
+    frame = constructor(data, nw).to_native()
     namespace = Namespace.from_native_object(frame)
     nw_frame = nw.from_native(frame)
     assert namespace.implementation == nw_frame.implementation
diff --git a/tests/preserve_pandas_like_columns_name_attr_test.py b/tests/preserve_pandas_like_columns_name_attr_test.py
index 3127040bee..546b388f67 100644
--- a/tests/preserve_pandas_like_columns_name_attr_test.py
+++ b/tests/preserve_pandas_like_columns_name_attr_test.py
@@ -1,17 +1,17 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Callable
+from typing import TYPE_CHECKING
 
 import pytest
 
 import narwhals as nw
 
 if TYPE_CHECKING:
-    import pandas as pd
+    from tests.utils import Constructor
 
 
 def test_ops_preserve_column_index_name(
-    constructor: Callable[..., pd.DataFrame], request: pytest.FixtureRequest
+    constructor: Constructor, request: pytest.FixtureRequest
 ) -> None:
     if not any(x in str(constructor) for x in ("pandas", "modin", "cudf", "dask")):
         pytest.skip(
@@ -22,7 +22,7 @@ def test_ops_preserve_column_index_name(
         request.applymarker(pytest.mark.xfail)
 
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]}
-    df_native = constructor(data)
+    df_native = constructor(data).to_native()
     df_native.columns.name = "foo"
 
     df = nw.from_native(df_native)
diff --git a/tests/read_scan_test.py b/tests/read_scan_test.py
index 4548f76a87..0e0c94d994 100644
--- a/tests/read_scan_test.py
+++ b/tests/read_scan_test.py
@@ -6,13 +6,8 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import (
-    PANDAS_VERSION,
-    Constructor,
-    assert_equal_data,
-    pyspark_session,
-    sqlframe_session,
-)
+from narwhals.testing.constructors import pyspark_session, sqlframe_session
+from tests.utils import PANDAS_VERSION, Constructor, assert_equal_data
 
 pytest.importorskip("polars")
 pytest.importorskip("pyarrow")
@@ -32,7 +27,7 @@
 
 IOSourceKind: TypeAlias = Literal["str", "Path", "PathLike"]
 
-data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]}
+data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]}
 skipif_pandas_lt_1_5 = pytest.mark.skipif(
     PANDAS_VERSION < (1, 5), reason="too old for pyarrow"
 )
diff --git a/tests/series_only/hist_test.py b/tests/series_only/hist_test.py
index 183c0a13ff..7db42c31bc 100644
--- a/tests/series_only/hist_test.py
+++ b/tests/series_only/hist_test.py
@@ -11,11 +11,14 @@
 
 import narwhals as nw
 from narwhals.exceptions import ComputeError
-from tests.utils import POLARS_VERSION, ConstructorEager, assert_equal_data
+from tests.utils import POLARS_VERSION, assert_equal_data
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
+    from narwhals.testing.typing import DataFrameConstructor
+
+
 rnd = Random(0)  # noqa: S311
 
 data: dict[str, Any] = {
@@ -43,7 +46,8 @@
 param_include_breakpoint = pytest.mark.parametrize(
     "include_breakpoint", [True, False], ids=["breakpoint-True", "breakpoint-False"]
 )
-param_library = pytest.mark.parametrize("library", ["pandas", "polars", "pyarrow"])
+param_name = pytest.mark.parametrize("name", ["pandas", "polars[eager]", "pyarrow"])
+
 
 SHIFT_BINS_BY = 10
 """shift bins property"""
@@ -63,34 +67,14 @@
     ],
     ids=str,
 )
-@param_library
 def test_hist_bin(
-    library: str,
+    nw_dataframe: DataFrameConstructor,
     bins: list[float],
     expected: Sequence[float],
     *,
     include_breakpoint: bool,
 ) -> None:
-    constructor_eager: ConstructorEager
-    pytest.importorskip(library)
-    if library == "pandas":
-        import pandas as pd
-
-        constructor_eager = pd.DataFrame
-    elif library == "polars":
-        import polars as pl
-
-        constructor_eager = pl.DataFrame
-    else:
-        import pyarrow as pa
-
-        pytest.importorskip("numpy")
-
-        constructor_eager = pa.table
-
-    df = nw.from_native(constructor_eager(data)).with_columns(
-        float=nw.col("int").cast(nw.Float64)
-    )
+    df = nw_dataframe(data, nw).with_columns(float=nw.col("int").cast(nw.Float64))
     expected_full = {"count": expected}
     if include_breakpoint:
         expected_full = {"breakpoint": bins[1:], **expected_full}
@@ -115,10 +99,8 @@ def test_hist_bin(
         assert_equal_data(result, expected_full)
 
     # missing/nan results
-    df = nw.from_native(
-        constructor_eager(
-            {"has_nan": [float("nan"), *data["int"]], "has_null": [None, *data["int"]]}
-        )
+    df = nw_dataframe(
+        {"has_nan": [float("nan"), *data["int"]], "has_null": [None, *data["int"]]}, nw
     )
     expected_full = {"count": expected}
     if include_breakpoint:
@@ -130,25 +112,13 @@ def test_hist_bin(
 
 @pytest.mark.parametrize("params", counts_and_expected)
 @param_include_breakpoint
-@param_library
 def test_hist_count(
-    library: str, *, params: dict[str, Any], include_breakpoint: bool
+    nw_dataframe: DataFrameConstructor,
+    *,
+    params: dict[str, Any],
+    include_breakpoint: bool,
 ) -> None:
-    if library == "pandas":
-        pytest.importorskip("pandas")
-        import pandas as pd
-
-        constructor_eager: Any = pd.DataFrame
-    elif library == "polars":
-        pl = pytest.importorskip("polars")
-        constructor_eager = pl.DataFrame
-    else:
-        pa = pytest.importorskip("pyarrow")
-        pytest.importorskip("numpy")
-        constructor_eager = pa.table
-    df = nw.from_native(constructor_eager(data)).with_columns(
-        float=nw.col("int").cast(nw.Float64)
-    )
+    df = nw_dataframe(data, nw).with_columns(float=nw.col("int").cast(nw.Float64))
     bin_count = params["bin_count"]
 
     expected_bins = params["expected_bins"]
@@ -168,10 +138,8 @@ def test_hist_count(
             assert result["count"].sum() == df[col].count()
 
     # missing/nan results
-    df = nw.from_native(
-        constructor_eager(
-            {"has_nan": [float("nan"), *data["int"]], "has_null": [None, *data["int"]]}
-        )
+    df = nw_dataframe(
+        {"has_nan": [float("nan"), *data["int"]], "has_null": [None, *data["int"]]}, nw
     )
 
     for col in df.columns:
@@ -186,22 +154,9 @@ def test_hist_count(
             )
 
 
-@param_library
-def test_hist_count_no_spread(library: str) -> None:
-    if library == "pandas":
-        pytest.importorskip("pandas")
-        import pandas as pd
-
-        constructor_eager: Any = pd.DataFrame
-    elif library == "polars":
-        pl = pytest.importorskip("polars")
-        constructor_eager = pl.DataFrame
-    else:
-        pa = pytest.importorskip("pyarrow")
-        pytest.importorskip("numpy")
-        constructor_eager = pa.table
+def test_hist_count_no_spread(nw_dataframe: DataFrameConstructor) -> None:
     data = {"all_zero": [0, 0, 0], "all_non_zero": [5, 5, 5]}
-    df = nw.from_native(constructor_eager(data))
+    df = nw_dataframe(data, nw)
 
     result = df["all_zero"].hist(bin_count=4, include_breakpoint=True)
     expected = {"breakpoint": [-0.25, 0.0, 0.25, 0.5], "count": [0, 3, 0, 0]}
@@ -229,23 +184,12 @@ def test_hist_bin_and_bin_count() -> None:
 
 
 @param_include_breakpoint
-@param_library
-def test_hist_no_data(library: str, *, include_breakpoint: bool) -> None:
-    if library == "pandas":
-        pytest.importorskip("pandas")
-        import pandas as pd
-
-        constructor_eager: Any = pd.DataFrame
-    elif library == "polars":
-        pl = pytest.importorskip("polars")
-        constructor_eager = pl.DataFrame
-    else:
-        pa = pytest.importorskip("pyarrow")
-        pytest.importorskip("numpy")
-        constructor_eager = pa.table
-    s = nw.from_native(constructor_eager({"values": []})).select(
-        nw.col("values").cast(nw.Float64)
-    )["values"]
+def test_hist_no_data(
+    nw_dataframe: DataFrameConstructor, *, include_breakpoint: bool
+) -> None:
+    s = nw_dataframe({"values": []}, nw).select(nw.col("values").cast(nw.Float64))[
+        "values"
+    ]
     for bin_count in [1, 10]:
         result = s.hist(bin_count=bin_count, include_breakpoint=include_breakpoint)
         assert len(result) == bin_count
@@ -262,21 +206,8 @@ def test_hist_no_data(library: str, *, include_breakpoint: bool) -> None:
     assert result["count"].sum() == 0
 
 
-@param_library
-def test_hist_small_bins(library: str) -> None:
-    if library == "pandas":
-        pytest.importorskip("pandas")
-        import pandas as pd
-
-        constructor_eager: Any = pd.DataFrame
-    elif library == "polars":
-        pl = pytest.importorskip("polars")
-        constructor_eager = pl.DataFrame
-    else:
-        pa = pytest.importorskip("pyarrow")
-        pytest.importorskip("numpy")
-        constructor_eager = pa.table
-    s = nw.from_native(constructor_eager({"values": [1, 2, 3]}))
+def test_hist_small_bins(nw_dataframe: DataFrameConstructor) -> None:
+    s = nw_dataframe({"values": [1, 2, 3]}, nw)
     result = s["values"].hist(bins=None, bin_count=None)
     assert len(result) == 10
 
@@ -284,11 +215,11 @@ def test_hist_small_bins(library: str) -> None:
         s["values"].hist(bins=[1, 3], bin_count=4)
 
 
-def test_hist_non_monotonic(constructor_eager: ConstructorEager) -> None:
-    if "cudf" in str(constructor_eager):
+def test_hist_non_monotonic(nw_dataframe: DataFrameConstructor) -> None:
+    if "cudf" in str(nw_dataframe):
         # TODO(unassigned): too many spurious failures, report and revisit
         return
-    df = nw.from_native(constructor_eager({"int": [0, 1, 2, 3, 4, 5, 6]}))
+    df = nw_dataframe({"int": [0, 1, 2, 3, 4, 5, 6]}, nw)
 
     with pytest.raises(ComputeError, match="monotonic"):
         df["int"].hist(bins=[5, 0, 2])
@@ -323,33 +254,17 @@ def test_hist_non_monotonic(constructor_eager: ConstructorEager) -> None:
     POLARS_VERSION < (1, 27),
     reason="polars cannot be used for compatibility checks since narwhals aims to mimic polars>=1.27 behavior",
 )
-@param_library
 @pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
 @pytest.mark.slow
 def test_hist_bin_hypotheis(
-    library: str, data: list[float], bin_deltas: list[float]
+    nw_dataframe: DataFrameConstructor, data: list[float], bin_deltas: list[float]
 ) -> None:
-    if library == "pandas":
-        pytest.importorskip("pandas")
-        import pandas as pd
-
-        constructor_eager: Any = pd.DataFrame
-    elif library == "polars":
-        pl = pytest.importorskip("polars")
-        constructor_eager = pl.DataFrame
-    else:
-        pa = pytest.importorskip("pyarrow")
-        pytest.importorskip("numpy")
-        constructor_eager = pa.table
     pytest.importorskip("polars")
     import polars as pl
 
-    df = nw.from_native(constructor_eager({"values": data})).select(
-        nw.col("values").cast(nw.Float64)
-    )
-    df_bins_native = constructor_eager({"bins": bin_deltas})
+    df = nw_dataframe({"values": data}, nw).select(nw.col("values").cast(nw.Float64))
     bins = (
-        nw.from_native(df_bins_native, eager_only=True)
+        nw_dataframe({"bins": bin_deltas}, nw)
         .get_column("bins")
         .cast(nw.Float64)
         .cum_sum()
@@ -376,28 +291,17 @@ def test_hist_bin_hypotheis(
     reason="polars cannot be used for compatibility checks since narwhals aims to mimic polars>=1.27 behavior",
 )
 @pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
-@param_library
 @pytest.mark.slow
 def test_hist_count_hypothesis(
-    library: str, data: list[float], bin_count: int, request: pytest.FixtureRequest
+    nw_dataframe: DataFrameConstructor,
+    data: list[float],
+    bin_count: int,
+    request: pytest.FixtureRequest,
 ) -> None:
     pytest.importorskip("polars")
     import polars as pl
 
-    if library == "pandas":
-        pytest.importorskip("pandas")
-        import pandas as pd
-
-        constructor_eager: Any = pd.DataFrame
-    elif library == "polars":
-        constructor_eager = pl.DataFrame
-    else:
-        pa = pytest.importorskip("pyarrow")
-        pytest.importorskip("numpy")
-        constructor_eager = pa.table
-    df = nw.from_native(constructor_eager({"values": data})).select(
-        nw.col("values").cast(nw.Float64)
-    )
+    df = nw_dataframe({"values": data}, nw).select(nw.col("values").cast(nw.Float64))
 
     try:
         result = df["values"].hist(bin_count=bin_count, include_breakpoint=True)
@@ -418,9 +322,7 @@ def test_hist_count_hypothesis(
 
     if expected[
         "count"
-    ].sum() != expected_data.is_not_nan().sum() and "polars" not in str(
-        constructor_eager
-    ):
+    ].sum() != expected_data.is_not_nan().sum() and "polars" not in str(nw_dataframe):
         request.applymarker(pytest.mark.xfail)
 
     assert_equal_data(result, expected.to_dict(as_series=False))
diff --git a/tests/series_only/is_sorted_test.py b/tests/series_only/is_sorted_test.py
index 046669aac0..4efddf542f 100644
--- a/tests/series_only/is_sorted_test.py
+++ b/tests/series_only/is_sorted_test.py
@@ -16,7 +16,7 @@
 )
 def test_is_sorted(
     constructor_eager: ConstructorEager,
-    input_data: str,
+    input_data: list[int],
     descending: bool,  # noqa: FBT001
     expected: bool,  # noqa: FBT001
 ) -> None:
diff --git a/tests/series_only/to_native_test.py b/tests/series_only/to_native_test.py
index 350d81764d..c2a7ad5ecb 100644
--- a/tests/series_only/to_native_test.py
+++ b/tests/series_only/to_native_test.py
@@ -11,7 +11,7 @@
 
 
 def test_to_native(constructor_eager: ConstructorEager) -> None:
-    orig_series = constructor_eager({"a": data})["a"]  # type: ignore[index]
+    orig_series = constructor_eager({"a": data})["a"].to_native()
     nw_series = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
     result = nw_series.to_native()
     assert isinstance(result, orig_series.__class__)
diff --git a/tests/testing/assert_frame_equal_test.py b/tests/testing/assert_frame_equal_test.py
index c1b3b4e357..5d2f187267 100644
--- a/tests/testing/assert_frame_equal_test.py
+++ b/tests/testing/assert_frame_equal_test.py
@@ -12,8 +12,8 @@
 from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
+    from narwhals.testing.typing import Data
     from narwhals.typing import IntoSchema
-    from tests.conftest import Data
     from tests.utils import Constructor, ConstructorEager
 
 
@@ -24,12 +24,12 @@ def _assertion_error(detail: str) -> pytest.RaisesExc:
 
 def test_check_narwhals_objects(constructor: Constructor) -> None:
     """Test that a type error is raised if the input is not a Narwhals object."""
-    frame = constructor({"a": [1, 2, 3]})
+    frame = constructor({"a": [1, 2, 3]}).to_native()
     msg = re.escape(
         "Expected `narwhals.DataFrame` or `narwhals.LazyFrame` instance, found"
     )
     with pytest.raises(TypeError, match=msg):
-        assert_frame_equal(frame, frame)  # type: ignore[arg-type]
+        assert_frame_equal(frame, frame)
 
 
 def test_implementation_mismatch() -> None:
@@ -42,8 +42,7 @@ def test_implementation_mismatch() -> None:
 
     with _assertion_error("implementation mismatch"):
         assert_frame_equal(
-            nw.from_native(pd.DataFrame({"a": [1]})),
-            nw.from_native(pa.table({"a": [1]})),  # type: ignore[type-var] # pyright: ignore[reportArgumentType]
+            nw.from_native(pd.DataFrame({"a": [1]})), nw.from_native(pa.table({"a": [1]}))
         )
 
 
diff --git a/tests/testing/assert_series_equal_test.py b/tests/testing/assert_series_equal_test.py
index c4826c695e..064cc546dd 100644
--- a/tests/testing/assert_series_equal_test.py
+++ b/tests/testing/assert_series_equal_test.py
@@ -13,8 +13,8 @@
 if TYPE_CHECKING:
     from typing_extensions import TypeAlias
 
+    from narwhals.testing.typing import Data
     from narwhals.typing import IntoSchema, IntoSeriesT
-    from tests.conftest import Data
     from tests.utils import ConstructorEager
 
     SetupFn: TypeAlias = Callable[[nw.Series[Any]], tuple[nw.Series[Any], nw.Series[Any]]]
@@ -406,7 +406,7 @@ def test_categorical_as_str(
         "left": ["beluga", "dolphin", "narwhal", "orca"],
         "right": ["unicorn", "orca", "narwhal", "orca"],
     }
-    frame = nw.from_native(constructor_eager(data), eager_only=True)
+    frame = constructor_eager(data, namespace=nw)
     left = frame["left"].cast(nw.Categorical())[2:]
     right = frame["right"].cast(nw.Categorical())[2:]
 
diff --git a/tests/testing/conftest.py b/tests/testing/conftest.py
index a41d4fdce4..0ff5e9935f 100644
--- a/tests/testing/conftest.py
+++ b/tests/testing/conftest.py
@@ -8,8 +8,8 @@
 import narwhals as nw
 
 if TYPE_CHECKING:
+    from narwhals.testing.typing import Data
     from narwhals.typing import IntoSchema
-    from tests.conftest import Data
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/testing/constructors_test.py b/tests/testing/constructors_test.py
new file mode 100644
index 0000000000..3520d492da
--- /dev/null
+++ b/tests/testing/constructors_test.py
@@ -0,0 +1,124 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+import narwhals as nw
+from narwhals._utils import Implementation
+from narwhals.testing.constructors import (
+    available_backends,
+    get_backend_constructor,
+    prepare_backends,
+)
+
+if TYPE_CHECKING:
+    from typing_extensions import TypeAlias
+
+    PropertyName: TypeAlias = str
+    TrueNames: TypeAlias = set[str]
+    FalseNames: TypeAlias = set[str]
+
+
+def test_eager_returns_eager_frame() -> None:
+    c = get_backend_constructor("pandas")
+    if not c.is_available:
+        pytest.skip()
+
+    df = c({"x": [1, 2, 3]}, nw)
+    assert isinstance(df, nw.DataFrame)
+
+
+def test_lazy_returns_lazy_frame() -> None:
+    c = get_backend_constructor("polars[lazy]")
+    if not c.is_available:
+        pytest.skip()
+
+    lf = c({"x": [1, 2, 3]}, nw)
+    assert isinstance(lf, nw.LazyFrame)
+
+
+_IS_PROPERTY_CASES: list[tuple[PropertyName, TrueNames, FalseNames]] = [
+    ("is_pandas", {"pandas", "pandas[nullable]", "pandas[pyarrow]"}, {"polars[eager]"}),
+    ("is_modin", {"modin", "modin[pyarrow]"}, {"pandas"}),
+    ("is_cudf", {"cudf"}, {"pandas"}),
+    ("is_pandas_like", {"pandas", "modin", "cudf"}, {"polars[eager]"}),
+    ("is_polars", {"polars[eager]", "polars[lazy]"}, {"pandas"}),
+    ("is_pyarrow", {"pyarrow"}, {"pandas"}),
+    ("is_dask", {"dask"}, {"pandas"}),
+    ("is_duckdb", {"duckdb"}, {"pandas"}),
+    ("is_pyspark", {"pyspark", "pyspark[connect]"}, {"pandas"}),
+    ("is_sqlframe", {"sqlframe"}, {"pandas"}),
+    ("is_ibis", {"ibis"}, {"pandas"}),
+    ("is_spark_like", {"pyspark", "sqlframe", "pyspark[connect]"}, {"pandas"}),
+    ("is_lazy", {"polars[lazy]", "dask", "duckdb"}, {"pandas"}),
+    ("needs_pyarrow", {"pyarrow", "duckdb", "ibis"}, {"pandas"}),
+    ("is_nullable", {"polars[eager]"}, {"pandas", "modin", "dask"}),
+]
+
+
+@pytest.mark.parametrize(("prop", "true_names", "false_names"), _IS_PROPERTY_CASES)
+def test_constructor_is_properties(
+    prop: str, true_names: TrueNames, false_names: FalseNames
+) -> None:
+    for name in true_names:
+        c = get_backend_constructor(name)
+        assert getattr(c, prop), f"{name}.{prop} should be True"
+    for name in false_names:
+        c = get_backend_constructor(name)
+        assert not getattr(c, prop), f"{name}.{prop} should be False"
+
+
+def test_constructor_implementation() -> None:
+    assert get_backend_constructor("pandas").implementation is Implementation.PANDAS
+    assert (
+        get_backend_constructor("pandas[pyarrow]").implementation is Implementation.PANDAS
+    )
+    assert (
+        get_backend_constructor("polars[eager]").implementation is Implementation.POLARS
+    )
+    assert (
+        get_backend_constructor("pyspark[connect]").implementation
+        is Implementation.PYSPARK_CONNECT
+    )
+
+
+def test_constructor_dunder() -> None:
+    c1 = get_backend_constructor("pandas")
+    c2 = get_backend_constructor("pandas")
+    assert c1.identifier == "pandas"
+    assert c1 == c2
+    assert hash(c1) == hash(c2)
+    assert c1 != get_backend_constructor("polars[eager]")
+    assert c1 != "not a constructor"
+
+
+def test_get_backend_constructor_invalid_name() -> None:
+    with pytest.raises(ValueError, match="Unknown constructor"):
+        get_backend_constructor("not_a_backend")
+
+
+@pytest.mark.parametrize(
+    ("include", "exclude", "expected"),
+    [
+        (None, None, available_backends()),
+        (None, ["pandas"], available_backends() - {"pandas"}),
+        (["pandas", "polars[eager]"], None, {"pandas", "polars[eager]"}),
+        (["pandas", "polars[eager]"], ["pandas"], {"polars[eager]"}),
+        ([], None, frozenset()),
+    ],
+)
+def test_prepare_backends(
+    include: list[str] | None, exclude: list[str] | None, expected: frozenset[str]
+) -> None:
+    for name in (*(include or ()), *(exclude or ())):
+        if not get_backend_constructor(name).is_available:
+            pytest.skip(f"{name} not installed")
+    result = prepare_backends(include=include, exclude=exclude)
+    assert {c.name for c in result} == expected
+
+
+@pytest.mark.parametrize("kwarg", ["include", "exclude"])
+def test_prepare_backends_unknown_name_raises(kwarg: str) -> None:
+    with pytest.raises(ValueError, match="not known constructors"):
+        prepare_backends(**{kwarg: ["not_a_backend"]})
diff --git a/tests/testing/plugin_test.py b/tests/testing/plugin_test.py
new file mode 100644
index 0000000000..2c49818047
--- /dev/null
+++ b/tests/testing/plugin_test.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+import pytest
+
+pytest_plugins = ["pytester"]
+
+
+def test_constructor_eager_fixture_runs_for_each_backend(
+    pytester: pytest.Pytester,
+) -> None:
+    pytest.importorskip("pandas")
+    pytest.importorskip("polars")
+    pytest.importorskip("pyarrow")
+
+    pytester.makeconftest("")
+    pytester.makepyfile("""
+        import narwhals as nw
+        from narwhals.testing.typing import DataFrameConstructor
+
+        def test_shape(nw_dataframe: DataFrameConstructor) -> None:
+            df = nw_dataframe({"x": [1, 2, 3]}, namespace=nw)
+            assert df.shape == (3, 1)
+    """)
+    result = pytester.runpytest_subprocess(
+        "-v", "-p", "no:randomly", "--nw-backends=pandas,polars[eager],pyarrow"
+    )
+    result.assert_outcomes(passed=3)
+    result.stdout.fnmatch_lines(
+        [
+            "*test_shape?pandas?*",
+            "*test_shape?polars[[]eager[]]?*",
+            "*test_shape?pyarrow?*",
+        ]
+    )
+
+
+def test_constructor_fixture_includes_lazy_backends(pytester: pytest.Pytester) -> None:
+    pytest.importorskip("pandas")
+    pytest.importorskip("polars")
+    pytest.importorskip("duckdb")
+
+    pytester.makeconftest("")
+    pytester.makepyfile("""
+        import narwhals as nw
+        from narwhals.testing.typing import FrameConstructor
+
+        def test_columns(nw_frame: FrameConstructor) -> None:
+            df = nw_frame({"x": [1, 2, 3]}, namespace=nw)
+            assert df.collect_schema().names() == ["x"]
+    """)
+    result = pytester.runpytest_subprocess(
+        "-v", "--nw-backends=pandas,polars[lazy],duckdb"
+    )
+    result.assert_outcomes(passed=3)
+
+
+def test_external_constructor_disables_parametrisation(pytester: pytest.Pytester) -> None:
+    pytester.makeconftest("")
+    pytester.makepyfile("""
+        from narwhals.testing.typing import DataFrameConstructor
+
+        def test_unparam(nw_dataframe: DataFrameConstructor) -> None:
+            pass
+    """)
+    result = pytester.runpytest_subprocess("--use-external-nw-backend")
+    # Without external parametrisation in place, the fixture is missing.
+    result.assert_outcomes(errors=1)
diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py
index 9f87b5220b..d23cbfd39c 100644
--- a/tests/translate/from_native_test.py
+++ b/tests/translate/from_native_test.py
@@ -30,7 +30,7 @@
 
 import narwhals as nw
 from narwhals._utils import Version
-from tests.conftest import sqlframe_pyspark_lazy_constructor
+from narwhals.testing.constructors import get_backend_constructor
 from tests.utils import Constructor, maybe_get_modin_df
 
 if TYPE_CHECKING:
@@ -294,10 +294,10 @@ def test_eager_only_lazy_dask(eager_only: Any, context: Any) -> None:
 
 def test_series_only_sqlframe() -> None:  # pragma: no cover
     pytest.importorskip("sqlframe")
-    df = sqlframe_pyspark_lazy_constructor(data)
+    df = get_backend_constructor("sqlframe")(data, nw).to_native()
 
     with pytest.raises(TypeError, match="Cannot only use `series_only`"):
-        nw.from_native(df, series_only=True)  # pyright: ignore[reportArgumentType, reportCallIssue]  # pyrefly: ignore[no-matching-overload]
+        nw.from_native(df, series_only=True)  # type: ignore[call-overload]  # pyrefly: ignore[no-matching-overload]
 
 
 @pytest.mark.parametrize(
@@ -315,7 +315,7 @@ def test_series_only_sqlframe() -> None:  # pragma: no cover
 )
 def test_eager_only_sqlframe(eager_only: Any, context: Any) -> None:  # pragma: no cover
     pytest.importorskip("sqlframe")
-    df = sqlframe_pyspark_lazy_constructor(data)
+    df = get_backend_constructor("sqlframe")(data, nw).to_native()
 
     with context:
         res = nw.from_native(df, eager_only=eager_only)
@@ -528,7 +528,7 @@ def test_eager_only_pass_through_main(constructor: Constructor) -> None:
     if not any(s in str(constructor) for s in ("pyspark", "dask", "ibis", "duckdb")):
         pytest.skip(reason="Non lazy or polars")
 
-    df = constructor(data)
+    df = constructor(data).to_native()
 
     r1 = nw.from_native(df, eager_only=False, pass_through=False)
     r2 = nw.from_native(df, eager_only=False, pass_through=True)
@@ -539,7 +539,7 @@ def test_eager_only_pass_through_main(constructor: Constructor) -> None:
     assert not isinstance(r3, nw.LazyFrame)
 
     with pytest.raises(TypeError, match=r"Cannot.+use.+eager_only"):
-        nw.from_native(df, eager_only=True, pass_through=False)  # type: ignore[type-var]
+        nw.from_native(df, eager_only=True, pass_through=False)
 
 
 def test_from_native_lazyframe_exhaustive() -> None:  # noqa: PLR0914, PLR0915
diff --git a/tests/translate/get_native_namespace_test.py b/tests/translate/get_native_namespace_test.py
index 821443ea64..5a15069ed2 100644
--- a/tests/translate/get_native_namespace_test.py
+++ b/tests/translate/get_native_namespace_test.py
@@ -76,7 +76,7 @@ def test_native_namespace_frame(constructor: Constructor) -> None:
 
 
 def test_native_namespace_series(constructor_eager: ConstructorEager) -> None:
-    constructor_name = constructor_eager.__name__
+    constructor_name = str(constructor_eager)
 
     expected_namespace = _get_expected_namespace(constructor_name=constructor_name)
 
diff --git a/tests/utils.py b/tests/utils.py
index b9fa613bca..5b53e3a49b 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -6,7 +6,7 @@
 import warnings
 from datetime import date, datetime
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, cast
+from typing import TYPE_CHECKING, Any
 
 import pytest
 
@@ -15,16 +15,25 @@
 from narwhals.dependencies import get_pandas
 from narwhals.translate import from_native
 
+# TODO(FBruzzesi): Replace these aliases once all the test suite migrates to *FrameConstructor's
+from tests.conftest import (
+    _PatchedDataFrameConstructor as ConstructorEager,
+    _PatchedDataFrameConstructor as ConstructorPandasLike,
+    _PatchedFrameConstructor as Constructor,
+)
+
 if TYPE_CHECKING:
     from collections.abc import Mapping, Sequence
 
     import pandas as pd
-    from pyspark.sql import SparkSession
-    from sqlframe.duckdb import DuckDBSession
     from typing_extensions import TypeAlias
 
-    from narwhals._native import NativeLazyFrame
-    from narwhals.typing import Frame, IntoDataFrame, TimeUnit
+    from narwhals.typing import Frame, TimeUnit
+
+# TODO(FBruzzesi): Remove these aliases once all the test suite migrates to *FrameConstructor's
+# NOTE: Explicitly exported otherwise mypy will raise an [attr-defined] error for each file
+# importing them from `tests.utils` rather than `narwhals.testing.typing` directly.
+__all__ = ("Constructor", "ConstructorEager", "ConstructorPandasLike")
 
 
 def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]:
@@ -44,11 +53,6 @@ def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]:
 PYSPARK_VERSION: tuple[int, ...] = get_module_version_as_tuple("pyspark")
 CUDF_VERSION: tuple[int, ...] = get_module_version_as_tuple("cudf")
 
-Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | IntoDataFrame"]
-ConstructorEager: TypeAlias = Callable[[Any], "IntoDataFrame"]
-ConstructorLazy: TypeAlias = Callable[[Any], "NativeLazyFrame"]
-ConstructorPandasLike: TypeAlias = Callable[[Any], "pd.DataFrame"]
-
 NestedOrEnumDType: TypeAlias = "nw.List | nw.Array | nw.Struct | nw.Enum"
 """`DType`s which **cannot** be used as bare types."""
 
@@ -174,34 +178,6 @@ def assert_equal_hash(left: Any, right: Any) -> None:
     )
 
 
-def sqlframe_session() -> DuckDBSession:
-    from sqlframe.duckdb import DuckDBSession
-
-    # NOTE: `__new__` override inferred by `pyright` only
-    # https://github.com/eakmanrq/sqlframe/blob/772b3a6bfe5a1ffd569b7749d84bea2f3a314510/sqlframe/base/session.py#L181-L184
-    return cast("DuckDBSession", DuckDBSession())  # type: ignore[redundant-cast]
-
-
-def pyspark_session() -> SparkSession:  # pragma: no cover
-    if is_spark_connect := os.environ.get("SPARK_CONNECT", None):
-        from pyspark.sql.connect.session import SparkSession
-    else:
-        from pyspark.sql import SparkSession
-    builder = cast("SparkSession.Builder", SparkSession.builder).appName("unit-tests")
-    builder = (
-        builder.remote(f"sc://localhost:{os.environ.get('SPARK_PORT', '15002')}")
-        if is_spark_connect
-        else builder.master("local[1]").config("spark.ui.enabled", "false")
-    )
-    return (
-        # Don't remove pyrefly-ignore, needed in CI when pyspark is installed.
-        builder.config("spark.default.parallelism", "1")  # pyrefly: ignore[bad-return]
-        .config("spark.sql.shuffle.partitions", "2")
-        .config("spark.sql.session.timeZone", "UTC")
-        .getOrCreate()
-    )
-
-
 def maybe_get_modin_df(df_pandas: pd.DataFrame) -> Any:  # pragma: no cover
     """Convert a pandas DataFrame to a Modin DataFrame if Modin is available."""
     try:
@@ -231,10 +207,7 @@ def is_pyarrow_windows_no_tzdata(constructor: Constructor, /) -> bool:
 
 def uses_pyarrow_backend(constructor: Constructor | ConstructorEager) -> bool:
     """Checks if the pandas-like constructor uses pyarrow backend."""
-    return constructor.__name__ in {
-        "pandas_pyarrow_constructor",
-        "modin_pyarrow_constructor",
-    }
+    return str(constructor) in {"pandas_pyarrow_constructor", "modin_pyarrow_constructor"}
 
 
 def maybe_collect(df: Frame) -> Frame:
diff --git a/tests/v1_test.py b/tests/v1_test.py
index 9882c4ed15..8ddb64a118 100644
--- a/tests/v1_test.py
+++ b/tests/v1_test.py
@@ -318,11 +318,11 @@ def test_cast_to_enum_v1(
     ):
         request.applymarker(pytest.mark.xfail)
 
-    df_native = constructor({"a": ["a", "b"]})
+    df = constructor({"a": ["a", "b"]}, nw_v1)
 
     msg = re.escape("Converting to Enum is not supported in narwhals.stable.v1")
     with pytest.raises(NotImplementedError, match=msg):
-        nw_v1.from_native(df_native).select(nw_v1.col("a").cast(nw_v1.Enum))  # type: ignore[arg-type]
+        df.select(nw_v1.col("a").cast(nw_v1.Enum))  # type: ignore[arg-type]
 
 
 def test_v1_ordered_categorical_pandas() -> None:
@@ -459,7 +459,7 @@ def test_with_row_index(constructor: Constructor) -> None:
         pytest.skip()
     data = {"abc": ["foo", "bars"], "xyz": [100, 200], "const": [42, 42]}
 
-    frame = nw_v1.from_native(constructor(data))
+    frame = constructor(data, nw_v1)
 
     msg = "Cannot pass `order_by`"
     context = (
@@ -469,7 +469,7 @@ def test_with_row_index(constructor: Constructor) -> None:
     )
 
     with context:
-        result = frame.with_row_index()
+        result = frame.with_row_index()  # type: ignore[call-arg]
 
         expected = {"index": [0, 1], **data}
         assert_equal_data(result, expected)
@@ -887,7 +887,7 @@ def test_is_frame() -> None:
 
 
 def test_with_version(constructor: Constructor) -> None:
-    lf = nw_v1.from_native(constructor({"a": [1, 2]})).lazy()
+    lf = constructor({"a": [1, 2]}, nw_v1).lazy()
     assert isinstance(lf, nw_v1.LazyFrame)
     assert lf._compliant_frame._with_version(Version.MAIN)._version is Version.MAIN
 
@@ -896,7 +896,7 @@ def test_with_version(constructor: Constructor) -> None:
 @pytest.mark.parametrize("offset", [1, 2])
 def test_gather_every(constructor_eager: ConstructorEager, n: int, offset: int) -> None:
     data = {"a": list(range(10))}
-    df_v1 = nw_v1.from_native(constructor_eager(data))
+    df_v1 = constructor_eager(data, nw_v1)
     result = df_v1.gather_every(n=n, offset=offset)
     expected = {"a": data["a"][offset::n]}
     assert_equal_data(result, expected)
@@ -1156,7 +1156,7 @@ def test_series_from_iterable(
 
 def test_mode_single_expr(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 1, 2, 2, 3], "b": [1, 2, 3, 3, 4]}
-    df = nw_v1.from_native(constructor_eager(data))
+    df = constructor_eager(data, nw_v1)
     result = df.select(nw_v1.col("a").mode()).sort("a")
     expected = {"a": [1, 2]}
     assert_equal_data(result, expected)
@@ -1164,7 +1164,7 @@ def test_mode_single_expr(constructor_eager: ConstructorEager) -> None:
 
 def test_mode_series(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 1, 2, 2, 3], "b": [1, 2, 3, 3, 4]}
-    series = nw_v1.from_native(constructor_eager(data), eager_only=True)["a"]
+    series = constructor_eager(data, nw_v1)["a"]
     result = series.mode().sort()
     expected = {"a": [1, 2]}
     assert_equal_data({"a": result}, expected)
@@ -1173,7 +1173,7 @@ def test_mode_series(constructor_eager: ConstructorEager) -> None:
 def test_mode_different_lengths(constructor_eager: ConstructorEager) -> None:
     if "polars" in str(constructor_eager) and POLARS_VERSION < (1, 10):
         pytest.skip()
-    df = nw_v1.from_native(constructor_eager({"a": [1, 1, 2], "b": [4, 5, 6]}))
+    df = constructor_eager({"a": [1, 1, 2], "b": [4, 5, 6]}, nw_v1)
     with pytest.raises(ShapeError):
         df.select(nw_v1.col("a", "b").mode())
 
@@ -1196,7 +1196,7 @@ def test_any_value_expr(constructor: Constructor, request: pytest.FixtureRequest
         "b": [1, 2, 3, 4, 5, 6],
         "c": [None, None, 1, None, 2, None],
     }
-    df = nw_v1.from_native(constructor(data))
+    df = constructor(data, nw_v1)
 
     with pytest.warns(NarwhalsUnstableWarning):
         df.select(nw_v1.col("a", "b").any_value())
@@ -1204,7 +1204,7 @@ def test_any_value_expr(constructor: Constructor, request: pytest.FixtureRequest
 
 def test_any_value_series(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 1, 1, 2, 2, 3]}
-    df = nw_v1.from_native(constructor_eager(data))
+    df = constructor_eager(data, nw_v1)
 
     with pytest.warns(NarwhalsUnstableWarning):
         df["a"].any_value()
diff --git a/tests/v2_test.py b/tests/v2_test.py
index 7a1903425c..d33ae97edb 100644
--- a/tests/v2_test.py
+++ b/tests/v2_test.py
@@ -347,7 +347,7 @@ def fun2(self, df: Any) -> Any:  # pragma: no cover
 
 
 def test_with_version(constructor: Constructor) -> None:
-    lf = nw_v2.from_native(constructor({"a": [1, 2]})).lazy()
+    lf = constructor({"a": [1, 2]}, nw_v2).lazy()
     assert isinstance(lf, nw_v2.LazyFrame)
     assert lf._compliant_frame._with_version(Version.MAIN)._version is Version.MAIN
 
@@ -503,7 +503,7 @@ def test_series_from_iterable(
 
 def test_mode_single_expr(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 1, 2, 2, 3], "b": [1, 2, 3, 3, 4]}
-    df = nw_v2.from_native(constructor_eager(data))
+    df = constructor_eager(data, nw_v2)
     result = df.select(nw_v2.col("a").mode()).sort("a")
     expected = {"a": [1, 2]}
     assert_equal_data(result, expected)
@@ -511,7 +511,7 @@ def test_mode_single_expr(constructor_eager: ConstructorEager) -> None:
 
 def test_mode_series(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 1, 2, 2, 3], "b": [1, 2, 3, 3, 4]}
-    series = nw_v2.from_native(constructor_eager(data), eager_only=True)["a"]
+    series = constructor_eager(data, nw_v2)["a"]
     result = series.mode().sort()
     expected = {"a": [1, 2]}
     assert_equal_data({"a": result}, expected)
@@ -520,7 +520,7 @@ def test_mode_series(constructor_eager: ConstructorEager) -> None:
 def test_mode_different_lengths(constructor_eager: ConstructorEager) -> None:
     if "polars" in str(constructor_eager) and POLARS_VERSION < (1, 10):
         pytest.skip()
-    df = nw_v2.from_native(constructor_eager({"a": [1, 1, 2], "b": [4, 5, 6]}))
+    df = constructor_eager({"a": [1, 1, 2], "b": [4, 5, 6]}, nw_v2)
     with pytest.raises(ShapeError):
         df.select(nw_v2.col("a", "b").mode())
 
@@ -535,7 +535,7 @@ def test_any_value_expr(constructor: Constructor, request: pytest.FixtureRequest
         "b": [1, 2, 3, 4, 5, 6],
         "c": [None, None, 1, None, 2, None],
     }
-    df = nw_v2.from_native(constructor(data))
+    df = constructor(data, nw_v2)
 
     with pytest.warns(NarwhalsUnstableWarning):
         df.select(nw_v2.col("a", "b").any_value())
@@ -543,7 +543,7 @@ def test_any_value_expr(constructor: Constructor, request: pytest.FixtureRequest
 
 def test_any_value_series(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 1, 1, 2, 2, 3]}
-    df = nw_v2.from_native(constructor_eager(data))
+    df = constructor_eager(data, nw_v2)
 
     with pytest.warns(NarwhalsUnstableWarning):
         df["a"].any_value()
diff --git a/tpch/tests/conftest.py b/tpch/tests/conftest.py
index d98c4b401a..499571a567 100644
--- a/tpch/tests/conftest.py
+++ b/tpch/tests/conftest.py
@@ -36,13 +36,6 @@ def pytest_configure(config: pytest.Config) -> None:
 
 
 def pytest_addoption(parser: pytest.Parser) -> None:
-    from tests.conftest import DEFAULT_CONSTRUCTORS
-
-    parser.addoption(
-        "--constructors",
-        default=DEFAULT_CONSTRUCTORS,
-        help="<sink for defaults in VSC getting injected>",
-    )
     parser.addoption(
         "--scale-factor",
         default=constants.SCALE_FACTOR_DEFAULT,
diff --git a/utils/import_check.py b/utils/import_check.py
index d292b40790..d97b488509 100644
--- a/utils/import_check.py
+++ b/utils/import_check.py
@@ -27,6 +27,20 @@
     "_polars": {"polars"},
     "_duckdb": {"duckdb"},
     "_ibis": {"ibis", "ibis._", "ibis.expr.types"},
+    # narwhals.testing constructors deliberately lazy-import every supported
+    # backend inside `__call__` so test fixtures can build native frames.
+    "testing": {
+        "cudf",
+        "dask",
+        "dask.dataframe",
+        "duckdb",
+        "ibis",
+        "modin",
+        "pandas",
+        "polars",
+        "pyarrow",
+        "pyspark",
+    },
 }
 
 
diff --git a/utils/sort_api_reference.py b/utils/sort_api_reference.py
index 1b417ed63a..243ccbcd6d 100644
--- a/utils/sort_api_reference.py
+++ b/utils/sort_api_reference.py
@@ -42,7 +42,7 @@ def sort_list(match: re.Match[str]) -> str:
 
 
 PATH = Path("docs") / "api-reference"
-FILES_TO_SKIP = {"dtypes", "typing"}
+FILES_TO_SKIP = {"dtypes", "typing", "testing"}
 
 ret = max(
     sort_members_in_markdown(file_path=file_path)