diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml index 7f23be921b..b694785cd5 100644 --- a/.github/workflows/extremes.yml +++ b/.github/workflows/extremes.yml @@ -6,6 +6,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml UV_SYSTEM_PYTHON: 1 permissions: @@ -48,7 +49,10 @@ jobs: echo "$DEPS" | grep 'scikit-learn==1.1.0' echo "$DEPS" | grep 'duckdb==1.1' - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb + run: | + coverage run -m pytest tests --runslow --nw-backends=pandas,pyarrow,polars[eager],polars[lazy],duckdb + coverage combine + coverage report --fail-under=50 pretty_old_versions: strategy: @@ -86,7 +90,10 @@ jobs: echo "$DEPS" | grep 'scikit-learn==1.1.0' echo "$DEPS" | grep 'duckdb==1.2' - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb + run: | + coverage run -m pytest tests --runslow --nw-backends=pandas,pyarrow,polars[eager],polars[lazy],duckdb + coverage combine + coverage report --fail-under=50 not_so_old_versions: strategy: @@ -123,7 +130,10 @@ jobs: echo "$DEPS" | grep 'dask==2024.10' echo "$DEPS" | grep 'duckdb==1.3' - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],dask,duckdb + run: | + coverage run -m pytest tests --runslow --nw-backends=pandas,pyarrow,polars[eager],polars[lazy],dask,duckdb + coverage combine + coverage report --fail-under=50 nightlies: strategy: @@ -179,5 +189,6 @@ jobs: echo "$DEPS" | grep 'dask.*@' - name: Run pytest run: | - pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow \ - --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb + coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb + coverage combine + coverage report --fail-under=50 diff --git a/.github/workflows/pytest-ibis.yml b/.github/workflows/pytest-ibis.yml index 61ca076a3d..74132f8828 100644 --- a/.github/workflows/pytest-ibis.yml +++ b/.github/workflows/pytest-ibis.yml @@ -6,6 +6,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml UV_SYSTEM_PYTHON: 1 permissions: @@ -40,4 +41,4 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --constructors ibis + run: pytest tests --nw-backends ibis diff --git a/.github/workflows/pytest-modin.yml b/.github/workflows/pytest-modin.yml index e20aff8539..923ce920e6 100644 --- a/.github/workflows/pytest-modin.yml +++ b/.github/workflows/pytest-modin.yml @@ -6,6 +6,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml UV_SYSTEM_PYTHON: 1 permissions: @@ -38,4 +39,4 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --constructors modin[pyarrow] + run: pytest tests --nw-backends modin[pyarrow] diff --git a/.github/workflows/pytest-pyspark.yml b/.github/workflows/pytest-pyspark.yml index 7aae7c599d..bf4e412e4a 100644 --- a/.github/workflows/pytest-pyspark.yml +++ b/.github/workflows/pytest-pyspark.yml @@ -14,6 +14,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml UV_SYSTEM_PYTHON: 1 permissions: @@ -44,7 +45,10 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals/_spark_like --cov-fail-under=95 --runslow --constructors pyspark + run: | + coverage run -m pytest tests --runslow --nw-backends pyspark + coverage combine + coverage report --fail-under=95 --include "narwhals/_spark_like/*" pytest-pyspark-min-version-constructor: @@ -71,7 +75,7 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --constructors pyspark + run: pytest tests --nw-backends pyspark pytest-pyspark-connect-constructor: strategy: @@ -137,7 +141,10 @@ jobs: echo "Spark Connect server started" - name: Run pytest - run: pytest tests --cov=narwhals/_spark_like --cov-fail-under=95 --runslow --constructors "pyspark[connect]" + run: | + coverage run -m pytest tests --runslow --nw-backends "pyspark[connect]" + coverage combine + coverage report --fail-under=95 --include="narwhals/_spark_like/*" - name: Stop Spark Connect server if: always() diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 9da8bf293b..b308015a51 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -6,6 +6,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml UV_SYSTEM_PYTHON: 1 permissions: @@ -34,7 +35,15 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=75 --constructors=pandas,pyarrow,polars[eager],polars[lazy] + env: + # coverage's execv/fork patches raise on Windows; collapse to `subprocess` + # there (coverage dedupes) and keep the default values on Linux. + COVERAGE_PATCH_EXECV: ${{ matrix.os == 'windows-latest' && 'subprocess' || 'execv' }} + COVERAGE_PATCH_FORK: ${{ matrix.os == 'windows-latest' && 'subprocess' || 'fork' }} + run: | + coverage run -m pytest tests --nw-backends=pandas,pyarrow,polars[eager],polars[lazy] + coverage combine + coverage report --fail-under=75 - name: install-test-plugin run: uv pip install -e test-plugin/. @@ -44,6 +53,11 @@ jobs: python-version: ["3.10", "3.12"] os: [windows-latest] runs-on: ${{ matrix.os }} + env: + # coverage's execv/fork patches raise on Windows; collapse them to `subprocess` + # in the pyproject `patch` list (coverage dedupes). + COVERAGE_PATCH_EXECV: subprocess + COVERAGE_PATCH_FORK: subprocess steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 @@ -64,7 +78,9 @@ jobs: run: uv pip freeze - name: Run pytest run: | - pytest tests --cov=narwhals --cov=tests --runslow --cov-fail-under=95 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --durations=30 + coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --durations=30 + coverage combine + coverage report --fail-under=95 pytest-full-coverage: strategy: @@ -95,7 +111,10 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100 --runslow --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb,sqlframe --durations=30 + run: | + coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb,sqlframe --durations=30 + coverage combine + coverage report --fail-under=100 - name: Run doctests # reprs differ between versions, so we only run doctests on the latest Python if: matrix.python-version == '3.13' @@ -124,20 +143,20 @@ jobs: uv pip install -e ".[pandas]" --group tests uv pip freeze - name: Run pytest (pandas and pandas[nullable]) - run: pytest tests --runslow --constructors=pandas,pandas[nullable] + run: pytest tests --runslow --nw-backends=pandas,pandas[nullable] - name: install-more-reqs run: | uv pip install -U pyarrow uv pip freeze - name: Run pytest (pandas[pyarrow] and pyarrow) - run: pytest tests --runslow --constructors=pandas[pyarrow],pyarrow + run: pytest tests --runslow --nw-backends=pandas[pyarrow],pyarrow - name: install-polars run: | uv pip uninstall pandas pyarrow uv pip install polars uv pip freeze - name: Run pytest (polars) - run: pytest tests --runslow --constructors=polars[eager],polars[lazy] + run: pytest tests --runslow --nw-backends=polars[eager],polars[lazy] python-314: strategy: @@ -161,7 +180,10 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --runslow --durations=30 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --cov-fail-under=50 + run: | + coverage run -m pytest tests --runslow --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],duckdb,sqlframe --durations=30 + coverage combine + coverage report --fail-under=50 python-314t: strategy: @@ -187,4 +209,7 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --runslow --durations=30 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow --cov-fail-under=50 + run: | + coverage run -m pytest tests --runslow --durations=30 --nw-backends=pandas,pandas[nullable],pandas[pyarrow],pyarrow + coverage combine + coverage report --fail-under=50 diff --git a/.github/workflows/random_ci_pytest.yml b/.github/workflows/random_ci_pytest.yml index e48909d3f6..74ae375309 100644 --- a/.github/workflows/random_ci_pytest.yml +++ b/.github/workflows/random_ci_pytest.yml @@ -6,6 +6,7 @@ on: env: PY_COLORS: 1 PYTEST_ADDOPTS: "--numprocesses=logical" + COVERAGE_PROCESS_START: pyproject.toml permissions: contents: read @@ -39,5 +40,6 @@ jobs: run: uv pip freeze - name: Run pytest run: | - pytest tests --cov=narwhals --cov=tests --cov-fail-under=75 \ - --constructors=pandas,pyarrow,polars[eager],polars[lazy] + coverage run -m pytest tests --nw-backends=pandas,pyarrow,polars[eager],polars[lazy] + coverage combine + coverage report --fail-under=75 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 98a274e4ed..3dc3129c66 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -149,7 +149,7 @@ If you add code that should be tested, please add tests. - To run tests, run `pytest`. To check coverage: `pytest --cov=narwhals` - To run tests on the doctests, use `pytest narwhals --doctest-modules` -- To run unit tests and doctests at the same time, run `pytest tests narwhals --cov=narwhals --doctest-modules` +- To run unit tests and doctests at the same time, run `pytest tests narwhals --doctest-modules` - To run tests multiprocessed, you may also want to use [pytest-xdist](https://github.com/pytest-dev/pytest-xdist) (optional) - To choose which backends to run tests with you, you can use the `--constructors` flag: - To only run tests for pandas, Polars, and PyArrow, use `pytest --constructors=pandas,pyarrow,polars` diff --git a/Makefile b/Makefile index 90538ea1a0..cdc85888f3 100644 --- a/Makefile +++ b/Makefile @@ -41,3 +41,15 @@ docs-serve: # Build and serve the docs locally $(VENV_BIN)/uv run --no-sync utils/generate_backend_completeness.py $(VENV_BIN)/uv run --no-sync utils/generate_zen_content.py $(VENV_BIN)/uv run --no-sync zensical serve + +.PHONY: test +test: ## Run unittest + $(VENV_BIN)/uv pip install \ + --upgrade \ + --editable test-plugin/. \ + --editable .[ibis,modin,pyspark] \ + --group core \ + --group tests + $(VENV_BIN)/uv run --no-sync coverage run -m pytest tests --all-nw-backends --numprocesses=logical + $(VENV_BIN)/uv run --no-sync coverage combine + $(VENV_BIN)/uv run --no-sync coverage report --fail-under=95 diff --git a/docs/api-reference/testing.md b/docs/api-reference/testing.md index db83c6930e..0ee8ec5f36 100644 --- a/docs/api-reference/testing.md +++ b/docs/api-reference/testing.md @@ -1,8 +1,87 @@ # `narwhals.testing` +## Assertions + ::: narwhals.testing handler: python options: + show_root_heading: false + heading_level: 3 members: - assert_frame_equal - assert_series_equal + +## `pytest` plugin + +Narwhals register a pytest plugin that exposes parametrized fixtures with callables +to build Narwhals frames from a column-oriented python `dict`. + +### Available fixtures + +| Fixture | Backends | +|---|---| +| `nw_frame` | every selected backend (eager + lazy) | +| `nw_lazyframe` | only lazy backends | +| `nw_dataframe` | only eager backends | +| `nw_pandas_like_frame` | pandas-like backends | + +### Pytest options + +The backend selection is controlled by the following CLI options: + +* `--nw-backends=pandas,polars[lazy],duckdb`: comma-separated list. + Defaults to the following list: `pandas,pandas[pyarrow],polars[eager],pyarrow,duckdb,sqlframe,ibis` + intersected with the backends installed in the current environment. +* `--nw-all-backends`: shortcut for "every **CPU** backend that is installed". +* `--use-nw-external-constructor`: Skip narwhals.testing's parametrisation and let + another plugin provide the `constructor*` fixtures. + +Set the `NARWHALS_DEFAULT_BACKENDS` environment variable to override the default +list (useful e.g. when running under `cudf.pandas`). + +### Quick start + +The plugin auto-loads as soon as you `pip install narwhals`. Just write a test: + +```python +from typing import TYPE_CHECKING + +import narwhals as nw +import narwhals.stable.v2 as nw_v2 + +if TYPE_CHECKING: + from narwhals.testing.typing import Data, DataFrameConstructor, LazyFrameConstructor + + +def test_shape(nw_dataframe: DataFrameConstructor) -> None: + data: Data = {"x": [1, 2, 3]} + df = nw_dataframe(data, namespace=nw) + assert df.shape == (3, 1) + + +def test_laziness(nw_lazyframe: LazyFrameConstructor) -> None: + data: Data = {"x": [1, 2, 3]} + lf = nw_lazyframe(data, namespace=nw_v2) + assert isinstance(lf, nw_v2.LazyFrame) +``` + +The fixtures are parametrised against every supported backend that is installed +in the current environment. Filter the matrix on the command line: + +```bash +pytest --nw-backends="pandas,polars[lazy]" +pytest --all-nw-backends +``` + +## Type aliases + +::: narwhals.testing.typing + handler: python + options: + show_root_heading: false + heading_level: 3 + members: + - Data + - FrameConstructor + - DataFrameConstructor + - LazyFrameConstructor diff --git a/narwhals/stable/v1/typing.py b/narwhals/stable/v1/typing.py index a154a1a3f8..bc0a1ca26a 100644 --- a/narwhals/stable/v1/typing.py +++ b/narwhals/stable/v1/typing.py @@ -8,7 +8,12 @@ if TYPE_CHECKING: from typing_extensions import TypeAlias - from narwhals._native import NativeDataFrame, NativeDuckDB, NativeLazyFrame + from narwhals._native import ( + NativeDataFrame, + NativeDuckDB, + NativeIbis, + NativeLazyFrame, + ) from narwhals.stable.v1 import DataFrame, Expr, LazyFrame, Series class DataFrameLike(Protocol): @@ -25,7 +30,9 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeDataFrame", "DataFrameLike", "NativeDuckDB"] +IntoDataFrame: TypeAlias = Union[ + "NativeDataFrame", "DataFrameLike", "NativeDuckDB", "NativeIbis" +] """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. diff --git a/narwhals/testing/__init__.py b/narwhals/testing/__init__.py index 649463383f..6eb8c0b0d0 100644 --- a/narwhals/testing/__init__.py +++ b/narwhals/testing/__init__.py @@ -2,5 +2,6 @@ from narwhals.testing.asserts.frame import assert_frame_equal from narwhals.testing.asserts.series import assert_series_equal +from narwhals.testing.constructors import frame_constructor -__all__ = ("assert_frame_equal", "assert_series_equal") +__all__ = ("assert_frame_equal", "assert_series_equal", "frame_constructor") diff --git a/narwhals/testing/asserts/frame.py b/narwhals/testing/asserts/frame.py index 64eec42abc..9386abad13 100644 --- a/narwhals/testing/asserts/frame.py +++ b/narwhals/testing/asserts/frame.py @@ -13,7 +13,6 @@ if TYPE_CHECKING: from narwhals._typing import Arrow, IntoBackend, Pandas, Polars - from narwhals.typing import DataFrameT, LazyFrameT GUARANTEES_ROW_ORDER = { Implementation.PANDAS, @@ -26,8 +25,8 @@ def assert_frame_equal( - left: DataFrameT | LazyFrameT, - right: DataFrameT | LazyFrameT, + left: DataFrame[Any] | LazyFrame[Any], + right: DataFrame[Any] | LazyFrame[Any], *, check_row_order: bool = True, check_column_order: bool = True, @@ -145,8 +144,8 @@ def assert_frame_equal( def _check_correct_input_type( # noqa: RET503 - left: DataFrameT | LazyFrameT, - right: DataFrameT | LazyFrameT, + left: DataFrame[Any] | LazyFrame[Any], + right: DataFrame[Any] | LazyFrame[Any], backend: IntoBackend[Polars | Pandas | Arrow] | None, ) -> tuple[DataFrame[Any], DataFrame[Any]]: # Adapted from https://github.com/pola-rs/polars/blob/afdbf3056d1228cf493901e45f536b0905cec8ea/py-polars/src/polars/testing/asserts/frame.py#L15-L17 @@ -165,8 +164,8 @@ def _check_correct_input_type( # noqa: RET503 def _assert_dataframe_equal( - left: DataFrameT, - right: DataFrameT, + left: DataFrame[Any], + right: DataFrame[Any], impl: Implementation, *, check_row_order: bool, @@ -232,7 +231,11 @@ def _assert_dataframe_equal( def _check_schema_equal( - left: DataFrameT, right: DataFrameT, *, check_dtypes: bool, check_column_order: bool + left: DataFrame[Any], + right: DataFrame[Any], + *, + check_dtypes: bool, + check_column_order: bool, ) -> None: """Compares DataFrame schema based on specified criteria. diff --git a/narwhals/testing/constructors.py b/narwhals/testing/constructors.py new file mode 100644 index 0000000000..1ea6737d29 --- /dev/null +++ b/narwhals/testing/constructors.py @@ -0,0 +1,701 @@ +"""Constructor registry for `narwhals.testing`. + +Each constructor wraps one backend library (pandas, Polars, DuckDB, ...) and +knows how to turn a column-oriented `dict` into a native frame. + +Registration is explicit: wrap a plain builder function with `@frame_constructor.register(...)`. +The decorator instantiates a [`narwhals.testing.frame_constructor`][] with the +declared metadata and stores it in the shared `_registry`. + +## Adding a new constructor + +```py +from narwhals.testing import frame_constructor + + +@frame_constructor.register( + name="my_backend", + implementation=Implementation.MY_BACKEND, + requirements=("my_backend",), +) +def my_backend_lazy_constructor(obj: Data, /, **kwds: Any) -> IntoLazyFrame: + import my_backend + + return my_backend.from_dict(obj) +``` +""" + +from __future__ import annotations + +import os +import uuid +import warnings +from copy import deepcopy +from functools import lru_cache +from importlib.util import find_spec +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Generic, + Literal, + TypeVar, + cast, + overload, +) + +from narwhals._utils import Implementation, generate_temporary_column_name + +if TYPE_CHECKING: + from collections.abc import Iterable + from types import ModuleType + + import ibis + import pandas as pd + import polars as pl + import pyarrow as pa + from ibis.backends.duckdb import Backend as IbisDuckDBBackend + from pyspark.sql import SparkSession + from sqlframe.duckdb import DuckDBSession + from typing_extensions import Concatenate, TypeAlias + + from narwhals import DataFrame, LazyFrame + from narwhals._native import NativeDask, NativeDuckDB, NativePySpark, NativeSQLFrame + from narwhals.testing.typing import Data + from narwhals.typing import ( + IntoDataFrame, + IntoDataFrameT, + IntoFrame, + IntoLazyFrame, + IntoLazyFrameT, + ) + + +__all__ = ( + "available_backends", + "available_cpu_backends", + "frame_constructor", + "get_backend_constructor", + "is_backend_available", + "prepare_backends", + "pyspark_session", + "sqlframe_session", +) + +T_co = TypeVar("T_co", covariant=True, bound="IntoFrame") +R = TypeVar("R", bound="IntoFrame") + + +class frame_constructor(Generic[T_co]): # noqa: N801 + """Callable wrapper around a backend frame builder. + + Turns a column-oriented `dict` (typed as [`Data`][narwhals.testing.typing.Data]) + into a native frame. Metadata (implementation, requirements, eager/lazy, + nullability, GPU need) lives on the instance, alongside the wrapped + `func`. Equality and hashing are keyed on `(type, name)`, so two lookups + of the same registered constructor compare equal. + + Warning: + Instances should be created via [`narwhals.testing.constructors.frame_constructor.register`][], + which is the only supported entry point. + + Direct instantiation is allowed but **does not** register the instance. + """ + + _registry: ClassVar[dict[str, frame_constructor[IntoFrame]]] = {} + + func: Callable[Concatenate[Data, ...], T_co] + + def __init__( + self, + func: Callable[Concatenate[Data, ...], T_co], + /, + *, + name: str, + implementation: Implementation, + requirements: tuple[str, ...] = (), + is_eager: bool = False, + is_nullable: bool = True, + needs_gpu: bool = False, + ) -> None: + self.func = func + self.name = name + self.implementation = implementation + self.requirements = requirements + self.is_eager = is_eager + self.is_nullable = is_nullable + self.needs_gpu = needs_gpu + + @classmethod + def register( + cls, + *, + name: str, + implementation: Implementation, + requirements: tuple[str, ...] = (), + is_eager: bool = False, + is_nullable: bool = True, + needs_gpu: bool = False, + ) -> Callable[[Callable[Concatenate[Data, ...], R]], frame_constructor[R]]: + """Decorator: register `func` as the constructor named `name`. + + Arguments: + name: The string identifier of the constructor (e.g. `"pandas[pyarrow]"`). + implementation: The [`Implementation`][] this constructor belongs to. + requirements: Package names that must be importable for this constructor + to be available (checked via `importlib.util.find_spec`). + is_eager: Whether the backend returns an eager dataframe. + is_nullable: Whether the backend has native null support. + needs_gpu: Whether the backend requires GPU hardware. + + Returns: + A decorator that replaces `func` with a `frame_constructor` + instance registered into the shared `_registry`. + """ + + def decorator(func: Callable[Concatenate[Data, ...], R]) -> frame_constructor[R]: + inst: frame_constructor[R] = frame_constructor( + func, + name=name, + implementation=implementation, + requirements=requirements, + is_eager=is_eager, + is_nullable=is_nullable, + needs_gpu=needs_gpu, + ) + cls._registry[name] = inst + return inst + + return decorator + + @overload + def __call__( + self: frame_constructor[IntoDataFrameT], + obj: Data, + /, + namespace: ModuleType, + **kwds: Any, + ) -> DataFrame[IntoDataFrameT]: ... + @overload + def __call__( + self: frame_constructor[IntoLazyFrameT], + obj: Data, + /, + namespace: ModuleType, + **kwds: Any, + ) -> LazyFrame[IntoLazyFrameT]: ... + @overload + def __call__( + self: frame_constructor[IntoFrame], + obj: Data, + /, + namespace: ModuleType, + **kwds: Any, + ) -> DataFrame[Any] | LazyFrame[Any]: ... + + def __call__( + self, obj: Data, /, namespace: ModuleType, **kwds: Any + ) -> DataFrame[Any] | LazyFrame[Any]: + """Build a native frame and wrap it with `namespace.from_native`. + + Arguments: + obj: Column-oriented mapping passed to the wrapped builder. + namespace: A narwhals namespace (e.g. `narwhals`, `narwhals.stable.v1`) + whose `from_native` performs the wrapping. + **kwds: Forwarded to the wrapped builder. + """ + native = self.func(obj, **kwds) + return namespace.from_native(native) # type: ignore[no-any-return] + + @property + def identifier(self) -> str: + """Instance-level string identifier for test IDs.""" + return self.name + + @property + def is_lazy(self) -> bool: + """Whether this constructor produces a lazy native frame.""" + return not self.is_eager + + @property + def is_pandas(self) -> bool: + """Whether this is one of the pandas constructors.""" + return self.implementation.is_pandas() + + @property + def is_modin(self) -> bool: + """Whether this is one of the modin constructors.""" + return self.implementation.is_modin() + + @property + def is_cudf(self) -> bool: + """Whether this is the cudf constructor.""" + return self.implementation.is_cudf() + + @property + def is_pandas_like(self) -> bool: + """Whether this constructor produces a pandas-like dataframe (pandas, modin, cudf).""" + return self.implementation.is_pandas_like() + + @property + def is_polars(self) -> bool: + """Whether this is one of the polars constructors.""" + return self.implementation.is_polars() + + @property + def is_pyarrow(self) -> bool: + """Whether this is the pyarrow table constructor.""" + return self.implementation.is_pyarrow() + + @property + def is_dask(self) -> bool: + """Whether this is the dask constructor.""" + return self.implementation.is_dask() + + @property + def is_duckdb(self) -> bool: + """Whether this is the duckdb constructor.""" + return self.implementation.is_duckdb() + + @property + def is_pyspark(self) -> bool: + """Whether this is one of the pyspark constructors.""" + impl = self.implementation + return impl.is_pyspark() or impl.is_pyspark_connect() + + @property + def is_sqlframe(self) -> bool: + """Whether this is the sqlframe constructor.""" + return self.implementation.is_sqlframe() + + @property + def is_ibis(self) -> bool: + """Whether this is the ibis constructor.""" + return self.implementation.is_ibis() + + @property + def is_spark_like(self) -> bool: + """Whether this constructor uses a spark-like backend (pyspark, sqlframe).""" + return self.implementation.is_spark_like() + + @property + def needs_pyarrow(self) -> bool: + """Whether this constructor requires `pyarrow` to be installed.""" + return "pyarrow" in self.requirements + + @property + def is_available(self) -> bool: + """Whether every package this constructor needs is importable.""" + return is_backend_available(*self.requirements) + + def __str__(self) -> str: + # NOTE: This is a temporary hack + # TODO(FBruzzesi): Remove once all the `"backend" in str(constructor)` + # statements in the test suite are properly replaced + return self.func.__name__ + + def __repr__(self) -> str: + return f"{type(self).__name__}(name={self.name!r})" + + def __hash__(self) -> int: + return hash((type(self), self.name)) + + def __eq__(self, other: object) -> bool: + return isinstance(other, frame_constructor) and self.name == other.name + + +# Eager constructors + + +@frame_constructor.register( + name="pandas", + implementation=Implementation.PANDAS, + requirements=("pandas",), + is_eager=True, + is_nullable=False, +) +def pandas_constructor(obj: Data, /, **kwds: Any) -> pd.DataFrame: + import pandas as pd + + return pd.DataFrame(obj, **kwds) + + +@frame_constructor.register( + name="pandas[nullable]", + implementation=Implementation.PANDAS, + requirements=("pandas",), + is_eager=True, +) +def pandas_nullable_constructor(obj: Data, /, **kwds: Any) -> pd.DataFrame: + import pandas as pd + + return pd.DataFrame(obj, **kwds).convert_dtypes(dtype_backend="numpy_nullable") + + +@frame_constructor.register( + name="pandas[pyarrow]", + implementation=Implementation.PANDAS, + requirements=("pandas", "pyarrow"), + is_eager=True, +) +def pandas_pyarrow_constructor(obj: Data, /, **kwds: Any) -> pd.DataFrame: + import pandas as pd + + return pd.DataFrame(obj, **kwds).convert_dtypes(dtype_backend="pyarrow") + + +@frame_constructor.register( + name="pyarrow", + implementation=Implementation.PYARROW, + requirements=("pyarrow",), + is_eager=True, +) +def pyarrow_table_constructor(obj: Data, /, **kwds: Any) -> pa.Table: + import pyarrow as pa + + return pa.table(obj, **kwds) + + +@frame_constructor.register( + name="modin", + implementation=Implementation.MODIN, + requirements=("modin",), + is_eager=True, + is_nullable=False, +) +def modin_constructor(obj: Data, /, **kwds: Any) -> IntoDataFrame: # pragma: no cover + import modin.pandas as mpd + import pandas as pd + + return cast("IntoDataFrame", mpd.DataFrame(pd.DataFrame(obj, **kwds))) + + +@frame_constructor.register( + name="modin[pyarrow]", + implementation=Implementation.MODIN, + requirements=("modin", "pyarrow"), + is_eager=True, +) +def modin_pyarrow_constructor( + obj: Data, /, **kwds: Any +) -> IntoDataFrame: # pragma: no cover + import modin.pandas as mpd + import pandas as pd + + df = mpd.DataFrame(pd.DataFrame(obj, **kwds)).convert_dtypes(dtype_backend="pyarrow") + return cast("IntoDataFrame", df) + + +@frame_constructor.register( + name="cudf", + implementation=Implementation.CUDF, + requirements=("cudf",), + is_eager=True, + needs_gpu=True, +) +def cudf_constructor(obj: Data, /, **kwds: Any) -> IntoDataFrame: # pragma: no cover + import cudf + + return cast("IntoDataFrame", cudf.DataFrame(obj, **kwds)) + + +@frame_constructor.register( + name="polars[eager]", + implementation=Implementation.POLARS, + requirements=("polars",), + is_eager=True, +) +def polars_eager_constructor(obj: Data, /, **kwds: Any) -> pl.DataFrame: + import polars as pl + + return pl.DataFrame(obj, **kwds) + + +# Lazy constructors + + +@frame_constructor.register( + name="polars[lazy]", implementation=Implementation.POLARS, requirements=("polars",) +) +def polars_lazy_constructor(obj: Data, /, **kwds: Any) -> pl.LazyFrame: + import polars as pl + + return pl.LazyFrame(obj, **kwds) + + +@frame_constructor.register( + name="dask", + implementation=Implementation.DASK, + requirements=("dask",), + is_nullable=False, +) +def dask_lazy_p2_constructor( + obj: Data, /, npartitions: int = 2, **kwds: Any +) -> NativeDask: # pragma: no cover + import dask.dataframe as dd + + return cast("NativeDask", dd.from_dict(obj, npartitions=npartitions, **kwds)) + + +@frame_constructor.register( + name="duckdb", + implementation=Implementation.DUCKDB, + requirements=("duckdb", "pyarrow"), +) +def duckdb_lazy_constructor(obj: Data, /, **kwds: Any) -> NativeDuckDB: + import duckdb + import pyarrow as pa + + duckdb.sql("""set timezone = 'UTC'""") + _df = pa.table(obj, **kwds) + return duckdb.sql("select * from _df") + + +def _pyspark_build(obj: Data, /, **kwds: Any) -> NativePySpark: # pragma: no cover + session = _pyspark_session_lazy() + _obj = deepcopy(obj) + index_col_name = generate_temporary_column_name(n_bytes=8, columns=list(_obj)) + _obj[index_col_name] = list(range(len(_obj[next(iter(_obj))]))) + result = ( + session.createDataFrame([*zip(*_obj.values())], schema=[*_obj.keys()], **kwds) + .repartition(2) + .orderBy(index_col_name) + .drop(index_col_name) + ) + return cast("NativePySpark", result) + + +@frame_constructor.register( + name="pyspark", implementation=Implementation.PYSPARK, requirements=("pyspark",) +) +def pyspark_lazy_constructor( + obj: Data, /, **kwds: Any +) -> NativePySpark: # pragma: no cover + return _pyspark_build(obj, **kwds) + + +@frame_constructor.register( + name="pyspark[connect]", + implementation=Implementation.PYSPARK_CONNECT, + requirements=("pyspark",), +) +def pyspark_connect_lazy_constructor( + obj: Data, /, **kwds: Any +) -> NativePySpark: # pragma: no cover + return _pyspark_build(obj, **kwds) + + +@frame_constructor.register( + name="sqlframe", + implementation=Implementation.SQLFRAME, + requirements=("sqlframe", "duckdb"), +) +def sqlframe_pyspark_lazy_constructor(obj: Data, /, **kwds: Any) -> NativeSQLFrame: + session = sqlframe_session() + return session.createDataFrame([*zip(*obj.values())], schema=[*obj.keys()], **kwds) + + +@frame_constructor.register( + name="ibis", + implementation=Implementation.IBIS, + requirements=("ibis", "duckdb", "pyarrow"), +) +def ibis_lazy_constructor(obj: Data, /, **kwds: Any) -> ibis.Table: # pragma: no cover + import pyarrow as pa + + table = pa.table(obj) + table_name = str(uuid.uuid4()) + return _ibis_backend().create_table(table_name, table, **kwds) + + +DEFAULT_BACKENDS: frozenset[str] = frozenset( + { + "pandas", + "pandas[pyarrow]", + "polars[eager]", + "pyarrow", + "duckdb", + "sqlframe", + "ibis", + } +) +"""Subset of backends enabled by default for parametrised tests when the +user does not pass `--nw-backends` (mirrors the historical Narwhals defaults). +""" + + +def available_backends() -> frozenset[str]: + """Return the names of every constructor whose backend is importable. + + Examples: + >>> from narwhals.testing.constructors import available_backends + >>> "pandas" in available_backends() + True + """ + return frozenset( + name for name, c in frame_constructor._registry.items() if c.is_available + ) + + +def available_cpu_backends() -> frozenset[str]: # pragma: no cover + """Return the names of every CPU constructor whose backend is importable. + + Examples: + >>> from narwhals.testing.constructors import available_cpu_backends + >>> "pandas" in available_cpu_backends() + True + """ + return frozenset( + name + for name, c in frame_constructor._registry.items() + if c.is_available and not c.needs_gpu + ) + + +EagerName: TypeAlias = Literal[ + "pandas", + "pandas[nullable]", + "pandas[pyarrow]", + "modin", + "modin[pyarrow]", + "cudf", + "polars[eager]", + "pyarrow", +] +LazyName: TypeAlias = Literal[ + "polars[lazy]", "dask", "duckdb", "pyspark", "pyspark[connect]", "sqlframe", "ibis" +] + + +@overload +def get_backend_constructor(name: EagerName) -> frame_constructor[IntoDataFrame]: ... +@overload +def get_backend_constructor(name: LazyName) -> frame_constructor[IntoLazyFrame]: ... +@overload +def get_backend_constructor(name: str) -> frame_constructor[IntoFrame]: ... + + +def get_backend_constructor(name: str) -> frame_constructor[IntoFrame]: + """Return the registered constructor for `name`. + + Arguments: + name: The string identifier of a registered constructor + (e.g. `"pandas[pyarrow]"`). + + Raises: + ValueError: If `name` is not a registered constructor identifier. + + Examples: + >>> from narwhals.testing.constructors import get_backend_constructor + >>> get_backend_constructor("pandas") + frame_constructor(name='pandas') + """ + try: + return frame_constructor._registry[name] + except KeyError as exc: + valid = sorted(frame_constructor._registry) + msg = f"Unknown constructor {name!r}. Expected one of: {valid}." + raise ValueError(msg) from exc + + +def prepare_backends( + *, include: Iterable[str] | None = None, exclude: Iterable[str] | None = None +) -> list[frame_constructor[IntoFrame]]: + """Return available constructors, optionally filtered. + + Note: + `exclude` is given precedence in the selection. + + Arguments: + include: If given, only return backends whose name is in this set. + exclude: If given, remove backends whose name is in this set. + + Examples: + >>> from narwhals.testing.constructors import prepare_backends + >>> backends = prepare_backends(include=["pandas", "polars[eager]"]) + """ + available = available_backends() + candidates: list[frame_constructor[Any]] = [ + c for name, c in frame_constructor._registry.items() if name in available + ] + + include_set: frozenset[str] = ( + frozenset(include) if include is not None else frozenset() + ) + exclude_set: frozenset[str] = ( + frozenset(exclude) if exclude is not None else frozenset() + ) + + if unknown := (include_set.union(exclude_set).difference(available)): + msg = f"The following names are not known constructors: {sorted(unknown)}" + raise ValueError(msg) + + if include is not None: + candidates = [c for c in candidates if c.name in include_set] + if exclude is not None: + candidates = [c for c in candidates if c.name not in exclude_set] + return sorted(candidates, key=lambda c: c.name) + + +def is_backend_available(*packages: str) -> bool: + """Whether every package in `packages` can be imported in this environment. + + Examples: + >>> from narwhals.testing.constructors import is_backend_available + >>> is_backend_available("pandas") + True + """ + return all(find_spec(pkg) is not None for pkg in packages) + + +def sqlframe_session() -> DuckDBSession: + """Return a fresh in-memory `sqlframe` DuckDB session.""" + from sqlframe.duckdb import DuckDBSession + + # NOTE: `__new__` override inferred by `pyright` only + # https://github.com/eakmanrq/sqlframe/blob/772b3a6bfe5a1ffd569b7749d84bea2f3a314510/sqlframe/base/session.py#L181-L184 + return cast("DuckDBSession", DuckDBSession()) # type: ignore[redundant-cast] + + +def pyspark_session() -> SparkSession: # pragma: no cover + """Return a singleton local `pyspark` (or pyspark[connect]) session.""" + if is_spark_connect := os.environ.get("SPARK_CONNECT", None): + from pyspark.sql.connect.session import SparkSession + else: + from pyspark.sql import SparkSession + builder = cast("SparkSession.Builder", SparkSession.builder).appName("unit-tests") + builder = ( + builder.remote(f"sc://localhost:{os.environ.get('SPARK_PORT', '15002')}") + if is_spark_connect + else builder.master("local[1]").config("spark.ui.enabled", "false") + ) + return ( + builder.config("spark.default.parallelism", "1") + .config("spark.sql.shuffle.partitions", "2") + .config("spark.sql.session.timeZone", "UTC") + .getOrCreate() + ) + + +@lru_cache(maxsize=1) +def _ibis_backend() -> IbisDuckDBBackend: # pragma: no cover + """Cached singleton in-memory ibis backend, so all tables share one database.""" + import ibis + + return ibis.duckdb.connect() + + +@lru_cache(maxsize=1) +def _pyspark_session_lazy() -> SparkSession: # pragma: no cover + """Cached pyspark session; created on first use, stopped at interpreter exit.""" + from atexit import register + + with warnings.catch_warnings(): + # The spark session seems to trigger a polars warning. + warnings.filterwarnings( + "ignore", r"Using fork\(\) can cause Polars", category=RuntimeWarning + ) + session = pyspark_session() + register(session.stop) + return session diff --git a/narwhals/testing/pytest_plugin.py b/narwhals/testing/pytest_plugin.py new file mode 100644 index 0000000000..28e9701742 --- /dev/null +++ b/narwhals/testing/pytest_plugin.py @@ -0,0 +1,138 @@ +"""Narwhals pytest plugin - auto-parametrises fixtures. + +NOTE: All imports from `narwhals.*` are deferred inside the hook functions so that +the entry-point module can be loaded by pytest without pulling in the narwhals package tree. + +This is critical because entry-point plugins are loaded *before* `coveragepy` starts +coverage measurement; any narwhals module imported at that stage would have its +module-level code (class definitions, constants, etc.) executed outside the coverage tracer. +""" + +from __future__ import annotations + +import os +from typing import TYPE_CHECKING, cast + +if TYPE_CHECKING: + import pytest + + from narwhals.testing.typing import FrameConstructor + + +_MIN_PANDAS_NULLABLE_VERSION: tuple[int, ...] = (2, 0, 0) +"""`pandas.convert_dtypes(dtype_backend=...)` requires pandas >= 2.0.0.""" + + +def _pandas_version() -> tuple[int, ...]: + try: + import pandas as pd + except ImportError: # pragma: no cover + return (0, 0, 0) + + from narwhals._utils import parse_version + + return parse_version(pd.__version__) + + +def _default_backend_ids() -> list[str]: + """Resolve the default `--nw-backends` value for the current environment. + + Honours `NARWHALS_DEFAULT_BACKENDS` if set, otherwise restricts + [`DEFAULT_BACKENDS`][] to backends whose libraries are importable. + """ + if env := os.environ.get("NARWHALS_DEFAULT_BACKENDS"): # pragma: no cover + return env.split(",") + from narwhals.testing.constructors import DEFAULT_BACKENDS, frame_constructor + + return [ + name + for name, constructor in frame_constructor._registry.items() + if constructor.is_available and name in DEFAULT_BACKENDS + ] + + +def pytest_addoption(parser: pytest.Parser) -> None: + from narwhals.testing.constructors import DEFAULT_BACKENDS + + group = parser.getgroup("narwhals", "narwhals-testing") + defaults = ", ".join(f"'{c}'" for c in sorted(DEFAULT_BACKENDS)) + group.addoption( + "--nw-backends", + action="store", + default=",".join(_default_backend_ids()), + type=str, + help=( + "Comma-separated list of (data|lazy) frame backend constructors to" + f"parametrise. Defaults to the installed subset of ({defaults})" + ), + ) + group.addoption( + "--all-nw-backends", + action="store_true", + default=False, + help=("Run tests against every installed CPU backend (overrides --nw-backends)."), + ) + # Escape hatch for downstream test suites that ship their own backend plugin. + # When set, this plugin still adds the CLI options but stops parametrising the fixtures. + group.addoption( + "--use-external-nw-backend", + action="store_true", + default=False, + help=( + "Skip narwhals-testing's parametrisation and let another plugin " + "provide the `nw_*frame_constructor` fixtures." + ), + ) + + +def _select_backends(config: pytest.Config) -> list[FrameConstructor]: # pragma: no cover + from narwhals.testing.constructors import available_cpu_backends, prepare_backends + + _all_cpu_exclusions = frozenset({"modin", "pyspark[connect]"}) + + if config.getoption("all_nw_backends"): + selected = prepare_backends( + include=available_cpu_backends(), exclude=_all_cpu_exclusions + ) + else: + opt = cast("str", config.getoption("nw_backends")) + names = [c for c in opt.split(",") if c] + selected = prepare_backends(include=names) + + if _pandas_version() < _MIN_PANDAS_NULLABLE_VERSION: + _pandas_nullables = {"pandas[nullable]", "pandas[pyarrow]"} + selected = [c for c in selected if c.name not in _pandas_nullables] + return selected + + +def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: + if metafunc.config.getoption("use_external_nw_backend"): # pragma: no cover + return + + fixturenames = set(metafunc.fixturenames) + if not fixturenames & { + "nw_frame", + "nw_dataframe", + "nw_lazyframe", + "nw_pandas_like_frame", + }: + return + + selected = _select_backends(metafunc.config) + + if "nw_dataframe" in fixturenames: + params = [c for c in selected if c.is_eager] + ids = [c.name for c in params] + metafunc.parametrize("nw_dataframe", params, ids=ids) + elif "nw_lazyframe" in fixturenames: # pragma: no cover + params = [c for c in selected if not c.is_eager] + ids = [c.name for c in params] + metafunc.parametrize("nw_dataframe", params, ids=ids) + elif "nw_frame" in fixturenames: + metafunc.parametrize("nw_frame", selected, ids=[c.name for c in selected]) + elif "nw_pandas_like_frame" in fixturenames: + params = [c for c in selected if c.is_eager and c.is_pandas_like] + ids = [c.name for c in params] + metafunc.parametrize("nw_pandas_like_frame", params, ids=ids) + else: # pragma: no cover + ... diff --git a/narwhals/testing/typing.py b/narwhals/testing/typing.py new file mode 100644 index 0000000000..f03e946887 --- /dev/null +++ b/narwhals/testing/typing.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + + from narwhals.testing.constructors import frame_constructor + from narwhals.typing import IntoDataFrame, IntoFrame, IntoLazyFrame + + +__all__ = ("Data", "DataFrameConstructor", "FrameConstructor", "LazyFrameConstructor") + +FrameConstructor: TypeAlias = "frame_constructor[IntoFrame]" +"""Type alias for a constructor that returns a native eager or lazy frame.""" + +DataFrameConstructor: TypeAlias = "frame_constructor[IntoDataFrame]" +"""Type alias for a constructor that returns an eager native dataframe.""" + +LazyFrameConstructor: TypeAlias = "frame_constructor[IntoLazyFrame]" +"""Type alias for a constructor that returns a lazy native frame.""" + +Data: TypeAlias = dict[str, Any] # TODO(Unassined): This should have a better annotation +"""A column-oriented mapping used as input to a frame constructor.""" diff --git a/pyproject.toml b/pyproject.toml index ef43aebc23..e69bf49f2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,10 @@ Documentation = "https://narwhals-dev.github.io/narwhals/" Repository = "https://github.com/narwhals-dev/narwhals" "Bug Tracker" = "https://github.com/narwhals-dev/narwhals/issues" +[project.entry-points.pytest11] +narwhals_testing = "narwhals.testing.pytest_plugin" +# See: https://docs.pytest.org/en/stable/how-to/writing_plugins.html#making-your-plugin-installable-by-others + [project.optional-dependencies] # These should be aligned with MIN_VERSIONS in narwhals/utils.py # Exception: modin, because `modin.__version__` isn't aligned with @@ -63,7 +67,6 @@ core = [ tests = [ "covdefaults", "pytest", - "pytest-cov", "pytest-env", "pytest-randomly", "pytest-xdist", @@ -298,7 +301,12 @@ env = [ ] [tool.coverage.run] +# execv and fork patches are unsupported on Windows (coverage raises), so Windows +# CI jobs set these env vars to "subprocess" — coverage dedupes the final list. +patch = ["${COVERAGE_PATCH_EXECV-execv}", "${COVERAGE_PATCH_FORK-fork}", "subprocess"] plugins = ["covdefaults"] +source = ["narwhals", "tests"] +parallel = true [tool.coverage.report] fail_under = 80 # This is just for local development, in CI we set it to 100 diff --git a/tests/conftest.py b/tests/conftest.py index 3fc3e91fa9..ed94521264 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,80 +1,37 @@ from __future__ import annotations -import os -import uuid -from copy import deepcopy -from functools import lru_cache from importlib.util import find_spec -from typing import TYPE_CHECKING, Any, Callable, cast +from typing import TYPE_CHECKING, Any, cast import pytest import narwhals as nw -from narwhals._utils import Implementation, generate_temporary_column_name -from tests.utils import ID_PANDAS_LIKE, PANDAS_VERSION, pyspark_session, sqlframe_session +from narwhals._utils import Implementation + +# `narwhals.testing.pytest_plugin` registers itself via the `pytest11` entry point (see pyproject.toml) +# so it auto-loads as soon as Narwhals is installed. +# That plugin is what owns the `--constructors`, `--all-cpu-constructors`, and `--use-external-constructor` +# CLI options as well as parametrising the `constructor*` fixtures. if TYPE_CHECKING: from collections.abc import Sequence + from types import ModuleType - import ibis - import pandas as pd - import polars as pl - import pyarrow as pa - from ibis.backends.duckdb import Backend as IbisDuckDBBackend - from typing_extensions import TypeAlias - - from narwhals._native import NativeDask, NativeDuckDB, NativePySpark, NativeSQLFrame from narwhals._typing import EagerAllowed - from narwhals.typing import IntoDataFrame, NonNestedDType - from tests.utils import ( - Constructor, - ConstructorEager, - ConstructorLazy, - NestedOrEnumDType, - ) - - Data: TypeAlias = "dict[str, list[Any]]" - + from narwhals.dataframe import DataFrame, LazyFrame + from narwhals.testing.constructors import frame_constructor + from narwhals.testing.typing import Data, DataFrameConstructor, FrameConstructor + from narwhals.typing import IntoFrame, NonNestedDType + from tests.utils import NestedOrEnumDType -MIN_PANDAS_NULLABLE_VERSION = (2,) -# When testing cudf.pandas in Kaggle, we get an error if we try to run -# python -m cudf.pandas -m pytest --constructors=pandas. This gives us -# a way to run `python -m cudf.pandas -m pytest` and control which constructors -# get tested. -if default_constructors := os.environ.get( - "NARWHALS_DEFAULT_CONSTRUCTORS", None -): # pragma: no cover - DEFAULT_CONSTRUCTORS = default_constructors -else: - DEFAULT_CONSTRUCTORS = ( - "pandas,pandas[pyarrow],polars[eager],pyarrow,duckdb,sqlframe,ibis" - ) +# Narwhals-internal pytest options (not part of the public testing plugin) def pytest_addoption(parser: pytest.Parser) -> None: parser.addoption( "--runslow", action="store_true", default=False, help="run slow tests" ) - parser.addoption( - "--all-cpu-constructors", - action="store_true", - default=False, - help="run tests with all cpu constructors", - ) - parser.addoption( - "--use-external-constructor", - action="store_true", - default=False, - help="run tests with external constructor", - ) - parser.addoption( - "--constructors", - action="store", - default=DEFAULT_CONSTRUCTORS, - type=str, - help="libraries to test", - ) def pytest_configure(config: pytest.Config) -> None: @@ -85,7 +42,6 @@ def pytest_collection_modifyitems( config: pytest.Config, items: Sequence[pytest.Function] ) -> None: # pragma: no cover if config.getoption("--runslow"): - # --runslow given in cli: do not skip slow tests return skip_slow = pytest.mark.skip(reason="need --runslow option to run") for item in items: @@ -93,235 +49,6 @@ def pytest_collection_modifyitems( item.add_marker(skip_slow) -def pandas_constructor(obj: Data) -> pd.DataFrame: - import pandas as pd - - return pd.DataFrame(obj) - - -def pandas_nullable_constructor(obj: Data) -> pd.DataFrame: - import pandas as pd - - return pd.DataFrame(obj).convert_dtypes(dtype_backend="numpy_nullable") - - -def pandas_pyarrow_constructor(obj: Data) -> pd.DataFrame: - pytest.importorskip("pyarrow") - import pandas as pd - - return pd.DataFrame(obj).convert_dtypes(dtype_backend="pyarrow") - - -def modin_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover - import modin.pandas as mpd - import pandas as pd - - df = mpd.DataFrame(pd.DataFrame(obj)) - return cast("IntoDataFrame", df) - - -def modin_pyarrow_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover - import modin.pandas as mpd - import pandas as pd - - df = mpd.DataFrame(pd.DataFrame(obj)).convert_dtypes(dtype_backend="pyarrow") - return cast("IntoDataFrame", df) - - -def cudf_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover - import cudf - - df = cudf.DataFrame(obj) - return cast("IntoDataFrame", df) - - -def polars_eager_constructor(obj: Data) -> pl.DataFrame: - pytest.importorskip("polars") - import polars as pl - - return pl.DataFrame(obj) - - -def polars_lazy_constructor(obj: Data) -> pl.LazyFrame: - import polars as pl - - return pl.LazyFrame(obj) - - -def duckdb_lazy_constructor(obj: dict[str, Any]) -> NativeDuckDB: - pytest.importorskip("duckdb") - pytest.importorskip("pyarrow") - import duckdb - import pyarrow as pa - - duckdb.sql("""set timezone = 'UTC'""") - - _df = pa.table(obj) - return duckdb.sql("select * from _df") - - -def dask_lazy_p1_constructor(obj: Data) -> NativeDask: # pragma: no cover - import dask.dataframe as dd - - return cast("NativeDask", dd.from_dict(obj, npartitions=1)) - - -def dask_lazy_p2_constructor(obj: Data) -> NativeDask: # pragma: no cover - import dask.dataframe as dd - - return cast("NativeDask", dd.from_dict(obj, npartitions=2)) - - -def pyarrow_table_constructor(obj: dict[str, Any]) -> pa.Table: - pytest.importorskip("pyarrow") - import pyarrow as pa - - return pa.table(obj) - - -def pyspark_lazy_constructor() -> Callable[[Data], NativePySpark]: # pragma: no cover - pytest.importorskip("pyspark") - import warnings - from atexit import register - - with warnings.catch_warnings(): - # The spark session seems to trigger a polars warning. - # Polars is imported in the tests, but not used in the spark operations - warnings.filterwarnings( - "ignore", r"Using fork\(\) can cause Polars", category=RuntimeWarning - ) - session = pyspark_session() - - register(session.stop) - - def _constructor(obj: Data) -> NativePySpark: - _obj = deepcopy(obj) - index_col_name = generate_temporary_column_name(n_bytes=8, columns=list(_obj)) - _obj[index_col_name] = list(range(len(_obj[next(iter(_obj))]))) - result = ( - session.createDataFrame([*zip(*_obj.values())], schema=[*_obj.keys()]) - .repartition(2) - .orderBy(index_col_name) - .drop(index_col_name) - ) - return cast("NativePySpark", result) - - return _constructor - - -def sqlframe_pyspark_lazy_constructor(obj: Data) -> NativeSQLFrame: # pragma: no cover - pytest.importorskip("sqlframe") - pytest.importorskip("duckdb") - session = sqlframe_session() - return session.createDataFrame([*zip(*obj.values())], schema=[*obj.keys()]) - - -@lru_cache(maxsize=1) -def _ibis_backend() -> IbisDuckDBBackend: # pragma: no cover - """Cached (singleton) in-memory backend to ensure all tables exist within the same in-memory database.""" - import ibis - - return ibis.duckdb.connect() - - -def ibis_lazy_constructor(obj: Data) -> ibis.Table: # pragma: no cover - pytest.importorskip("ibis") - pytest.importorskip("polars") - import polars as pl - - ldf = pl.LazyFrame(obj) - table_name = str(uuid.uuid4()) - return _ibis_backend().create_table(table_name, ldf) - - -EAGER_CONSTRUCTORS: dict[str, ConstructorEager] = { - "pandas": pandas_constructor, - "pandas[nullable]": pandas_nullable_constructor, - "pandas[pyarrow]": pandas_pyarrow_constructor, - "pyarrow": pyarrow_table_constructor, - "modin": modin_constructor, - "modin[pyarrow]": modin_pyarrow_constructor, - "cudf": cudf_constructor, - "polars[eager]": polars_eager_constructor, -} -LAZY_CONSTRUCTORS: dict[str, ConstructorLazy] = { # pyrefly: ignore[bad-assignment] - "dask": dask_lazy_p2_constructor, - "polars[lazy]": polars_lazy_constructor, - "duckdb": duckdb_lazy_constructor, - "pyspark": pyspark_lazy_constructor, # type: ignore[dict-item] - "sqlframe": sqlframe_pyspark_lazy_constructor, - "ibis": ibis_lazy_constructor, -} -GPU_CONSTRUCTORS: dict[str, ConstructorEager] = {"cudf": cudf_constructor} - - -def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: - if metafunc.config.getoption("use_external_constructor"): # pragma: no cover - return # let the plugin handle this - if metafunc.config.getoption("all_cpu_constructors"): # pragma: no cover - selected_constructors: list[str] = [ - *iter(EAGER_CONSTRUCTORS.keys()), - *iter(LAZY_CONSTRUCTORS.keys()), - ] - selected_constructors = [ - x - for x in selected_constructors - if x not in GPU_CONSTRUCTORS - and x - not in { - "modin", # too slow - "spark[connect]", # complex local setup; can't run together with local spark - } - ] - else: # pragma: no cover - opt = cast("str", metafunc.config.getoption("constructors")) - selected_constructors = opt.split(",") - - eager_constructors: list[ConstructorEager] = [] - eager_constructors_ids: list[str] = [] - constructors: list[Constructor] = [] - constructors_ids: list[str] = [] - - for constructor in selected_constructors: - if ( - constructor in {"pandas[nullable]", "pandas[pyarrow]"} - and MIN_PANDAS_NULLABLE_VERSION > PANDAS_VERSION - ): - continue # pragma: no cover - - if constructor in EAGER_CONSTRUCTORS: - eager_constructors.append(EAGER_CONSTRUCTORS[constructor]) - eager_constructors_ids.append(constructor) - constructors.append(EAGER_CONSTRUCTORS[constructor]) - elif constructor in {"pyspark", "pyspark[connect]"}: # pragma: no cover - constructors.append(pyspark_lazy_constructor()) - elif constructor in LAZY_CONSTRUCTORS: - constructors.append(LAZY_CONSTRUCTORS[constructor]) - else: # pragma: no cover - msg = f"Expected one of {EAGER_CONSTRUCTORS.keys()} or {LAZY_CONSTRUCTORS.keys()}, got {constructor}" - raise ValueError(msg) - constructors_ids.append(constructor) - - if "constructor_eager" in metafunc.fixturenames: - metafunc.parametrize( - "constructor_eager", eager_constructors, ids=eager_constructors_ids - ) - elif "constructor" in metafunc.fixturenames: - metafunc.parametrize("constructor", constructors, ids=constructors_ids) - elif "constructor_pandas_like" in metafunc.fixturenames: - pandas_like_constructors = [] - pandas_like_constructors_ids = [] - for fn, name in zip(eager_constructors, eager_constructors_ids): - if name in ID_PANDAS_LIKE: - pandas_like_constructors.append(fn) - pandas_like_constructors_ids.append(name) - metafunc.parametrize( - "constructor_pandas_like", - pandas_like_constructors, - ids=pandas_like_constructors_ids, - ) - - TEST_EAGER_BACKENDS: list[EagerAllowed] = [] TEST_EAGER_BACKENDS.extend( (Implementation.POLARS, "polars") if find_spec("polars") is not None else () @@ -390,3 +117,64 @@ def non_nested_type(request: pytest.FixtureRequest) -> type[NonNestedDType]: def nested_dtype(request: pytest.FixtureRequest) -> NestedOrEnumDType: dtype: NestedOrEnumDType = request.param return dtype + + +# The following fixtures are aliases of those registered in `narwhals/testing/pytest_plugin.py`, +# wrapped so that calling them without an explicit `namespace` defaults to the main +# `narwhals` namespace. Tests can still pass `nw_v1` / `nw_v2` explicitly to opt in +# to a stable namespace; the legacy pattern `nw.from_native(constructor(data))` keeps +# working because `nw.from_native` is idempotent on narwhals objects. +# TODO(FBruzzesi): Drop these aliases once every test calls `nw_frame` / `nw_dataframe` +# directly with an explicit namespace. + + +class _PatchedFrameConstructor: + """Proxy over a `frame_constructor` defaulting `namespace` to `narwhals`. + + Delegates attribute access, `str()`, and `repr()` to the wrapped instance + so that test helpers (e.g. `constructor.is_nullable`, `"pandas" in str(constructor)`) + keep working unchanged. + """ + + __slots__ = ("_inner",) + + def __init__(self, inner: frame_constructor[IntoFrame]) -> None: + self._inner = inner + + def __call__( + self, obj: Data, /, namespace: ModuleType = nw, **kwds: Any + ) -> DataFrame[Any] | LazyFrame[Any]: + return self._inner(obj, namespace=namespace, **kwds) + + def __getattr__(self, name: str) -> Any: + return getattr(self._inner, name) + + def __str__(self) -> str: + return str(self._inner) + + def __repr__(self) -> str: + return repr(self._inner) + + +class _PatchedDataFrameConstructor(_PatchedFrameConstructor): + def __call__( + self, obj: Data, /, namespace: ModuleType = nw, **kwds: Any + ) -> DataFrame[Any]: + return cast("DataFrame[Any]", self._inner(obj, namespace=namespace, **kwds)) + + +@pytest.fixture +def constructor(nw_frame: FrameConstructor) -> _PatchedFrameConstructor: + return _PatchedFrameConstructor(nw_frame) + + +@pytest.fixture +def constructor_eager(nw_dataframe: DataFrameConstructor) -> _PatchedDataFrameConstructor: + return _PatchedDataFrameConstructor(nw_dataframe) + + +@pytest.fixture +def constructor_pandas_like( + nw_pandas_like_frame: DataFrameConstructor, +) -> _PatchedDataFrameConstructor: + return _PatchedDataFrameConstructor(nw_pandas_like_frame) diff --git a/tests/dependencies/is_narwhals_dataframe_test.py b/tests/dependencies/is_narwhals_dataframe_test.py index aeedf15981..0897e64cc8 100644 --- a/tests/dependencies/is_narwhals_dataframe_test.py +++ b/tests/dependencies/is_narwhals_dataframe_test.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING -import narwhals as nw from narwhals.stable.v1.dependencies import is_narwhals_dataframe if TYPE_CHECKING: @@ -12,5 +11,5 @@ def test_is_narwhals_dataframe(constructor_eager: ConstructorEager) -> None: df = constructor_eager({"col1": [1, 2], "col2": [3, 4]}) - assert is_narwhals_dataframe(nw.from_native(df)) - assert not is_narwhals_dataframe(df) + assert is_narwhals_dataframe(df) + assert not is_narwhals_dataframe(df.to_native()) diff --git a/tests/dependencies/is_narwhals_lazyframe_test.py b/tests/dependencies/is_narwhals_lazyframe_test.py index 0e4c6e1bd9..113fd4a511 100644 --- a/tests/dependencies/is_narwhals_lazyframe_test.py +++ b/tests/dependencies/is_narwhals_lazyframe_test.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING -import narwhals as nw from narwhals.stable.v1.dependencies import is_narwhals_lazyframe from tests.utils import Constructor @@ -13,5 +12,5 @@ def test_is_narwhals_lazyframe(constructor: Constructor) -> None: lf = constructor({"a": [1, 2, 3]}) - assert is_narwhals_lazyframe(nw.from_native(lf).lazy()) - assert not is_narwhals_lazyframe(lf) + assert is_narwhals_lazyframe(lf.lazy()) + assert not is_narwhals_lazyframe(lf.to_native()) diff --git a/tests/dtypes/dtypes_test.py b/tests/dtypes/dtypes_test.py index 33fa61ac08..a233f955f3 100644 --- a/tests/dtypes/dtypes_test.py +++ b/tests/dtypes/dtypes_test.py @@ -9,13 +9,8 @@ import narwhals as nw from narwhals.exceptions import InvalidOperationError, PerformanceWarning -from tests.utils import ( - PANDAS_VERSION, - POLARS_VERSION, - PYARROW_VERSION, - assert_equal_hash, - pyspark_session, -) +from narwhals.testing.constructors import pyspark_session +from tests.utils import PANDAS_VERSION, POLARS_VERSION, PYARROW_VERSION, assert_equal_hash if TYPE_CHECKING: from collections.abc import Iterable diff --git a/tests/expr_and_series/arithmetic_test.py b/tests/expr_and_series/arithmetic_test.py index af0c464e5b..9755435871 100644 --- a/tests/expr_and_series/arithmetic_test.py +++ b/tests/expr_and_series/arithmetic_test.py @@ -45,7 +45,7 @@ def test_arithmetic_expr( request.applymarker(pytest.mark.xfail) data = {"a": [1.0, 2.0, 3.0]} - df = nw.from_native(constructor(data)) + df = constructor(data, nw) result = df.select(getattr(nw.col("a"), attr)(rhs)) assert_equal_data(result, {"a": expected}) @@ -76,7 +76,7 @@ def test_right_arithmetic_expr( ): request.applymarker(pytest.mark.xfail) data = {"a": [1, 2, 3]} - df = nw.from_native(constructor(data)) + df = constructor(data) result = df.select(getattr(nw.col("a"), attr)(rhs)) assert_equal_data(result, {"literal": expected}) @@ -98,16 +98,16 @@ def test_arithmetic_series( attr: str, rhs: Any, expected: list[Any], - constructor_eager: ConstructorEager, + nw_dataframe: ConstructorEager, request: pytest.FixtureRequest, ) -> None: if attr == "__mod__" and any( - x in str(constructor_eager) for x in ["pandas_pyarrow", "modin_pyarrow"] + x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin_pyarrow"] ): request.applymarker(pytest.mark.xfail) data = {"a": [1, 2, 3]} - df = nw.from_native(constructor_eager(data), eager_only=True) + df = nw_dataframe(data, nw) result = df.select(getattr(df["a"], attr)(rhs)) assert_equal_data(result, {"a": expected}) @@ -128,29 +128,29 @@ def test_right_arithmetic_series( attr: str, rhs: Any, expected: list[Any], - constructor_eager: ConstructorEager, + nw_dataframe: ConstructorEager, request: pytest.FixtureRequest, ) -> None: if attr == "__rmod__" and any( - x in str(constructor_eager) for x in ["pandas_pyarrow", "modin_pyarrow"] + x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin_pyarrow"] ): request.applymarker(pytest.mark.xfail) data = {"a": [1, 2, 3]} - df = nw.from_native(constructor_eager(data), eager_only=True) + df = nw_dataframe(data, nw) result_series = getattr(df["a"], attr)(rhs) assert result_series.name == "a" assert_equal_data({"a": result_series}, {"a": expected}) def test_truediv_same_dims( - constructor_eager: ConstructorEager, request: pytest.FixtureRequest + nw_dataframe: ConstructorEager, request: pytest.FixtureRequest ) -> None: - if "polars" in str(constructor_eager): + if "polars" in str(nw_dataframe): # https://github.com/pola-rs/polars/issues/17760 request.applymarker(pytest.mark.xfail) - s_left = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"] - s_right = nw.from_native(constructor_eager({"a": [2, 2, 1]}), eager_only=True)["a"] + s_left = nw_dataframe({"a": [1, 2, 3]}, nw)["a"] + s_right = nw_dataframe({"a": [2, 2, 1]}, nw)["a"] result = s_left / s_right assert_equal_data({"a": result}, {"a": [0.5, 1.0, 3.0]}) result = s_left.__rtruediv__(s_right) @@ -160,31 +160,27 @@ def test_truediv_same_dims( @given(left=st.integers(-100, 100), right=st.integers(-100, 100)) @pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="convert_dtypes not available") @pytest.mark.slow -def test_floordiv(constructor_eager: ConstructorEager, *, left: int, right: int) -> None: - if any(x in str(constructor_eager) for x in ["modin", "cudf"]): +def test_floordiv(nw_dataframe: ConstructorEager, *, left: int, right: int) -> None: + if any(x in str(nw_dataframe) for x in ["modin", "cudf"]): # modin & cudf are too slow here pytest.skip() assume(right != 0) expected = {"a": [left // right]} - result = nw.from_native(constructor_eager({"a": [left]}), eager_only=True).select( - nw.col("a") // right - ) + result = nw_dataframe({"a": [left]}, nw).select(nw.col("a") // right) assert_equal_data(result, expected) @pytest.mark.slow @given(left=st.integers(-100, 100), right=st.integers(-100, 100)) @pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="convert_dtypes not available") -def test_mod(constructor_eager: ConstructorEager, *, left: int, right: int) -> None: - if any(x in str(constructor_eager) for x in ["pandas_pyarrow", "modin", "cudf"]): +def test_mod(nw_dataframe: ConstructorEager, *, left: int, right: int) -> None: + if any(x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin", "cudf"]): # pandas[pyarrow] does not implement mod # modin & cudf are too slow here pytest.skip() assume(right != 0) expected = {"a": [left % right]} - result = nw.from_native(constructor_eager({"a": [left]}), eager_only=True).select( - nw.col("a") % right - ) + result = nw_dataframe({"a": [left]}, nw).select(nw.col("a") % right) assert_equal_data(result, expected) @@ -218,7 +214,7 @@ def test_arithmetic_expr_left_literal( request.applymarker(pytest.mark.xfail) data = {"a": [1.0, 2.0, 4.0]} - df = nw.from_native(constructor(data)) + df = constructor(data, nw) result = df.select(getattr(lhs, attr)(nw.col("a"))) assert_equal_data(result, {"literal": expected}) @@ -240,16 +236,16 @@ def test_arithmetic_series_left_literal( attr: str, lhs: Any, expected: list[Any], - constructor_eager: ConstructorEager, + nw_dataframe: ConstructorEager, request: pytest.FixtureRequest, ) -> None: if attr == "__mod__" and any( - x in str(constructor_eager) for x in ["pandas_pyarrow", "modin_pyarrow"] + x in str(nw_dataframe) for x in ["pandas_pyarrow", "modin_pyarrow"] ): request.applymarker(pytest.mark.xfail) data = {"a": [1.0, 2.0, 4.0]} - df = nw.from_native(constructor_eager(data)) + df = nw_dataframe(data, nw) result = df.select(getattr(lhs, attr)(nw.col("a"))) assert_equal_data(result, {"literal": expected}) @@ -258,7 +254,7 @@ def test_std_broadcating(constructor: Constructor) -> None: if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): # `std(ddof=2)` fails for duckdb here pytest.skip() - df = nw.from_native(constructor({"a": [1, 2, 3]})) + df = constructor({"a": [1, 2, 3]}, nw) result = df.with_columns(b=nw.col("a").std()).sort("a") expected = {"a": [1, 2, 3], "b": [1.0, 1.0, 1.0]} assert_equal_data(result, expected) diff --git a/tests/expr_and_series/corr_test.py b/tests/expr_and_series/corr_test.py index 9152df69c8..15e371d62d 100644 --- a/tests/expr_and_series/corr_test.py +++ b/tests/expr_and_series/corr_test.py @@ -25,7 +25,7 @@ def test_corr_expr( output_name: str, a: str | nw.Expr, b: str | nw.Expr, - expected_corr: float, + expected_corr: float | None, ) -> None: if "pyspark" in str(constructor) and expected_corr is None: request.applymarker( @@ -51,7 +51,7 @@ def test_corr_expr_spearman( output_name: str, a: str | nw.Expr, b: str | nw.Expr, - expected_corr: float, + expected_corr: float | None, ) -> None: context = ( does_not_raise() @@ -75,7 +75,7 @@ def test_corr_series( output_name: str, a: str, b: str, - expected_corr: float, + expected_corr: float | None, ) -> None: if "pyspark" in str(constructor_eager) and expected_corr is None: request.applymarker( @@ -97,7 +97,7 @@ def test_corr_series_spearman( output_name: str, a: str, b: str, - expected_corr: float, + expected_corr: float | None, ) -> None: if "pyspark" in str(constructor_eager) and expected_corr is None: request.applymarker( diff --git a/tests/expr_and_series/dt/convert_time_zone_test.py b/tests/expr_and_series/dt/convert_time_zone_test.py index 65d1a6e3b6..8fd654ad6d 100644 --- a/tests/expr_and_series/dt/convert_time_zone_test.py +++ b/tests/expr_and_series/dt/convert_time_zone_test.py @@ -7,13 +7,13 @@ import pytest import narwhals as nw +from narwhals.testing.constructors import pyspark_session from tests.utils import ( PANDAS_VERSION, POLARS_VERSION, Constructor, assert_equal_data, is_windows, - pyspark_session, ) if TYPE_CHECKING: diff --git a/tests/expr_and_series/dt/datetime_attributes_test.py b/tests/expr_and_series/dt/datetime_attributes_test.py index c7bf55e7c0..830666cac2 100644 --- a/tests/expr_and_series/dt/datetime_attributes_test.py +++ b/tests/expr_and_series/dt/datetime_attributes_test.py @@ -123,10 +123,10 @@ def test_to_date(request: pytest.FixtureRequest, constructor: Constructor) -> No request.applymarker(pytest.mark.xfail) dates = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]} if "dask" in str(constructor): - df_dask = cast("dd.DataFrame", constructor(dates)) + df_dask = cast("dd.DataFrame", constructor(dates).to_native()) df_dask = cast("dd.DataFrame", df_dask.astype({"a": "timestamp[ns][pyarrow]"})) df = nw.from_native(df_dask) else: - df = nw.from_native(constructor(dates)) + df = constructor(dates) result = df.select(nw.col("a").dt.date()) assert result.collect_schema() == {"a": nw.Date} diff --git a/tests/expr_and_series/dt/datetime_duration_test.py b/tests/expr_and_series/dt/datetime_duration_test.py index b84ecfa66e..ac7d132bfc 100644 --- a/tests/expr_and_series/dt/datetime_duration_test.py +++ b/tests/expr_and_series/dt/datetime_duration_test.py @@ -74,7 +74,7 @@ def test_duration_attributes_nano( import numpy as np data = {"c": np.array([None, 20], dtype="timedelta64[ns]")} - df = nw.from_native(constructor(data)) + df = constructor(data, nw) result_c = df.select(getattr(nw.col("c").dt, attribute)().fill_null(0)) assert_equal_data(result_c, {"c": expected_c}) diff --git a/tests/expr_and_series/dt/replace_time_zone_test.py b/tests/expr_and_series/dt/replace_time_zone_test.py index 1c9dff7d59..27bc394b69 100644 --- a/tests/expr_and_series/dt/replace_time_zone_test.py +++ b/tests/expr_and_series/dt/replace_time_zone_test.py @@ -7,13 +7,8 @@ import pytest import narwhals as nw -from tests.utils import ( - PANDAS_VERSION, - Constructor, - assert_equal_data, - is_windows, - pyspark_session, -) +from narwhals.testing.constructors import pyspark_session +from tests.utils import PANDAS_VERSION, Constructor, assert_equal_data, is_windows if TYPE_CHECKING: from tests.utils import ConstructorEager diff --git a/tests/expr_and_series/fill_nan_test.py b/tests/expr_and_series/fill_nan_test.py index 132b553c50..1835d6c1f1 100644 --- a/tests/expr_and_series/fill_nan_test.py +++ b/tests/expr_and_series/fill_nan_test.py @@ -3,21 +3,8 @@ import pytest import narwhals as nw -from tests.conftest import ( - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, - pandas_constructor, -) from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data -NON_NULLABLE_CONSTRUCTORS = [ - pandas_constructor, - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, -] - def test_fill_nan(request: pytest.FixtureRequest, constructor: Constructor) -> None: if "cudf" in str(constructor): @@ -36,7 +23,7 @@ def test_fill_nan(request: pytest.FixtureRequest, constructor: Constructor) -> N assert_equal_data(result, expected) assert result.lazy().collect()["float_na"].null_count() == 2 result = df.select(nw.all().fill_nan(3.0)) - if any(constructor is c for c in NON_NULLABLE_CONSTRUCTORS): + if not constructor.is_nullable: # no nan vs null distinction expected = {"float": [-1.0, 1.0, 3.0], "float_na": [3.0, 1.0, 3.0]} assert result.lazy().collect()["float_na"].null_count() == 0 @@ -55,7 +42,7 @@ def test_fill_nan_series(constructor_eager: ConstructorEager) -> None: "float_na" ] result = s.fill_nan(999) - if any(constructor_eager is c for c in NON_NULLABLE_CONSTRUCTORS): + if not constructor_eager.is_nullable: # no nan vs null distinction assert_equal_data({"a": result}, {"a": [999.0, 1.0, 999.0]}) elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,): diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py index 16c59536ca..01579e3f3f 100644 --- a/tests/expr_and_series/is_close_test.py +++ b/tests/expr_and_series/is_close_test.py @@ -12,12 +12,6 @@ import narwhals as nw from narwhals.exceptions import ComputeError, InvalidOperationError -from tests.conftest import ( - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, - pandas_constructor, -) from tests.utils import ( PANDAS_VERSION, PYARROW_VERSION, @@ -29,12 +23,6 @@ if TYPE_CHECKING: from narwhals.typing import NumericLiteral -NON_NULLABLE_CONSTRUCTORS = ( - pandas_constructor, - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, -) NULL_PLACEHOLDER, NAN_PLACEHOLDER = 9999.0, -1.0 INF_POS, INF_NEG = float("inf"), float("-inf") @@ -126,7 +114,7 @@ def test_is_close_series_with_series( y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) result = x.is_close(y, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) - if constructor_eager in NON_NULLABLE_CONSTRUCTORS: + if not constructor_eager.is_nullable: expected = [v if v is not None else nans_equal for v in expected] elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,): expected = [ @@ -154,7 +142,7 @@ def test_is_close_series_with_scalar( y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) result = y.is_close(other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) - if constructor_eager in NON_NULLABLE_CONSTRUCTORS: + if not constructor_eager.is_nullable: expected = [v if v is not None else False for v in expected] elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,): expected = [ @@ -199,7 +187,7 @@ def test_is_close_expr_with_expr( ) .sort("idx") ) - if constructor in NON_NULLABLE_CONSTRUCTORS: + if not constructor.is_nullable: expected = [v if v is not None else nans_equal for v in expected] elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,): expected = [ @@ -240,7 +228,7 @@ def test_is_close_expr_with_scalar( ) .sort("idx") ) - if constructor in NON_NULLABLE_CONSTRUCTORS: + if not constructor.is_nullable: expected = [v if v is not None else False for v in expected] elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,): expected = [ diff --git a/tests/expr_and_series/is_finite_test.py b/tests/expr_and_series/is_finite_test.py index eb07b2a41e..f55b106593 100644 --- a/tests/expr_and_series/is_finite_test.py +++ b/tests/expr_and_series/is_finite_test.py @@ -5,21 +5,8 @@ import pytest import narwhals as nw -from tests.conftest import ( - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, - pandas_constructor, -) from tests.utils import POLARS_VERSION, Constructor, ConstructorEager, assert_equal_data -NON_NULLABLE_CONSTRUCTORS = [ - pandas_constructor, - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, -] - data = {"a": [float("nan"), float("inf"), 2.0, None]} @@ -77,7 +64,7 @@ def test_is_finite_column_with_null(constructor: Constructor, data: list[float]) result = df.select(nw.col("a").is_finite()) expected: dict[str, list[Any]] - if any(constructor is c for c in NON_NULLABLE_CONSTRUCTORS): + if not constructor.is_nullable: # Null values are coerced to NaN for non-nullable datatypes expected = {"a": [True, True, False]} else: diff --git a/tests/expr_and_series/is_nan_test.py b/tests/expr_and_series/is_nan_test.py index 27790e27b2..9dce78c535 100644 --- a/tests/expr_and_series/is_nan_test.py +++ b/tests/expr_and_series/is_nan_test.py @@ -5,21 +5,8 @@ import pytest import narwhals as nw -from tests.conftest import ( - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, - pandas_constructor, -) from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data -NON_NULLABLE_CONSTRUCTORS = [ - pandas_constructor, - dask_lazy_p1_constructor, - dask_lazy_p2_constructor, - modin_constructor, -] - def test_nan(constructor: Constructor) -> None: data_na = {"int": [-1, 1, None]} @@ -33,7 +20,7 @@ def test_nan(constructor: Constructor) -> None: ) expected: dict[str, list[Any]] - if any(constructor is c for c in NON_NULLABLE_CONSTRUCTORS): + if not constructor.is_nullable: # Null values are coerced to NaN for non-nullable datatypes expected = { "int": [False, False, True], @@ -70,7 +57,7 @@ def test_nan_series(constructor_eager: ConstructorEager) -> None: "float_na": df["float_na"].is_nan(), } expected: dict[str, list[Any]] - if any(constructor_eager is c for c in NON_NULLABLE_CONSTRUCTORS): + if not constructor_eager.is_nullable: # Null values are coerced to NaN for non-nullable datatypes expected = { "int": [False, False, True], diff --git a/tests/expr_and_series/list/get_test.py b/tests/expr_and_series/list/get_test.py index 52ca3386ba..338ab6197a 100644 --- a/tests/expr_and_series/list/get_test.py +++ b/tests/expr_and_series/list/get_test.py @@ -45,9 +45,8 @@ def test_get_series( pytest.skip() pytest.importorskip("pyarrow") - if ( - constructor_eager.__name__.startswith("pandas") - and "pyarrow" not in constructor_eager.__name__ + if str(constructor_eager).startswith("pandas") and "pyarrow" not in str( + constructor_eager ): df = nw.from_native(constructor_eager(data), eager_only=True) msg = re.escape("Series must be of PyArrow List type to support list namespace.") diff --git a/tests/expr_and_series/nth_test.py b/tests/expr_and_series/nth_test.py index 1249f7f2e2..86f9bfe2eb 100644 --- a/tests/expr_and_series/nth_test.py +++ b/tests/expr_and_series/nth_test.py @@ -1,17 +1,14 @@ from __future__ import annotations import re -from typing import TYPE_CHECKING, Any +from typing import Any import pytest import narwhals as nw from tests.utils import POLARS_VERSION, Constructor, assert_equal_data -if TYPE_CHECKING: - from collections.abc import Mapping - -data: Mapping[str, Any] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} +data: dict[str, list[Any]] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} @pytest.mark.parametrize( diff --git a/tests/expr_and_series/over_test.py b/tests/expr_and_series/over_test.py index ec9dea6104..7ea345af30 100644 --- a/tests/expr_and_series/over_test.py +++ b/tests/expr_and_series/over_test.py @@ -475,10 +475,10 @@ def test_over_quantile(constructor: Constructor, request: pytest.FixtureRequest) data = {"a": [1, 2, 3, 4, 5, 6], "b": ["x", "x", "x", "y", "y", "y"]} quantile_expr = nw.col("a").quantile(quantile=0.5, interpolation="linear") - native_frame = constructor(data) + native_frame = constructor(data).to_native() if "dask" in str(constructor): - native_frame = native_frame.repartition(npartitions=1) # type: ignore[union-attr] + native_frame = native_frame.repartition(npartitions=1) result = ( nw.from_native(native_frame) diff --git a/tests/expr_and_series/str/split_test.py b/tests/expr_and_series/str/split_test.py index b6b25cd024..f206b84e35 100644 --- a/tests/expr_and_series/str/split_test.py +++ b/tests/expr_and_series/str/split_test.py @@ -20,8 +20,7 @@ ) def test_str_split(constructor: Constructor, by: str, expected: Any) -> None: if "cudf" not in str(constructor) and ( - constructor.__name__.startswith("pandas") - and "pyarrow" not in constructor.__name__ + str(constructor).startswith("pandas") and "pyarrow" not in str(constructor) ): df = nw.from_native(constructor(data)) msg = re.escape("This operation requires a pyarrow-backed series. ") @@ -44,8 +43,8 @@ def test_str_split_series( constructor_eager: ConstructorEager, by: str, expected: Any ) -> None: if "cudf" not in str(constructor_eager) and ( - constructor_eager.__name__.startswith("pandas") - and "pyarrow" not in constructor_eager.__name__ + str(constructor_eager).startswith("pandas") + and "pyarrow" not in str(constructor_eager) ): df = nw.from_native(constructor_eager(data), eager_only=True) msg = re.escape("This operation requires a pyarrow-backed series. ") diff --git a/tests/expr_and_series/str/to_time_test.py b/tests/expr_and_series/str/to_time_test.py index fceed2688d..29af4c0225 100644 --- a/tests/expr_and_series/str/to_time_test.py +++ b/tests/expr_and_series/str/to_time_test.py @@ -21,7 +21,7 @@ def requires_time_support( Skip or mark tests as expected failures depending on backend capabilities, version, and pyarrow availability when testing Time dtype support. """ - if constructor.__name__.startswith(("pandas", "modin")): + if str(constructor).startswith(("pandas", "modin")): if PANDAS_VERSION < (2, 2, 0): pytest.skip( "pandas < 2.2.0 has no pyarrow dtype support (and therefore does not support the Time dtype)" diff --git a/tests/expr_and_series/struct_/field_test.py b/tests/expr_and_series/struct_/field_test.py index a351c31500..dd8dd26790 100644 --- a/tests/expr_and_series/struct_/field_test.py +++ b/tests/expr_and_series/struct_/field_test.py @@ -1,84 +1,53 @@ from __future__ import annotations -from typing import cast - import pytest import narwhals as nw -from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data +from tests.utils import ( + DUCKDB_VERSION, + PANDAS_VERSION, + Constructor, + ConstructorEager, + assert_equal_data, +) def test_get_field_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: pytest.importorskip("pyarrow") - import pyarrow as pa - if any(backend in str(constructor) for backend in ("dask", "modin")): + if any(backend in str(constructor) for backend in ("dask",)): request.applymarker(pytest.mark.xfail) - if "pandas" in str(constructor) and PANDAS_VERSION < (2, 2, 0): + if ("pandas" in str(constructor) and PANDAS_VERSION < (2, 2, 0)) or ( + "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3, 0) + ): pytest.skip() - data = {"user": [{"id": "0", "name": "john"}, {"id": "1", "name": "jane"}]} - - df_native = constructor(data) - - if "pandas" in str(constructor): - import pandas as pd - df_native = cast("pd.DataFrame", df_native).assign( - user=pd.Series( - data["user"], - dtype=pd.ArrowDtype( - pa.struct([("id", pa.string()), ("name", pa.string())]) - ), - ) - ) - - df = nw.from_native(df_native) + data = {"id": ["0", "1"], "name": ["john", "jane"]} + expected = data.copy() + df = constructor(data, nw).select(user=nw.struct("id", "name")) result = nw.from_native(df).select( nw.col("user").struct.field("id"), nw.col("user").struct.field("name") ) - expected = {"id": ["0", "1"], "name": ["john", "jane"]} assert_equal_data(result, expected) result = nw.from_native(df).select(nw.col("user").struct.field("id").name.keep()) expected = {"user": ["0", "1"]} assert_equal_data(result, expected) -def test_get_field_series( - request: pytest.FixtureRequest, constructor_eager: ConstructorEager -) -> None: +def test_get_field_series(constructor_eager: ConstructorEager) -> None: pytest.importorskip("pyarrow") - import pyarrow as pa - if any(backend in str(constructor_eager) for backend in ("modin",)): - request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor_eager) and PANDAS_VERSION < (2, 2, 0): pytest.skip() - data = {"user": [{"id": "0", "name": "john"}, {"id": "1", "name": "jane"}]} - expected = {"id": ["0", "1"], "name": ["john", "jane"]} - - _expected = expected.copy() - df_native = constructor_eager(data) - - if "pandas" in str(constructor_eager): - import pandas as pd - - df_native = cast("pd.DataFrame", df_native).assign( - user=pd.Series( - data["user"], - dtype=pd.ArrowDtype( - pa.struct([("id", pa.string()), ("name", pa.string())]) - ), - ) - ) - - df = nw.from_native(df_native, eager_only=True) + data = {"id": ["0", "1"], "name": ["john", "jane"]} + expected = data.copy() + df = constructor_eager(data, nw).select(user=nw.struct("id", "name")) result = nw.from_native(df).select( df["user"].struct.field("id"), df["user"].struct.field("name") ) - expected = {"id": ["0", "1"], "name": ["john", "jane"]} - assert_equal_data(result, _expected) + assert_equal_data(result, expected) def test_pandas_object_series() -> None: diff --git a/tests/frame/group_by_test.py b/tests/frame/group_by_test.py index 57aacae09b..788a5363a4 100644 --- a/tests/frame/group_by_test.py +++ b/tests/frame/group_by_test.py @@ -26,7 +26,7 @@ from narwhals.typing import NonNestedLiteral -data: Mapping[str, Any] = {"a": [1, 1, 3], "b": [4, 4, 6], "c": [7.0, 8.0, 9.0]} +data: dict[str, list[Any]] = {"a": [1, 1, 3], "b": [4, 4, 6], "c": [7.0, 8.0, 9.0]} POLARS_COLLECT_STREAMING_ENGINE = os.environ.get("NARWHALS_POLARS_NEW_STREAMING", None) diff --git a/tests/frame/interchange_native_namespace_test.py b/tests/frame/interchange_native_namespace_test.py index 79a92ef6c9..0face73928 100644 --- a/tests/frame/interchange_native_namespace_test.py +++ b/tests/frame/interchange_native_namespace_test.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import Any import pytest @@ -9,10 +9,7 @@ pytest.importorskip("polars") import polars as pl -if TYPE_CHECKING: - from collections.abc import Mapping - -data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} +data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} def test_interchange() -> None: @@ -60,9 +57,9 @@ def test_duckdb() -> None: pytest.importorskip("duckdb") import duckdb - df_pl = pl.DataFrame(data) # noqa: F841 + _df_pl = pl.DataFrame(data) - rel = duckdb.sql("select * from df_pl") + rel = duckdb.sql("select * from _df_pl") df = nw_v1.from_native(rel, eager_or_interchange_only=True) series = df["a"] diff --git a/tests/frame/interchange_select_test.py b/tests/frame/interchange_select_test.py index a927ba18c6..90279f0296 100644 --- a/tests/frame/interchange_select_test.py +++ b/tests/frame/interchange_select_test.py @@ -1,16 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import Any import pytest import narwhals as nw import narwhals.stable.v1 as nw_v1 -if TYPE_CHECKING: - from collections.abc import Mapping - -data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} +data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} class InterchangeDataFrame: diff --git a/tests/frame/interchange_to_arrow_test.py b/tests/frame/interchange_to_arrow_test.py index 2277d498ea..e8604f816d 100644 --- a/tests/frame/interchange_to_arrow_test.py +++ b/tests/frame/interchange_to_arrow_test.py @@ -1,15 +1,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import Any import pytest import narwhals.stable.v1 as nw_v1 -if TYPE_CHECKING: - from collections.abc import Mapping - -data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} +data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} pytest.importorskip("polars") pytest.importorskip("pyarrow") diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index 05543105a6..50ac30c6ed 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -16,19 +16,7 @@ ) if TYPE_CHECKING: - from narwhals.typing import IntoDataFrame, IntoLazyFrameT, JoinStrategy - - -def from_native_lazy( - native: IntoLazyFrameT | IntoDataFrame, -) -> nw.LazyFrame[IntoLazyFrameT] | nw.LazyFrame[Any]: - """Every join test [needs to use `.lazy()` for typing]*. - - *Unless both left/right frames are of the same concrete type. - - [needs to use `.lazy()` for typing]: https://github.com/narwhals-dev/narwhals/pull/2944#discussion_r2286264815 - """ - return nw.from_native(native).lazy() + from narwhals.typing import JoinStrategy @pytest.mark.parametrize( @@ -107,8 +95,8 @@ def test_full_join( right_on: None | str | list[str], constructor: Constructor, ) -> None: - df_left = from_native_lazy(constructor(df1)) - df_right = from_native_lazy(constructor(df2)) + df_left = constructor(df1).lazy() + df_right = constructor(df2).lazy() result = df_left.join( df_right, on=on, left_on=left_on, right_on=right_on, how="full" ).sort("id", nulls_last=True) @@ -123,8 +111,8 @@ def test_full_join_duplicate( df1 = {"foo": [1, 2, 3], "val1": [1, 2, 3]} df2 = {"foo": [1, 2, 3], "foo_right": [1, 2, 3]} - df_left = from_native_lazy(constructor(df1)) - df_right = from_native_lazy(constructor(df2)) + df_left = constructor(df1).lazy() + df_right = constructor(df2).lazy() exceptions: list[type[Exception]] = [nw.exceptions.NarwhalsError] if "pyspark" in str(constructor) and "sqlframe" not in str(constructor): @@ -146,7 +134,7 @@ def test_inner_join_two_keys(constructor: Constructor) -> None: "zor ro": [7.0, 8.0, 9.0], "idx": [0, 1, 2], } - df = from_native_lazy(constructor(data)) + df = constructor(data).lazy() df_right = df result = df.join( df_right, @@ -175,7 +163,7 @@ def test_inner_join_single_key(constructor: Constructor) -> None: "zor ro": [7.0, 8.0, 9.0], "idx": [0, 1, 2], } - df = from_native_lazy(constructor(data)) + df = constructor(data).lazy() df_right = df result = df.join( df_right, left_on="antananarivo", right_on="antananarivo", how="inner" @@ -199,7 +187,7 @@ def test_cross_join(constructor: Constructor) -> None: if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 1, 4): pytest.skip() data = {"antananarivo": [1, 3, 2]} - df = from_native_lazy(constructor(data)) + df = constructor(data).lazy() result = df.join(df, how="cross").sort("antananarivo", "antananarivo_right") expected = { "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3], @@ -219,7 +207,7 @@ def test_suffix( constructor: Constructor, how: Literal["inner", "left"], suffix: str ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = from_native_lazy(constructor(data)) + df = constructor(data).lazy() df_right = df result = df.join( df_right, @@ -237,7 +225,7 @@ def test_cross_join_suffix(constructor: Constructor, suffix: str) -> None: if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 1, 4): pytest.skip() data = {"antananarivo": [1, 3, 2]} - df = from_native_lazy(constructor(data)) + df = constructor(data).lazy() result = df.join(df, how="cross", suffix=suffix).sort( "antananarivo", f"antananarivo{suffix}" ) @@ -287,7 +275,7 @@ def test_anti_join( expected: dict[str, list[Any]], ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = from_native_lazy(constructor(data)) + df = constructor(data).lazy() other = df.filter(filter_expr) result = df.join(other, how="anti", left_on=join_key, right_on=join_key) assert_equal_data(result, expected) @@ -325,7 +313,7 @@ def test_semi_join( expected: dict[str, list[Any]], ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = from_native_lazy(constructor(data)) + df = constructor(data).lazy() other = df.filter(filter_expr) result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort( "antananarivo" @@ -336,7 +324,7 @@ def test_semi_join( @pytest.mark.parametrize("how", ["right"]) def test_join_not_implemented(constructor: Constructor, how: str) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = from_native_lazy(constructor(data)) + df = constructor(data).lazy() with pytest.raises( NotImplementedError, @@ -363,8 +351,8 @@ def test_left_join(constructor: Constructor) -> None: "co": [4.0, 5.0, 7.0], "idx": [0.0, 1.0, 2.0], } - df_left = from_native_lazy(constructor(data_left)) - df_right = from_native_lazy(constructor(data_right)) + df_left = constructor(data_left).lazy() + df_right = constructor(data_right).lazy() result = df_left.join(df_right, left_on="bob", right_on="co", how="left") result = result.sort("idx") result = result.drop("idx_right") @@ -389,8 +377,8 @@ def test_left_join(constructor: Constructor) -> None: def test_left_join_multiple_column(constructor: Constructor) -> None: data_left = {"antananarivo": [1, 2, 3], "bob": [4, 5, 6], "idx": [0, 1, 2]} data_right = {"antananarivo": [1, 2, 3], "c": [4, 5, 6], "idx": [0, 1, 2]} - df_left = from_native_lazy(constructor(data_left)) - df_right = from_native_lazy(constructor(data_right)) + df_left = constructor(data_left).lazy() + df_right = constructor(data_right).lazy() result = df_left.join( df_right, left_on=["antananarivo", "bob"], @@ -416,8 +404,8 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: "d": [1.0, 4.0, 2.0], "idx": [0.0, 1.0, 2.0], } - df_left = from_native_lazy(constructor(data_left)) - df_right = from_native_lazy(constructor(data_right)) + df_left = constructor(data_left).lazy() + df_right = constructor(data_right).lazy() result = df_left.join(df_right, left_on="bob", right_on="c", how="left").sort("idx") result = result.drop("idx_right") expected: dict[str, list[Any]] = { @@ -446,7 +434,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: @pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"]) def test_join_keys_exceptions(constructor: Constructor, how: JoinStrategy) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = from_native_lazy(constructor(data)) + df = constructor(data).lazy() with pytest.raises( ValueError, @@ -512,16 +500,27 @@ def test_joinasof_numeric( ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = from_native_lazy( - constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]}) - ).sort("antananarivo") - df_right = from_native_lazy( - constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) - ).sort("antananarivo") - result = df.join_asof( - df_right, left_on="antananarivo", right_on="antananarivo", strategy=strategy - ) - result_on = df.join_asof(df_right, on="antananarivo", strategy=strategy) + + data_left = {"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]} + data_right = {"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]} + left_lf = constructor(data_left).lazy().sort("antananarivo") + right_lf = constructor(data_right).lazy().sort("antananarivo") + + result: nw.DataFrame[Any] | nw.LazyFrame[Any] + result_on: nw.DataFrame[Any] | nw.LazyFrame[Any] + if constructor.is_lazy: + result = left_lf.join_asof( + right_lf, left_on="antananarivo", right_on="antananarivo", strategy=strategy + ) + result_on = left_lf.join_asof(right_lf, on="antananarivo", strategy=strategy) + + else: + left_df, right_df = left_lf.collect(), right_lf.collect() + result = left_df.join_asof( + right_df, left_on="antananarivo", right_on="antananarivo", strategy=strategy + ) + result_on = left_df.join_asof(right_df, on="antananarivo", strategy=strategy) + assert_equal_data(result.sort(by="antananarivo"), expected) assert_equal_data(result_on.sort(by="antananarivo"), expected) @@ -581,7 +580,7 @@ def test_joinasof_time( request.applymarker(pytest.mark.xfail) if PANDAS_VERSION < (2, 1) and ("pandas_pyarrow" in str(constructor)): request.applymarker(pytest.mark.xfail) - df = from_native_lazy( + df = ( constructor( { "datetime": [ @@ -592,8 +591,10 @@ def test_joinasof_time( "population": [82.19, 82.66, 83.12], } ) - ).sort("datetime") - df_right = from_native_lazy( + .lazy() + .sort("datetime") + ) + df_right = ( constructor( { "datetime": [ @@ -606,7 +607,9 @@ def test_joinasof_time( "gdp": [4164, 4411, 4566, 4696, 4827], } ) - ).sort("datetime") + .lazy() + .sort("datetime") + ) result = df.join_asof( df_right, left_on="datetime", right_on="datetime", strategy=strategy ) @@ -622,7 +625,7 @@ def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) - ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = from_native_lazy( + df = ( constructor( { "antananarivo": [1, 5, 7, 10], @@ -630,12 +633,16 @@ def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) - "c": [9, 2, 1, 1], } ) - ).sort("antananarivo") - df_right = from_native_lazy( + .lazy() + .sort("antananarivo") + ) + df_right = ( constructor( {"antananarivo": [1, 4, 5, 8], "bob": ["D", "D", "A", "F"], "d": [1, 3, 4, 1]} ) - ).sort("antananarivo") + .lazy() + .sort("antananarivo") + ) result = df.join_asof(df_right, on="antananarivo", by_left="bob", by_right="bob") result_by = df.join_asof(df_right, on="antananarivo", by="bob") expected = { @@ -657,12 +664,16 @@ def test_joinasof_suffix( ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = from_native_lazy( + df = ( constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]}) - ).sort("antananarivo") - df_right = from_native_lazy( + .lazy() + .sort("antananarivo") + ) + df_right = ( constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) - ).sort("antananarivo") + .lazy() + .sort("antananarivo") + ) result = df.join_asof( df_right, left_on="antananarivo", right_on="antananarivo", suffix="_y" ) @@ -675,7 +686,7 @@ def test_joinasof_not_implemented( constructor: Constructor, strategy: Literal["backward", "forward"] ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = from_native_lazy(constructor(data)) + df = constructor(data).lazy() with pytest.raises( NotImplementedError, @@ -688,7 +699,7 @@ def test_joinasof_not_implemented( def test_joinasof_keys_exceptions(constructor: Constructor) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = from_native_lazy(constructor(data)) + df = constructor(data).lazy() with pytest.raises( ValueError, @@ -754,13 +765,16 @@ def test_joinasof_by_exceptions( message: str, ) -> None: data = {ON: [1, 3, 2], BY: [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) - if isinstance(df, nw.LazyFrame): + frame = constructor(data).lazy() + + if constructor.is_lazy: with pytest.raises(ValueError, match=message): - df.join_asof(df, on=on, by_left=by_left, by_right=by_right, by=by) + frame.join_asof(frame, on=on, by_left=by_left, by_right=by_right, by=by) else: with pytest.raises(ValueError, match=message): - df.join_asof(df, on=on, by_left=by_left, by_right=by_right, by=by) + frame.collect().join_asof( + frame.collect(), on=on, by_left=by_left, by_right=by_right, by=by + ) def test_join_duplicate_column_names( @@ -777,7 +791,7 @@ def test_join_duplicate_column_names( ): request.applymarker(pytest.mark.xfail) data = {"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]} - df = nw.from_native(constructor(data)) + lf = constructor(data).lazy() if any( x in str(constructor) for x in ("pandas", "pandas[pyarrow]", "pandas[nullable]", "dask") @@ -796,10 +810,12 @@ def test_join_duplicate_column_names( request.applymarker(pytest.mark.xfail) else: exception = nw.exceptions.DuplicateError - if isinstance(df, nw.LazyFrame): + + if constructor.is_lazy: with pytest.raises(exception): # pyrefly: ignore[unbound-name] - df.join(df, on=["a"]).join(df, on=["a"]).collect() + lf.join(lf, on=["a"]).join(lf, on=["a"]).collect() else: + df = lf.collect() with pytest.raises(exception): # pyrefly: ignore[unbound-name] df.join(df, on=["a"]).join(df, on=["a"]) @@ -875,8 +891,8 @@ def test_join_on_null_values( data_left = {**keys, "x": [1, 2, 3, 4]} data_right = {**keys, "y": [1.2, 3.4, 5.6, 7.8]} - df_left = from_native_lazy(constructor(data_left)) - df_right = from_native_lazy(constructor(data_right)) + df_left = constructor(data_left).lazy() + df_right = constructor(data_right).lazy() on = None if how == "cross" else list(keys) sort_by = ["a", "x", "y"] if how in {"cross", "full"} else ["a", "x"] @@ -902,8 +918,8 @@ def test_full_join_with_overlapping_non_key_columns_and_nulls( "right_only": [100, 200, 300], } - df_left = from_native_lazy(constructor(data_left)) - df_right = from_native_lazy(constructor(data_right)) + df_left = constructor(data_left).lazy() + df_right = constructor(data_right).lazy() result = df_left.join(df_right, on="id", how="full", suffix="_r").sort( "id", nulls_last=True @@ -929,7 +945,7 @@ def test_join_with_float_nan( data = {"a": [0, 0, 0], "b": [0, 0, 0], "c": [0.0, 0.0, float("nan")]} join_cols = ["a", "c"] - frame = from_native_lazy(constructor(data)) + frame = constructor(data).lazy() result = ( frame.join(frame, on=join_cols, how="inner").sort("c", nulls_last=True).collect() diff --git a/tests/frame/lazy_test.py b/tests/frame/lazy_test.py index 9e671c68d2..658a61c68b 100644 --- a/tests/frame/lazy_test.py +++ b/tests/frame/lazy_test.py @@ -9,13 +9,8 @@ import narwhals as nw from narwhals._utils import Implementation from narwhals.dependencies import get_cudf, get_modin -from tests.utils import ( - DUCKDB_VERSION, - PANDAS_VERSION, - assert_equal_data, - pyspark_session, - sqlframe_session, -) +from narwhals.testing.constructors import pyspark_session, sqlframe_session +from tests.utils import DUCKDB_VERSION, PANDAS_VERSION, assert_equal_data if TYPE_CHECKING: from narwhals._typing import LazyAllowed, SparkLike diff --git a/tests/frame/sample_test.py b/tests/frame/sample_test.py index b86ddaee1d..8db480e02c 100644 --- a/tests/frame/sample_test.py +++ b/tests/frame/sample_test.py @@ -19,9 +19,7 @@ def test_sample_n(constructor_eager: ConstructorEager) -> None: def test_sample_fraction(constructor_eager: ConstructorEager) -> None: - df = nw.from_native( - constructor_eager({"a": [1, 2, 3, 4], "b": ["x", "y", "x", "y"]}), eager_only=True - ) + df = constructor_eager({"a": [1, 2, 3, 4], "b": ["x", "y", "x", "y"]}) result_expr = df.sample(fraction=0.5).shape expected_expr = (2, 2) @@ -30,11 +28,11 @@ def test_sample_fraction(constructor_eager: ConstructorEager) -> None: def test_sample_with_seed(constructor_eager: ConstructorEager) -> None: size, n = 100, 10 - df = nw.from_native(constructor_eager({"a": range(size)}), eager_only=True) + df = constructor_eager({"a": range(size)}) r1 = nw.to_native(df.sample(n=n, seed=123)) r2 = nw.to_native(df.sample(n=n, seed=123)) r3 = nw.to_native(df.sample(n=n, seed=42)) - assert r1.equals(r2) # type: ignore[attr-defined] - assert not r1.equals(r3) # type: ignore[attr-defined] + assert r1.equals(r2) + assert not r1.equals(r3) diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index a4ee5d36a3..3d66c308b7 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -9,7 +9,7 @@ import narwhals as nw from narwhals.exceptions import PerformanceWarning -from tests.utils import PANDAS_VERSION, POLARS_VERSION, ConstructorPandasLike +from tests.utils import PANDAS_VERSION, POLARS_VERSION if TYPE_CHECKING: from collections.abc import Callable, Sequence @@ -23,7 +23,7 @@ IntoPandasSchema, IntoPolarsSchema, ) - from tests.utils import Constructor, ConstructorEager + from tests.utils import Constructor, ConstructorEager, ConstructorPandasLike TimeUnit: TypeAlias = Literal["ns", "us"] @@ -578,7 +578,7 @@ def origin_pandas_like( "d": [5.3, 4.99], "e": [datetime(2006, 1, 1), datetime(2001, 9, 3)], } - return constructor_pandas_like(data).dtypes.to_dict() + return constructor_pandas_like(data).to_native().dtypes.to_dict() # type: ignore[no-any-return] @pytest.fixture @@ -588,8 +588,8 @@ def origin_pandas_like_pyarrow( if PANDAS_VERSION < (1, 5): pytest.skip(reason="pandas too old for `pyarrow`") name_pandas_like = {"pandas_pyarrow_constructor", "modin_pyarrow_constructor"} - if constructor_pandas_like.__name__ not in name_pandas_like: - pytest.skip(f"{constructor_pandas_like.__name__!r} is not pandas_like_pyarrow") + if str(constructor_pandas_like) not in name_pandas_like: + pytest.skip(f"{constructor_pandas_like!s} is not pandas_like_pyarrow") data = { "a": [2, 1], "b": ["hello", "hi"], @@ -603,7 +603,7 @@ def origin_pandas_like_pyarrow( df_nw = nw.from_native(df_pd).with_columns( nw.col("f").cast(nw.Date()), nw.col("g").cast(nw.Time()) ) - return df_nw.to_native().dtypes.to_dict() + return df_nw.to_native().dtypes.to_dict() # type: ignore[no-any-return] def test_schema_from_polars( diff --git a/tests/frame/to_native_test.py b/tests/frame/to_native_test.py index 0ef0ae885a..cdb03e2675 100644 --- a/tests/frame/to_native_test.py +++ b/tests/frame/to_native_test.py @@ -10,7 +10,7 @@ def test_to_native(constructor: Constructor) -> None: data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} - df_raw = constructor(data) + df_raw = constructor(data).to_native() df = nw.from_native(df_raw) assert isinstance(df.to_native(), df_raw.__class__) diff --git a/tests/frame/to_pandas_test.py b/tests/frame/to_pandas_test.py index 473b685c19..bcdcc10fc3 100644 --- a/tests/frame/to_pandas_test.py +++ b/tests/frame/to_pandas_test.py @@ -7,7 +7,6 @@ pytest.importorskip("pandas") import pandas as pd -import narwhals as nw from tests.utils import PANDAS_VERSION if TYPE_CHECKING: @@ -19,11 +18,10 @@ def test_convert_pandas(constructor_eager: ConstructorEager) -> None: pytest.importorskip("pyarrow") data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} - df_raw = constructor_eager(data) - result = nw.from_native(df_raw, eager_only=True).to_pandas() + result = constructor_eager(data).to_pandas() - if constructor_eager.__name__.startswith("pandas"): - expected = cast("pd.DataFrame", constructor_eager(data)) + if str(constructor_eager).startswith("pandas"): + expected = cast("pd.DataFrame", constructor_eager(data).to_native()) elif "modin_pyarrow" in str(constructor_eager): expected = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") else: diff --git a/tests/frame/to_polars_test.py b/tests/frame/to_polars_test.py index 60ca653f32..89d4a65b2a 100644 --- a/tests/frame/to_polars_test.py +++ b/tests/frame/to_polars_test.py @@ -1,14 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING import pytest import narwhals as nw if TYPE_CHECKING: - from collections.abc import Mapping - + from narwhals.testing.typing import Data from tests.utils import ConstructorEager pytest.importorskip("polars") @@ -20,7 +19,7 @@ def test_convert_polars(constructor_eager: ConstructorEager) -> None: pytest.importorskip("pyarrow") from polars.testing import assert_frame_equal - data: Mapping[str, Any] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} + data: Data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} df_raw = constructor_eager(data) result = nw.from_native(df_raw).to_polars() diff --git a/tests/hypothesis/getitem_test.py b/tests/hypothesis/getitem_test.py index 759a292f97..c18f860872 100644 --- a/tests/hypothesis/getitem_test.py +++ b/tests/hypothesis/getitem_test.py @@ -1,29 +1,30 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Callable, cast +from typing import TYPE_CHECKING, Any, cast import hypothesis.strategies as st import pytest from hypothesis import assume, given import narwhals as nw -from tests.conftest import pandas_constructor, pyarrow_table_constructor +from narwhals.testing.constructors import get_backend_constructor from tests.utils import assert_equal_data if TYPE_CHECKING: from collections.abc import Sequence - from narwhals.typing import IntoDataFrame + from narwhals.testing.typing import DataFrameConstructor pytest.importorskip("pandas") pytest.importorskip("polars") import polars as pl -@pytest.fixture(params=[pandas_constructor, pyarrow_table_constructor], scope="module") -def pandas_or_pyarrow_constructor( - request: pytest.FixtureRequest, -) -> Callable[[Any], IntoDataFrame]: +@pytest.fixture( + params=[get_backend_constructor("pandas"), get_backend_constructor("pyarrow")], + scope="module", +) +def pandas_or_pyarrow_constructor(request: pytest.FixtureRequest) -> DataFrameConstructor: return request.param # type: ignore[no-any-return] @@ -117,7 +118,9 @@ def tuple_selector(draw: st.DrawFn) -> tuple[Any, Any]: @given(selector=st.one_of(single_selector, tuple_selector())) @pytest.mark.slow -def test_getitem(pandas_or_pyarrow_constructor: Any, selector: Any) -> None: +def test_getitem( + pandas_or_pyarrow_constructor: DataFrameConstructor, selector: Any +) -> None: """Compare __getitem__ against polars.""" # TODO(PR - clean up): documenting current differences # These assume(...) lines each filter out a known difference. @@ -125,7 +128,7 @@ def test_getitem(pandas_or_pyarrow_constructor: Any, selector: Any) -> None: # NotImplementedError: Slicing with step is not supported on PyArrow tables assume( not ( - pandas_or_pyarrow_constructor is pyarrow_table_constructor + pandas_or_pyarrow_constructor.is_pyarrow and isinstance(selector, slice) and selector.step is not None ) @@ -134,7 +137,7 @@ def test_getitem(pandas_or_pyarrow_constructor: Any, selector: Any) -> None: # NotImplementedError: Slicing with step is not supported on PyArrow tables assume( not ( - pandas_or_pyarrow_constructor is pyarrow_table_constructor + pandas_or_pyarrow_constructor.is_pyarrow and isinstance(selector, tuple) and ( (isinstance(selector[0], slice) and selector[0].step is not None) @@ -155,7 +158,7 @@ def test_getitem(pandas_or_pyarrow_constructor: Any, selector: Any) -> None: # rows/columns sides. return - df_other = nw.from_native(pandas_or_pyarrow_constructor(TEST_DATA)) + df_other = pandas_or_pyarrow_constructor(TEST_DATA, nw) result_other = df_other[cast("Any", selector)] if isinstance(result_polars, nw.Series): diff --git a/tests/ibis_test.py b/tests/ibis_test.py index 14a93c8ef8..a9a9dc413b 100644 --- a/tests/ibis_test.py +++ b/tests/ibis_test.py @@ -1,30 +1,14 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any - import pytest import narwhals as nw - -if TYPE_CHECKING: - import ibis - import polars as pl - - from tests.utils import Constructor -else: - ibis = pytest.importorskip("ibis") - pl = pytest.importorskip("polars") - - -@pytest.fixture -def ibis_constructor() -> Constructor: - def func(data: dict[str, Any]) -> ibis.Table: - df = pl.DataFrame(data) - return ibis.memtable(df) - - return func +from narwhals.testing.constructors import get_backend_constructor -def test_from_native(ibis_constructor: Constructor) -> None: - df = nw.from_native(ibis_constructor({"a": [1, 2, 3], "b": [4, 5, 6]})) +def test_from_native() -> None: + ibis_constructor = get_backend_constructor("ibis") + if not ibis_constructor.is_available: + pytest.skip() + df = ibis_constructor({"a": [1, 2, 3], "b": [4, 5, 6]}, nw) assert df.columns == ["a", "b"] diff --git a/tests/modern_polars/method_chaining_test.py b/tests/modern_polars/method_chaining_test.py index 611f85973e..ba7a06b894 100644 --- a/tests/modern_polars/method_chaining_test.py +++ b/tests/modern_polars/method_chaining_test.py @@ -38,10 +38,7 @@ def test_split_list_get(request: pytest.FixtureRequest, constructor: Constructor if PANDAS_VERSION < (2, 2): pytest.skip() pytest.importorskip("pyarrow") - if ( - constructor.__name__.startswith("pandas") - and "pyarrow" not in constructor.__name__ - ): + if str(constructor).startswith("pandas") and "pyarrow" not in str(constructor): df = nw.from_native(constructor(data)) msg = re.escape("This operation requires a pyarrow-backed series. ") with pytest.raises(TypeError, match=msg): diff --git a/tests/namespace_test.py b/tests/namespace_test.py index 34d0d60204..ce95c01e05 100644 --- a/tests/namespace_test.py +++ b/tests/namespace_test.py @@ -72,7 +72,7 @@ def test_namespace_from_backend_name(backend: BackendName) -> None: def test_namespace_from_native_object(constructor: Constructor) -> None: data = {"a": [1, 2, 3], "b": [4, 5, 6]} - frame = constructor(data) + frame = constructor(data, nw).to_native() namespace = Namespace.from_native_object(frame) nw_frame = nw.from_native(frame) assert namespace.implementation == nw_frame.implementation diff --git a/tests/preserve_pandas_like_columns_name_attr_test.py b/tests/preserve_pandas_like_columns_name_attr_test.py index 3127040bee..546b388f67 100644 --- a/tests/preserve_pandas_like_columns_name_attr_test.py +++ b/tests/preserve_pandas_like_columns_name_attr_test.py @@ -1,17 +1,17 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING import pytest import narwhals as nw if TYPE_CHECKING: - import pandas as pd + from tests.utils import Constructor def test_ops_preserve_column_index_name( - constructor: Callable[..., pd.DataFrame], request: pytest.FixtureRequest + constructor: Constructor, request: pytest.FixtureRequest ) -> None: if not any(x in str(constructor) for x in ("pandas", "modin", "cudf", "dask")): pytest.skip( @@ -22,7 +22,7 @@ def test_ops_preserve_column_index_name( request.applymarker(pytest.mark.xfail) data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} - df_native = constructor(data) + df_native = constructor(data).to_native() df_native.columns.name = "foo" df = nw.from_native(df_native) diff --git a/tests/read_scan_test.py b/tests/read_scan_test.py index 4548f76a87..0e0c94d994 100644 --- a/tests/read_scan_test.py +++ b/tests/read_scan_test.py @@ -6,13 +6,8 @@ import pytest import narwhals as nw -from tests.utils import ( - PANDAS_VERSION, - Constructor, - assert_equal_data, - pyspark_session, - sqlframe_session, -) +from narwhals.testing.constructors import pyspark_session, sqlframe_session +from tests.utils import PANDAS_VERSION, Constructor, assert_equal_data pytest.importorskip("polars") pytest.importorskip("pyarrow") @@ -32,7 +27,7 @@ IOSourceKind: TypeAlias = Literal["str", "Path", "PathLike"] -data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} +data: dict[str, list[Any]] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} skipif_pandas_lt_1_5 = pytest.mark.skipif( PANDAS_VERSION < (1, 5), reason="too old for pyarrow" ) diff --git a/tests/series_only/hist_test.py b/tests/series_only/hist_test.py index 183c0a13ff..7db42c31bc 100644 --- a/tests/series_only/hist_test.py +++ b/tests/series_only/hist_test.py @@ -11,11 +11,14 @@ import narwhals as nw from narwhals.exceptions import ComputeError -from tests.utils import POLARS_VERSION, ConstructorEager, assert_equal_data +from tests.utils import POLARS_VERSION, assert_equal_data if TYPE_CHECKING: from collections.abc import Sequence + from narwhals.testing.typing import DataFrameConstructor + + rnd = Random(0) # noqa: S311 data: dict[str, Any] = { @@ -43,7 +46,8 @@ param_include_breakpoint = pytest.mark.parametrize( "include_breakpoint", [True, False], ids=["breakpoint-True", "breakpoint-False"] ) -param_library = pytest.mark.parametrize("library", ["pandas", "polars", "pyarrow"]) +param_name = pytest.mark.parametrize("name", ["pandas", "polars[eager]", "pyarrow"]) + SHIFT_BINS_BY = 10 """shift bins property""" @@ -63,34 +67,14 @@ ], ids=str, ) -@param_library def test_hist_bin( - library: str, + nw_dataframe: DataFrameConstructor, bins: list[float], expected: Sequence[float], *, include_breakpoint: bool, ) -> None: - constructor_eager: ConstructorEager - pytest.importorskip(library) - if library == "pandas": - import pandas as pd - - constructor_eager = pd.DataFrame - elif library == "polars": - import polars as pl - - constructor_eager = pl.DataFrame - else: - import pyarrow as pa - - pytest.importorskip("numpy") - - constructor_eager = pa.table - - df = nw.from_native(constructor_eager(data)).with_columns( - float=nw.col("int").cast(nw.Float64) - ) + df = nw_dataframe(data, nw).with_columns(float=nw.col("int").cast(nw.Float64)) expected_full = {"count": expected} if include_breakpoint: expected_full = {"breakpoint": bins[1:], **expected_full} @@ -115,10 +99,8 @@ def test_hist_bin( assert_equal_data(result, expected_full) # missing/nan results - df = nw.from_native( - constructor_eager( - {"has_nan": [float("nan"), *data["int"]], "has_null": [None, *data["int"]]} - ) + df = nw_dataframe( + {"has_nan": [float("nan"), *data["int"]], "has_null": [None, *data["int"]]}, nw ) expected_full = {"count": expected} if include_breakpoint: @@ -130,25 +112,13 @@ def test_hist_bin( @pytest.mark.parametrize("params", counts_and_expected) @param_include_breakpoint -@param_library def test_hist_count( - library: str, *, params: dict[str, Any], include_breakpoint: bool + nw_dataframe: DataFrameConstructor, + *, + params: dict[str, Any], + include_breakpoint: bool, ) -> None: - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - pl = pytest.importorskip("polars") - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table - df = nw.from_native(constructor_eager(data)).with_columns( - float=nw.col("int").cast(nw.Float64) - ) + df = nw_dataframe(data, nw).with_columns(float=nw.col("int").cast(nw.Float64)) bin_count = params["bin_count"] expected_bins = params["expected_bins"] @@ -168,10 +138,8 @@ def test_hist_count( assert result["count"].sum() == df[col].count() # missing/nan results - df = nw.from_native( - constructor_eager( - {"has_nan": [float("nan"), *data["int"]], "has_null": [None, *data["int"]]} - ) + df = nw_dataframe( + {"has_nan": [float("nan"), *data["int"]], "has_null": [None, *data["int"]]}, nw ) for col in df.columns: @@ -186,22 +154,9 @@ def test_hist_count( ) -@param_library -def test_hist_count_no_spread(library: str) -> None: - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - pl = pytest.importorskip("polars") - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table +def test_hist_count_no_spread(nw_dataframe: DataFrameConstructor) -> None: data = {"all_zero": [0, 0, 0], "all_non_zero": [5, 5, 5]} - df = nw.from_native(constructor_eager(data)) + df = nw_dataframe(data, nw) result = df["all_zero"].hist(bin_count=4, include_breakpoint=True) expected = {"breakpoint": [-0.25, 0.0, 0.25, 0.5], "count": [0, 3, 0, 0]} @@ -229,23 +184,12 @@ def test_hist_bin_and_bin_count() -> None: @param_include_breakpoint -@param_library -def test_hist_no_data(library: str, *, include_breakpoint: bool) -> None: - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - pl = pytest.importorskip("polars") - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table - s = nw.from_native(constructor_eager({"values": []})).select( - nw.col("values").cast(nw.Float64) - )["values"] +def test_hist_no_data( + nw_dataframe: DataFrameConstructor, *, include_breakpoint: bool +) -> None: + s = nw_dataframe({"values": []}, nw).select(nw.col("values").cast(nw.Float64))[ + "values" + ] for bin_count in [1, 10]: result = s.hist(bin_count=bin_count, include_breakpoint=include_breakpoint) assert len(result) == bin_count @@ -262,21 +206,8 @@ def test_hist_no_data(library: str, *, include_breakpoint: bool) -> None: assert result["count"].sum() == 0 -@param_library -def test_hist_small_bins(library: str) -> None: - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - pl = pytest.importorskip("polars") - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table - s = nw.from_native(constructor_eager({"values": [1, 2, 3]})) +def test_hist_small_bins(nw_dataframe: DataFrameConstructor) -> None: + s = nw_dataframe({"values": [1, 2, 3]}, nw) result = s["values"].hist(bins=None, bin_count=None) assert len(result) == 10 @@ -284,11 +215,11 @@ def test_hist_small_bins(library: str) -> None: s["values"].hist(bins=[1, 3], bin_count=4) -def test_hist_non_monotonic(constructor_eager: ConstructorEager) -> None: - if "cudf" in str(constructor_eager): +def test_hist_non_monotonic(nw_dataframe: DataFrameConstructor) -> None: + if "cudf" in str(nw_dataframe): # TODO(unassigned): too many spurious failures, report and revisit return - df = nw.from_native(constructor_eager({"int": [0, 1, 2, 3, 4, 5, 6]})) + df = nw_dataframe({"int": [0, 1, 2, 3, 4, 5, 6]}, nw) with pytest.raises(ComputeError, match="monotonic"): df["int"].hist(bins=[5, 0, 2]) @@ -323,33 +254,17 @@ def test_hist_non_monotonic(constructor_eager: ConstructorEager) -> None: POLARS_VERSION < (1, 27), reason="polars cannot be used for compatibility checks since narwhals aims to mimic polars>=1.27 behavior", ) -@param_library @pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning") @pytest.mark.slow def test_hist_bin_hypotheis( - library: str, data: list[float], bin_deltas: list[float] + nw_dataframe: DataFrameConstructor, data: list[float], bin_deltas: list[float] ) -> None: - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - pl = pytest.importorskip("polars") - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table pytest.importorskip("polars") import polars as pl - df = nw.from_native(constructor_eager({"values": data})).select( - nw.col("values").cast(nw.Float64) - ) - df_bins_native = constructor_eager({"bins": bin_deltas}) + df = nw_dataframe({"values": data}, nw).select(nw.col("values").cast(nw.Float64)) bins = ( - nw.from_native(df_bins_native, eager_only=True) + nw_dataframe({"bins": bin_deltas}, nw) .get_column("bins") .cast(nw.Float64) .cum_sum() @@ -376,28 +291,17 @@ def test_hist_bin_hypotheis( reason="polars cannot be used for compatibility checks since narwhals aims to mimic polars>=1.27 behavior", ) @pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning") -@param_library @pytest.mark.slow def test_hist_count_hypothesis( - library: str, data: list[float], bin_count: int, request: pytest.FixtureRequest + nw_dataframe: DataFrameConstructor, + data: list[float], + bin_count: int, + request: pytest.FixtureRequest, ) -> None: pytest.importorskip("polars") import polars as pl - if library == "pandas": - pytest.importorskip("pandas") - import pandas as pd - - constructor_eager: Any = pd.DataFrame - elif library == "polars": - constructor_eager = pl.DataFrame - else: - pa = pytest.importorskip("pyarrow") - pytest.importorskip("numpy") - constructor_eager = pa.table - df = nw.from_native(constructor_eager({"values": data})).select( - nw.col("values").cast(nw.Float64) - ) + df = nw_dataframe({"values": data}, nw).select(nw.col("values").cast(nw.Float64)) try: result = df["values"].hist(bin_count=bin_count, include_breakpoint=True) @@ -418,9 +322,7 @@ def test_hist_count_hypothesis( if expected[ "count" - ].sum() != expected_data.is_not_nan().sum() and "polars" not in str( - constructor_eager - ): + ].sum() != expected_data.is_not_nan().sum() and "polars" not in str(nw_dataframe): request.applymarker(pytest.mark.xfail) assert_equal_data(result, expected.to_dict(as_series=False)) diff --git a/tests/series_only/is_sorted_test.py b/tests/series_only/is_sorted_test.py index 046669aac0..4efddf542f 100644 --- a/tests/series_only/is_sorted_test.py +++ b/tests/series_only/is_sorted_test.py @@ -16,7 +16,7 @@ ) def test_is_sorted( constructor_eager: ConstructorEager, - input_data: str, + input_data: list[int], descending: bool, # noqa: FBT001 expected: bool, # noqa: FBT001 ) -> None: diff --git a/tests/series_only/to_native_test.py b/tests/series_only/to_native_test.py index 350d81764d..c2a7ad5ecb 100644 --- a/tests/series_only/to_native_test.py +++ b/tests/series_only/to_native_test.py @@ -11,7 +11,7 @@ def test_to_native(constructor_eager: ConstructorEager) -> None: - orig_series = constructor_eager({"a": data})["a"] # type: ignore[index] + orig_series = constructor_eager({"a": data})["a"].to_native() nw_series = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"] result = nw_series.to_native() assert isinstance(result, orig_series.__class__) diff --git a/tests/testing/assert_frame_equal_test.py b/tests/testing/assert_frame_equal_test.py index c1b3b4e357..5d2f187267 100644 --- a/tests/testing/assert_frame_equal_test.py +++ b/tests/testing/assert_frame_equal_test.py @@ -12,8 +12,8 @@ from tests.utils import PANDAS_VERSION if TYPE_CHECKING: + from narwhals.testing.typing import Data from narwhals.typing import IntoSchema - from tests.conftest import Data from tests.utils import Constructor, ConstructorEager @@ -24,12 +24,12 @@ def _assertion_error(detail: str) -> pytest.RaisesExc: def test_check_narwhals_objects(constructor: Constructor) -> None: """Test that a type error is raised if the input is not a Narwhals object.""" - frame = constructor({"a": [1, 2, 3]}) + frame = constructor({"a": [1, 2, 3]}).to_native() msg = re.escape( "Expected `narwhals.DataFrame` or `narwhals.LazyFrame` instance, found" ) with pytest.raises(TypeError, match=msg): - assert_frame_equal(frame, frame) # type: ignore[arg-type] + assert_frame_equal(frame, frame) def test_implementation_mismatch() -> None: @@ -42,8 +42,7 @@ def test_implementation_mismatch() -> None: with _assertion_error("implementation mismatch"): assert_frame_equal( - nw.from_native(pd.DataFrame({"a": [1]})), - nw.from_native(pa.table({"a": [1]})), # type: ignore[type-var] # pyright: ignore[reportArgumentType] + nw.from_native(pd.DataFrame({"a": [1]})), nw.from_native(pa.table({"a": [1]})) ) diff --git a/tests/testing/assert_series_equal_test.py b/tests/testing/assert_series_equal_test.py index c4826c695e..064cc546dd 100644 --- a/tests/testing/assert_series_equal_test.py +++ b/tests/testing/assert_series_equal_test.py @@ -13,8 +13,8 @@ if TYPE_CHECKING: from typing_extensions import TypeAlias + from narwhals.testing.typing import Data from narwhals.typing import IntoSchema, IntoSeriesT - from tests.conftest import Data from tests.utils import ConstructorEager SetupFn: TypeAlias = Callable[[nw.Series[Any]], tuple[nw.Series[Any], nw.Series[Any]]] @@ -406,7 +406,7 @@ def test_categorical_as_str( "left": ["beluga", "dolphin", "narwhal", "orca"], "right": ["unicorn", "orca", "narwhal", "orca"], } - frame = nw.from_native(constructor_eager(data), eager_only=True) + frame = constructor_eager(data, namespace=nw) left = frame["left"].cast(nw.Categorical())[2:] right = frame["right"].cast(nw.Categorical())[2:] diff --git a/tests/testing/conftest.py b/tests/testing/conftest.py index a41d4fdce4..0ff5e9935f 100644 --- a/tests/testing/conftest.py +++ b/tests/testing/conftest.py @@ -8,8 +8,8 @@ import narwhals as nw if TYPE_CHECKING: + from narwhals.testing.typing import Data from narwhals.typing import IntoSchema - from tests.conftest import Data @pytest.fixture(scope="module") diff --git a/tests/testing/constructors_test.py b/tests/testing/constructors_test.py new file mode 100644 index 0000000000..3520d492da --- /dev/null +++ b/tests/testing/constructors_test.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import narwhals as nw +from narwhals._utils import Implementation +from narwhals.testing.constructors import ( + available_backends, + get_backend_constructor, + prepare_backends, +) + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + + PropertyName: TypeAlias = str + TrueNames: TypeAlias = set[str] + FalseNames: TypeAlias = set[str] + + +def test_eager_returns_eager_frame() -> None: + c = get_backend_constructor("pandas") + if not c.is_available: + pytest.skip() + + df = c({"x": [1, 2, 3]}, nw) + assert isinstance(df, nw.DataFrame) + + +def test_lazy_returns_lazy_frame() -> None: + c = get_backend_constructor("polars[lazy]") + if not c.is_available: + pytest.skip() + + lf = c({"x": [1, 2, 3]}, nw) + assert isinstance(lf, nw.LazyFrame) + + +_IS_PROPERTY_CASES: list[tuple[PropertyName, TrueNames, FalseNames]] = [ + ("is_pandas", {"pandas", "pandas[nullable]", "pandas[pyarrow]"}, {"polars[eager]"}), + ("is_modin", {"modin", "modin[pyarrow]"}, {"pandas"}), + ("is_cudf", {"cudf"}, {"pandas"}), + ("is_pandas_like", {"pandas", "modin", "cudf"}, {"polars[eager]"}), + ("is_polars", {"polars[eager]", "polars[lazy]"}, {"pandas"}), + ("is_pyarrow", {"pyarrow"}, {"pandas"}), + ("is_dask", {"dask"}, {"pandas"}), + ("is_duckdb", {"duckdb"}, {"pandas"}), + ("is_pyspark", {"pyspark", "pyspark[connect]"}, {"pandas"}), + ("is_sqlframe", {"sqlframe"}, {"pandas"}), + ("is_ibis", {"ibis"}, {"pandas"}), + ("is_spark_like", {"pyspark", "sqlframe", "pyspark[connect]"}, {"pandas"}), + ("is_lazy", {"polars[lazy]", "dask", "duckdb"}, {"pandas"}), + ("needs_pyarrow", {"pyarrow", "duckdb", "ibis"}, {"pandas"}), + ("is_nullable", {"polars[eager]"}, {"pandas", "modin", "dask"}), +] + + +@pytest.mark.parametrize(("prop", "true_names", "false_names"), _IS_PROPERTY_CASES) +def test_constructor_is_properties( + prop: str, true_names: TrueNames, false_names: FalseNames +) -> None: + for name in true_names: + c = get_backend_constructor(name) + assert getattr(c, prop), f"{name}.{prop} should be True" + for name in false_names: + c = get_backend_constructor(name) + assert not getattr(c, prop), f"{name}.{prop} should be False" + + +def test_constructor_implementation() -> None: + assert get_backend_constructor("pandas").implementation is Implementation.PANDAS + assert ( + get_backend_constructor("pandas[pyarrow]").implementation is Implementation.PANDAS + ) + assert ( + get_backend_constructor("polars[eager]").implementation is Implementation.POLARS + ) + assert ( + get_backend_constructor("pyspark[connect]").implementation + is Implementation.PYSPARK_CONNECT + ) + + +def test_constructor_dunder() -> None: + c1 = get_backend_constructor("pandas") + c2 = get_backend_constructor("pandas") + assert c1.identifier == "pandas" + assert c1 == c2 + assert hash(c1) == hash(c2) + assert c1 != get_backend_constructor("polars[eager]") + assert c1 != "not a constructor" + + +def test_get_backend_constructor_invalid_name() -> None: + with pytest.raises(ValueError, match="Unknown constructor"): + get_backend_constructor("not_a_backend") + + +@pytest.mark.parametrize( + ("include", "exclude", "expected"), + [ + (None, None, available_backends()), + (None, ["pandas"], available_backends() - {"pandas"}), + (["pandas", "polars[eager]"], None, {"pandas", "polars[eager]"}), + (["pandas", "polars[eager]"], ["pandas"], {"polars[eager]"}), + ([], None, frozenset()), + ], +) +def test_prepare_backends( + include: list[str] | None, exclude: list[str] | None, expected: frozenset[str] +) -> None: + for name in (*(include or ()), *(exclude or ())): + if not get_backend_constructor(name).is_available: + pytest.skip(f"{name} not installed") + result = prepare_backends(include=include, exclude=exclude) + assert {c.name for c in result} == expected + + +@pytest.mark.parametrize("kwarg", ["include", "exclude"]) +def test_prepare_backends_unknown_name_raises(kwarg: str) -> None: + with pytest.raises(ValueError, match="not known constructors"): + prepare_backends(**{kwarg: ["not_a_backend"]}) diff --git a/tests/testing/plugin_test.py b/tests/testing/plugin_test.py new file mode 100644 index 0000000000..2c49818047 --- /dev/null +++ b/tests/testing/plugin_test.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import pytest + +pytest_plugins = ["pytester"] + + +def test_constructor_eager_fixture_runs_for_each_backend( + pytester: pytest.Pytester, +) -> None: + pytest.importorskip("pandas") + pytest.importorskip("polars") + pytest.importorskip("pyarrow") + + pytester.makeconftest("") + pytester.makepyfile(""" + import narwhals as nw + from narwhals.testing.typing import DataFrameConstructor + + def test_shape(nw_dataframe: DataFrameConstructor) -> None: + df = nw_dataframe({"x": [1, 2, 3]}, namespace=nw) + assert df.shape == (3, 1) + """) + result = pytester.runpytest_subprocess( + "-v", "-p", "no:randomly", "--nw-backends=pandas,polars[eager],pyarrow" + ) + result.assert_outcomes(passed=3) + result.stdout.fnmatch_lines( + [ + "*test_shape?pandas?*", + "*test_shape?polars[[]eager[]]?*", + "*test_shape?pyarrow?*", + ] + ) + + +def test_constructor_fixture_includes_lazy_backends(pytester: pytest.Pytester) -> None: + pytest.importorskip("pandas") + pytest.importorskip("polars") + pytest.importorskip("duckdb") + + pytester.makeconftest("") + pytester.makepyfile(""" + import narwhals as nw + from narwhals.testing.typing import FrameConstructor + + def test_columns(nw_frame: FrameConstructor) -> None: + df = nw_frame({"x": [1, 2, 3]}, namespace=nw) + assert df.collect_schema().names() == ["x"] + """) + result = pytester.runpytest_subprocess( + "-v", "--nw-backends=pandas,polars[lazy],duckdb" + ) + result.assert_outcomes(passed=3) + + +def test_external_constructor_disables_parametrisation(pytester: pytest.Pytester) -> None: + pytester.makeconftest("") + pytester.makepyfile(""" + from narwhals.testing.typing import DataFrameConstructor + + def test_unparam(nw_dataframe: DataFrameConstructor) -> None: + pass + """) + result = pytester.runpytest_subprocess("--use-external-nw-backend") + # Without external parametrisation in place, the fixture is missing. + result.assert_outcomes(errors=1) diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index 9f87b5220b..d23cbfd39c 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -30,7 +30,7 @@ import narwhals as nw from narwhals._utils import Version -from tests.conftest import sqlframe_pyspark_lazy_constructor +from narwhals.testing.constructors import get_backend_constructor from tests.utils import Constructor, maybe_get_modin_df if TYPE_CHECKING: @@ -294,10 +294,10 @@ def test_eager_only_lazy_dask(eager_only: Any, context: Any) -> None: def test_series_only_sqlframe() -> None: # pragma: no cover pytest.importorskip("sqlframe") - df = sqlframe_pyspark_lazy_constructor(data) + df = get_backend_constructor("sqlframe")(data, nw).to_native() with pytest.raises(TypeError, match="Cannot only use `series_only`"): - nw.from_native(df, series_only=True) # pyright: ignore[reportArgumentType, reportCallIssue] # pyrefly: ignore[no-matching-overload] + nw.from_native(df, series_only=True) # type: ignore[call-overload] # pyrefly: ignore[no-matching-overload] @pytest.mark.parametrize( @@ -315,7 +315,7 @@ def test_series_only_sqlframe() -> None: # pragma: no cover ) def test_eager_only_sqlframe(eager_only: Any, context: Any) -> None: # pragma: no cover pytest.importorskip("sqlframe") - df = sqlframe_pyspark_lazy_constructor(data) + df = get_backend_constructor("sqlframe")(data, nw).to_native() with context: res = nw.from_native(df, eager_only=eager_only) @@ -528,7 +528,7 @@ def test_eager_only_pass_through_main(constructor: Constructor) -> None: if not any(s in str(constructor) for s in ("pyspark", "dask", "ibis", "duckdb")): pytest.skip(reason="Non lazy or polars") - df = constructor(data) + df = constructor(data).to_native() r1 = nw.from_native(df, eager_only=False, pass_through=False) r2 = nw.from_native(df, eager_only=False, pass_through=True) @@ -539,7 +539,7 @@ def test_eager_only_pass_through_main(constructor: Constructor) -> None: assert not isinstance(r3, nw.LazyFrame) with pytest.raises(TypeError, match=r"Cannot.+use.+eager_only"): - nw.from_native(df, eager_only=True, pass_through=False) # type: ignore[type-var] + nw.from_native(df, eager_only=True, pass_through=False) def test_from_native_lazyframe_exhaustive() -> None: # noqa: PLR0914, PLR0915 diff --git a/tests/translate/get_native_namespace_test.py b/tests/translate/get_native_namespace_test.py index 821443ea64..5a15069ed2 100644 --- a/tests/translate/get_native_namespace_test.py +++ b/tests/translate/get_native_namespace_test.py @@ -76,7 +76,7 @@ def test_native_namespace_frame(constructor: Constructor) -> None: def test_native_namespace_series(constructor_eager: ConstructorEager) -> None: - constructor_name = constructor_eager.__name__ + constructor_name = str(constructor_eager) expected_namespace = _get_expected_namespace(constructor_name=constructor_name) diff --git a/tests/utils.py b/tests/utils.py index b9fa613bca..5b53e3a49b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -6,7 +6,7 @@ import warnings from datetime import date, datetime from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, cast +from typing import TYPE_CHECKING, Any import pytest @@ -15,16 +15,25 @@ from narwhals.dependencies import get_pandas from narwhals.translate import from_native +# TODO(FBruzzesi): Replace these aliases once all the test suite migrates to *FrameConstructor's +from tests.conftest import ( + _PatchedDataFrameConstructor as ConstructorEager, + _PatchedDataFrameConstructor as ConstructorPandasLike, + _PatchedFrameConstructor as Constructor, +) + if TYPE_CHECKING: from collections.abc import Mapping, Sequence import pandas as pd - from pyspark.sql import SparkSession - from sqlframe.duckdb import DuckDBSession from typing_extensions import TypeAlias - from narwhals._native import NativeLazyFrame - from narwhals.typing import Frame, IntoDataFrame, TimeUnit + from narwhals.typing import Frame, TimeUnit + +# TODO(FBruzzesi): Remove these aliases once all the test suite migrates to *FrameConstructor's +# NOTE: Explicitly exported otherwise mypy will raise an [attr-defined] error for each file +# importing them from `tests.utils` rather than `narwhals.testing.typing` directly. +__all__ = ("Constructor", "ConstructorEager", "ConstructorPandasLike") def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: @@ -44,11 +53,6 @@ def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: PYSPARK_VERSION: tuple[int, ...] = get_module_version_as_tuple("pyspark") CUDF_VERSION: tuple[int, ...] = get_module_version_as_tuple("cudf") -Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | IntoDataFrame"] -ConstructorEager: TypeAlias = Callable[[Any], "IntoDataFrame"] -ConstructorLazy: TypeAlias = Callable[[Any], "NativeLazyFrame"] -ConstructorPandasLike: TypeAlias = Callable[[Any], "pd.DataFrame"] - NestedOrEnumDType: TypeAlias = "nw.List | nw.Array | nw.Struct | nw.Enum" """`DType`s which **cannot** be used as bare types.""" @@ -174,34 +178,6 @@ def assert_equal_hash(left: Any, right: Any) -> None: ) -def sqlframe_session() -> DuckDBSession: - from sqlframe.duckdb import DuckDBSession - - # NOTE: `__new__` override inferred by `pyright` only - # https://github.com/eakmanrq/sqlframe/blob/772b3a6bfe5a1ffd569b7749d84bea2f3a314510/sqlframe/base/session.py#L181-L184 - return cast("DuckDBSession", DuckDBSession()) # type: ignore[redundant-cast] - - -def pyspark_session() -> SparkSession: # pragma: no cover - if is_spark_connect := os.environ.get("SPARK_CONNECT", None): - from pyspark.sql.connect.session import SparkSession - else: - from pyspark.sql import SparkSession - builder = cast("SparkSession.Builder", SparkSession.builder).appName("unit-tests") - builder = ( - builder.remote(f"sc://localhost:{os.environ.get('SPARK_PORT', '15002')}") - if is_spark_connect - else builder.master("local[1]").config("spark.ui.enabled", "false") - ) - return ( - # Don't remove pyrefly-ignore, needed in CI when pyspark is installed. - builder.config("spark.default.parallelism", "1") # pyrefly: ignore[bad-return] - .config("spark.sql.shuffle.partitions", "2") - .config("spark.sql.session.timeZone", "UTC") - .getOrCreate() - ) - - def maybe_get_modin_df(df_pandas: pd.DataFrame) -> Any: # pragma: no cover """Convert a pandas DataFrame to a Modin DataFrame if Modin is available.""" try: @@ -231,10 +207,7 @@ def is_pyarrow_windows_no_tzdata(constructor: Constructor, /) -> bool: def uses_pyarrow_backend(constructor: Constructor | ConstructorEager) -> bool: """Checks if the pandas-like constructor uses pyarrow backend.""" - return constructor.__name__ in { - "pandas_pyarrow_constructor", - "modin_pyarrow_constructor", - } + return str(constructor) in {"pandas_pyarrow_constructor", "modin_pyarrow_constructor"} def maybe_collect(df: Frame) -> Frame: diff --git a/tests/v1_test.py b/tests/v1_test.py index 9882c4ed15..8ddb64a118 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -318,11 +318,11 @@ def test_cast_to_enum_v1( ): request.applymarker(pytest.mark.xfail) - df_native = constructor({"a": ["a", "b"]}) + df = constructor({"a": ["a", "b"]}, nw_v1) msg = re.escape("Converting to Enum is not supported in narwhals.stable.v1") with pytest.raises(NotImplementedError, match=msg): - nw_v1.from_native(df_native).select(nw_v1.col("a").cast(nw_v1.Enum)) # type: ignore[arg-type] + df.select(nw_v1.col("a").cast(nw_v1.Enum)) # type: ignore[arg-type] def test_v1_ordered_categorical_pandas() -> None: @@ -459,7 +459,7 @@ def test_with_row_index(constructor: Constructor) -> None: pytest.skip() data = {"abc": ["foo", "bars"], "xyz": [100, 200], "const": [42, 42]} - frame = nw_v1.from_native(constructor(data)) + frame = constructor(data, nw_v1) msg = "Cannot pass `order_by`" context = ( @@ -469,7 +469,7 @@ def test_with_row_index(constructor: Constructor) -> None: ) with context: - result = frame.with_row_index() + result = frame.with_row_index() # type: ignore[call-arg] expected = {"index": [0, 1], **data} assert_equal_data(result, expected) @@ -887,7 +887,7 @@ def test_is_frame() -> None: def test_with_version(constructor: Constructor) -> None: - lf = nw_v1.from_native(constructor({"a": [1, 2]})).lazy() + lf = constructor({"a": [1, 2]}, nw_v1).lazy() assert isinstance(lf, nw_v1.LazyFrame) assert lf._compliant_frame._with_version(Version.MAIN)._version is Version.MAIN @@ -896,7 +896,7 @@ def test_with_version(constructor: Constructor) -> None: @pytest.mark.parametrize("offset", [1, 2]) def test_gather_every(constructor_eager: ConstructorEager, n: int, offset: int) -> None: data = {"a": list(range(10))} - df_v1 = nw_v1.from_native(constructor_eager(data)) + df_v1 = constructor_eager(data, nw_v1) result = df_v1.gather_every(n=n, offset=offset) expected = {"a": data["a"][offset::n]} assert_equal_data(result, expected) @@ -1156,7 +1156,7 @@ def test_series_from_iterable( def test_mode_single_expr(constructor_eager: ConstructorEager) -> None: data = {"a": [1, 1, 2, 2, 3], "b": [1, 2, 3, 3, 4]} - df = nw_v1.from_native(constructor_eager(data)) + df = constructor_eager(data, nw_v1) result = df.select(nw_v1.col("a").mode()).sort("a") expected = {"a": [1, 2]} assert_equal_data(result, expected) @@ -1164,7 +1164,7 @@ def test_mode_single_expr(constructor_eager: ConstructorEager) -> None: def test_mode_series(constructor_eager: ConstructorEager) -> None: data = {"a": [1, 1, 2, 2, 3], "b": [1, 2, 3, 3, 4]} - series = nw_v1.from_native(constructor_eager(data), eager_only=True)["a"] + series = constructor_eager(data, nw_v1)["a"] result = series.mode().sort() expected = {"a": [1, 2]} assert_equal_data({"a": result}, expected) @@ -1173,7 +1173,7 @@ def test_mode_series(constructor_eager: ConstructorEager) -> None: def test_mode_different_lengths(constructor_eager: ConstructorEager) -> None: if "polars" in str(constructor_eager) and POLARS_VERSION < (1, 10): pytest.skip() - df = nw_v1.from_native(constructor_eager({"a": [1, 1, 2], "b": [4, 5, 6]})) + df = constructor_eager({"a": [1, 1, 2], "b": [4, 5, 6]}, nw_v1) with pytest.raises(ShapeError): df.select(nw_v1.col("a", "b").mode()) @@ -1196,7 +1196,7 @@ def test_any_value_expr(constructor: Constructor, request: pytest.FixtureRequest "b": [1, 2, 3, 4, 5, 6], "c": [None, None, 1, None, 2, None], } - df = nw_v1.from_native(constructor(data)) + df = constructor(data, nw_v1) with pytest.warns(NarwhalsUnstableWarning): df.select(nw_v1.col("a", "b").any_value()) @@ -1204,7 +1204,7 @@ def test_any_value_expr(constructor: Constructor, request: pytest.FixtureRequest def test_any_value_series(constructor_eager: ConstructorEager) -> None: data = {"a": [1, 1, 1, 2, 2, 3]} - df = nw_v1.from_native(constructor_eager(data)) + df = constructor_eager(data, nw_v1) with pytest.warns(NarwhalsUnstableWarning): df["a"].any_value() diff --git a/tests/v2_test.py b/tests/v2_test.py index 7a1903425c..d33ae97edb 100644 --- a/tests/v2_test.py +++ b/tests/v2_test.py @@ -347,7 +347,7 @@ def fun2(self, df: Any) -> Any: # pragma: no cover def test_with_version(constructor: Constructor) -> None: - lf = nw_v2.from_native(constructor({"a": [1, 2]})).lazy() + lf = constructor({"a": [1, 2]}, nw_v2).lazy() assert isinstance(lf, nw_v2.LazyFrame) assert lf._compliant_frame._with_version(Version.MAIN)._version is Version.MAIN @@ -503,7 +503,7 @@ def test_series_from_iterable( def test_mode_single_expr(constructor_eager: ConstructorEager) -> None: data = {"a": [1, 1, 2, 2, 3], "b": [1, 2, 3, 3, 4]} - df = nw_v2.from_native(constructor_eager(data)) + df = constructor_eager(data, nw_v2) result = df.select(nw_v2.col("a").mode()).sort("a") expected = {"a": [1, 2]} assert_equal_data(result, expected) @@ -511,7 +511,7 @@ def test_mode_single_expr(constructor_eager: ConstructorEager) -> None: def test_mode_series(constructor_eager: ConstructorEager) -> None: data = {"a": [1, 1, 2, 2, 3], "b": [1, 2, 3, 3, 4]} - series = nw_v2.from_native(constructor_eager(data), eager_only=True)["a"] + series = constructor_eager(data, nw_v2)["a"] result = series.mode().sort() expected = {"a": [1, 2]} assert_equal_data({"a": result}, expected) @@ -520,7 +520,7 @@ def test_mode_series(constructor_eager: ConstructorEager) -> None: def test_mode_different_lengths(constructor_eager: ConstructorEager) -> None: if "polars" in str(constructor_eager) and POLARS_VERSION < (1, 10): pytest.skip() - df = nw_v2.from_native(constructor_eager({"a": [1, 1, 2], "b": [4, 5, 6]})) + df = constructor_eager({"a": [1, 1, 2], "b": [4, 5, 6]}, nw_v2) with pytest.raises(ShapeError): df.select(nw_v2.col("a", "b").mode()) @@ -535,7 +535,7 @@ def test_any_value_expr(constructor: Constructor, request: pytest.FixtureRequest "b": [1, 2, 3, 4, 5, 6], "c": [None, None, 1, None, 2, None], } - df = nw_v2.from_native(constructor(data)) + df = constructor(data, nw_v2) with pytest.warns(NarwhalsUnstableWarning): df.select(nw_v2.col("a", "b").any_value()) @@ -543,7 +543,7 @@ def test_any_value_expr(constructor: Constructor, request: pytest.FixtureRequest def test_any_value_series(constructor_eager: ConstructorEager) -> None: data = {"a": [1, 1, 1, 2, 2, 3]} - df = nw_v2.from_native(constructor_eager(data)) + df = constructor_eager(data, nw_v2) with pytest.warns(NarwhalsUnstableWarning): df["a"].any_value() diff --git a/tpch/tests/conftest.py b/tpch/tests/conftest.py index d98c4b401a..499571a567 100644 --- a/tpch/tests/conftest.py +++ b/tpch/tests/conftest.py @@ -36,13 +36,6 @@ def pytest_configure(config: pytest.Config) -> None: def pytest_addoption(parser: pytest.Parser) -> None: - from tests.conftest import DEFAULT_CONSTRUCTORS - - parser.addoption( - "--constructors", - default=DEFAULT_CONSTRUCTORS, - help="", - ) parser.addoption( "--scale-factor", default=constants.SCALE_FACTOR_DEFAULT, diff --git a/utils/import_check.py b/utils/import_check.py index d292b40790..d97b488509 100644 --- a/utils/import_check.py +++ b/utils/import_check.py @@ -27,6 +27,20 @@ "_polars": {"polars"}, "_duckdb": {"duckdb"}, "_ibis": {"ibis", "ibis._", "ibis.expr.types"}, + # narwhals.testing constructors deliberately lazy-import every supported + # backend inside `__call__` so test fixtures can build native frames. + "testing": { + "cudf", + "dask", + "dask.dataframe", + "duckdb", + "ibis", + "modin", + "pandas", + "polars", + "pyarrow", + "pyspark", + }, } diff --git a/utils/sort_api_reference.py b/utils/sort_api_reference.py index 1b417ed63a..243ccbcd6d 100644 --- a/utils/sort_api_reference.py +++ b/utils/sort_api_reference.py @@ -42,7 +42,7 @@ def sort_list(match: re.Match[str]) -> str: PATH = Path("docs") / "api-reference" -FILES_TO_SKIP = {"dtypes", "typing"} +FILES_TO_SKIP = {"dtypes", "typing", "testing"} ret = max( sort_members_in_markdown(file_path=file_path)