Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from narwhals._arrow.typing import ChunkedArrayAny, Incomplete, ScalarAny
from narwhals._utils import Version
from narwhals.typing import IntoDType, NonNestedLiteral
from narwhals.typing import IntoDType, PythonLiteral


class ArrowNamespace(
Expand Down Expand Up @@ -64,7 +64,7 @@ def len(self) -> ArrowExpr:
version=self._version,
)

def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> ArrowExpr:
def lit(self, value: PythonLiteral, dtype: IntoDType | None) -> ArrowExpr:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, isn't this a nice diff πŸ˜„

def _lit_arrow_series(_: ArrowDataFrame) -> ArrowSeries:
arrow_series = ArrowSeries.from_iterable(
data=[value], name="literal", context=self
Expand Down
6 changes: 5 additions & 1 deletion narwhals/_dask/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
combine_alias_output_names,
combine_evaluate_output_names,
)
from narwhals._utils import Implementation, zip_strict
from narwhals._utils import Implementation, is_nested_literal, zip_strict

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator
Expand Down Expand Up @@ -55,6 +55,10 @@ def __init__(self, *, version: Version) -> None:
self._version = version

def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> DaskExpr:
if is_nested_literal(value):
msg = f"Nested structures are not supported for Dask backend, found {type(value).__name__}"
raise NotImplementedError(msg)

def func(df: DaskLazyFrame) -> list[dx.Series]:
if dtype is not None:
native_dtype = narwhals_to_native_dtype(dtype, self._version)
Expand Down
8 changes: 6 additions & 2 deletions narwhals/_duckdb/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

from narwhals._compliant.window import WindowInputs
from narwhals._utils import Version
from narwhals.typing import ConcatMethod, IntoDType, NonNestedLiteral
from narwhals.typing import ConcatMethod, IntoDType, PythonLiteral

VARCHAR = duckdb_dtypes.VARCHAR

Expand Down Expand Up @@ -130,8 +130,12 @@ def func(cols: Iterable[Expression]) -> Expression:

return self._expr._from_elementwise_horizontal_op(func, *exprs)

def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> DuckDBExpr:
def lit(self, value: PythonLiteral, dtype: IntoDType | None) -> DuckDBExpr:
def func(df: DuckDBLazyFrame) -> list[Expression]:
if isinstance(value, dict) and len(value) == 0:
Comment thread
FBruzzesi marked this conversation as resolved.
Outdated
msg = "Cannot create an empty struct type for DuckDB backend"
raise NotImplementedError(msg)

tz = DeferredTimeZone(df.native)
if dtype is not None:
target = narwhals_to_native_dtype(dtype, self._version, tz)
Expand Down
8 changes: 7 additions & 1 deletion narwhals/_ibis/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,15 @@ def func(cols: Iterable[ir.Value]) -> ir.Value:

return self._expr._from_elementwise_horizontal_op(func, *exprs)

def lit(self, value: Any, dtype: IntoDType | None) -> IbisExpr:
def lit(self, value: PythonLiteral, dtype: IntoDType | None) -> IbisExpr:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh nice, no more Any too!

def func(_df: IbisLazyFrame) -> Sequence[ir.Value]:
if isinstance(value, dict) and len(value) == 0:
msg = "Cannot create an empty struct type for Ibis backend"
raise NotImplementedError(msg)

ibis_dtype = narwhals_to_native_dtype(dtype, self._version) if dtype else None
if isinstance(value, dict):
return [ibis.struct(value, type=ibis_dtype)]
return [lit(value, ibis_dtype)]
Comment thread
FBruzzesi marked this conversation as resolved.
Outdated

return self._expr(
Expand Down
9 changes: 7 additions & 2 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from narwhals._pandas_like.series import PANDAS_TO_NUMPY_DTYPE_MISSING, PandasLikeSeries
from narwhals._pandas_like.utils import (
align_and_extract_native,
broadcast_series_to_index,
get_dtype_backend,
import_array_module,
iter_dtype_backends,
Expand Down Expand Up @@ -307,8 +308,12 @@ def _with_native(self, df: Any, *, validate_column_names: bool = True) -> Self:
def _extract_comparand(self, other: PandasLikeSeries) -> pd.Series[Any]:
index = self.native.index
if other._broadcast:
s = other.native
return type(s)(s.iloc[0], index=index, dtype=s.dtype, name=s.name)
native = other.native
is_nested = other.dtype.is_nested()
return broadcast_series_to_index(
native, index, is_nested=is_nested, series_class=type(native)
)

if (len_other := len(other)) != (len_idx := len(index)):
msg = f"Expected object of length {len_idx}, got: {len_other}."
raise ShapeError(msg)
Expand Down
39 changes: 34 additions & 5 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from typing_extensions import TypeAlias

from narwhals._utils import Implementation, Version
from narwhals.typing import IntoDType, NonNestedLiteral
from narwhals.typing import IntoDType, PythonLiteral


Incomplete: TypeAlias = Any
Expand Down Expand Up @@ -83,17 +83,46 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
context=self,
)

def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> PandasLikeExpr:
def lit(self, value: PythonLiteral, dtype: IntoDType | None) -> PandasLikeExpr:
def _lit_pandas_series(df: PandasLikeDataFrame) -> PandasLikeSeries:
pandas_series = self._series.from_iterable(
if isinstance(value, (list, tuple, dict)):
try:
import pandas as pd # ignore-banned-import
import pyarrow as pa # ignore-banned-import
except ImportError as exc: # pragma: no cover
msg = (
"Nested structures require pyarrow to be installed for pandas backend. "
"Please install pyarrow: pip install pyarrow"
)
raise ImportError(msg) from exc

from narwhals._arrow.utils import (
narwhals_to_native_dtype as _to_arrow_dtype,
)

array_value = list(value) if isinstance(value, tuple) else value
pa_dtype = _to_arrow_dtype(dtype, self._version) if dtype else None
pa_array = pa.array([array_value], type=pa_dtype) # type: ignore[arg-type, list-item]

# Use ArrowExtensionArray to avoid pandas unpacking the nested structure
ns = self._implementation.to_native_namespace()
pandas_series_native = ns.Series(
pd.arrays.ArrowExtensionArray(pa_array), # type: ignore[attr-defined]
name="literal",
index=df._native_frame.index[0:1],
)
Comment on lines +107 to +113
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the wrapping part of (#3424 (comment)) should be reused here.

Maybe broadcast_series_to_index is too specfic of a function?

The two more useful parts IMO are:

  • repeat
  • something related to reconstruction?

Copy link
Copy Markdown
Member

@dangotbanned dangotbanned Jan 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(#3424 (comment))

Okay yeah so what I'm thinking is a new constructor (or two?) on PandasLikeSeries might work?

All of these are parts of a constructor for both native & compliant, and they're spread across 4 modules.

def _extract_comparand(self, other: PandasLikeSeries) -> pd.Series[Any]:

def lit(self, value: PythonLiteral, dtype: IntoDType | None) -> PandasLikeExpr:
def _lit_pandas_series(df: PandasLikeDataFrame) -> PandasLikeSeries:

@classmethod
def _align_full_broadcast(cls, *series: Self) -> Sequence[Self]:
Series = series[0].__native_namespace__().Series

def broadcast_series_to_index(


return self._series.from_native(pandas_series_native, context=self)

pandas_like_series = self._series.from_iterable(
data=[value],
name="literal",
index=df._native_frame.index[0:1],
context=self,
)
if dtype:
return pandas_series.cast(dtype)
return pandas_series
return pandas_like_series.cast(dtype)
return pandas_like_series

return PandasLikeExpr(
lambda df: [_lit_pandas_series(df)],
Expand Down
5 changes: 3 additions & 2 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from narwhals._pandas_like.series_struct import PandasLikeSeriesStructNamespace
from narwhals._pandas_like.utils import (
align_and_extract_native,
broadcast_series_to_index,
get_dtype_backend,
import_array_module,
narwhals_to_native_dtype,
Expand Down Expand Up @@ -211,8 +212,8 @@ def _align_full_broadcast(cls, *series: Self) -> Sequence[Self]:
reindexed = []
for s in series:
if s._broadcast:
native = Series(
s.native.iloc[0], index=idx, name=s.name, dtype=s.native.dtype
native = broadcast_series_to_index(
s.native, idx, is_nested=s.dtype.is_nested(), series_class=Series
)
compliant = s._with_native(native)
elif s.native.index is not idx:
Expand Down
34 changes: 34 additions & 0 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,3 +663,37 @@ class PandasLikeSeriesNamespace(EagerSeriesNamespace["PandasLikeSeries", Any]):

def make_group_by_kwargs(*, drop_null_keys: bool) -> dict[str, bool]:
return {"sort": False, "as_index": True, "dropna": drop_null_keys, "observed": True}


def broadcast_series_to_index(
native: pd.Series[Any],
index: Any,
*,
is_nested: bool,
series_class: type[pd.Series[Any]],
) -> pd.Series[Any]:
"""Broadcast a scalar value from a (one element) Series to match a target index.

For nested (arrow-backed) types, we rely on
[`pandas.array`](https://pandas.pydata.org/docs/reference/api/pandas.array.html).

Arguments:
native: The native pandas-like Series containing the scalar value to broadcast.
index: The target index to broadcast to.
is_nested: Whether the Series has a nested (arrow-backed) dtype.
series_class: Series class to use for constructing the result.

Returns:
A new Series with the scalar value broadcast to match the target index.
"""
value = native.iloc[0]
if is_nested:
from narwhals._arrow.utils import repeat

# NOTE: Ignore typing because `pandas-stubs` are wrong
# TODO(FBruzzesi): Should we pass the `copy=False` flag?
pa_array = pd.array(repeat(value, len(index)), dtype=native.dtype) # type: ignore[arg-type]

return series_class(pa_array, index=index, name=native.name)

return series_class(value, index=index, dtype=native.dtype, name=native.name)
19 changes: 16 additions & 3 deletions narwhals/_spark_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from narwhals._compliant.window import WindowInputs
from narwhals._spark_like.dataframe import SQLFrameDataFrame # noqa: F401
from narwhals._utils import Implementation, Version
from narwhals.typing import ConcatMethod, IntoDType, NonNestedLiteral, PythonLiteral
from narwhals.typing import ConcatMethod, IntoDType, PythonLiteral

# Adjust slight SQL vs PySpark differences
FUNCTION_REMAPPINGS = {
Expand Down Expand Up @@ -91,9 +91,22 @@ def _when(
def _coalesce(self, *exprs: Column) -> Column:
return self._F.coalesce(*exprs)

def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> SparkLikeExpr:
def lit(self, value: PythonLiteral, dtype: IntoDType | None) -> SparkLikeExpr:
def func(df: SparkLikeLazyFrame) -> list[Column]:
column = df._F.lit(value)
F = df._F

if isinstance(value, (list, tuple)):
lit_values = [F.lit(v) for v in value]
column = F.lit(F.array(lit_values))
elif isinstance(value, dict):
if (not self._implementation.is_pyspark()) and (len(value) == 0):
msg = f"Cannot create an empty struct type for {self._implementation} backend"
raise NotImplementedError(msg)
lit_values = [F.lit(v).alias(k) for k, v in value.items()]
column = F.struct(*lit_values)
else:
column = F.lit(value)

if dtype:
native_dtype = narwhals_to_native_dtype(
dtype, self._version, df._native_dtypes, df.native.sparkSession
Expand Down
5 changes: 5 additions & 0 deletions narwhals/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
FileSource,
IntoSeriesT,
MultiIndexSelector,
NestedLiteral,
SingleIndexSelector,
SizedMultiBoolSelector,
SizedMultiIndexSelector,
Expand Down Expand Up @@ -1371,6 +1372,10 @@ def is_sequence_of(obj: Any, tp: type[_T]) -> TypeIs[Sequence[_T]]:
)


def is_nested_literal(obj: Any) -> TypeIs[NestedLiteral]:
return isinstance(obj, (list, tuple, dict))


def validate_strict_and_pass_though(
strict: bool | None, # noqa: FBT001
pass_through: bool | None, # noqa: FBT001
Expand Down
80 changes: 66 additions & 14 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
deprecate_native_namespace,
flatten,
is_eager_allowed,
is_nested_literal,
is_sequence_but_not_str,
normalize_path,
supports_arrow_c_stream,
Expand Down Expand Up @@ -46,6 +47,7 @@
IntoExpr,
IntoSchema,
NonNestedLiteral,
PythonLiteral,
_2DArray,
)

Expand Down Expand Up @@ -1422,39 +1424,89 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr], ignore_nulls: bool) ->
)


def lit(value: NonNestedLiteral, dtype: IntoDType | None = None) -> Expr:
def lit(value: PythonLiteral, dtype: IntoDType | None = None) -> Expr:
"""Return an expression representing a literal value.

Arguments:
value: The value to use as literal.
value: The value to use as literal. Can be a scalar value, list, tuple, or dict.
Lists and tuples are converted to `List` dtype, dicts to `Struct` dtype.
dtype: The data type of the literal value. If not provided, the data type will
be inferred by the native library.
be inferred by the native library. For empty lists/dicts, dtype must be
specified explicitly.

Examples:
>>> import pandas as pd
Scalar literals:

>>> import pyarrow as pa
>>> import narwhals as nw
>>>
>>> df_native = pd.DataFrame({"a": [1, 2]})
>>> nw.from_native(df_native).with_columns(nw.lit(3))
>>> df_nw = nw.from_native(pa.table({"a": [1, 2]}))
>>> df_nw.with_columns(nw.lit(3))
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
|Narwhals DataFrame|
|------------------|
| a literal |
| 0 1 3 |
| 1 2 3 |
| pyarrow.Table |
| a: int64 |
| literal: int64 |
| ---- |
| a: [[1,2]] |
| literal: [[3,3]] |
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

List literals (creates a List column):

>>> df_nw.with_columns(nw.lit([1, 2, 3]).alias("list_col"))
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
| Narwhals DataFrame |
|-----------------------------|
|pyarrow.Table |
|a: int64 |
|list_col: list<item: int64> |
| child 0, item: int64 |
|---- |
|a: [[1,2]] |
|list_col: [[[1,2,3],[1,2,3]]]|
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

Dict literals (creates a Struct column):

>>> df_nw.with_columns(nw.lit({"x": 1, "y": 2}).alias("struct_col"))
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
| Narwhals DataFrame |
|--------------------------------------|
|pyarrow.Table |
|a: int64 |
|struct_col: struct<x: int64, y: int64>|
| child 0, x: int64 |
| child 1, y: int64 |
|---- |
|a: [[1,2]] |
|struct_col: [ |
| -- is_valid: all not null |
| -- child 0 type: int64 |
|[1,1] |
| -- child 1 type: int64 |
|[2,2]] |
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
if is_numpy_array(value):
msg = (
"numpy arrays are not supported as literal values. "
"Consider using `with_columns` to create a new column from the array."
)
raise ValueError(msg)

if isinstance(value, (list, tuple)):
msg = f"Nested datatypes are not supported yet. Got {value}"
raise NotImplementedError(msg)

if is_nested_literal(value):
if not value:
if not dtype:
msg = "Cannot infer dtype for empty nested structure. Please provide an explicit dtype parameter."
raise ValueError(msg)
elif isinstance(value, dict):
if any(is_nested_literal(v) for v in value.values()):
msg = "Nested structures with nested values are not supported."
raise NotImplementedError(msg)
elif is_nested_literal(value[0]):
msg = "Nested structures with nested values are not supported."
raise NotImplementedError(msg)
return Expr(ExprNode(ExprKind.LITERAL, "lit", value=value, dtype=dtype))


Expand Down
Loading
Loading