Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/pyspark/pandas/data_type_ops/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
extension_float_dtypes_available,
extension_object_dtypes_available,
handle_dtype_as_extension_dtype,
is_str_dtype,
spark_type_to_pandas_dtype,
)

Expand Down Expand Up @@ -193,7 +194,7 @@ def _as_string_type(
representing null Spark column. Note that `null_str` is for non-extension dtypes only.
"""
spark_type = StringType()
if handle_dtype_as_extension_dtype(dtype):
if handle_dtype_as_extension_dtype(dtype) or is_str_dtype(dtype):
scol = index_ops.spark.column.cast(spark_type)
else:
casted = index_ops.spark.column.cast(spark_type)
Expand Down
5 changes: 3 additions & 2 deletions python/pyspark/pandas/data_type_ops/boolean_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from pyspark.pandas.typedef.typehints import (
as_spark_type,
handle_dtype_as_extension_dtype,
is_str_dtype,
pandas_on_spark_type,
)
from pyspark.pandas.utils import is_ansi_mode_enabled
Expand Down Expand Up @@ -326,12 +327,12 @@ def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> Ind
elif isinstance(spark_type, BooleanType):
return _as_bool_type(index_ops, dtype)
elif isinstance(spark_type, StringType):
if handle_dtype_as_extension_dtype(dtype):
if handle_dtype_as_extension_dtype(dtype) or is_str_dtype(dtype):
scol = F.when(
index_ops.spark.column.isNotNull(),
F.when(index_ops.spark.column, "True").otherwise("False"),
)
nullable = index_ops.spark.nullable
nullable = index_ops.spark.nullable or is_str_dtype(dtype)
else:
null_str = str(pd.NA) if isinstance(self, BooleanExtensionOps) else str(None)
casted = F.when(index_ops.spark.column, "True").otherwise("False")
Expand Down
11 changes: 9 additions & 2 deletions python/pyspark/pandas/data_type_ops/string_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@
_as_string_type,
_sanitize_list_like,
)
from pyspark.pandas.typedef import handle_dtype_as_extension_dtype, pandas_on_spark_type
from pyspark.pandas.typedef import (
handle_dtype_as_extension_dtype,
is_str_dtype,
pandas_on_spark_type,
)
from pyspark.sql.types import BooleanType


Expand Down Expand Up @@ -128,7 +132,10 @@ def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> Ind
if handle_dtype_as_extension_dtype(dtype):
scol = index_ops.spark.column.cast(spark_type)
else:
scol = F.when(index_ops.spark.column.isNull(), F.lit(False)).otherwise(
# pandas 3 maps `str` to StringDtype, where astype(bool)
# treats missing values as True.
null_value = F.lit(True) if is_str_dtype(self.dtype) else F.lit(False)
scol = F.when(index_ops.spark.column.isNull(), null_value).otherwise(
F.length(index_ops.spark.column) > 0
)
return index_ops._with_new_scol(
Expand Down