apache · ueshin · Mar 23, 2026
diff --git a/python/pyspark/pandas/data_type_ops/base.py b/python/pyspark/pandas/data_type_ops/base.py
@@ -51,6 +51,7 @@
     extension_float_dtypes_available,
     extension_object_dtypes_available,
     handle_dtype_as_extension_dtype,
+    is_str_dtype,
     spark_type_to_pandas_dtype,
 )
 
@@ -193,7 +194,7 @@ def _as_string_type(
     representing null Spark column. Note that `null_str` is for non-extension dtypes only.
     """
     spark_type = StringType()
-    if handle_dtype_as_extension_dtype(dtype):
+    if handle_dtype_as_extension_dtype(dtype) or is_str_dtype(dtype):
         scol = index_ops.spark.column.cast(spark_type)
     else:
         casted = index_ops.spark.column.cast(spark_type)

diff --git a/python/pyspark/pandas/data_type_ops/boolean_ops.py b/python/pyspark/pandas/data_type_ops/boolean_ops.py
@@ -39,6 +39,7 @@
 from pyspark.pandas.typedef.typehints import (
     as_spark_type,
     handle_dtype_as_extension_dtype,
+    is_str_dtype,
     pandas_on_spark_type,
 )
 from pyspark.pandas.utils import is_ansi_mode_enabled
@@ -326,12 +327,12 @@ def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> Ind
         elif isinstance(spark_type, BooleanType):
             return _as_bool_type(index_ops, dtype)
         elif isinstance(spark_type, StringType):
-            if handle_dtype_as_extension_dtype(dtype):
+            if handle_dtype_as_extension_dtype(dtype) or is_str_dtype(dtype):
                 scol = F.when(
                     index_ops.spark.column.isNotNull(),
                     F.when(index_ops.spark.column, "True").otherwise("False"),
                 )
-                nullable = index_ops.spark.nullable
+                nullable = index_ops.spark.nullable or is_str_dtype(dtype)
             else:
                 null_str = str(pd.NA) if isinstance(self, BooleanExtensionOps) else str(None)
                 casted = F.when(index_ops.spark.column, "True").otherwise("False")

diff --git a/python/pyspark/pandas/data_type_ops/string_ops.py b/python/pyspark/pandas/data_type_ops/string_ops.py
@@ -33,7 +33,11 @@
     _as_string_type,
     _sanitize_list_like,
 )
-from pyspark.pandas.typedef import handle_dtype_as_extension_dtype, pandas_on_spark_type
+from pyspark.pandas.typedef import (
+    handle_dtype_as_extension_dtype,
+    is_str_dtype,
+    pandas_on_spark_type,
+)
 from pyspark.sql.types import BooleanType
 
 
@@ -128,7 +132,10 @@ def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> Ind
             if handle_dtype_as_extension_dtype(dtype):
                 scol = index_ops.spark.column.cast(spark_type)
             else:
-                scol = F.when(index_ops.spark.column.isNull(), F.lit(False)).otherwise(
+                # pandas 3 maps `str` to StringDtype, where astype(bool)
+                # treats missing values as True.
+                null_value = F.lit(True) if is_str_dtype(self.dtype) else F.lit(False)
+                scol = F.when(index_ops.spark.column.isNull(), null_value).otherwise(
                     F.length(index_ops.spark.column) > 0
                 )
             return index_ops._with_new_scol(