TileDB-Inc · kounelisagis · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026
diff --git a/tiledb/dataframe_.py b/tiledb/dataframe_.py
@@ -154,7 +154,7 @@ class ColumnInfo:
 
     @classmethod
     def from_values(cls, array_like, varlen_types=()):
-        from pandas import CategoricalDtype
+        from pandas import CategoricalDtype, StringDtype
         from pandas.api import types as pd_types
 
         if pd_types.is_object_dtype(array_like):
@@ -171,6 +171,16 @@ def from_values(cls, array_like, varlen_types=()):
                 raise NotImplementedError(
                     f"{inferred_dtype} inferred dtype not supported (column {array_like.name})"
                 )
+        elif hasattr(array_like, "dtype") and isinstance(array_like.dtype, StringDtype):
+            # Explicit pd.StringDtype() (name="string") is always nullable;
+            # auto-inferred str (name="str") depends on data
+            explicit = array_like.dtype.name == "string"
+            return cls(
+                np.dtype(np.str_),
+                repr="string" if explicit else None,
+                var=True,
+                nullable=explicit or bool(array_like.isna().any()),
+            )
         elif hasattr(array_like, "dtype") and isinstance(
             array_like.dtype, CategoricalDtype
         ):
@@ -211,6 +221,14 @@ def from_dtype(cls, dtype, column_name, varlen_types=()):
         dtype = pd_types.pandas_dtype(dtype)
         # Note: be careful if you rearrange the order of the following checks
 
+        # pandas StringDtype (auto-inferred 'str' and explicit 'string')
+        from pandas import StringDtype
+
+        if isinstance(dtype, StringDtype):
+            repr_val = "string" if dtype.name == "string" else None
+            nullable = dtype.name == "string"
+            return cls(np.dtype(np.str_), repr=repr_val, var=True, nullable=nullable)
+
         # extension types
         if pd_types.is_extension_array_dtype(dtype):
             if libtiledb_version() < (2, 10) and pd_types.is_bool_dtype(dtype):
@@ -255,12 +273,7 @@ def from_dtype(cls, dtype, column_name, varlen_types=()):
 
         # datetime types
         if pd_types.is_datetime64_any_dtype(dtype):
-            if dtype == "datetime64[ns]":
-                return cls(dtype)
-            else:
-                raise NotImplementedError(
-                    f"Only 'datetime64[ns]' datetime dtype is supported (column {column_name})"
-                )
+            return cls(dtype)
 
         # string types
         # don't use pd_types.is_string_dtype() because it includes object types too
@@ -517,8 +530,8 @@ def _df_to_np_arrays(df, column_infos, fillna):
         if not column_info.var:
             to_numpy_kwargs.update(dtype=column_info.dtype)
 
-        if column_info.nullable:
-            # use default 0/empty for the dtype
+        if column_info.nullable and column.isna().any():
+            # Only create nullmap if data actually has nulls
             to_numpy_kwargs.update(na_value=column_info.dtype.type())
             nullmaps[name] = (~column.isna()).to_numpy(dtype=np.uint8)
 

diff --git a/tiledb/dense_array.py b/tiledb/dense_array.py
@@ -481,6 +481,14 @@ def _setitem_impl(self, selection, val, nullmaps: dict):
 
                 try:
                     if attr.isvar:
+                        # Capture null mask before np.asarray() loses pandas NA info
+                        if (
+                            attr.isnullable
+                            and name not in nullmaps
+                            and hasattr(attr_val, "isna")
+                        ):
+                            nullmaps[name] = (~attr_val.isna()).to_numpy(dtype=np.uint8)
+
                         # ensure that the value is array-convertible, for example: pandas.Series
                         attr_val = np.asarray(attr_val)
                         if attr.isnullable and name not in nullmaps:

diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
@@ -890,7 +890,11 @@ def _update_df_from_meta(
                 col_dtypes[name] = dtype
 
     if col_dtypes:
-        df = df.astype(col_dtypes, copy=False)
+        # Use str instead of '<U0' so pandas uses its native string type
+        col_dtypes = {
+            name: str if dtype == "<U0" else dtype for name, dtype in col_dtypes.items()
+        }
+        df = df.astype(col_dtypes)
 
     if index_col:
         if index_col is not True:

diff --git a/tiledb/sparse_array.py b/tiledb/sparse_array.py
@@ -122,6 +122,15 @@ def _setitem_impl_sparse(self, selection, val, nullmaps: dict):
         attr_val = val[name]
 
         try:
+            # Capture null mask before np.asarray() loses pandas NA info
+            if (
+                attr.isvar
+                and attr.isnullable
+                and name not in nullmaps
+                and hasattr(attr_val, "isna")
+            ):
+                nullmaps[name] = (~attr_val.isna()).to_numpy(dtype=np.uint8)
+
             # ensure that the value is array-convertible, for example: pandas.Series
             attr_val = np.asarray(attr_val)
 

diff --git a/tiledb/tests/datatypes.py b/tiledb/tests/datatypes.py
@@ -48,11 +48,17 @@ def __len__(self):
         return len(self._flat_arrays)
 
     def __getitem__(self, i):
-        return self._flat_arrays[i]
+        if isinstance(i, (int, np.integer)):
+            return self._flat_arrays[i]
+        return type(self)(self._flat_arrays[i], self._dtype)
 
     @property
     def dtype(self):
         return self._dtype
 
     def copy(self):
         return type(self)(self._flat_arrays, self._dtype)
+
+    @property
+    def ndim(self):
+        return 1