TileDB-Inc · kounelisagis · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026
diff --git a/examples/incomplete_iteration.py b/examples/incomplete_iteration.py
@@ -40,21 +40,14 @@
 
 
 def check_dataframe_deps():
-    pd_error = """Pandas version >= 1.0 and < 3.0 required for dataframe functionality.
-                  Please `pip install pandas>=1.0,<3.0` to proceed."""
+    pd_error = """Pandas is required for dataframe functionality.
+                  Please `pip install pandas` to proceed."""
 
     try:
-        import pandas as pd
+        import pandas
     except ImportError:
         raise Exception(pd_error)
 
-    from packaging.version import Version
-
-    if Version(pd.__version__) < Version("1.0") or Version(pd.__version__) >= Version(
-        "3.0.0.dev0"
-    ):
-        raise Exception(pd_error)
-
 
 # Name of the array to create.
 array_name = "incomplete_iteration"

diff --git a/examples/parallel_csv_ingestion.py b/examples/parallel_csv_ingestion.py
@@ -49,21 +49,14 @@
 
 
 def check_dataframe_deps():
-    pd_error = """Pandas version >= 1.0 and < 3.0 required for dataframe functionality.
-                  Please `pip install pandas>=1.0,<3.0` to proceed."""
+    pd_error = """Pandas is required for dataframe functionality.
+                  Please `pip install pandas` to proceed."""
 
     try:
-        import pandas as pd
+        import pandas
     except ImportError:
         raise Exception(pd_error)
 
-    from packaging.version import Version
-
-    if Version(pd.__version__) < Version("1.0") or Version(pd.__version__) >= Version(
-        "3.0.0.dev0"
-    ):
-        raise Exception(pd_error)
-
 
 def generate_csvs(csv_folder, count=9, min_length=1, max_length=109):
     def make_dataframe(col_size):

diff --git a/pyproject.toml b/pyproject.toml
@@ -57,7 +57,7 @@ test = [
     "hypothesis",
     "psutil",
     "pyarrow",
-    "pandas<3",
+    "pandas",
     "dask[distributed]",
 ]
 
@@ -118,6 +118,6 @@ test-requires = [
     "hypothesis",
     "psutil",
     "pyarrow",
-    "pandas<3",
+    "pandas",
 ]
 test-command = "pytest {project}"
diff --git a/tiledb/dataframe_.py b/tiledb/dataframe_.py
@@ -15,31 +15,14 @@
 
 
 def check_dataframe_deps():
-    pd_error = """Pandas version >= 1.0 and < 3.0 required for dataframe functionality.
-                  Please `pip install pandas>=1.0,<3.0` to proceed."""
-    pa_error = """PyArrow version >= 1.0 is suggested for dataframe functionality.
-                  Please `pip install pyarrow>=1.0`."""
+    pd_error = """Pandas is required for dataframe functionality.
+                  Please `pip install pandas` to proceed."""
 
     try:
-        import pandas as pd
+        import pandas
     except ImportError:
         raise Exception(pd_error)
 
-    from packaging.version import Version
-
-    if Version(pd.__version__) < Version("1.0") or Version(pd.__version__) >= Version(
-        "3.0.0.dev0"
-    ):
-        raise Exception(pd_error)
-
-    try:
-        import pyarrow as pa
-
-        if Version(pa.__version__) < Version("1.0"):
-            warnings.warn(pa_error)
-    except ImportError:
-        warnings.warn(pa_error)
-
 
 # Note: 'None' is used to indicate optionality for many of these options
 #       For example, if the `sparse` argument is unspecified we will default
@@ -154,7 +137,7 @@ class ColumnInfo:
 
     @classmethod
     def from_values(cls, array_like, varlen_types=()):
-        from pandas import CategoricalDtype
+        from pandas import CategoricalDtype, StringDtype
         from pandas.api import types as pd_types
 
         if pd_types.is_object_dtype(array_like):
@@ -171,6 +154,16 @@ def from_values(cls, array_like, varlen_types=()):
                 raise NotImplementedError(
                     f"{inferred_dtype} inferred dtype not supported (column {array_like.name})"
                 )
+        elif hasattr(array_like, "dtype") and isinstance(array_like.dtype, StringDtype):
+            # Explicit pd.StringDtype() (name="string") is always nullable;
+            # auto-inferred str (name="str") depends on data
+            explicit = array_like.dtype.name == "string"
+            return cls(
+                np.dtype(np.str_),
+                repr="string" if explicit else None,
+                var=True,
+                nullable=explicit or bool(array_like.isna().any()),
+            )
         elif hasattr(array_like, "dtype") and isinstance(
             array_like.dtype, CategoricalDtype
         ):
@@ -211,6 +204,14 @@ def from_dtype(cls, dtype, column_name, varlen_types=()):
         dtype = pd_types.pandas_dtype(dtype)
         # Note: be careful if you rearrange the order of the following checks
 
+        # pandas StringDtype (auto-inferred 'str' and explicit 'string')
+        from pandas import StringDtype
+
+        if isinstance(dtype, StringDtype):
+            repr_val = "string" if dtype.name == "string" else None
+            nullable = dtype.name == "string"
+            return cls(np.dtype(np.str_), repr=repr_val, var=True, nullable=nullable)
+
         # extension types
         if pd_types.is_extension_array_dtype(dtype):
             if libtiledb_version() < (2, 10) and pd_types.is_bool_dtype(dtype):
@@ -255,12 +256,7 @@ def from_dtype(cls, dtype, column_name, varlen_types=()):
 
         # datetime types
         if pd_types.is_datetime64_any_dtype(dtype):
-            if dtype == "datetime64[ns]":
-                return cls(dtype)
-            else:
-                raise NotImplementedError(
-                    f"Only 'datetime64[ns]' datetime dtype is supported (column {column_name})"
-                )
+            return cls(dtype)
 
         # string types
         # don't use pd_types.is_string_dtype() because it includes object types too
@@ -517,8 +513,8 @@ def _df_to_np_arrays(df, column_infos, fillna):
         if not column_info.var:
             to_numpy_kwargs.update(dtype=column_info.dtype)
 
-        if column_info.nullable:
-            # use default 0/empty for the dtype
+        if column_info.nullable and column.isna().any():
+            # Only create nullmap if data actually has nulls
             to_numpy_kwargs.update(na_value=column_info.dtype.type())
             nullmaps[name] = (~column.isna()).to_numpy(dtype=np.uint8)
 

diff --git a/tiledb/dense_array.py b/tiledb/dense_array.py
@@ -481,6 +481,14 @@ def _setitem_impl(self, selection, val, nullmaps: dict):
 
                 try:
                     if attr.isvar:
+                        # Capture null mask before np.asarray() loses pandas NA info
+                        if (
+                            attr.isnullable
+                            and name not in nullmaps
+                            and hasattr(attr_val, "isna")
+                        ):
+                            nullmaps[name] = (~attr_val.isna()).to_numpy(dtype=np.uint8)
+
                         # ensure that the value is array-convertible, for example: pandas.Series
                         attr_val = np.asarray(attr_val)
                         if attr.isnullable and name not in nullmaps:

diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
@@ -890,7 +890,11 @@ def _update_df_from_meta(
                 col_dtypes[name] = dtype
 
     if col_dtypes:
-        df = df.astype(col_dtypes, copy=False)
+        # Use str instead of '<U0' so pandas uses its native string type
+        col_dtypes = {
+            name: str if dtype == "<U0" else dtype for name, dtype in col_dtypes.items()
+        }
+        df = df.astype(col_dtypes)
 
     if index_col:
         if index_col is not True:

diff --git a/tiledb/sparse_array.py b/tiledb/sparse_array.py
@@ -122,6 +122,15 @@ def _setitem_impl_sparse(self, selection, val, nullmaps: dict):
         attr_val = val[name]
 
         try:
+            # Capture null mask before np.asarray() loses pandas NA info
+            if (
+                attr.isvar
+                and attr.isnullable
+                and name not in nullmaps
+                and hasattr(attr_val, "isna")
+            ):
+                nullmaps[name] = (~attr_val.isna()).to_numpy(dtype=np.uint8)
+
             # ensure that the value is array-convertible, for example: pandas.Series
             attr_val = np.asarray(attr_val)
 

diff --git a/tiledb/tests/common.py b/tiledb/tests/common.py
@@ -26,17 +26,12 @@
 
 def has_pandas():
     try:
-        import pandas as pd
-    except ImportError:
-        return False
+        import pandas
 
-    if Version(pd.__version__) < Version("1.0") or Version(pd.__version__) >= Version(
-        "3.0.0.dev0"
-    ):
+        return True
+    except ImportError:
         return False
 
-    return True
-
 
 def has_pyarrow():
     try:

diff --git a/tiledb/tests/datatypes.py b/tiledb/tests/datatypes.py
@@ -48,11 +48,17 @@ def __len__(self):
         return len(self._flat_arrays)
 
     def __getitem__(self, i):
-        return self._flat_arrays[i]
+        if isinstance(i, (int, np.integer)):
+            return self._flat_arrays[i]
+        return type(self)(self._flat_arrays[i], self._dtype)
 
     @property
     def dtype(self):
         return self._dtype
 
     def copy(self):
         return type(self)(self._flat_arrays, self._dtype)
+
+    @property
+    def ndim(self):
+        return 1
diff --git a/tiledb/tests/test_pandas_dataframe.py b/tiledb/tests/test_pandas_dataframe.py
@@ -204,32 +204,34 @@ def test_implemented(self, type_specs, info_dtype, info_repr, info_nullable):
 
     def test_object_dtype(self):
         self.assertColumnInfo(
-            ColumnInfo.from_values(pd.Series(["hello", "world"])), np.dtype("<U")
+            ColumnInfo.from_values(pd.Series(["hello", "world"], dtype=object)),
+            np.dtype("<U"),
         )
         self.assertColumnInfo(
-            ColumnInfo.from_values(pd.Series([b"hello", b"world"])), np.dtype("S")
+            ColumnInfo.from_values(pd.Series([b"hello", b"world"], dtype=object)),
+            np.dtype("S"),
         )
         for s in ["hello", b"world"], ["hello", 1], [b"hello", 1]:
             pytest.raises(NotImplementedError, ColumnInfo.from_values, pd.Series(s))
 
+    def test_string_dtype(self):
+        # Auto-inferred str type: non-nullable when data has no nulls
+        info = ColumnInfo.from_values(pd.Series(["hello", "world"]))
+        assert info.dtype == np.dtype("<U")
+        assert info.var is True
+        assert info.nullable is False
+        # With nulls: pandas 3+ auto-infers StringDtype which preserves null info;
+        # pandas 2 uses object dtype where null detection happens in the write path
+        s = pd.Series(["hello", None])
+        info = ColumnInfo.from_values(s)
+        assert info.dtype == np.dtype("<U")
+        assert info.var is True
+        assert info.nullable is isinstance(s.dtype, pd.StringDtype)
+
     unsupported_type_specs = [
         [np.float16, "f2"],
         [np.complex64, "c8"],
         [np.complex128, "c16"],
-        [np.datetime64, "<M8", "datetime64"],
-        [
-            "<M8[Y]",
-            "<M8[M]",
-            "<M8[W]",
-            "<M8[h]",
-            "<M8[m]",
-            "<M8[s]",
-            "<M8[ms]",
-            "<M8[us]",
-            "<M8[ps]",
-            "<M8[fs]",
-            "<M8[as]",
-        ],
     ]
     if hasattr(np, "float128"):
         unsupported_type_specs.append([np.float128, "f16"])
@@ -443,7 +445,7 @@ def test_dataframe_basic_rt1_manual(self):
             times = df["time"]
             cccc = df["cccc"]
 
-            df = df.drop(columns=["time", "cccc"], axis=1)
+            df = df.drop(columns=["time", "cccc"])
             A[s_ichars, times, cccc] = df.to_dict(orient="series")
 
         with tiledb.SparseArray(uri) as A:
@@ -603,12 +605,12 @@ def test_dataframe_index_to_sparse_dims(self):
 
             # ensure that all column which will be used as string dim index
             # is sorted, because that is how it will be returned
-            if df.dtypes[col] == "O":
+            if pd.api.types.is_string_dtype(df.dtypes[col]):
                 df.sort_values(col, inplace=True)
 
                 # also ensure that string columns are converted to bytes
                 # b/c only TILEDB_ASCII supported for string dimension
-                if isinstance(df[col][0], str):
+                if isinstance(df[col].iloc[0], str):
                     df[col] = [x.encode("UTF-8") for x in df[col]]
 
             new_df = df.drop_duplicates(subset=col)
@@ -1446,13 +1448,7 @@ def try_rt(name, df, pq_args={}):
             tdb_uri = os.path.join(uri, f"{name}.tdb")
             pq_uri = os.path.join(uri, f"{name}.pq")
 
-            df.to_parquet(
-                pq_uri,
-                # this is required to losslessly serialize timestamps
-                # until Parquet 2.0 is default.
-                use_deprecated_int96_timestamps=True,
-                **pq_args,
-            )
+            df.to_parquet(pq_uri, **pq_args)
 
             tiledb.from_parquet(str(tdb_uri), str(pq_uri))
             df_bk = tiledb.open_dataframe(tdb_uri)
@@ -1995,9 +1991,8 @@ def test_datetime64_days_dtype_read_sc25572(checked_path):
         assert_dict_arrays_equal(array[:], data)
         df_received = array.df[:]
         df_received = df_received.set_index("d1")
-        tm.assert_frame_equal(
-            original_df, df_received, check_datetimelike_compat=True, check_dtype=False
-        )
+        # TileDB returns datetime.date objects for datetime64[D], convert both to strings
+        tm.assert_frame_equal(original_df.astype(str), df_received.astype(str))
 
 
 def test_datetime64_days_dtype_write_sc25572(checked_path):
@@ -2024,9 +2019,7 @@ def test_datetime64_days_dtype_write_sc25572(checked_path):
     with tiledb.open(uri, "r") as array:
         assert_dict_arrays_equal(array[:], data)
         df_received = array.df[:]
-        tm.assert_frame_equal(
-            original_df, df_received, check_datetimelike_compat=True, check_dtype=False
-        )
+        tm.assert_frame_equal(original_df, df_received, check_dtype=False)
 
 
 def test_datetime64_days_dtype_read_out_of_range_sc25572(checked_path):