SciTools · pp-mo · Oct 24, 2025 · Oct 25, 2025 · Oct 28, 2025 · Dec 7, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -35,7 +35,7 @@ repos:
     -   id: no-commit-to-branch
 
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.15.4"
+    rev: "v0.15.5"
     hooks:
     -   id: ruff
         types: [file, python]
@@ -44,7 +44,7 @@ repos:
         types: [file, python]
 
 -   repo: https://github.com/codespell-project/codespell
-    rev: "v2.4.1"
+    rev: "v2.4.2"
     hooks:
     -   id: codespell
         types_or: [asciidoc, python, markdown, rst]

diff --git a/lib/iris/fileformats/_nc_load_rules/helpers.py b/lib/iris/fileformats/_nc_load_rules/helpers.py
@@ -708,13 +708,13 @@ def build_and_add_global_attributes(engine: Engine):
             ),
         )
         if problem is not None:
-            stack_notes = problem.stack_trace.__notes__
+            stack_notes = problem.stack_trace.__notes__  # type: ignore[attr-defined]
             if stack_notes is None:
                 stack_notes = []
             stack_notes.append(
                 f"Skipping disallowed global attribute '{attr_name}' (see above error)"
             )
-            problem.stack_trace.__notes__ = stack_notes
+            problem.stack_trace.__notes__ = stack_notes  # type: ignore[attr-defined]
 
 
 ################################################################################
@@ -1209,9 +1209,6 @@ def get_attr_units(cf_var, attributes, capture_invalid=False):
         attributes["invalid_units"] = attr_units
         attr_units = UNKNOWN_UNIT_STRING
 
-    if np.issubdtype(cf_var.dtype, np.str_):
-        attr_units = NO_UNIT_STRING
-
     if any(
         hasattr(cf_var.cf_data, name)
         for name in ("flag_values", "flag_masks", "flag_meanings")
@@ -1536,14 +1533,14 @@ def build_and_add_dimension_coordinate(
     )
     if problem is not None:
         coord_var_name = str(cf_coord_var.cf_name)
-        stack_notes = problem.stack_trace.__notes__
+        stack_notes = problem.stack_trace.__notes__  # type: ignore[attr-defined]
         if stack_notes is None:
             stack_notes = []
         stack_notes.append(
             f"Failed to create {coord_var_name} dimension coordinate:\n"
             f"Gracefully creating {coord_var_name!r} auxiliary coordinate instead."
         )
-        problem.stack_trace.__notes__ = stack_notes
+        problem.stack_trace.__notes__ = stack_notes  # type: ignore[attr-defined]
         problem.handled = True
 
         _ = _add_or_capture(
@@ -1574,11 +1571,7 @@ def _build_auxiliary_coordinate(
     # Get units
     attr_units = get_attr_units(cf_coord_var, attributes)
 
-    # Get any coordinate point data.
-    if isinstance(cf_coord_var, cf.CFLabelVariable):
-        points_data = cf_coord_var.cf_label_data(engine.cf_var)
-    else:
-        points_data = _get_cf_var_data(cf_coord_var)
+    points_data = _get_cf_var_data(cf_coord_var)
 
     # Get any coordinate bounds.
     cf_bounds_var, climatological = get_cf_bounds_var(cf_coord_var)
@@ -1643,9 +1636,13 @@ def _add_auxiliary_coordinate(
 
     # Determine the name of the dimension/s shared between the CF-netCDF data variable
     # and the coordinate being built.
-    common_dims = [
-        dim for dim in cf_coord_var.dimensions if dim in engine.cf_var.dimensions
-    ]
+    coord_dims = cf_coord_var.dimensions
+    # if cf._is_str_dtype(cf_coord_var):
+    #     coord_dims = coord_dims[:-1]
+    datavar_dims = engine.cf_var.dimensions
+    # if cf._is_str_dtype(engine.cf_var):
+    #     datavar_dims = datavar_dims[:-1]
+    common_dims = [dim for dim in coord_dims if dim in datavar_dims]
     data_dims = None
     if common_dims:
         # Calculate the offset of each common dimension.

diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py
@@ -31,7 +31,7 @@
 
 import iris.exceptions
 import iris.fileformats._nc_load_rules.helpers as hh
-from iris.fileformats.netcdf import _thread_safe_nc
+from iris.fileformats.netcdf import _bytecoding_datasets, _thread_safe_nc
 from iris.mesh.components import Connectivity
 import iris.util
 import iris.warnings
@@ -72,7 +72,9 @@
 
 # NetCDF returns a different type for strings depending on Python version.
 def _is_str_dtype(var):
-    return np.issubdtype(var.dtype, np.bytes_)
+    # N.B. use 'datatype' not 'dtype', to "look inside" variable wrappers which
+    #  represent 'S1' type data as 'U<xx>'.
+    return np.dtype(var.dtype).kind in "SU"
 
 
 ################################################################################
@@ -773,73 +775,6 @@ def identify(cls, variables, ignore=None, target=None, warn=True):
 
         return result
 
-    def cf_label_data(self, cf_data_var):
-        """Return the associated CF-netCDF label variable strings.
-
-        Parameters
-        ----------
-        cf_data_var : :class:`iris.fileformats.cf.CFDataVariable`
-            The CF-netCDF data variable which the CF-netCDF label variable
-            describes.
-
-        Returns
-        -------
-        str labels
-
-        """
-        if not isinstance(cf_data_var, CFDataVariable):
-            raise TypeError(
-                "cf_data_var argument should be of type CFDataVariable. Got %r."
-                % type(cf_data_var)
-            )
-
-        # Determine the name of the label string (or length) dimension by
-        # finding the dimension name that doesn't exist within the data dimensions.
-        str_dim_name = list(set(self.dimensions) - set(cf_data_var.dimensions))
-
-        if len(str_dim_name) != 1:
-            raise ValueError(
-                "Invalid string dimensions for CF-netCDF label variable %r"
-                % self.cf_name
-            )
-
-        str_dim_name = str_dim_name[0]
-        label_data = self[:]
-
-        if ma.isMaskedArray(label_data):
-            label_data = label_data.filled()
-
-        # Determine whether we have a string-valued scalar label
-        # i.e. a character variable that only has one dimension (the length of the string).
-        if self.ndim == 1:
-            label_string = b"".join(label_data).strip()
-            label_string = label_string.decode("utf8")
-            data = np.array([label_string])
-        else:
-            # Determine the index of the string dimension.
-            str_dim = self.dimensions.index(str_dim_name)
-
-            # Calculate new label data shape (without string dimension) and create payload array.
-            new_shape = tuple(
-                dim_len for i, dim_len in enumerate(self.shape) if i != str_dim
-            )
-            string_basetype = "|U%d"
-            string_dtype = string_basetype % self.shape[str_dim]
-            data = np.empty(new_shape, dtype=string_dtype)
-
-            for index in np.ndindex(new_shape):
-                # Create the slice for the label data.
-                if str_dim == 0:
-                    label_index = (slice(None, None),) + index
-                else:
-                    label_index = index + (slice(None, None),)
-
-                label_string = b"".join(label_data[label_index]).strip()
-                label_string = label_string.decode("utf8")
-                data[index] = label_string
-
-        return data
-
     def cf_label_dimensions(self, cf_data_var):
         """Return the name of the associated CF-netCDF label variable data dimensions.
 
@@ -1366,7 +1301,11 @@ def __init__(self, file_source, warn=False, monotonic=False):
         if isinstance(file_source, str):
             # Create from filepath : open it + own it (=close when we die).
             self._filename = os.path.expanduser(file_source)
-            self._dataset = _thread_safe_nc.DatasetWrapper(self._filename, mode="r")
+            if _bytecoding_datasets.DECODE_TO_STRINGS_ON_READ:
+                ds_type = _bytecoding_datasets.EncodedDataset
+            else:
+                ds_type = _thread_safe_nc.DatasetWrapper
+            self._dataset = ds_type(self._filename, mode="r")
             self._own_file = True
         else:
             # We have been passed an open dataset.

diff --git a/lib/iris/fileformats/netcdf/__init__.py b/lib/iris/fileformats/netcdf/__init__.py
@@ -25,6 +25,11 @@
 
 # Note: these probably shouldn't be public, but for now they are.
 from .._nc_load_rules.helpers import UnknownCellMethodWarning, parse_cell_methods
+from ._bytecoding_datasets import (
+    DECODE_TO_STRINGS_ON_READ,
+    DEFAULT_READ_ENCODING,
+    DEFAULT_WRITE_ENCODING,
+)
 from .loader import DEBUG, NetCDFDataProxy, load_cubes
 from .saver import (
     CF_CONVENTIONS_VERSION,
@@ -42,6 +47,9 @@
     "CFNameCoordMap",
     "CF_CONVENTIONS_VERSION",
     "DEBUG",
+    "DECODE_TO_STRINGS_ON_READ",
+    "DEFAULT_READ_ENCODING",
+    "DEFAULT_WRITE_ENCODING",
     "MESH_ELEMENTS",
     "NetCDFDataProxy",
     "SPATIO_TEMPORAL_AXES",