Skip to content
Draft

Tmp #6974

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
041af2d
Initial tests.
pp-mo Oct 24, 2025
65bd9dd
Get 'create_cf_data_variable' to call 'create_generic_cf_array_var': …
pp-mo Oct 25, 2025
d75a7a7
Reinstate decode on load, now in-Iris coded.
pp-mo Oct 28, 2025
07efc06
Revert and amend.
pp-mo Dec 7, 2025
2321077
Hack to preserve the existing order of attributes on saved Coords and…
pp-mo Oct 29, 2025
0174e53
Fix for dataless; avoid FUTURE global state change from temporary tests.
pp-mo Oct 29, 2025
035e28b
Further fix to attribute ordering.
pp-mo Oct 29, 2025
80c4776
Fixes for data packing.
pp-mo Oct 29, 2025
d4d3ebd
Latest test-chararrays.
pp-mo Dec 7, 2025
3f10cc1
Fix search+replace error.
pp-mo Dec 7, 2025
ee2fe4c
Tiny fix in crucial place! (merge error?).
pp-mo Jan 14, 2026
744826d
Extra mock property prevents weird test crashes.
pp-mo Jan 14, 2026
a3e1217
Fix another mock problem.
pp-mo Jan 14, 2026
1a4f2f2
Initial dataset wrappers.
pp-mo Oct 31, 2025
0148f43
Various notes, choices + changes: Beginnings of encoded-dataset testing.
pp-mo Dec 3, 2025
20a5be2
Replace use of encoding functions with test-specific function: Test f…
pp-mo Dec 5, 2025
9b621bf
Radically simplify 'make_bytesarray', by using a known specified byte…
pp-mo Dec 5, 2025
b366fd2
Add read tests.
pp-mo Dec 5, 2025
cf048b2
Remove iris width control (not in this layer).
pp-mo Dec 5, 2025
e684d1d
more notes
pp-mo Dec 5, 2025
28b124c
Merge branch 'encoded_datasets' into chardata_plus_encoded_datasets
pp-mo Jan 19, 2026
a20cc45
Remove temporary test code.
pp-mo Jan 19, 2026
c995a8d
Use iris categorised warnings for unknown encodings.
pp-mo Jan 19, 2026
f118c18
Clarify the temporary load/save exercising tests (a bit).
pp-mo Jan 19, 2026
c8a27df
Use bytecoded_datasets in nc load+save, begin fixes.
pp-mo Jan 17, 2026
c4a31a4
Further attempt to satisfy warning cateogry checker.
pp-mo Jan 19, 2026
10831d7
Fix overlength error tests.
pp-mo Jan 19, 2026
042028e
Get temporary iris load/save exercises working (todo: proper tests).
pp-mo Jan 19, 2026
94b2b21
Put encoding information into separate converter class, for use in pr…
pp-mo Jan 21, 2026
c4b7936
First proper testing (reads working).
pp-mo Jan 21, 2026
ac3e687
Encoded reading ~working; new ideas for switching (untested).
pp-mo Jan 23, 2026
9ec31fb
Check loads when coords do/not share a string dim with data.
pp-mo Jan 27, 2026
9bdeb5d
Fix nondecoded reference loads in test_byecoded_datasets.
pp-mo Jan 27, 2026
54d7743
Test writing of string data: various encodings, from strings or bytes.
pp-mo Jan 27, 2026
6a37f62
Fix write proxy; tmp_path in stringdata tests; tidy stringdata tests.
pp-mo Jan 28, 2026
cf9594b
Fix for non-string data.
pp-mo Jan 28, 2026
ef11375
Pre-clear load problems.
pp-mo Jan 28, 2026
2dbdcba
Fix mock patches.
pp-mo Feb 27, 2026
a34ea09
Fix patches in test_CFReader.
pp-mo Feb 27, 2026
aa1fe03
Fix variable creation in odd cases.
pp-mo Feb 27, 2026
f5d50ee
Ignore attribute reordering in scaling-packed saves.
pp-mo Feb 27, 2026
b2c6d51
Fix test for refactored proxy constructor.
pp-mo Feb 27, 2026
dfd4d91
Fix get_cf_var_data to support vlen-string.
pp-mo Feb 27, 2026
274fae4
Add back new test results, folder removed in error.
pp-mo Feb 27, 2026
09137c3
Merge branch 'latest' into chardata_plus_encoded_datasets
pp-mo Mar 6, 2026
122dc92
Fix string-type check in cf to suit any of the new dtypes.
pp-mo Mar 6, 2026
0bb70e1
Remove non-working no-unit for label variables.
pp-mo Mar 6, 2026
3c44c8b
Separate asserts for ruff PT018.
pp-mo Mar 6, 2026
6e0b34a
Make encoding controls public API.
pp-mo Mar 9, 2026
2ca9f6e
Fix old label-loading tests for new chardata handling.
pp-mo Mar 9, 2026
b81f4b5
Review changes, stylistic only.
pp-mo Mar 9, 2026
2adf6ab
Fix test for new dataset type.
pp-mo Mar 9, 2026
7e58f7d
Remove obsolete not-really-a-test.
pp-mo Mar 9, 2026
0907fe8
Odd pre-commit fixes, and autoupdate.
pp-mo Mar 9, 2026
bcd3371
Replace all use of 'CFLabelVariable.cf_label_data' with standard '_ge…
pp-mo Mar 10, 2026
1612e1d
Nobble any lazy string WriteProxy creation.
pp-mo Mar 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ repos:
- id: no-commit-to-branch

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.15.4"
rev: "v0.15.5"
hooks:
- id: ruff
types: [file, python]
Expand All @@ -44,7 +44,7 @@ repos:
types: [file, python]

- repo: https://github.com/codespell-project/codespell
rev: "v2.4.1"
rev: "v2.4.2"
hooks:
- id: codespell
types_or: [asciidoc, python, markdown, rst]
Expand Down
27 changes: 12 additions & 15 deletions lib/iris/fileformats/_nc_load_rules/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,13 +708,13 @@ def build_and_add_global_attributes(engine: Engine):
),
)
if problem is not None:
stack_notes = problem.stack_trace.__notes__
stack_notes = problem.stack_trace.__notes__ # type: ignore[attr-defined]
if stack_notes is None:
stack_notes = []
stack_notes.append(
f"Skipping disallowed global attribute '{attr_name}' (see above error)"
)
problem.stack_trace.__notes__ = stack_notes
problem.stack_trace.__notes__ = stack_notes # type: ignore[attr-defined]


################################################################################
Expand Down Expand Up @@ -1209,9 +1209,6 @@ def get_attr_units(cf_var, attributes, capture_invalid=False):
attributes["invalid_units"] = attr_units
attr_units = UNKNOWN_UNIT_STRING

if np.issubdtype(cf_var.dtype, np.str_):
attr_units = NO_UNIT_STRING

if any(
hasattr(cf_var.cf_data, name)
for name in ("flag_values", "flag_masks", "flag_meanings")
Expand Down Expand Up @@ -1536,14 +1533,14 @@ def build_and_add_dimension_coordinate(
)
if problem is not None:
coord_var_name = str(cf_coord_var.cf_name)
stack_notes = problem.stack_trace.__notes__
stack_notes = problem.stack_trace.__notes__ # type: ignore[attr-defined]
if stack_notes is None:
stack_notes = []
stack_notes.append(
f"Failed to create {coord_var_name} dimension coordinate:\n"
f"Gracefully creating {coord_var_name!r} auxiliary coordinate instead."
)
problem.stack_trace.__notes__ = stack_notes
problem.stack_trace.__notes__ = stack_notes # type: ignore[attr-defined]
problem.handled = True

_ = _add_or_capture(
Expand Down Expand Up @@ -1574,11 +1571,7 @@ def _build_auxiliary_coordinate(
# Get units
attr_units = get_attr_units(cf_coord_var, attributes)

# Get any coordinate point data.
if isinstance(cf_coord_var, cf.CFLabelVariable):
points_data = cf_coord_var.cf_label_data(engine.cf_var)
else:
points_data = _get_cf_var_data(cf_coord_var)
points_data = _get_cf_var_data(cf_coord_var)

# Get any coordinate bounds.
cf_bounds_var, climatological = get_cf_bounds_var(cf_coord_var)
Expand Down Expand Up @@ -1643,9 +1636,13 @@ def _add_auxiliary_coordinate(

# Determine the name of the dimension/s shared between the CF-netCDF data variable
# and the coordinate being built.
common_dims = [
dim for dim in cf_coord_var.dimensions if dim in engine.cf_var.dimensions
]
coord_dims = cf_coord_var.dimensions
# if cf._is_str_dtype(cf_coord_var):
# coord_dims = coord_dims[:-1]
datavar_dims = engine.cf_var.dimensions
# if cf._is_str_dtype(engine.cf_var):
# datavar_dims = datavar_dims[:-1]
common_dims = [dim for dim in coord_dims if dim in datavar_dims]
data_dims = None
if common_dims:
# Calculate the offset of each common dimension.
Expand Down
79 changes: 9 additions & 70 deletions lib/iris/fileformats/cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

import iris.exceptions
import iris.fileformats._nc_load_rules.helpers as hh
from iris.fileformats.netcdf import _thread_safe_nc
from iris.fileformats.netcdf import _bytecoding_datasets, _thread_safe_nc
from iris.mesh.components import Connectivity
import iris.util
import iris.warnings
Expand Down Expand Up @@ -72,7 +72,9 @@

# NetCDF returns a different type for strings depending on Python version.
def _is_str_dtype(var):
return np.issubdtype(var.dtype, np.bytes_)
# N.B. use 'datatype' not 'dtype', to "look inside" variable wrappers which
# represent 'S1' type data as 'U<xx>'.
return np.dtype(var.dtype).kind in "SU"


################################################################################
Expand Down Expand Up @@ -773,73 +775,6 @@ def identify(cls, variables, ignore=None, target=None, warn=True):

return result

def cf_label_data(self, cf_data_var):
"""Return the associated CF-netCDF label variable strings.

Parameters
----------
cf_data_var : :class:`iris.fileformats.cf.CFDataVariable`
The CF-netCDF data variable which the CF-netCDF label variable
describes.

Returns
-------
str labels

"""
if not isinstance(cf_data_var, CFDataVariable):
raise TypeError(
"cf_data_var argument should be of type CFDataVariable. Got %r."
% type(cf_data_var)
)

# Determine the name of the label string (or length) dimension by
# finding the dimension name that doesn't exist within the data dimensions.
str_dim_name = list(set(self.dimensions) - set(cf_data_var.dimensions))

if len(str_dim_name) != 1:
raise ValueError(
"Invalid string dimensions for CF-netCDF label variable %r"
% self.cf_name
)

str_dim_name = str_dim_name[0]
label_data = self[:]

if ma.isMaskedArray(label_data):
label_data = label_data.filled()

# Determine whether we have a string-valued scalar label
# i.e. a character variable that only has one dimension (the length of the string).
if self.ndim == 1:
label_string = b"".join(label_data).strip()
label_string = label_string.decode("utf8")
data = np.array([label_string])
else:
# Determine the index of the string dimension.
str_dim = self.dimensions.index(str_dim_name)

# Calculate new label data shape (without string dimension) and create payload array.
new_shape = tuple(
dim_len for i, dim_len in enumerate(self.shape) if i != str_dim
)
string_basetype = "|U%d"
string_dtype = string_basetype % self.shape[str_dim]
data = np.empty(new_shape, dtype=string_dtype)

for index in np.ndindex(new_shape):
# Create the slice for the label data.
if str_dim == 0:
label_index = (slice(None, None),) + index
else:
label_index = index + (slice(None, None),)

label_string = b"".join(label_data[label_index]).strip()
label_string = label_string.decode("utf8")
data[index] = label_string

return data

def cf_label_dimensions(self, cf_data_var):
"""Return the name of the associated CF-netCDF label variable data dimensions.

Expand Down Expand Up @@ -1366,7 +1301,11 @@ def __init__(self, file_source, warn=False, monotonic=False):
if isinstance(file_source, str):
# Create from filepath : open it + own it (=close when we die).
self._filename = os.path.expanduser(file_source)
self._dataset = _thread_safe_nc.DatasetWrapper(self._filename, mode="r")
if _bytecoding_datasets.DECODE_TO_STRINGS_ON_READ:
ds_type = _bytecoding_datasets.EncodedDataset
else:
ds_type = _thread_safe_nc.DatasetWrapper
self._dataset = ds_type(self._filename, mode="r")
self._own_file = True
else:
# We have been passed an open dataset.
Expand Down
8 changes: 8 additions & 0 deletions lib/iris/fileformats/netcdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@

# Note: these probably shouldn't be public, but for now they are.
from .._nc_load_rules.helpers import UnknownCellMethodWarning, parse_cell_methods
from ._bytecoding_datasets import (
DECODE_TO_STRINGS_ON_READ,
DEFAULT_READ_ENCODING,
DEFAULT_WRITE_ENCODING,
)
from .loader import DEBUG, NetCDFDataProxy, load_cubes
from .saver import (
CF_CONVENTIONS_VERSION,
Expand All @@ -42,6 +47,9 @@
"CFNameCoordMap",
"CF_CONVENTIONS_VERSION",
"DEBUG",
"DECODE_TO_STRINGS_ON_READ",
"DEFAULT_READ_ENCODING",
"DEFAULT_WRITE_ENCODING",
"MESH_ELEMENTS",
"NetCDFDataProxy",
"SPATIO_TEMPORAL_AXES",
Expand Down
Loading
Loading