diff --git a/docs/history.md b/docs/history.md index 536cf982..e2cc9112 100644 --- a/docs/history.md +++ b/docs/history.md @@ -2,6 +2,7 @@ ## Development +* FIX: Preserve CfRadial metadata groups during CfRadial1/CfRadial2 transforms, allow ``to_cfradial2`` to reopen root-only ``engine="cfradial1"`` datasets, and normalize sweep metadata after ``map_over_sweeps`` operations so filtered volumes can be exported to CfRadial1 again ({issue}`254`, {issue}`322`) by [@syedhamidali](https://github.com/syedhamidali) * ENH: Expose ``target_crs`` on ``.xradar.georeference()`` accessor for DataArray, Dataset and DataTree, enabling reprojection via e.g. ``radar.xradar.georeference(target_crs=4326)`` ({issue}`243`) by [@syedhamidali](https://github.com/syedhamidali) ## 0.12.0 (2026-04-21) diff --git a/tests/io/test_cfradial1.py b/tests/io/test_cfradial1.py index fad28442..28ec2b09 100644 --- a/tests/io/test_cfradial1.py +++ b/tests/io/test_cfradial1.py @@ -3,10 +3,12 @@ # Distributed under the MIT License. See LICENSE for more info. +import numpy as np import xarray as xr from open_radar_data import DATASETS import xradar as xd +from xradar.io.export import cfradial1 as cf1_export def test_cfradial1_open_mfdataset_context_manager(cfradial1_file): @@ -47,3 +49,99 @@ def test_compare_sweeps(temp_file): xr.testing.assert_equal( dtree[f"sweep_{sweep_num}"].ds, dtree1[f"sweep_{sweep_num}"].ds ) + + +def test_cfradial1_export_helper_scalar_normalization(): + assert cf1_export._first_valid_scalar(xr.DataArray(np.array([np.nan, 3.0]))) == 3.0 + + masked = xr.DataArray(np.ma.array([1.0, 2.0], mask=[True, False])) + assert cf1_export._first_valid_scalar(masked) == 2.0 + + nat = xr.DataArray(np.array(["NaT", "2025-01-01T00:00:00"], dtype="datetime64[ns]")) + assert np.datetime64(cf1_export._first_valid_scalar(nat), "ns") == np.datetime64( + "2025-01-01T00:00:00", "ns" + ) + + text = xr.DataArray(np.array(["azimuth_surveillance"], dtype=object)) + assert cf1_export._first_valid_scalar(text) == "azimuth_surveillance" + + missing = xr.DataArray(np.array([np.nan, np.nan])) + assert np.isnan(cf1_export._first_valid_scalar(missing)) + + +def test_cfradial1_export_helper_metadata_and_indices(): + sweep = xr.Dataset( + data_vars={ + "sweep_number": ( + ("azimuth", "range"), + np.array([[0.0, np.nan], [0.0, np.nan]]), + ), + "sweep_mode": ( + ("azimuth", "range"), + np.array( + [ + ["azimuth_surveillance", None], + ["azimuth_surveillance", None], + ], + dtype=object, + ), + ), + "DBZ": (("azimuth", "range"), np.ones((2, 2), dtype="float32")), + }, + coords={ + "azimuth": ("azimuth", np.array([0.0, 1.0], dtype="float32")), + "range": ("range", np.array([100.0, 200.0], dtype="float32")), + "time": ( + "azimuth", + np.array(["2025-01-01", "2025-01-01"], dtype="datetime64[ns]"), + ), + "elevation": ("azimuth", np.array([0.5, 0.5], dtype="float32")), + }, + ) + + normalized = cf1_export._normalize_sweep_metadata(sweep) + assert normalized["sweep_number"].dims == () + assert normalized["sweep_number"].item() == 0.0 + assert normalized["sweep_mode"].dims == () + assert normalized["sweep_mode"].item() == "azimuth_surveillance" + + valid = xr.DataTree.from_dict( + { + "/": xr.Dataset(), + "/sweep_0": xr.Dataset( + coords={"elevation": ("azimuth", np.array([0.5, 0.5], dtype="float32"))} + ), + } + ) + out = cf1_export.calculate_sweep_indices(valid) + assert out["sweep_start_ray_index"].dims == ("sweep",) + assert out["sweep_end_ray_index"].dims == ("sweep",) + + +def test_cfradial1_export_helper_empty_sweep_info_and_time_fallback(): + empty = xr.DataTree.from_dict({"/": xr.Dataset()}) + sweep_info = cf1_export._sweep_info_mapper(empty) + assert "sweep_number" in sweep_info + assert np.isnan(sweep_info["sweep_number"].values[0]) + + sweep = xr.Dataset( + data_vars={ + "DBZ": (("time", "range"), np.ones((2, 2), dtype="float32")), + "sweep_mode": ((), "manual"), + "sweep_number": ((), 0), + "sweep_fixed_angle": ((), 0.5), + }, + coords={ + "time": ( + "time", + np.array(["2025-01-01", "2025-01-01T00:00:01"], dtype="datetime64[ns]"), + ), + "range": ("range", np.array([100.0, 200.0], dtype="float32")), + "azimuth": ("time", np.array([0.0, 1.0], dtype="float32")), + "elevation": ("time", np.array([0.5, 0.5], dtype="float32")), + }, + ) + dtree = xr.DataTree.from_dict({"/": xr.Dataset(), "/sweep_0": sweep}) + mapped = cf1_export._variable_mapper(dtree) + assert "DBZ" in mapped + assert mapped["DBZ"].dims == ("time", "range") diff --git a/tests/transform/test_cfradial.py b/tests/transform/test_cfradial.py index f1824094..daea2e91 100644 --- a/tests/transform/test_cfradial.py +++ b/tests/transform/test_cfradial.py @@ -4,6 +4,7 @@ import pytest import xarray as xr +from open_radar_data import DATASETS from xarray import MergeError import xradar as xd @@ -12,7 +13,6 @@ def test_to_cfradial1(cfradial1_file): """Test the conversion from DataTree to CfRadial1 format.""" with xd.io.open_cfradial1_datatree(cfradial1_file) as dtree: - # Call the conversion function ds_cf1 = xd.transform.to_cfradial1(dtree) @@ -30,7 +30,6 @@ def test_to_cfradial1(cfradial1_file): def test_to_cfradial2(cfradial1_file): """Test the conversion from CfRadial1 to CfRadial2 DataTree format.""" with xd.io.open_cfradial1_datatree(cfradial1_file, optional_groups=True) as dtree: - # Convert to CfRadial1 dataset first ds_cf1 = xd.transform.to_cfradial1(dtree) @@ -40,12 +39,33 @@ def test_to_cfradial2(cfradial1_file): # Verify key attributes and data structures in the resulting datatree assert isinstance(dtree_cf2, xr.DataTree), "Output is not a valid DataTree" assert "radar_parameters" in dtree_cf2, "Missing radar_parameters in DataTree" + assert ( + "georeferencing_correction" in dtree_cf2 + ), "Missing georeferencing_correction in DataTree" # Round-tripped attrs should be a subset of the original — backend-specific # attrs (e.g. NEXRAD ICD metadata) are not preserved by the generic reader. for key, val in dtree_cf2.attrs.items(): assert ds_cf1.attrs[key] == val, f"Attr {key!r} mismatch after round-trip" +def test_to_cfradial2_root_only_dataset_reopens_source(cfradial1_file): + ds = xr.open_dataset(cfradial1_file, engine="cfradial1") + + dtree = xd.transform.to_cfradial2(ds) + + assert isinstance(dtree, xr.DataTree) + assert "georeferencing_correction" in dtree + assert "sweep_0" in dtree + + +def test_to_cfradial2_root_only_dataset_without_source_errors(cfradial1_file): + ds = xr.open_dataset(cfradial1_file, engine="cfradial1").copy() + ds.encoding = {} + + with pytest.raises(ValueError, match="does not retain a readable"): + xd.transform.to_cfradial2(ds) + + def test_to_cfradial1_with_different_range_shapes(nexradlevel2_bzfile): with xd.io.open_nexradlevel2_datatree(nexradlevel2_bzfile) as dtree: ds_cf1 = xd.transform.to_cfradial1(dtree) @@ -76,3 +96,31 @@ def test_to_cfradial1_error_with_different_range_bin_sizes(gamic_file): with xd.io.open_gamic_datatree(gamic_file) as dtree: with pytest.raises(MergeError): xd.transform.to_cfradial1(dtree) + + +def test_to_cfradial1_after_map_over_sweeps_where(tmp_path): + file = DATASETS.fetch("swx_20120520_0641.nc") + dtree = xd.io.open_cfradial1_datatree(file) + + def filter_radar( + ds, + vel_name="mean_doppler_velocity", + ref_name="corrected_reflectivity_horizontal", + ): + vel_texture = ds[vel_name].rolling(range=30, min_periods=2, center=True).std() + ds = ds.assign(velocity_texture=vel_texture) + return ds.where( + (ds.velocity_texture < 10) & ((ds[ref_name] >= -10) & (ds[ref_name] <= 75)) + ) + + filtered = dtree.xradar.map_over_sweeps(filter_radar) + ds_cf1 = xd.transform.to_cfradial1(filtered) + + assert ds_cf1["sweep_number"].dims == ("sweep",) + assert ds_cf1["fixed_angle"].dims == ("sweep",) + assert ds_cf1["sweep_mode"].dims == ("sweep",) + + outfile = tmp_path / "filtered_cfradial1.nc" + xd.io.to_cfradial1(filtered, outfile) + reopened = xd.io.open_cfradial1_datatree(outfile) + assert "sweep_0" in reopened diff --git a/xradar/io/export/cfradial1.py b/xradar/io/export/cfradial1.py index 7dbb5f66..daf5bafc 100644 --- a/xradar/io/export/cfradial1.py +++ b/xradar/io/export/cfradial1.py @@ -33,6 +33,47 @@ import xarray as xr +def _first_valid_scalar(data_array): + """Collapse a metadata variable to its first non-missing scalar value.""" + if data_array.ndim == 0: + if data_array.notnull().item(): + return data_array.to_numpy()[()] + return np.nan + + flat = data_array.stack(_flat=data_array.dims) + valid = flat.where(flat.notnull(), drop=True) + if valid.size: + return valid.isel(_flat=0).to_numpy()[()] + return np.nan + + +def _normalize_sweep_metadata(data): + """Restore sweep metadata variables to scalar form before CfRadial1 export.""" + metadata_vars = [ + "sweep_number", + "sweep_mode", + "polarization_mode", + "prt_mode", + "follow_mode", + "sweep_fixed_angle", + "sweep_start_ray_index", + "sweep_end_ray_index", + ] + + data = data.copy() + for name in metadata_vars: + if name not in data: + continue + if data[name].ndim == 0: + continue + data[name] = xr.DataArray( + _first_valid_scalar(data[name]), + attrs=data[name].attrs, + ) + + return data + + def _calib_mapper(calib_params): """ Map calibration parameters to a new dataset format. @@ -109,13 +150,15 @@ def _variable_mapper(dtree, dim0=None): sweep_datasets = [] for grp in dtree.groups: if "sweep" in grp: - data = dtree[grp].to_dataset(inherit="all_coords") + data = _normalize_sweep_metadata( + dtree[grp].to_dataset(inherit="all_coords") + ) # handling first dimension if dim0 is None: dim0 = ( "elevation" - if data.sweep_mode.load().astype(str) == "rhi" + if str(data.sweep_mode.load().values) == "rhi" else "azimuth" ) if dim0 not in data.dims: @@ -200,7 +243,7 @@ def _sweep_info_mapper(dtree): for var_name in sweep_vars: var_data_list = [ ( - np.unique(dtree[s][var_name].values[np.newaxis, ...]) + np.asarray([_first_valid_scalar(dtree[s][var_name])]) if var_name in dtree[s] else np.array([np.nan]) ) diff --git a/xradar/transform/cfradial.py b/xradar/transform/cfradial.py index 6a3d30df..ff8978d6 100644 --- a/xradar/transform/cfradial.py +++ b/xradar/transform/cfradial.py @@ -66,6 +66,7 @@ _get_required_root_dataset, _get_subgroup, _get_sweep_groups, + open_cfradial1_datatree, ) from ..io.backends.common import _attach_sweep_groups from ..io.export.cfradial1 import ( @@ -133,24 +134,46 @@ def to_cfradial2(ds, **kwargs): kwargs.pop("site_as_coords", True) sweep = kwargs.pop("sweep", None) + # Datasets opened via ``engine="cfradial1"`` without a group only expose the + # normalized root metadata view. If the original source path is available, + # reopen the full volume and delegate to the standard datatree reader. + if "sweep_number" not in ds and "fixed_angle" not in ds: + if "sweep_group_name" in ds and "sweep_fixed_angle" in ds: + source = ds.encoding.get("source") + if source: + return open_cfradial1_datatree( + source, + sweep=sweep, + first_dim=first_dim, + optional=optional, + optional_groups=True, + ) + raise ValueError( + "to_cfradial2 requires a full CfRadial1 volume dataset containing " + "`sweep_number` and `fixed_angle`. " + "The provided dataset looks like the normalized root metadata view " + 'returned by `xarray.open_dataset(..., engine="cfradial1")`, but ' + "it does not retain a readable `encoding['source']` path to reopen " + "the full volume." + ) + # Create DataTree root node with required data root_data = _get_required_root_dataset(ds, optional=optional) dtree = DataTree(dataset=root_data, name="root") # Attach additional root metadata groups as child nodes radar_parameters = _get_subgroup(ds, radar_parameters_subgroup) - if radar_parameters: + if radar_parameters.data_vars: dtree["radar_parameters"] = DataTree(radar_parameters, name="radar_parameters") calib = _get_radar_calibration(ds) - if calib: + if calib.data_vars: dtree["radar_calibration"] = DataTree(calib, name="radar_calibration") georeferencing = _get_subgroup(ds, georeferencing_correction_subgroup) - if georeferencing: - dtree["georeferencing_correction"] = DataTree( - georeferencing, name="georeferencing_correction" - ) + dtree["georeferencing_correction"] = DataTree( + georeferencing, name="georeferencing_correction" + ) # Attach sweep child nodes sweep_groups = list(