Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 201 additions & 6 deletions pyresample/future/geometry/_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,61 @@
from typing import TYPE_CHECKING, Any

import numpy as np
from pyproj import Proj

# this caching module imports the geometries so this subset module
# must be imported inside functions in the geometry modules if needed
# to avoid circular dependencies
from pyresample._caching import cache_to_json_if
from pyresample.boundary import Boundary
from pyresample.geometry import get_geostationary_bounding_box_in_lonlats, logger
from pyresample.geometry import (
DEFAULT_AREA_SLICE_SAMPLE_STEPS,
get_geostationary_bounding_box_in_lonlats,
logger,
)
from pyresample.utils import check_slice_orientation

if TYPE_CHECKING:
from pyresample import AreaDefinition


MAX_POINTS_PER_CHUNK = 600_000


@cache_to_json_if("cache_geometry_slices")
def get_area_slices(
src_area: AreaDefinition,
area_to_cover: AreaDefinition,
shape_divisible_by: int | None,
sample_steps: int | None = DEFAULT_AREA_SLICE_SAMPLE_STEPS,
sample_grid: bool = False,
) -> tuple[slice, slice]:
"""Compute the slice to read based on an `area_to_cover`."""
"""Compute the slice to read based on an `area_to_cover`.

For geostationary source areas in cross-projection mode:
- ``sample_steps`` >= 2 and ``sample_grid=False`` samples edge points only.
- ``sample_steps`` >= 2 and ``sample_grid=True`` samples an interior grid.
- ``sample_steps`` <= 0 or ``None`` and ``sample_grid=False`` samples all edge points.
- ``sample_steps`` <= 0 or ``None`` and ``sample_grid=True`` samples all destination points.
- ``sample_steps`` == 1 raises ``ValueError``.
"""
if not _is_area_like(src_area):
raise NotImplementedError(f"Only AreaDefinitions are supported, not {type(src_area)}")
if not _is_area_like(area_to_cover):
raise NotImplementedError(f"Only AreaDefinitions are supported, not {type(area_to_cover)}")

# Intersection only required for two different projections
proj_def_to_cover = area_to_cover.crs
proj_def = src_area.crs
if proj_def_to_cover == proj_def:
normalized_sample_steps = _normalize_sample_steps(sample_steps)

# Intersection is only required for two different projections.
src_crs_wkt = getattr(src_area, "crs_wkt", None)
dst_crs_wkt = getattr(area_to_cover, "crs_wkt", None)
if src_crs_wkt is not None and src_crs_wkt == dst_crs_wkt:
proj_def_to_cover = src_crs_wkt
else:
proj_def_to_cover = area_to_cover.crs
if proj_def_to_cover != src_area.crs:
proj_def_to_cover = None
if proj_def_to_cover is not None:
logger.debug('Projections for data and slice areas are identical: %s',
proj_def_to_cover)
# Get slice parameters
Expand All @@ -45,6 +71,16 @@ def get_area_slices(
y_slice = _ensure_integer_slice(y_slice)
return x_slice, y_slice

if src_area.is_geostationary:
coverage_slices = _get_covered_source_slices(
src_area,
area_to_cover,
sample_steps=normalized_sample_steps,
sample_grid=sample_grid,
)
if coverage_slices is not None:
return _finalize_slices(src_area, coverage_slices[0], coverage_slices[1], shape_divisible_by)

data_boundary = _get_area_boundary(src_area)
area_boundary = _get_area_boundary(area_to_cover)
intersection = data_boundary.contour_poly.intersection(
Expand All @@ -57,6 +93,33 @@ def get_area_slices(
np.rad2deg(intersection.lon), np.rad2deg(intersection.lat))
x_slice = slice(np.ma.min(x), np.ma.max(x) + 1)
y_slice = slice(np.ma.min(y), np.ma.max(y) + 1)

return _finalize_slices(src_area, x_slice, y_slice, shape_divisible_by)


def _normalize_sample_steps(sample_steps: int | None):
"""Normalize sampling config to sampled mode or ALL-mode fallback.

``None`` and values ``<= 0`` map to ``ALL`` mode, whose exact behavior is
determined later by ``sample_grid``.
A value of ``1`` is rejected because it does not provide meaningful sampled
coverage for edge or grid modes.
"""
if sample_steps is None:
return None
try:
normalized_sample_steps = int(sample_steps)
except (TypeError, ValueError) as err:
raise ValueError(f"sample_steps must be an integer or None, got {sample_steps!r}") from err
if normalized_sample_steps <= 0:
return None
if normalized_sample_steps == 1:
raise ValueError("sample_steps=1 is not supported; use <= 0/None for ALL mode or >= 2 for sampled modes.")
return normalized_sample_steps


def _finalize_slices(src_area: AreaDefinition, x_slice: slice, y_slice: slice, shape_divisible_by: int | None):
"""Normalize slice bounds and apply orientation/divisibility rules."""
x_slice = _ensure_integer_slice(x_slice)
y_slice = _ensure_integer_slice(y_slice)
if shape_divisible_by is not None:
Expand Down Expand Up @@ -105,6 +168,137 @@ def _get_area_boundary(area_to_cover: AreaDefinition) -> Boundary:
raise NotImplementedError("Can't determine boundary of area to cover") from err


def _get_covered_source_slices(
src_area: AreaDefinition,
area_to_cover: AreaDefinition,
sample_steps: int | None,
sample_grid: bool,
):
"""Estimate covering source slices from sampled destination points.

Returns ``None`` when sampled points do not produce any valid source
coverage, allowing the caller to fall back to boundary intersection.
"""
min_col = None
max_col = None
min_row = None
max_row = None
try:
src_proj = Proj(src_area.crs)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pyproj Proj class is less preferred these days (deprecated even?) in favor of Transformer. This is an even stronger argument for maybe looking at not using lons/lats as the intermediate value. By that I mean since you have two define the src and dst CRS in a Transformer you might as well go from source area CRS to area_to_cover CRS. At least that's my gut reaction as I look at this.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Transformer.from_proj is deprecated, but Proj is not. Sample points are in lon/lat units, so Proj is the cleanest API to use in this case: we do lon/lat->CRS, not general CRS->CRS. get_projection_coordinates_from_lonlat uses a similar technique.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm I could have sworn there was an explicit deprecation, but can't find it now. However, Proj is limited datum-wise:

https://pyproj4.github.io/pyproj/stable/gotchas.html#proj-not-a-generic-latitude-longitude-to-projection-converter

for destination_lons, destination_lats in _iter_destination_lonlat_samples(
area_to_cover=area_to_cover,
sample_steps=sample_steps,
sample_grid=sample_grid,
):
source_xs, source_ys = src_proj(
destination_lons,
destination_lats,
)
source_cols, source_rows = src_area.get_array_indices_from_projection_coordinates(
source_xs,
source_ys,
)
valid = ~np.ma.getmaskarray(source_cols) & ~np.ma.getmaskarray(source_rows)
if not valid.any():
continue
chunk_cols = np.ma.getdata(source_cols)[valid]
chunk_rows = np.ma.getdata(source_rows)[valid]
chunk_min_col = int(chunk_cols.min())
chunk_max_col = int(chunk_cols.max())
chunk_min_row = int(chunk_rows.min())
chunk_max_row = int(chunk_rows.max())
min_col = chunk_min_col if min_col is None else min(min_col, chunk_min_col)
max_col = chunk_max_col if max_col is None else max(max_col, chunk_max_col)
min_row = chunk_min_row if min_row is None else min(min_row, chunk_min_row)
max_row = chunk_max_row if max_row is None else max(max_row, chunk_max_row)
except (RuntimeError, TypeError, ValueError):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This worries me that there isn't a clear "this is why this error happens" definition and is instead "catch any problems". I'd like if it was clear why some of these errors are happening and to try to handle them before they happen. Or at the very least comment on why they would happen.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I tried to follow project patterns with that one. Such a pattern is very common in the codebase. Generic except catches.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Such a pattern is very common in the codebase

Yes, this project has never had the strictest of coding rules.

logger.debug("Failed to estimate covered source slices from sampled destination points.", exc_info=True)
return None
if min_col is None or min_row is None or max_col is None or max_row is None:
return None
col_start = max(0, min_col)
col_stop = min(src_area.width, max_col + 1)
row_start = max(0, min_row)
row_stop = min(src_area.height, max_row + 1)
if col_start >= col_stop or row_start >= row_stop:
return None
return slice(col_start, col_stop), slice(row_start, row_stop)


def _iter_destination_lonlat_samples(
area_to_cover: AreaDefinition,
sample_steps: int | None,
sample_grid: bool,
):
"""Yield destination lon/lat samples for ALL, grid, or edge mode."""
if sample_steps is None:
if sample_grid:
yield from _iter_dense_lonlat_samples(area_to_cover)
return
yield _get_edge_lonlat_samples(area_to_cover, sample_steps=None)
return
if sample_grid:
yield _get_grid_lonlat_samples(area_to_cover, sample_steps)
return
yield _get_edge_lonlat_samples(area_to_cover, sample_steps)
Comment on lines +234 to +243
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could be wrong but I don't think this logic matches GDAL. I read the GDAL docs as sample_grid=False and sample_steps being ALL (0/None in our case) that that would do all the pixels along the edges. When sample_grid=True then sample_steps=ALL/None is the full dense grid (edges and all internal points).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed that



def _iter_dense_lonlat_samples(area_to_cover: AreaDefinition):
"""Yield full destination lon/lat coverage in row chunks."""
row_block_size = max(1, MAX_POINTS_PER_CHUNK // area_to_cover.width)
for row_start in range(0, area_to_cover.height, row_block_size):
row_stop = min(area_to_cover.height, row_start + row_block_size)
yield area_to_cover.get_lonlats(
data_slice=(slice(row_start, row_stop), slice(None)),
dtype=np.float32,
)


def _get_grid_lonlat_samples(area_to_cover: AreaDefinition, sample_steps: int):
"""Return one evenly spaced interior destination sample grid."""
sample_rows = _get_sample_indices(area_to_cover.height, sample_steps)
sample_cols = _get_sample_indices(area_to_cover.width, sample_steps)
return area_to_cover.get_lonlats(
data_slice=(sample_rows[:, None], sample_cols[None, :]),
dtype=np.float32,
)


def _get_edge_lonlat_samples(area_to_cover: AreaDefinition, sample_steps: int | None):
"""Return perimeter destination samples for edge-only sampling mode."""
if area_to_cover.is_geostationary:
# Use limb-aware geostationary boundary sampling instead of raw array
# corners/edges. Projection corners may be off-earth and can under-cover.
if sample_steps is None:
nb_points = max(4, area_to_cover.width * 2 + area_to_cover.height * 2 - 4)
else:
nb_points = max(4, sample_steps * 4)
return get_geostationary_bounding_box_in_lonlats(
area_to_cover,
nb_points=nb_points,
)
sample_rows = _get_sample_indices(area_to_cover.height, sample_steps)
sample_cols = _get_sample_indices(area_to_cover.width, sample_steps)
top_rows = np.zeros(sample_cols.size, dtype=np.int64)
top_cols = sample_cols
right_rows = sample_rows[1:]
right_cols = np.full(right_rows.size, area_to_cover.width - 1, dtype=np.int64)
bottom_cols = sample_cols[-2::-1]
bottom_rows = np.full(bottom_cols.size, area_to_cover.height - 1, dtype=np.int64)
left_rows = sample_rows[-2:0:-1]
left_cols = np.zeros(left_rows.size, dtype=np.int64)
edge_rows = np.concatenate((top_rows, right_rows, bottom_rows, left_rows))
edge_cols = np.concatenate((top_cols, right_cols, bottom_cols, left_cols))
return area_to_cover.get_lonlats(data_slice=(edge_rows, edge_cols), dtype=np.float32)


def _get_sample_indices(axis_size: int, sample_steps: int | None):
"""Return evenly spaced integer sample indices including both endpoints."""
if sample_steps is None or sample_steps >= axis_size:
return np.arange(axis_size, dtype=np.int64)
return (np.arange(sample_steps, dtype=np.int64) * (axis_size - 1)) // (sample_steps - 1)


def _make_slice_divisible(sli, max_size, factor=2):
"""Make the given slice even in size."""
rem = (sli.stop - sli.start) % factor
Expand All @@ -121,6 +315,7 @@ def _make_slice_divisible(sli, max_size, factor=2):


def _ensure_integer_slice(sli):
"""Round slice bounds outward to conservatively preserve target coverage."""
start = sli.start
stop = sli.stop
step = sli.step
Expand Down
38 changes: 34 additions & 4 deletions pyresample/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@

logger = getLogger(__name__)

DEFAULT_AREA_SLICE_SAMPLE_STEPS = 21


class DimensionError(ValueError):
"""Wrap ValueError."""
Expand Down Expand Up @@ -619,7 +621,13 @@ def overlap_rate(self, other):
inter_area = get_polygon_area(self.intersection(other))
return inter_area / other_area

def get_area_slices(self, area_to_cover):
def get_area_slices(
self,
area_to_cover,
shape_divisible_by=None,
sample_steps=DEFAULT_AREA_SLICE_SAMPLE_STEPS,
sample_grid=False,
):
"""Compute the slice to read based on an `area_to_cover`."""
raise NotImplementedError

Expand Down Expand Up @@ -2651,10 +2659,32 @@ def proj4_string(self):
"instead.", DeprecationWarning, stacklevel=2)
return proj4_dict_to_str(self.proj_dict)

def get_area_slices(self, area_to_cover, shape_divisible_by=None):
"""Compute the slice to read based on an `area_to_cover`."""
def get_area_slices(
self,
area_to_cover,
shape_divisible_by=None,
sample_steps=DEFAULT_AREA_SLICE_SAMPLE_STEPS,
sample_grid=False,
):
"""Compute the slice to read based on an `area_to_cover`.

Args:
area_to_cover: Destination area to crop around.
shape_divisible_by: Optional factor to make the resulting shape divisible by.
sample_steps: Number of edge/grid samples used for geostationary cross-projection coverage checks.
Use ``None`` or ``<= 0`` for ``ALL`` mode: all edge points when ``sample_grid=False``
and all destination pixels when ``sample_grid=True``.
sample_grid: If ``True``, sample an interior grid with ``sample_steps`` density.
If ``False``, sample edge points only.
"""
from .future.geometry._subset import get_area_slices
return get_area_slices(self, area_to_cover, shape_divisible_by)
return get_area_slices(
self,
area_to_cover,
shape_divisible_by,
sample_steps,
sample_grid,
)

def crop_around(self, other_area):
"""Crop this area around `other_area`."""
Expand Down
Loading
Loading