diff --git a/DESCRIPTION b/DESCRIPTION index 8a2f9a9..04bfff9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,16 @@ Package: hydroloom Title: Utilities to Weave Hydrologic Fabrics Version: 1.2.0 -Authors@R: +Authors@R: c( person(given = "David", family = "Blodgett", role = c("aut", "cre"), email = "dblodgett@usgs.gov", - comment = c(ORCID = "0000-0001-9489-1710")) + comment = c(ORCID = "0000-0001-9489-1710")), + person(given = "Andrew", + family = "Psoras", + role = "ctb", + email = "apsoras@usgs.gov")) Description: A collection of utilities that support creation of network attributes for hydrologic networks. Methods and algorithms implemented are documented in Moore et al. (2019) ), Cormen and Leiserson (2022) and Verdin and Verdin (1999) . Depends: R (>= 4.1.0) Imports: dplyr, data.table, sf, units, stats, methods, utils, pbapply, tidyr, RANN, rlang, fastmap diff --git a/R/navigate_connected_paths.R b/R/navigate_connected_paths.R index 73aa9f1..40a08a6 100644 --- a/R/navigate_connected_paths.R +++ b/R/navigate_connected_paths.R @@ -3,11 +3,16 @@ #' lengths between all identified flowpath outlets. This algorithm finds #' paths between outlets regardless of flow direction. #' @param x data.frame network compatible with \link{hydroloom_names}. -#' @param outlets vector of ids from data.frame +#' @param outlets vector or list of length 2 of ids from data.frame #' @param status logical print status and progress bars? #' @details #' #' Required attributes: `id`, `toid`, `length_km` +#' +#' If `outlets` is a vector, all combinations (\link{combn}) of the given `outlets` will be produced. +#' If `outlets` is a list, it must be of length 2 (froms, tos). +#' Recycling is performed via \link{expand.grid} if child vector lengths do not match. +#' `outlets` may also be a 2-column dataframe. #' #' @returns data.frame containing the distance between pairs of network outlets #' and a list column containing flowpath identifiers along path that connect outlets. @@ -30,9 +35,15 @@ navigate_connected_paths <- function(x, outlets, status = FALSE) { on.exit(pboptions(pbopts), add = TRUE) } - stopifnot(is.vector(outlets)) + outlet_ids <- if (is.list(outlets)) { + stopifnot(length(outlets) == 2) + unique(unlist(outlets)) + } else { + stopifnot(is.vector(outlets)) + unique(outlets) + } - if (!all(outlets %in% x$id)) + if (!all(outlet_ids %in% x$id)) stop("All outlets must be in x.") x <- st_drop_geometry(x) @@ -45,19 +56,25 @@ navigate_connected_paths <- function(x, outlets, status = FALSE) { if (dim(index$to)[1] != 1) stop("network must be dendritic") get_dwn <- function(indid, toindid) { - next_dn <- toindid[1, indid] - if (next_dn == 0) { - indid - } else { - c(indid, get_dwn(next_dn, toindid)) + next_dn <- toindid[indid] + out <- list(indid) + i <- 2 + while (next_dn != 0) { + indid <- next_dn + next_dn <- toindid[indid] + out[[i]] <- indid + i <- i + 1 } + unlist(out) } - id_match <- match(outlets, index$to_list$id) - + id_match <- match(outlet_ids, index$to_list$id) + if (status) message("Finding all downstream paths.") + # we could in theory be storing these as we go and looking up + # subsequences but that gets pretty complicated all_dn <- pblapply(id_match, function(indid, toindid) { out <- get_dwn(indid, toindid) if ((lo <- length(out)) > 1) { @@ -68,70 +85,60 @@ navigate_connected_paths <- function(x, outlets, status = FALSE) { }, toindid = index$to) if (status) - message("Finding all connected pairs.") + message("Finding all connected pairs.") get_path <- function(p, all_dn) { x <- all_dn[[p[1]]] y <- all_dn[[p[2]]] - - if (length(x) == 1) # if one end is a terminal - return(list(x = integer(0), y = y)) - - if (length(y) == 1) - return(list(x = x, y = integer(0))) - - if (tail(x, 1) == tail(y, 1)) - return(list(x = x[!x %in% y], y = y[!y %in% x])) - - list() + x_len <- length(x) + y_len <- length(y) + + if (x_len == 1) # if one end is a terminal + return(y) + + if (y_len == 1) + return(x) + + if (x[x_len] == y[y_len]) { + return(c(x[match(x, y, nomatch = 0) == 0], y[match(y, x, nomatch = 0) == 0])) + } + + numeric(0) } - pairs <- t(combn(length(id_match), 2)) - - paths <- pbapply(pairs, 1, get_path, all_dn = all_dn, cl = future_available()) - - connected_paths <- paths[lengths(paths) > 0] - - length_km <- select(left_join(index$to_list, - select(x, id, "length_km"), - by = id), - id, "length_km") - - if (status) - message("Summing length of all connected pairs.") - - get_length <- function(p, length_km) { - sum(length_km$length_km[p[[1]]], length_km$length_km[p[[2]]]) + # outlet_ids and id_match are interchangeable in combn + # but for outlet list we need to get to an index + pairs <- if(is.list(outlets)) { + if (is.data.frame(outlets) || diff(lengths(outlets)) == 0) { + # if the lengths are equal no recycling should be applied + sapply(outlets, \(x) match(x, outlet_ids)) + } else { + expand.grid(lapply(outlets, \(x) match(x, outlet_ids))) + } + } else { + t(combn(length(id_match), 2)) } - path_lengths <- pblapply(connected_paths, get_length, length_km = length_km) - - path_lengths <- cbind(as.data.frame(matrix(id_match[pairs[lengths(paths) > 0, ]], - ncol = 2)), - data.frame(length = as.numeric(path_lengths))) - - names(path_lengths) <- c("indid_1", "indid_2", "network_distance_km") - - paths <- cbind(as.data.frame(matrix(id_match[pairs[lengths(paths) > 0, ]], - ncol = 2))) - - names(paths) <- c("indid_1", "indid_2") - - paths[["path"]] <- lapply(connected_paths, function(x) { - c(x$x, x$y) - }) - - path_lengths <- left_join(path_lengths, paths, by = c("indid_1", "indid_2")) + length_km <- x$length_km + + paths <- pbapply(pairs, 1, \(p) { + path <- get_path(p, all_dn) + if (length(path) != 0) { + list( + id_1 = p[1], + id_2 = p[2], + network_distance_km = sum(length_km[path]), + path = list(path) + ) + } + }, simplify = FALSE) - path_lengths <- left_join(path_lengths, - select(index$to_list, id_1 = id, indid), - by = c("indid_1" = "indid")) |> - left_join(select(index$to_list, id_2 = id, indid), - by = c("indid_2" = "indid")) |> - select(-"indid_1", -"indid_2") + df <- rbindlist(paths) - select(path_lengths, all_of(c("id_1", "id_2", "network_distance_km", "path"))) + df$id_1 <- outlet_ids[match(df$id_1, seq_along(id_match))] + df$id_2 <- outlet_ids[match(df$id_2, seq_along(id_match))] + as.data.frame(df) } #' @title Get Partial Flowpath Length diff --git a/man/navigate_connected_paths.Rd b/man/navigate_connected_paths.Rd index c382af0..092d08d 100644 --- a/man/navigate_connected_paths.Rd +++ b/man/navigate_connected_paths.Rd @@ -9,7 +9,7 @@ navigate_connected_paths(x, outlets, status = FALSE) \arguments{ \item{x}{data.frame network compatible with \link{hydroloom_names}.} -\item{outlets}{vector of ids from data.frame} +\item{outlets}{vector or list of length 2 of ids from data.frame} \item{status}{logical print status and progress bars?} } @@ -25,6 +25,11 @@ paths between outlets regardless of flow direction. } \details{ Required attributes: \code{id}, \code{toid}, \code{length_km} + +If \code{outlets} is a vector, all combinations (\link{combn}) of the given \code{outlets} will be produced. +If \code{outlets} is a list, it must be of length 2 (froms, tos). +Recycling is performed via \link{expand.grid} if child vector lengths do not match. +\code{outlets} may also be a 2-column dataframe. } \examples{ x <- sf::read_sf(system.file("extdata", "walker.gpkg", package = "hydroloom")) diff --git a/tests/testthat/test_navigate_connected_paths.R b/tests/testthat/test_navigate_connected_paths.R index f020a26..93dbef2 100644 --- a/tests/testthat/test_navigate_connected_paths.R +++ b/tests/testthat/test_navigate_connected_paths.R @@ -33,5 +33,41 @@ test_that("navigate connected paths", { pbopts <- pbapply::pboptions(type = "none") on.exit(pbapply::pboptions(pbopts), add = TRUE) - expect_equal(length(mess), 3) + expect_equal(length(mess), 2) }) + +# List input/output, same case as above +test_that("navigate_connected_paths", { + fline <- sf::read_sf(system.file("extdata", "walker.gpkg", package = "hydroloom")) + outlets_vec <- c(5329357, 5329317, 5329365, 5329435, 5329817) + fline <- add_toids(fline) + + pl <- navigate_connected_paths(fline, outlets_vec) + + # Checking recycling behavior + outlets <- list( + outlets_vec[1], + outlets_vec[-1] + ) + + exp_len <- expand.grid(outlets) |> nrow() + pl_recycle <- navigate_connected_paths(fline, outlets) + expect_equal(nrow(pl_recycle), exp_len) + + # Checking if we can suppy pairs as-is with no recycling + outlets <- list( + outlets_vec[1:2], + outlets_vec[3:4] + ) + + pl_asis <- navigate_connected_paths(fline, outlets) + expect_equal(nrow(pl_asis), 2) + + # Checking for bad input (also names don't matter) + expect_error(navigate_connected_paths(fline, list(1))) + expect_error(navigate_connected_paths(fline, list(1, 2, 3))) + + # Checking for correctness with list input + expect_equal(pl_asis$network_distance_km[pl_asis$id_1 == 5329357 & pl_asis$id_2 == 5329365], + 3.6, tolerance = 0.01) +}) \ No newline at end of file