Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
Package: hydroloom
Title: Utilities to Weave Hydrologic Fabrics
Version: 1.2.0
Authors@R:
Authors@R: c(
person(given = "David",
family = "Blodgett",
role = c("aut", "cre"),
email = "dblodgett@usgs.gov",
comment = c(ORCID = "0000-0001-9489-1710"))
comment = c(ORCID = "0000-0001-9489-1710")),
person(given = "Andrew",
family = "Psoras",
role = "ctb",
email = "apsoras@usgs.gov"))
Description: A collection of utilities that support creation of network attributes for hydrologic networks. Methods and algorithms implemented are documented in Moore et al. (2019) <doi:10.3133/ofr20191096>), Cormen and Leiserson (2022) <ISBN:9780262046305> and Verdin and Verdin (1999) <doi:10.1016/S0022-1694(99)00011-6>.
Depends: R (>= 4.1.0)
Imports: dplyr, data.table, sf, units, stats, methods, utils, pbapply, tidyr, RANN, rlang, fastmap
Expand Down
133 changes: 70 additions & 63 deletions R/navigate_connected_paths.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@
#' lengths between all identified flowpath outlets. This algorithm finds
#' paths between outlets regardless of flow direction.
#' @param x data.frame network compatible with \link{hydroloom_names}.
#' @param outlets vector of ids from data.frame
#' @param outlets vector or list of length 2 of ids from data.frame
#' @param status logical print status and progress bars?
#' @details
#'
#' Required attributes: `id`, `toid`, `length_km`
#'
#' If `outlets` is a vector, all combinations (\link{combn}) of the given `outlets` will be produced.
#' If `outlets` is a list, it must be of length 2 (froms, tos).
#' Recycling is performed via \link{expand.grid} if child vector lengths do not match.
#' `outlets` may also be a 2-column dataframe.
#'
#' @returns data.frame containing the distance between pairs of network outlets
#' and a list column containing flowpath identifiers along path that connect outlets.
Expand All @@ -30,9 +35,15 @@ navigate_connected_paths <- function(x, outlets, status = FALSE) {
on.exit(pboptions(pbopts), add = TRUE)
}

stopifnot(is.vector(outlets))
outlet_ids <- if (is.list(outlets)) {
stopifnot(length(outlets) == 2)
unique(unlist(outlets))
} else {
stopifnot(is.vector(outlets))
unique(outlets)
}

if (!all(outlets %in% x$id))
if (!all(outlet_ids %in% x$id))
stop("All outlets must be in x.")

x <- st_drop_geometry(x)
Expand All @@ -45,19 +56,25 @@ navigate_connected_paths <- function(x, outlets, status = FALSE) {
if (dim(index$to)[1] != 1) stop("network must be dendritic")

get_dwn <- function(indid, toindid) {
next_dn <- toindid[1, indid]
if (next_dn == 0) {
indid
} else {
c(indid, get_dwn(next_dn, toindid))
next_dn <- toindid[indid]
out <- list(indid)
i <- 2
while (next_dn != 0) {
indid <- next_dn
next_dn <- toindid[indid]
out[[i]] <- indid
i <- i + 1
}
unlist(out)
}

id_match <- match(outlets, index$to_list$id)

id_match <- match(outlet_ids, index$to_list$id)
if (status)
message("Finding all downstream paths.")

# we could in theory be storing these as we go and looking up
# subsequences but that gets pretty complicated
all_dn <- pblapply(id_match, function(indid, toindid) {
out <- get_dwn(indid, toindid)
if ((lo <- length(out)) > 1) {
Expand All @@ -68,70 +85,60 @@ navigate_connected_paths <- function(x, outlets, status = FALSE) {
}, toindid = index$to)

if (status)
message("Finding all connected pairs.")
message("Finding all connected pairs.")

get_path <- function(p, all_dn) {
x <- all_dn[[p[1]]]
y <- all_dn[[p[2]]]

if (length(x) == 1) # if one end is a terminal
return(list(x = integer(0), y = y))

if (length(y) == 1)
return(list(x = x, y = integer(0)))

if (tail(x, 1) == tail(y, 1))
return(list(x = x[!x %in% y], y = y[!y %in% x]))

list()
x_len <- length(x)
y_len <- length(y)

if (x_len == 1) # if one end is a terminal
return(y)

if (y_len == 1)
return(x)

if (x[x_len] == y[y_len]) {
return(c(x[match(x, y, nomatch = 0) == 0], y[match(y, x, nomatch = 0) == 0]))
}

numeric(0)
}

pairs <- t(combn(length(id_match), 2))

paths <- pbapply(pairs, 1, get_path, all_dn = all_dn, cl = future_available())

connected_paths <- paths[lengths(paths) > 0]

length_km <- select(left_join(index$to_list,
select(x, id, "length_km"),
by = id),
id, "length_km")

if (status)
message("Summing length of all connected pairs.")

get_length <- function(p, length_km) {
sum(length_km$length_km[p[[1]]], length_km$length_km[p[[2]]])
# outlet_ids and id_match are interchangeable in combn
# but for outlet list we need to get to an index
pairs <- if(is.list(outlets)) {
if (is.data.frame(outlets) || diff(lengths(outlets)) == 0) {
# if the lengths are equal no recycling should be applied
sapply(outlets, \(x) match(x, outlet_ids))
} else {
expand.grid(lapply(outlets, \(x) match(x, outlet_ids)))
}
} else {
t(combn(length(id_match), 2))
}

path_lengths <- pblapply(connected_paths, get_length, length_km = length_km)

path_lengths <- cbind(as.data.frame(matrix(id_match[pairs[lengths(paths) > 0, ]],
ncol = 2)),
data.frame(length = as.numeric(path_lengths)))

names(path_lengths) <- c("indid_1", "indid_2", "network_distance_km")

paths <- cbind(as.data.frame(matrix(id_match[pairs[lengths(paths) > 0, ]],
ncol = 2)))

names(paths) <- c("indid_1", "indid_2")

paths[["path"]] <- lapply(connected_paths, function(x) {
c(x$x, x$y)
})

path_lengths <- left_join(path_lengths, paths, by = c("indid_1", "indid_2"))
length_km <- x$length_km

paths <- pbapply(pairs, 1, \(p) {
path <- get_path(p, all_dn)
if (length(path) != 0) {
list(
id_1 = p[1],
id_2 = p[2],
network_distance_km = sum(length_km[path]),
path = list(path)
)
}
}, simplify = FALSE)

path_lengths <- left_join(path_lengths,
select(index$to_list, id_1 = id, indid),
by = c("indid_1" = "indid")) |>
left_join(select(index$to_list, id_2 = id, indid),
by = c("indid_2" = "indid")) |>
select(-"indid_1", -"indid_2")
df <- rbindlist(paths)

select(path_lengths, all_of(c("id_1", "id_2", "network_distance_km", "path")))
df$id_1 <- outlet_ids[match(df$id_1, seq_along(id_match))]
df$id_2 <- outlet_ids[match(df$id_2, seq_along(id_match))]

as.data.frame(df)
}

#' @title Get Partial Flowpath Length
Expand Down
7 changes: 6 additions & 1 deletion man/navigate_connected_paths.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 37 additions & 1 deletion tests/testthat/test_navigate_connected_paths.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,41 @@ test_that("navigate connected paths", {
pbopts <- pbapply::pboptions(type = "none")
on.exit(pbapply::pboptions(pbopts), add = TRUE)

expect_equal(length(mess), 3)
expect_equal(length(mess), 2)
})

# List input/output, same case as above
test_that("navigate_connected_paths", {
fline <- sf::read_sf(system.file("extdata", "walker.gpkg", package = "hydroloom"))
outlets_vec <- c(5329357, 5329317, 5329365, 5329435, 5329817)
fline <- add_toids(fline)

pl <- navigate_connected_paths(fline, outlets_vec)

# Checking recycling behavior
outlets <- list(
outlets_vec[1],
outlets_vec[-1]
)

exp_len <- expand.grid(outlets) |> nrow()
pl_recycle <- navigate_connected_paths(fline, outlets)
expect_equal(nrow(pl_recycle), exp_len)

# Checking if we can suppy pairs as-is with no recycling
outlets <- list(
outlets_vec[1:2],
outlets_vec[3:4]
)

pl_asis <- navigate_connected_paths(fline, outlets)
expect_equal(nrow(pl_asis), 2)

# Checking for bad input (also names don't matter)
expect_error(navigate_connected_paths(fline, list(1)))
expect_error(navigate_connected_paths(fline, list(1, 2, 3)))

# Checking for correctness with list input
expect_equal(pl_asis$network_distance_km[pl_asis$id_1 == 5329357 & pl_asis$id_2 == 5329365],
3.6, tolerance = 0.01)
})