From 759da8ad73d882a31b8ae2b6d8ee10b9c166cc78 Mon Sep 17 00:00:00 2001 From: Samuel Jenness Date: Sat, 25 Apr 2026 13:51:20 -0400 Subject: [PATCH] Validate init.hiv.prev length matches the race flag (#59) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue #59 reported confusion under `race = FALSE`: the docstring said init.hiv.prev had to be length 3 (one per race category), but the underlying flow only used the first element when race = FALSE so a length-1 input ought to work. The previous active validation accepted any length silently, which led to: - length 1 + race = FALSE: actually worked (user's request). - length 1 + race = TRUE: failed downstream with the cryptic message "vector size cannot be NA/NaN" because init.hiv.prev[2:3] returns NA. - length 2 + race = TRUE: same cryptic failure. Fix: - Update the @param doc on build_epistats to spell out: length 1 is correct under race = FALSE; length must equal length(race.level) (default 3) under race = TRUE. - Replace the commented-out length check with an active one: + race = TRUE requires length(init.hiv.prev) == length(race.level). Clear error message including the expected and observed lengths. + race = FALSE accepts any length >= 1; only init.hiv.prev[1] is used downstream. Length > 1 is permitted (extra elements ignored) to preserve backward compat with existing callers — notably the atlanta_no_race scenario in inst/validation/. Out-of-range elements (>= 1 or <= 0) still raise the existing "between 0 and 1 non-inclusive" error. Validation: - Backward-compat snapshot harness still matches 3/3 (atlanta_no_race uses length-3 + race = FALSE; that combination still works under the new validation, just no longer documented as the recommended shape). - New tests in test-init-hiv-prev.R (5 blocks, 8 assertions): valid length-1 + race=FALSE; valid length-3 + race=TRUE; clear errors for length-1 + race=TRUE and length-2 + race=TRUE; out-of-range value still rejected. - R CMD check: 0/0/0. Closes #59. Co-Authored-By: Claude Opus 4.7 (1M context) --- R/EpiStats.R | 25 +++++++++---- man/build_epistats.Rd | 11 +++--- tests/testthat/test-init-hiv-prev.R | 57 +++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 13 deletions(-) create mode 100644 tests/testthat/test-init-hiv-prev.R diff --git a/R/EpiStats.R b/R/EpiStats.R index 694ba17..44dd588 100644 --- a/R/EpiStats.R +++ b/R/EpiStats.R @@ -23,11 +23,12 @@ #' with `age.limits = c(15, 65)`. #' @param age.sexual.cessation Age of cessation of sexual activity, while aging process continues #' through the upper age limit. Maximum allowed value of 66. -#' @param init.hiv.prev Initial HIV prevalence to be used in epidemic model estimated model, with a -#' numerical vector of size 3 corresponding to starting prevalence in three race/ethnic -#' groups (Black, Hispanic, and White/Other, respectively). If `init.hiv.prev = NULL`, -#' `build_epistats` will estimate a logistic regression model to predict starting prevalence -#' as a function of estimated prevalence in ARTnet as a function of race/ethnicity and age. +#' @param init.hiv.prev Initial HIV prevalence to be used in epidemic model estimated model. When +#' `race = TRUE`, must be a numerical vector with one element per race category in +#' `race.level` (default 3, corresponding to Black, Hispanic, and White/Other). When +#' `race = FALSE`, must be a single overall-population prevalence (length 1). If +#' `init.hiv.prev = NULL`, `build_epistats` will estimate a logistic regression model to +#' predict starting prevalence as a function of race/ethnicity and age in ARTnet. #' @param time.unit Specifies time unit for time-dependent ARTnet statistics. Default is 7, #' corresponding to a weekly time unit. Allowed inputs range from 1 for a daily time unit to #' 30 for a monthly time unit. @@ -536,9 +537,17 @@ build_epistats <- function(geog.lvl = NULL, # Output out$hiv.mod <- hiv.mod } else { - #if (length(init.hiv.prev) != 3) { - # stop("Input parameter init.prev.hiv must be a vector of size three") - #} + if (race == TRUE && length(init.hiv.prev) != length(race.level)) { + stop("init.hiv.prev must have length ", length(race.level), + " (one starting prevalence per race category in race.level) when ", + "race = TRUE; got length ", length(init.hiv.prev), ".") + } + if (race == FALSE && length(init.hiv.prev) < 1) { + stop("init.hiv.prev must have length >= 1 when race = FALSE.") + } + # Note: under race = FALSE the downstream sampler uses only + # init.hiv.prev[1] (the overall-population prevalence). Longer vectors + # are accepted for backward compatibility but extra elements are ignored. if (prod(init.hiv.prev < 1) == 0 || prod(init.hiv.prev > 0) == 0) { stop("All elements of init.hiv.prev must be between 0 and 1 non-inclusive") } diff --git a/man/build_epistats.Rd b/man/build_epistats.Rd index d6ecac0..24a098e 100644 --- a/man/build_epistats.Rd +++ b/man/build_epistats.Rd @@ -43,11 +43,12 @@ with \code{age.limits = c(15, 65)}.} \item{age.sexual.cessation}{Age of cessation of sexual activity, while aging process continues through the upper age limit. Maximum allowed value of 66.} -\item{init.hiv.prev}{Initial HIV prevalence to be used in epidemic model estimated model, with a -numerical vector of size 3 corresponding to starting prevalence in three race/ethnic -groups (Black, Hispanic, and White/Other, respectively). If \code{init.hiv.prev = NULL}, -\code{build_epistats} will estimate a logistic regression model to predict starting prevalence -as a function of estimated prevalence in ARTnet as a function of race/ethnicity and age.} +\item{init.hiv.prev}{Initial HIV prevalence to be used in epidemic model estimated model. When +\code{race = TRUE}, must be a numerical vector with one element per race category in +\code{race.level} (default 3, corresponding to Black, Hispanic, and White/Other). When +\code{race = FALSE}, must be a single overall-population prevalence (length 1). If +\code{init.hiv.prev = NULL}, \code{build_epistats} will estimate a logistic regression model to +predict starting prevalence as a function of race/ethnicity and age in ARTnet.} \item{time.unit}{Specifies time unit for time-dependent ARTnet statistics. Default is 7, corresponding to a weekly time unit. Allowed inputs range from 1 for a daily time unit to diff --git a/tests/testthat/test-init-hiv-prev.R b/tests/testthat/test-init-hiv-prev.R new file mode 100644 index 0000000..9a6ef92 --- /dev/null +++ b/tests/testthat/test-init-hiv-prev.R @@ -0,0 +1,57 @@ +# Tests for the init.hiv.prev length-validation contract (#59). +# Length 1 is fine when race = FALSE (the user's request). +# Length matching race.level is required when race = TRUE. + +skip_without_artnetdata <- function() { + testthat::skip_if(system.file(package = "ARTnetData") == "", + "ARTnetData not installed") +} + +test_that("length-1 init.hiv.prev works when race = FALSE (#59)", { + skip_without_artnetdata() + expect_silent( + ep <- build_epistats(geog.lvl = "city", geog.cat = "Atlanta", + init.hiv.prev = 0.33, race = FALSE, time.unit = 7) + ) + expect_equal(ep$init.hiv.prev, 0.33) + expect_false(ep$race) +}) + +test_that("length-3 init.hiv.prev still works when race = TRUE", { + skip_without_artnetdata() + expect_silent( + ep <- build_epistats(geog.lvl = "city", geog.cat = "Atlanta", + init.hiv.prev = c(0.33, 0.137, 0.084), + race = TRUE, time.unit = 7) + ) + expect_equal(ep$init.hiv.prev, c(0.33, 0.137, 0.084)) +}) + +test_that("length-1 init.hiv.prev with race = TRUE raises a clear error", { + skip_without_artnetdata() + expect_error( + build_epistats(geog.lvl = "city", geog.cat = "Atlanta", + init.hiv.prev = 0.33, race = TRUE, time.unit = 7), + regexp = "init.hiv.prev must have length 3" + ) +}) + +test_that("length-2 init.hiv.prev with race = TRUE raises a clear error", { + skip_without_artnetdata() + expect_error( + build_epistats(geog.lvl = "city", geog.cat = "Atlanta", + init.hiv.prev = c(0.33, 0.1), + race = TRUE, time.unit = 7), + regexp = "init.hiv.prev must have length 3" + ) +}) + +test_that("out-of-range init.hiv.prev still rejected with original message", { + skip_without_artnetdata() + expect_error( + build_epistats(geog.lvl = "city", geog.cat = "Atlanta", + init.hiv.prev = c(0.33, 1.5, 0.084), + race = TRUE, time.unit = 7), + regexp = "between 0 and 1 non-inclusive" + ) +})