diff --git a/R/print.R b/R/print.R index 026f0bd..ae4b791 100755 --- a/R/print.R +++ b/R/print.R @@ -7,7 +7,7 @@ #' @param bins number of bins to use for summary #' @importFrom stats chisq.test integrate #' @export -rank_summary <- function(ranks, par, thin, bins = 20){ +rank_summary <- function(results, par, bins = 20){ pval <- function(bin_count){ return(chisq.test(bin_count)$p.value) } @@ -25,14 +25,21 @@ rank_summary <- function(ranks, par, thin, bins = 20){ val <- integrate(tempf,1,bins, rel.tol=.Machine$double.eps^.05)$value return(val) } - S <- dim(ranks)[1] - bin_size <- S / bins - bin_count <- rep(0, bins) - for (s in 1:S) { - bin <- ceiling(ranks[s, par] / bins) - bin_count[bin] <- bin_count[bin] + 1 + for (p in par){ + ranks <- results$stats$rank[names(results$stats$rank) %in% p] + ranks <- array(ranks, dim=c(length(ranks), 1)) + colnames(ranks) <- c(p) + S <- results$stats$max_rank[1] + bin_size <- S / bins + bin_count <- rep(0, bins) + for (n in 1:dim(ranks)[1]) { + bin <- ceiling(ranks[n, p] / bin_size) + bin_count[bin] <- bin_count[bin] + 1 + } + rank_list <- list() + rank_list[[p]] <- list(pval = round(pval(bin_count),10), max_diff = round(max_diff(bin_count),3), wasserstein = round(wasserstein(bin_count),3)) } - print(paste0("pval: ", round(pval(bin_count),10), " max_diff: ", round(max_diff(bin_count),3), " wasserstein: ", round(wasserstein(bin_count),3))) + return(list(par = par, rank_list = rank_list)) } #' Summarize relational property of overall prior and posterior samples @@ -48,3 +55,88 @@ dist_summary <- function(prior, post, par, bins = 20){ h2 <- hist(post[[p]], breaks = breaks, plot = FALSE) return(list("KLD"= HistogramTools::kl.divergence(h1, h2), "JeffDivg" = HistogramTools::jeffrey.divergence(h1, h2), "Minkowski_2" = HistogramTools::minkowski.dist(h1, h2, 2))) } + +##' Cumulative Jensen-Shannon divergence +##' +##' Computes the cumulative Jensen-Shannon distance between two +##' samples. +##' +##' The Cumulative Jensen-Shannon distance is a symmetric metric based +##' on the cumulative Jensen-Shannon divergence. The divergence CJS(P || Q) between +##' two cumulative distribution functions P and Q is defined as: +##' +##' \deqn{CJS(P || Q) = \sum P(x) \log \frac{P(x)}{0.5 (P(x) + Q(x))} + \frac{1}{2 \ln 2} \sum (Q(x) - P(x))} +##' +##' The symmetric metric is defined as: +##' +##' \deqn{CJS_{dist}(P || Q) = \sqrt{CJS(P || Q) + CJS(Q || P)}} +##' +##' This has an upper bound of \eqn{\sqrt \sum (P(x) + Q(x))} +##' +##' @param x numeric vector of samples from first distribution +##' @param y numeric vector of samples from second distribution +##' @param x_weights numeric vector of weights of first distribution +##' @param y_weights numeric vector of weights of second distribution +##' @param ... unused +##' @return distance value based on CJS computation. +##' @references Nguyen H-V., Vreeken J. (2015). Non-parametric +##' Jensen-Shannon Divergence. In: Appice A., Rodrigues P., Santos +##' Costa V., Gama J., Jorge A., Soares C. (eds) Machine Learning +##' and Knowledge Discovery in Databases. ECML PKDD 2015. Lecture +##' Notes in Computer Science, vol 9285. Springer, Cham. +##' \code{doi:10.1007/978-3-319-23525-7_11} +##' @export +cjs_dist <- function(x, y, x_weights, y_weights, ...) { + + # sort draws and weights + x_idx <- order(x) + x <- x[x_idx] + wp <- x_weights[x_idx] + + y_idx <- order(y) + y <- y[y_idx] + wq <- y_weights[y_idx] + + # add end point of final step + x_v <- x[length(x)] + x[length(x)] - x[length(x) - 1] + y_v <- y[length(y)] + y[length(y)] - y[length(y) - 1] + + # calculate widths of each step + x_diff <- diff(c(x, x_v)) + y_diff <- diff(c(y, y_v)) + + # calculate required weighted ecdfs + Px <- spatstat.geom::ewcdf(x, weights = wp)(x) + Qx <- spatstat.geom::ewcdf(y, weights = wq)(x) + + Py <- spatstat.geom::ewcdf(x, weights = wp)(y) + Qy <- spatstat.geom::ewcdf(y, weights = wq)(y) + + # calculate integral of ecdfs + Px_int <- drop(Px %*% x_diff) + Qx_int <- drop(Qx %*% x_diff) + + Py_int <- drop(Py %*% y_diff) + Qy_int <- drop(Qy %*% y_diff) + + # calculate cjs + cjs_PQ <- x_diff %*% ( + Px * (log(Px, base = 2) - + log(0.5 * Px + 0.5 * Qx, base = 2) + ) + ) + 0.5 / log(2) * (Qx_int - Px_int) + + cjs_QP <- y_diff %*% ( + Qy * (log(Qy, base = 2) - + log(0.5 * Qy + 0.5 * Py, base = 2) + ) + ) + 0.5 / log(2) * (Py_int - Qy_int) + + # calculate upper bound + bound <- Px_int + Qy_int + + # normalise with respect to upper bound + out <- (sqrt(cjs_PQ + cjs_QP)) / sqrt(bound) + + return(c(cjs = out)) +} diff --git a/vignettes/model_interaction.Rmd b/vignettes/model_interaction.Rmd new file mode 100644 index 0000000..d66951c --- /dev/null +++ b/vignettes/model_interaction.Rmd @@ -0,0 +1,196 @@ +--- +title: "Model interaction" +output: html_document +--- + +```{r setup} +devtools::install_github("hyunjimoon/SBC",ref="api-variant") +library(SBC) + +# use_cmdstanr <- TRUE # Set to false to use rstan instead +# +# if(use_cmdstanr) { +# library(cmdstanr) +# } else { +# library(rstan) +# } +library(cmdstanr) +library(bayesplot) +library(posterior) + +library(future) +plan(multisession) + +options(SBC.min_chunk_size = 5) +``` + +Three models of simple.stan from "ryanbe.me", indexed in order of choice from the tree graph. To define model correlation score the following should be inspected: symmetry, triangular inequality, +```{r simple} +backend_1_1 <- cmdstan_sample_SBC_backend(cmdstan_model("model_interaction/simple1_1.stan")) +backend_1_2 <- cmdstan_sample_SBC_backend(cmdstan_model("model_interaction/simple1_2.stan")) +backend_2_1 <- cmdstan_sample_SBC_backend(cmdstan_model("model_interaction/simple2_1.stan")) +# backend_2_2 <- cmdstan_sample_SBC_backend(cmdstan_model("model_interaction/simple2_2.stan")) doesn't have any parameter +``` + +```{r } +generator_func_1_1 <- function(N) { + sigma <- rlnorm(1); + mu <- rnorm(1); + y <- rnorm(N, mu, sigma); + list( + parameters = list( + mu = mu, + sigma = sigma + ), + generated = list( + N = N, + y = y + ) + ) +} +generator_func_1_1 <- function_SBC_generator(generator_func_1_1, N = 50) +dataset_1_1 <- generate_datasets(generator_func_1_1, n_datasets = 100) +``` + +```{r } +generator_func_1_2 <- function(N) { + sigma <- rexp(1) + mu <- rnorm(1) + y <- rnorm(N, mu, sigma) + list( + parameters = list( + mu = mu, + sigma = sigma + ), + generated = list( + N = N, + y = y + ) + ) +} +generator_func_1_2 <- function_SBC_generator(generator_func_1_2, N = 50) +dataset_1_2 <- generate_datasets(generator_func_1_2, n_datasets = 100) +``` + +```{r } +generator_func_2_1 <- function(N) { + sigma <- rlnorm(1) # from parameter block + mu <- 0 # from transformed parameter block + y <- rnorm(N, mu, sigma) #not sure between rnorm(N, 0, sigma) + list( + parameters = list( + mu = mu, + sigma = sigma + ), + generated = list( + N = N, + y = y + ) + ) +} +generator_func_2_1 <- function_SBC_generator(generator_func_2_1, N = 50) +dataset_2_1 <- generate_datasets(generator_func_2_1, 100) +``` + +```{r } +# How would the degree of neighbor affect the correlation score between two models? Following questions are in order +# 1. Would there exist any stats s.t. results_i_j_i_j+1$stat < results_i_j_!(i_j) for all i_j paris? +# 2. Would symmetric? triangular inequality? +results_1_1_1_1 <- compute_results(dataset_1_1, backend_1_1) +results_1_1_1_2 <- compute_results(dataset_1_1, backend_1_2) +results_1_1_2_1 <- compute_results(dataset_1_1, backend_2_1) + +results_1_2_1_2 <- compute_results(dataset_1_2, backend_1_2) +results_1_2_1_1 <- compute_results(dataset_1_2, backend_1_1) +results_1_2_2_1 <- compute_results(dataset_1_2, backend_2_1) + +results_2_1_2_1 <- compute_results(dataset_2_1, backend_2_1) +results_2_1_1_1 <- compute_results(dataset_2_1, backend_1_1) +results_2_1_1_2 <- compute_results(dataset_2_1, backend_1_2) +``` + +```{R} +rank_summary(results_1_1_1_1, par = c("mu", "sigma")) +rank_summary(results_1_1_1_2, par = c("mu", "sigma")) +rank_summary(results_1_1_2_1, par = c("mu", "sigma")) + +rank_summary(results_1_2_1_2, par = c("mu", "sigma")) +rank_summary(results_1_2_1_1, par = c("mu", "sigma")) +rank_summary(results_1_2_2_1, par = c("mu", "sigma")) + +rank_summary(results_2_1_2_1, par = c("mu", "sigma")) +rank_summary(results_2_1_1_1, par = c("mu", "sigma")) +rank_summary(results_2_1_1_2, par = c("mu", "sigma")) +``` + +> rank_summary(results_1_1_1_1, par = c("mu", "sigma")) +[1] " max_diff: 1.6 wasserstein: 0.315" +[1] " max_diff: 0.6 wasserstein: 0.241" + +> rank_summary(results_1_1_1_2, par = c("mu", "sigma")) +[1] " max_diff: 1.4 wasserstein: 0.318" +[1] " max_diff: 1 wasserstein: 0.291" + +> rank_summary(results_1_1_2_1, par = c("mu", "sigma")) +[1] " max_diff: 19 wasserstein: 0.95" +[1] " max_diff: 3.179 wasserstein: 0.411" + +> rank_summary(results_1_2_1_2, par = c("mu", "sigma")) +[1] " max_diff: 1 wasserstein: 0.249" +[1] " max_diff: 0.8 wasserstein: 0.291" + +> rank_summary(results_1_2_1_1, par = c("mu", "sigma")) +[1] " max_diff: 0.6 wasserstein: 0.255" +[1] " max_diff: 0.8 wasserstein: 0.265" + +> rank_summary(results_1_2_2_1, par = c("mu", "sigma")) +[1] " max_diff: 19 wasserstein: 0.95" +[1] " max_diff: 4.581 wasserstein: 0.851" + +> +> rank_summary(results_2_1_2_1, par = c("mu", "sigma")) +[1] " max_diff: 1 wasserstein: 0.298" +[1] " max_diff: 1 wasserstein: 0.402" + +> rank_summary(results_2_1_1_1, par = c("mu", "sigma")) +[1] " max_diff: 0.8 wasserstein: 0.375" +[1] " max_diff: 0.8 wasserstein: 0.356" + +> rank_summary(results_2_1_1_2, par = c("mu", "sigma")) +[1] " max_diff: 1 wasserstein: 0.406" +[1] " max_diff: 0.8 wasserstein: 0.374" + +Graphical comparison. +```{R} + +plot_ecdf_diff(results_1_1_1_1) +plot_ecdf_diff(results_1_1_1_2) +plot_ecdf_diff(results_1_1_2_1) # mu is fixed to 0 and therefore have a therefore highly under-dispersed. + +plot_ecdf_diff(results_1_2_1_2) +plot_ecdf_diff(results_1_2_1_1) +plot_ecdf_diff(results_1_2_2_1) + +plot_ecdf_diff(results_2_1_2_1) +plot_ecdf_diff(results_2_1_1_1) +plot_ecdf_diff(results_2_1_1_2) +``` + +As can be seen from by comparing (`results_1_1_2_1` , `results_2_1_1_1` ) (`results_1_2_2_1`, `results_2_1_1_2`) rank scores are not symmetric. Compared to the `generator` model, `backend` model seem to have a greater effect on the outcome. + +The next question is, how would this metric affect the final goal, finding the model with high elpd within the model network? What common trait would model that are close would have? If we calculate rank summaries with `lp__`, what implication would this have? For this let's experiment with bram + +``` +template_data = data.frame(y = rep(0, 15), x = rnorm(15)) +priors <- prior(normal(0,1), class = "b") + + prior(normal(0,1), class = "Intercept") + + prior(normal(0,1), class = "sigma") +generator <- brms_SBC_generator(y ~ x, data = template_data, prior = priors, + thin = 50, warmup = 10000, refresh = 2000, + # Will generate the log density - this is useful, + #but a bit computationally expensive + generate_lp = TRUE + ) +``` + +then this would mean for This is a 1d summary of two models; At first, it is recommended to explore diverse models; heterogenity is diff --git a/vignettes/model_interaction/simple1_1 b/vignettes/model_interaction/simple1_1 new file mode 100755 index 0000000..caff433 Binary files /dev/null and b/vignettes/model_interaction/simple1_1 differ diff --git a/vignettes/model_interaction/simple1_1.stan b/vignettes/model_interaction/simple1_1.stan new file mode 100644 index 0000000..a9c72c9 --- /dev/null +++ b/vignettes/model_interaction/simple1_1.stan @@ -0,0 +1,13 @@ +data { + int N; + vector[N] y; +} +parameters { + real mu; + real sigma; +} +model { + sigma ~ lognormal(0, 1); + mu ~ normal(0, 1); + y ~ normal(mu, sigma); +} diff --git a/vignettes/model_interaction/simple1_2 b/vignettes/model_interaction/simple1_2 new file mode 100755 index 0000000..c09527e Binary files /dev/null and b/vignettes/model_interaction/simple1_2 differ diff --git a/vignettes/model_interaction/simple1_2.stan b/vignettes/model_interaction/simple1_2.stan new file mode 100644 index 0000000..f78c9b1 --- /dev/null +++ b/vignettes/model_interaction/simple1_2.stan @@ -0,0 +1,13 @@ +data { + int N; + vector[N] y; +} +parameters { + real mu; + real sigma; +} +model { + mu ~ normal(0, 1); + sigma ~ exponential(1); + y ~ normal(mu, sigma); +} diff --git a/vignettes/model_interaction/simple2_1 b/vignettes/model_interaction/simple2_1 new file mode 100755 index 0000000..7658203 Binary files /dev/null and b/vignettes/model_interaction/simple2_1 differ diff --git a/vignettes/model_interaction/simple2_1.stan b/vignettes/model_interaction/simple2_1.stan new file mode 100644 index 0000000..d393bd4 --- /dev/null +++ b/vignettes/model_interaction/simple2_1.stan @@ -0,0 +1,15 @@ +data { + int N; + vector[N] y; +} +parameters { + real sigma; +} +transformed parameters{ + real mu; + mu = 0; +} +model { + sigma ~ lognormal(0, 1); + y ~ normal(0, sigma); +} diff --git a/vignettes/model_interaction/simple2_2 b/vignettes/model_interaction/simple2_2 new file mode 100755 index 0000000..7435cd5 Binary files /dev/null and b/vignettes/model_interaction/simple2_2 differ diff --git a/vignettes/model_interaction/simple2_2.stan b/vignettes/model_interaction/simple2_2.stan new file mode 100644 index 0000000..f38d077 --- /dev/null +++ b/vignettes/model_interaction/simple2_2.stan @@ -0,0 +1,9 @@ +data { + int N; + vector[N] y; +} +parameters { +} +model { + y ~ normal(0, 1); +}