Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apis/r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices,
from and export to in-memory formats used by popular toolchains like
'Seurat', 'Bioconductor', and even 'AnnData' using the companion Python
package.
Version: 0.1.16.9000
Version: 0.1.16.9001
Authors@R: c(
person(given = "Aaron",
family = "Wolen",
Expand Down
4 changes: 4 additions & 0 deletions apis/r/NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# tiledbsoma (development version)

## Features

- The `AnnotationMatrix`'s `to_matrix()` method now supports batched reads via the `batch_mode` argument. This functionality can also be leveraged from `SOMA`'s `get_seurat_dimreductions_list()` and `get_seurat_dimreduction()` methods.

## Fixes
* Don't use default assay name when recreating a `Seurat` object (thanks @dan11mcguire)

Expand Down
19 changes: 12 additions & 7 deletions apis/r/R/AnnotationMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
#' Base class for matrix-like data with rows aligned to the observations or
#' features of a [`SOMA`].
#'
#' @param batch_mode logical, if `TRUE`, batch query mode is enabled, which
#' provides the ability to detect partial query results and resubmit until
#' all results are retrieved.
#' @export

AnnotationMatrix <- R6::R6Class(
Expand Down Expand Up @@ -42,15 +45,17 @@ AnnotationMatrix <- R6::R6Class(
},

#' @description Retrieve the annotation data from TileDB
#' @param attrs A character vector of the attribute names to retrieve. By
#' default, all attributes are retrieved.
#' @return A [`matrix`]
to_matrix = function() {
if (self$verbose) {
message(
sprintf("Reading %s into memory from '%s'", self$class(), self$uri)
)
}
to_matrix = function(attrs = NULL, batch_mode = FALSE) {

df <- private$read_data(
attrs = attrs,
batch_mode = batch_mode,
return_as = "data.frame"
)

df <- self$tiledb_array(return_as = "data.frame")[]
index_col <- self$dimnames()
attr_cols <- setdiff(colnames(df), index_col)

Expand Down
35 changes: 10 additions & 25 deletions apis/r/R/AssayMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,30 +107,11 @@ AssayMatrix <- R6::R6Class(
#' all attributes are retrieved.
#' @return A [`Matrix::dgTMatrix-class`].
to_dataframe = function(attrs = NULL, batch_mode = FALSE) {
if (self$verbose) {
message(
sprintf("Reading %s into memory from '%s'", self$class(), self$uri)
)
}
arr <- self$object
tiledb::attrs(arr) <- attrs %||% character()
tiledb::return_as(arr) <- "data.frame"

if (batch_mode) {
if (self$verbose) message("...reading in batches")
batcher <- tiledb:::createBatched(arr)
results <- list()
i <- 1
while(isFALSE(tiledb::completedBatched(batcher))) {
if (self$verbose) message(sprintf("...retrieving batch %d", i))
results[[i]] <- tiledb::fetchBatched(arr, batcher)
i <- i + 1
}
results <- vctrs::vec_rbind(!!!results)
} else {
results <- arr[]
}
results
private$read_data(
attrs = attrs,
batch_mode = batch_mode,
return_as = "data.frame"
)
},

#' @description Retrieve assay data from TileDB as a 2D sparse matrix.
Expand All @@ -146,7 +127,11 @@ AssayMatrix <- R6::R6Class(
}
stopifnot(is_scalar_character(attr))

assay_data <- self$to_dataframe(attrs = attr, batch_mode = batch_mode)
assay_data <- private$read_data(
attrs = attr,
batch_mode = batch_mode,
return_as = "data.frame"
)

# reverse index columns if transposing array dimensions
if (transpose) assay_data <- assay_data[c(rev(self$dimnames()), attr)]
Expand Down
12 changes: 7 additions & 5 deletions apis/r/R/SOMA.R
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ SOMA <- R6::R6Class(
#' @param technique Name of the dimensionality reduction technique. Used to
#' identify which `obsm`/`varm` array will be retrieved. If `NULL`, we
#' default to the first `obsm/dimreduction_` array.
get_seurat_dimreduction = function(technique = NULL) {
get_seurat_dimreduction = function(technique = NULL, batch_mode = FALSE) {

# Identify all obsm/varm dimreduction_ arrays
prefix <- "dimreduction_"
Expand Down Expand Up @@ -613,7 +613,9 @@ SOMA <- R6::R6Class(
}

# TODO: validate we're only returning 1 array per dimension
mats <- lapply(arrays, function(x) x[[1]]$to_matrix())
mats <- lapply(arrays,
function(x) x[[1]]$to_matrix(batch_mode = batch_mode)
)

# TODO: validate all keys match? For now just take the first one
key <- unlist(arrays)[[1]]$get_metadata(key = "dimreduction_key")
Expand All @@ -627,13 +629,13 @@ SOMA <- R6::R6Class(
},

#' @description Retrieve a list of all [`SeuratObject::DimReduc`] objects.
get_seurat_dimreductions_list = function() {
arrays <-self$get_annotation_matrix_arrays(prefix = "dimreduction_")
get_seurat_dimreductions_list = function(batch_mode = FALSE) {
arrays <- self$get_annotation_matrix_arrays(prefix = "dimreduction_")
array_names <- names(unlist(arrays))
techniques <- unique(sub("(obs|var)m\\.dimreduction_", "", array_names))
sapply(
techniques,
function(x) self$get_seurat_dimreduction(x),
function(x) self$get_seurat_dimreduction(x, batch_mode = batch_mode),
simplify = FALSE,
USE.NAMES = TRUE
)
Expand Down
7 changes: 3 additions & 4 deletions apis/r/R/SOMACollection.R
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ SOMACollection <- R6::R6Class(
#' @description Convert to a [SeuratObject::Seurat] object.
#' @param project [`SeuratObject::Project`] name for the `Seurat` object
#' @param batch_mode logical, if `TRUE`, batch query mode is enabled for
#' retrieving `X` layers. See
#' retrieving `X`, `obsm`/`varm`, and `obsp`/`varp` layers. See
#' [`AssayMatrix$to_dataframe()`][`AssayMatrix`] for more information.
to_seurat = function(project = "SeuratProject", batch_mode = FALSE) {
stopifnot(is_scalar_character(project))
Expand Down Expand Up @@ -243,9 +243,8 @@ SOMACollection <- R6::R6Class(
# Retrieve list of all techniques used in any soma's obsm/varm
# dimensionality reduction arrays. The association between assay and
# dimreduction is maintained by the DimReduc's `assay.used` slot.
dimreductions <- lapply(
self$somas,
function(x) x$get_seurat_dimreductions_list()
dimreductions <- lapply(self$somas,
function(x) x$get_seurat_dimreductions_list(batch_mode)
)
object@reductions <- Reduce(base::c, dimreductions)

Expand Down
39 changes: 38 additions & 1 deletion apis/r/R/TileDBArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,43 @@ TileDBArray <- R6::R6Class(
},

# @description Ingest data into the TileDB array.
ingest_data = function() return(NULL)
ingest_data = function() return(NULL),

# @description Retrieve data from the TileDB array
# @param batch_mode logical, if `TRUE`, batch query mode is enabled, which
# provides the ability to detect partial query results and resubmit until
# all results are retrieved.
# @param return_as Data can be read in as a `list` (default), `array`,
# `matrix`, `data.frame`, `data.table` or `tibble`.
read_data = function(attrs = NULL, batch_mode = FALSE, return_as = NULL) {
if (self$verbose) {
message(
sprintf("Reading %s into memory from '%s'", self$class(), self$uri)
)
}
arr <- self$object
tiledb::attrs(arr) <- attrs %||% character()
tiledb::return_as(arr) <- return_as %||% "asis"

if (batch_mode) {
if (self$verbose) message("...reading in batches")
batcher <- tiledb::createBatched(arr)
results <- list()
i <- 1
while (isFALSE(tiledb::completedBatched(batcher))) {
if (self$verbose) message(sprintf("...retrieving batch %d", i))
results[[i]] <- tiledb::fetchBatched(arr, batcher)
i <- i + 1
}

# TODO: currently tiledb-r's batched reader ignores return_as and a
# data.frame is always returned. When this is addressed we'll need to
# add class-specific concatenation logic here.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a tracking Shortcut story open?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do indeed: [sc-23436]

results <- vctrs::vec_rbind(!!!results)
} else {
results <- arr[]
}
results
}
)
)
14 changes: 13 additions & 1 deletion apis/r/man/AnnotationMatrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 15 additions & 2 deletions apis/r/man/SOMA.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion apis/r/man/SOMACollection.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions apis/r/tests/testthat/test_AnnotationMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,8 @@ test_that("annotation matrix can be stored and retrieved", {
expect_equal(sort(colnames(mat2)), sort(colnames(mat)))

expect_identical(mat[rlabs, clabs], mat2[rlabs, clabs])

# test that result is identical with batch mode
mat3 <- annotmat$to_matrix(batch_mode = TRUE)
expect_identical(mat2, mat3)
})