diff --git a/apis/r/DESCRIPTION b/apis/r/DESCRIPTION index bc9f9b9f9a..ed4e801718 100644 --- a/apis/r/DESCRIPTION +++ b/apis/r/DESCRIPTION @@ -7,7 +7,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices, from and export to in-memory formats used by popular toolchains like 'Seurat', 'Bioconductor', and even 'AnnData' using the companion Python package. -Version: 0.1.22 +Version: 0.1.22.9000 Authors@R: c( person(given = "Aaron", family = "Wolen", @@ -45,7 +45,7 @@ Imports: urltools, vctrs Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 Suggests: rmarkdown, knitr, diff --git a/apis/r/NAMESPACE b/apis/r/NAMESPACE index 96229a05a7..20bb0b04a3 100644 --- a/apis/r/NAMESPACE +++ b/apis/r/NAMESPACE @@ -16,9 +16,11 @@ export(SOMACollection) export(TileDBArray) export(TileDBGroup) export(TileDBObject) +export(dataset_seurat_pbmc3k) import(tiledb) importFrom(Matrix,mat2triplet) importFrom(Matrix,nnzero) +importFrom(Matrix,readMM) importFrom(Matrix,sparseMatrix) importFrom(R6,R6Class) importFrom(SeuratObject,AddMetaData) @@ -37,4 +39,7 @@ importFrom(glue,glue_collapse) importFrom(methods,slot) importFrom(urltools,url_compose) importFrom(urltools,url_parse) +importFrom(utils,download.file) importFrom(utils,modifyList) +importFrom(utils,read.table) +importFrom(utils,untar) diff --git a/apis/r/NEWS.md b/apis/r/NEWS.md index 13a4a68544..97402a706f 100644 --- a/apis/r/NEWS.md +++ b/apis/r/NEWS.md @@ -1,3 +1,9 @@ +# tiledbsoma (development version) + +## Features + +* New function `dataset_seurat_pbmc3k()` to download the pbmc3k dataset from 10X and import as a `Seurat` object without requiring any extra dependencies. + # tiledbsoma 0.1.19 ## Changes diff --git a/apis/r/R/datasets.R b/apis/r/R/datasets.R new file mode 100644 index 0000000000..2a68ea3902 --- /dev/null +++ b/apis/r/R/datasets.R @@ -0,0 +1,46 @@ +#' Seurat 3k PBMCs from 10x Genomics +#' +#' Create a [`SeuratObject::Seurat`] object containing the widely used 3k PBMCs +#' dataset from 10x Genomics. +#' +#' @returns a [`SeuratObject::Seurat`] object +#' @seealso https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/pbmc3k +#' @importFrom Matrix readMM +#' @importFrom utils download.file read.table untar +#' @export + +dataset_seurat_pbmc3k <- function() { + url <- "https://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz" + + tarfile <- file.path(tempdir(), basename(url)) + tardir <- sub("\\.tar\\.gz$", "", tarfile) + datadir <- file.path(tardir, "filtered_gene_bc_matrices", "hg19") + + if (!dir.exists(tardir)) { + if (!file.exists(tarfile)) { + utils::download.file(url = url, destfile = tarfile) + } + utils::untar(tarfile, exdir = tardir) + } + + mat <- Matrix::readMM(file.path(datadir, "matrix.mtx")) + genes <- utils::read.table( + file = file.path(datadir, "genes.tsv"), + header = FALSE, + col.names = c("id", "gene_name") + ) + barcodes <- utils::read.table( + file = file.path(datadir, "barcodes.tsv"), + header = FALSE, + col.names = "id" + ) + dimnames(mat) <- list(genes$id, barcodes$id) + + object <- SeuratObject::CreateSeuratObject(counts = mat) + object[["RNA"]] <- SeuratObject::AddMetaData( + object = object[["RNA"]], + metadata = genes$gene_name, + col.name = "gene_name" + ) + object +} diff --git a/apis/r/man/dataset_seurat_pbmc3k.Rd b/apis/r/man/dataset_seurat_pbmc3k.Rd new file mode 100644 index 0000000000..a888335039 --- /dev/null +++ b/apis/r/man/dataset_seurat_pbmc3k.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/datasets.R +\name{dataset_seurat_pbmc3k} +\alias{dataset_seurat_pbmc3k} +\title{Seurat 3k PBMCs from 10x Genomics} +\usage{ +dataset_seurat_pbmc3k() +} +\value{ +a \code{\link[SeuratObject:Seurat-class]{SeuratObject::Seurat}} object +} +\description{ +Create a \code{\link[SeuratObject:Seurat-class]{SeuratObject::Seurat}} object containing the widely used 3k PBMCs +dataset from 10x Genomics. +} +\seealso{ +https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/pbmc3k +}