aertslab · EmielGeeraerts · Jan 20, 2025 · Nov 24, 2025 · Dec 11, 2025 · Dec 11, 2025
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -1,4 +1,6 @@
 version: 2
+sphinx:
+  configuration: docs/conf.py
 
 build:
   os: "ubuntu-20.04"

diff --git a/README.md b/README.md
@@ -43,3 +43,7 @@ $ pip install .
 ## References
 
 [Bravo Gonzalez-Blas, C. & De Winter, S. *et al.* (2022). SCENIC+: single-cell multiomic inference of enhancers and gene regulatory networks](https://www.biorxiv.org/content/10.1101/2022.08.19.504505v1)
+
+## license
+
+The SCENIC+ [license](https://github.com/aertslab/scenicplus/blob/main/LICENCE.txt) covers [SCENIC+](https://github.com/aertslab/scenicplus), [pycisTarget](https://github.com/aertslab/pycistarget), [pycisTopic](https://github.com/aertslab/pycisTopic) and the [cisTarget dabases](https://resources.aertslab.org/cistarget/).
diff --git a/src/scenicplus/cli/commands.py b/src/scenicplus/cli/commands.py
@@ -932,6 +932,7 @@ def calculate_auc(
         eRegulons_fname: pathlib.Path,
         multiome_mudata_fname: pathlib.Path,
         out_file: pathlib.Path,
+        temp_dir: pathlib.Path,
         n_cpu: int = 1):
     """
     Calculate eRegulon enrichment scores.
@@ -944,19 +945,30 @@ def calculate_auc(
         Path to multiome MuData file.
     out_file : pathlib.Path
         Path to store output.
+    temp_dir: pathlib.Path
+        Temporary directory for parallel processing.
     n_cpu : int
         Number of parallel processes to run.
 
     """
     from scenicplus.eregulon_enrichment import score_eRegulons
+
+    if temp_dir is not None:
+        if type(temp_dir) == str:
+            temp_dir = pathlib.Path(temp_dir)
+        if not temp_dir.exists():
+            Warning(f"{temp_dir} does not exist, creating it.")
+            os.makedirs(temp_dir)
     log.info("Reading data.")
-    mdata = mudata.read(multiome_mudata_fname.__str__())
+    mdata = mudata.read(multiome_mudata_fname.__str__(), backed=True)
     eRegulons = pd.read_table(eRegulons_fname)
+
     log.info("Calculating enrichment scores.")
     gene_region_AUC = score_eRegulons(
         eRegulons=eRegulons,
-        gex_mtx=mdata["scRNA"].to_df(),
-        acc_mtx=mdata["scATAC"].to_df(),
+        gex_mtx=mdata["scRNA"],
+        acc_mtx=mdata["scATAC"],
+        temp_dir=temp_dir.__str__(),
         n_cpu=n_cpu)
     mdata_AUC = mudata.MuData(
         {

diff --git a/src/scenicplus/cli/scenicplus.py b/src/scenicplus/cli/scenicplus.py
@@ -1003,6 +1003,7 @@ def aucell(arg):
             eRegulons_fname=arg.eRegulon_fname,
             multiome_mudata_fname=arg.multiome_mudata_fname,
             out_file=arg.aucell_out_fname,
+            temp_dir=arg.temp_dir,
             n_cpu=arg.n_cpu)
     parser.set_defaults(func=aucell)
     # Required arguments
@@ -1018,6 +1019,10 @@ def aucell(arg):
         "--aucell_out_fname", dest="aucell_out_fname",
         action="store", type=pathlib.Path, required=True,
         help="Path to store enrichment scores (.h5mu).")
+    parser.add_argument(
+        "--temp_dir", dest="temp_dir",
+        action="store", type=pathlib.Path, required=True,
+        help="Path to temp dir.")
     # Optional arguments
     parser.add_argument(
         "--n_cpu", dest="n_cpu",

diff --git a/src/scenicplus/eregulon_enrichment.py b/src/scenicplus/eregulon_enrichment.py
@@ -7,6 +7,9 @@
 from typing import Literal, Dict, List, Optional
 import numpy as np
 import pandas as pd
+import anndata as ad
+import joblib
+import pathlib
 from dataclasses import dataclass
 from scenicplus.scenicplus_class import SCENICPLUS
 
@@ -79,34 +82,108 @@ def get_eRegulons_as_signatures(
         lambda x: list(set(x))).to_dict()
     return {"Gene_based": gene_signatures, "Region_based": region_signatures}
 
+def _rank_and_enrich(
+    cell_df: pd.DataFrame,
+    signatures: Dict[str, List[str]],
+    auc_threshold: float = 0.05,
+    normalize: bool = False,
+    n_cpu: int = 1) -> pd.DataFrame:
+    """
+    Compute enrichment AUC scores for signatures in a cell x feature dataframe  
+    Nb. expects a dataframe to maintain pycistopic compatibility in signature_enrichment().
+    """
+
+    ranking = rank_data(cell_df)
+    auc = signature_enrichment(
+        ranking,
+        signatures,
+        enrichment_type='gene',
+        auc_threshold=auc_threshold,
+        normalize=normalize,
+        n_cpu=1  # force single-threaded inside worker
+    )
+
+    return auc
+
+def _chunk_scoring(
+    adata: ad.AnnData,
+    signatures: Dict[str, List[str]],
+    temp_dir: pathlib.Path,
+    chunk_size: int = 1000,
+    auc_threshold: float = 0.05,
+    normalize: bool = False,
+    n_cpu: int = 1) -> pd.DataFrame:
+    """
+    Helper function that splits AnnData into chunks of cells to score in parallel.
+    N.b. that the chunks are turned into a dense dataframe
+    Increasing the chunk_size increases speed but also memory usage.
+    """
+    # loop over sets of chunk_size cells - to avoid unsparsing the full 
+    # cell x feature matrix when creating ranks during _rank_and_enrich()
+    all_cells = adata.obs_names
+    cell_chunks = [all_cells[i:i + chunk_size] for i in range(0, len(all_cells), chunk_size)]
+
+    # get signature enrichment scores in parallel
+    # concatenate in dataframe
+    AUC_list: List[auc] = joblib.Parallel(
+        n_jobs=n_cpu,
+        temp_folder=temp_dir
+    )(
+        joblib.delayed(
+            _rank_and_enrich
+        )(
+            adata[cell_chunk].to_df().copy(),
+            signatures,
+            auc_threshold=auc_threshold,
+            normalize=normalize  
+        )
+        for cell_chunk in cell_chunks
+    )
+    AUC_scores = pd.concat(AUC_list)
+    # match order of indices
+    AUC_scores = AUC_scores.loc[adata.obs_names]
+
+    return AUC_scores
+
 def score_eRegulons(
         eRegulons: pd.DataFrame,
-        gex_mtx: pd.DataFrame,
-        acc_mtx: pd.DataFrame,
+        gex_mtx: ad.AnnData,
+        acc_mtx: ad.AnnData,
+        temp_dir: pathlib.Path,
         auc_threshold: float = 0.05,
         normalize: bool = False,
-        n_cpu: int = 1
-) -> Dict[str, pd.DataFrame]:
+        chunk_size: int = 1000,
+        n_cpu: int = 1) -> Dict[str, pd.DataFrame]:
     """
     """
     eRegulon_signatures = get_eRegulons_as_signatures(eRegulons=eRegulons)
-    gex_ranking = rank_data(gex_mtx)
-    acc_ranking = rank_data(acc_mtx)
-    gex_AUC = signature_enrichment(
-        gex_ranking,
-        eRegulon_signatures["Gene_based"],
-        enrichment_type='gene',
-        auc_threshold=auc_threshold,
-        normalize=normalize,
-        n_cpu=n_cpu)
-    acc_AUC = signature_enrichment(
-        acc_ranking,
-        eRegulon_signatures["Region_based"],
-        enrichment_type='gene',
-        auc_threshold=auc_threshold,
-        normalize=normalize,
-        n_cpu=n_cpu)
-    return {"Gene_based": gex_AUC, "Region_based": acc_AUC}
+
+    # get eRegulon enrichment scores for gene expression
+    gex_AUC = _chunk_scoring(
+        adata = gex_mtx,
+        signatures = eRegulon_signatures["Gene_based"],
+        temp_dir = temp_dir,
+        chunk_size = chunk_size,
+        auc_threshold = auc_threshold,
+        normalize = normalize,
+        n_cpu = n_cpu
+    )
+
+    # get eRegulon enrichment scores for accessibility
+    acc_AUC = _chunk_scoring(
+        adata = acc_mtx,
+        signatures = eRegulon_signatures["Region_based"],
+        temp_dir = temp_dir,
+        chunk_size = chunk_size,
+        auc_threshold = auc_threshold,
+        normalize = normalize,
+        n_cpu = n_cpu
+    )
+
+    return {
+        "Gene_based": gex_AUC, 
+        "Region_based": acc_AUC
+    }
 
 def binarize_AUC(scplus_obj: SCENICPLUS,
                  auc_key: Optional[str] = 'eRegulon_AUC',

diff --git a/src/scenicplus/snakemake/Snakefile b/src/scenicplus/snakemake/Snakefile
@@ -391,13 +391,18 @@ rule AUCell_direct:
         multiome_mudata_fname=config["output_data"]["combined_GEX_ACC_mudata"],
     output:
         aucell_direct_out_fname=config["output_data"]["AUCell_direct"]
+    params:
+        temp_dir=lambda wildcards: config["params_general"]["temp_dir"]
     threads: config["params_general"]["n_cpu"]
     shell:
         """
+        echo "Snakemake is passing sample: {params.temp_dir}"
+
         scenicplus grn_inference AUCell \
             --eRegulon_fname {input.eRegulons_direct} \
             --multiome_mudata_fname {input.multiome_mudata_fname} \
             --aucell_out_fname {output.aucell_direct_out_fname} \
+            --temp_dir {params.temp_dir} \
             --n_cpu {threads}
         """
 
@@ -407,13 +412,16 @@ rule AUCell_extended:
         multiome_mudata_fname=config["output_data"]["combined_GEX_ACC_mudata"],
     output:
         aucell_extended_out_fname=config["output_data"]["AUCell_extended"]
+    params:
+        temp_dir=lambda wildcards: config["params_general"]["temp_dir"]
     threads: config["params_general"]["n_cpu"]
     shell:
         """
         scenicplus grn_inference AUCell \
             --eRegulon_fname {input.eRegulons_extended} \
             --multiome_mudata_fname {input.multiome_mudata_fname} \
             --aucell_out_fname {output.aucell_extended_out_fname} \
+            --temp_dir {params.temp_dir} \
             --n_cpu {threads}
         """