Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ optional-dependencies.gpu = [
"cupy-cuda12x",
"rapids-singlecell",
]
optional-dependencies.vae = [
"torch>=2.0",
]
optional-dependencies.test = [ "coverage", "pytest" ]
# https://docs.pypi.org/project_metadata/#project-urls
urls.Documentation = "https://tfmindi.readthedocs.io/"
Expand Down
155 changes: 126 additions & 29 deletions src/tfmindi/tl/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@


def cluster_seqlets(
adata: AnnData, resolution: float = 3.0, pca_svd_solver: str | None = None, *, recompute: bool = False
adata: AnnData,
resolution: float = 3.0,
pca_svd_solver: str | None = None,
*,
reduction: str = "pca",
vae_kwargs: dict | None = None,
recompute: bool = False,
) -> None:
"""
Perform complete clustering workflow including dimensionality reduction, clustering, and functional annotation.

This function performs the following steps:
1. PCA on similarity matrix (GPU-accelerated if available) - skipped if already present
1. Dimensionality reduction on similarity matrix - skipped if already present (PCA or VAE, see ``reduction``)
2. Compute neighborhood graph (GPU-accelerated if available) - skipped if already present
3. Generate t-SNE embedding (GPU-accelerated if available) - skipped if already present
4. Leiden clustering at specified resolution (GPU-accelerated if available) - always computed
Expand All @@ -29,13 +35,15 @@ def cluster_seqlets(
7. Map leiden clusters to consensus DBD annotations

Performance Optimization:
By default, PCA, neighborhood graph, and t-SNE computations are reused if already present
in the AnnData object. This allows fast re-clustering with different resolutions without
recomputing expensive preprocessing steps.
By default, dimensionality reduction, neighborhood graph, and t-SNE computations are reused
if already present in the AnnData object. This allows fast re-clustering with different
resolutions without recomputing expensive preprocessing steps.

GPU Acceleration:
When tfmindi[gpu] is installed and CUDA is available, this function automatically uses
RAPIDS-accelerated implementations. The API remains identical between CPU and GPU versions.
RAPIDS-accelerated implementations for PCA, neighbors, t-SNE, and Leiden.
The API remains identical between CPU and GPU versions.
Note: GPU acceleration applies to PCA only; VAE training uses PyTorch's own device handling.

Parameters
----------
Expand All @@ -46,14 +54,49 @@ def cluster_seqlets(
Clustering resolution for Leiden algorithm (default: 3.0)
pca_svd_solver
svd_solver used for calculating pca see: https://scanpy.readthedocs.io/en/stable/generated/scanpy.pp.pca.html#scanpy.pp.pca (default: None, i.e. choose automatically).
Ignored when ``reduction="vae"``.
reduction
Dimensionality reduction method to use before building the neighbour graph.
Must be one of:

- ``"pca"`` *(default)* - linear PCA via scanpy, optionally GPU-accelerated.
Fast and parameter-free; recommended for most use cases.
- ``"vae"`` - trains a beta-VAE (non-linear) and stores posterior means in
``adata.obsm["X_vae"]``. Captures non-linear structure that PCA can miss,
useful when clusters are poorly separated with PCA.
Requires PyTorch (``pip install torch``).

The ``recompute`` flag applies to both methods: if the target key
(``X_pca`` or ``X_vae``) is already present in ``adata.obsm`` and
``recompute=False``, the reduction step is skipped.
vae_kwargs
Extra keyword arguments forwarded to
:func:`tfmindi.tl.vae.fit_vae_latents`. Only used when
``reduction="vae"``. Ignored otherwise.

Commonly tuned options::

vae_kwargs = dict(
latent_dim = 10, # bottleneck size; analogous to n_comps for PCA
hidden = 512, # MLP hidden layer width
n_layers = 2, # encoder/decoder depth
epochs = 50, # training epochs; increase to 100-200 for large data
beta = 0.1, # KL weight; higher = more regularised latent space
batch_size = 4096, # reduce if GPU runs out of memory
num_workers = 0, # DataLoader workers; 0 is safest in notebooks
device = "auto", # "cpu", "cuda", or "auto"
)

recompute
If False (default), reuse existing PCA and neighborhood graph computations if available.
If True, always recompute PCA, neighbors, and t-SNE from scratch.
If False (default), reuse existing dimensionality reduction and neighborhood
graph computations if available.
If True, always recompute from scratch.

Returns
-------
Modifies adata in-place with cluster assignments and annotations:
- adata.obsm["X_pca"]: PCA coordinates
- adata.obsm["X_pca"]: PCA coordinates (when ``reduction="pca"``)
- adata.obsm["X_vae"]: VAE latent coordinates (when ``reduction="vae"``)
- adata.obsm["X_tsne"]: t-SNE coordinates
- adata.obs["leiden"]: Cluster assignments
- adata.obs["mean_contrib"]: Mean contribution scores per seqlet
Expand All @@ -75,58 +118,112 @@ def cluster_seqlets(
>>>
>>> # Force recomputation of all steps
>>> tm.tl.cluster_seqlets(adata, resolution=3.0, recompute=True)
>>>
>>> # VAE-based clustering (non-linear; requires PyTorch)
>>> tm.tl.cluster_seqlets(
... adata,
... reduction="vae",
... vae_kwargs=dict(latent_dim=10, epochs=100, beta=0.5),
... resolution=3.0,
... )
>>> print(f"Found {adata.obs['leiden'].nunique()} clusters")
"""
if adata.X is None:
raise ValueError("adata.X is None. Similarity matrix is required for motif assignment.")

_valid_reductions = ("pca", "vae")
if reduction not in _valid_reductions:
raise ValueError(f"reduction must be one of {_valid_reductions!r}, got {reduction!r}.")

# Determine if we should use GPU at runtime
_using_gpu = get_backend() == "gpu" and is_gpu_available()
if _using_gpu:
import rapids_singlecell as rsc # type: ignore
backend_info = "GPU-accelerated" if _using_gpu else "CPU"
print(f"Using {backend_info} backend for clustering operations...")

# Check if PCA already exists and we don't need to recompute
if "X_pca" in adata.obsm and not recompute:
print("Reusing existing PCA...")
else:
print("Computing PCA...")
if _using_gpu:
try:
rsc.pp.pca(adata)
except Exception as e: # noqa: BLE001
warnings.warn(f"GPU PCA failed: {e}. Falling back to CPU.", UserWarning, stacklevel=2)
# ------------------------------------------------------------------
# Dimensionality reduction: PCA (default) or VAE
# ------------------------------------------------------------------
if reduction == "pca":
# Check if PCA already exists and we don't need to recompute
if "X_pca" in adata.obsm and not recompute:
print("Reusing existing PCA...")
else:
print("Computing PCA...")
if _using_gpu:
try:
rsc.pp.pca(adata)
except Exception as e: # noqa: BLE001
warnings.warn(f"GPU PCA failed: {e}. Falling back to CPU.", UserWarning, stacklevel=2)
sc.tl.pca(adata, svd_solver=pca_svd_solver)
else:
sc.tl.pca(adata, svd_solver=pca_svd_solver)
_reduction_rep = "X_pca"

else: # reduction == "vae"
if "X_vae" in adata.obsm and not recompute:
print("Reusing existing VAE embedding...")
else:
sc.tl.pca(adata, svd_solver=pca_svd_solver)
# Lazy import: torch is only required when reduction="vae"
from tfmindi.tl.vae import fit_vae_latents # noqa: PLC0415

_kw: dict = dict(
latent_dim=10,
hidden=512,
n_layers=2,
beta=0.1,
lr=1e-4,
epochs=50,
batch_size=4096,
num_workers=0,
use_amp=True,
dropout=0.1,
n_sample_stats=20_000,
device="auto",
verbose=True,
)
if vae_kwargs:
_kw.update(vae_kwargs)

print(
f"Training VAE (latent_dim={_kw['latent_dim']}, epochs={_kw['epochs']}, "
f"beta={_kw['beta']}, device={_kw['device']})..."
)
adata.obsm["X_vae"] = fit_vae_latents(adata.X, **_kw)
_reduction_rep = "X_vae"

# ------------------------------------------------------------------
# Neighbourhood graph (uses whichever reduction was computed above)
# ------------------------------------------------------------------

# Check if neighborhood graph already exists and we don't need to recompute
if "connectivities" in adata.obsp and "distances" in adata.obsp and not recompute:
print("Reusing existing neighborhood graph...")
else:
print("Computing neighborhood graph...")
print(f"Computing neighborhood graph (use_rep='{_reduction_rep}')...")
if _using_gpu:
try:
rsc.pp.neighbors(adata, use_rep="X_pca")
rsc.pp.neighbors(adata, use_rep=_reduction_rep)
except Exception as e: # noqa: BLE001
warnings.warn(f"GPU neighbors failed: {e}. Falling back to CPU.", UserWarning, stacklevel=2)
sc.pp.neighbors(adata, use_rep="X_pca")
sc.pp.neighbors(adata, use_rep=_reduction_rep)
else:
sc.pp.neighbors(adata, use_rep="X_pca")
sc.pp.neighbors(adata, use_rep=_reduction_rep)

# Check if t-SNE already exists and we don't need to recompute
if "X_tsne" in adata.obsm and not recompute:
print("Reusing existing t-SNE embedding...")
else:
print("Computing t-SNE embedding...")
print(f"Computing t-SNE embedding (use_rep='{_reduction_rep}')...")
if _using_gpu:
try:
rsc.tl.tsne(adata, use_rep="X_pca")
rsc.tl.tsne(adata, use_rep=_reduction_rep)
except Exception as e: # noqa: BLE001
warnings.warn(f"GPU t-SNE failed: {e}. Falling back to CPU.", UserWarning, stacklevel=2)
sc.tl.tsne(adata, use_rep="X_pca")
sc.tl.tsne(adata, use_rep=_reduction_rep)
else:
sc.tl.tsne(adata, use_rep="X_pca")
sc.tl.tsne(adata, use_rep=_reduction_rep)

print(f"Performing Leiden clustering with resolution {resolution}...")
if _using_gpu:
Expand Down Expand Up @@ -244,4 +341,4 @@ def get_dbd_min_pval(df: pd.Series) -> str:

# Generate colors for cluster DBD annotations
if "cluster_dbd" in adata.obs.columns:
ensure_colors(adata, "cluster_dbd", cmap="tab10")
ensure_colors(adata, "cluster_dbd", cmap="tab10")
Loading
Loading