Source code for spateo.tools.cluster.leiden

from typing import Literal, Optional, Union

import anndata
import igraph
import leidenalg
import numpy as np
import scipy
from sklearn.neighbors import kneighbors_graph

from ...configuration import SKM


[docs]def distance_knn_graph(dist: np.ndarray, num_neighbors: int): """Construct a k-nearest neighbor graph from a distance matrix. Args: dist: Pairwise distance matrix num_neighbors: Number of nearest neighbors Returns: G: K-nearest neighbor graph """ n = dist.shape[0] G = igraph.Graph() G.add_vertices(n) edges = [] weights = [] for i in range(n): sorted_ind = np.argsort(dist[i, :]) for j in range(1, 1 + num_neighbors): # if i < sorted_ind[j]: edges.append((i, sorted_ind[j])) weights.append(dist[i, sorted_ind[j]]) # weights.append(1.) G.add_edges(edges) G.es["weight"] = weights return G
[docs]def embedding_knn_graph(X: np.ndarray, num_neighbors: int): """Construct a k-nearest neighbor graph from an arbitrary array, of shape [n_samples, n_features] Args: X: Embedding matrix num_neighbors: Number of nearest neighbors Returns: G: K-nearest neighbor graph """ adj = kneighbors_graph(X, num_neighbors, include_self=False, mode="distance") G = igraph.Graph.Weighted_Adjacency(adj) return G
[docs]def adj_to_igraph(adj: np.ndarray): """Convert an adjacency matrix to an igraph graph.""" G = igraph.Graph.Adjacency(adj) return G
[docs]def calculate_leiden_partition( adj: Optional[Union[scipy.sparse.spmatrix, np.ndarray]] = None, input_mat: Optional[np.ndarray] = None, num_neighbors: int = 10, graph_type: Literal["distance", "embedding"] = "distance", resolution: float = 1.0, n_iterations: int = -1, ) -> np.ndarray: """Performs Leiden clustering on a given dataset. Args: adj: Optional precomputed adjacency matrix input_mat: Optional, will be used only if 'adj' is not given. The input data, will be interepreted as either a distance matrix (if :param `graph_type` is "distance" or an embedding matrix (if :param `graph_type` is "embedding") num_neighbors: Only used if 'adj' is not given- the number of nearest neighbors for constructing the graph graph_type: Only used if 'adj' is not given- specifies the input type, either 'distance' or 'embedding' resolution: The resolution parameter for the Leiden algorithm n_iterations: The number of iterations for the Leiden algorithm (-1 for unlimited iterations) Returns: clusters: Array containing cluster assignments """ if adj is None and input_mat is None: raise ValueError("Either `adj` or `input_mat` must be specified") if adj is not None: if isinstance(adj, np.ndarray): G = adj_to_igraph(adj.tolist()) else: G = igraph.Graph(n=adj.shape[0]) for i, j in zip(*adj.nonzero()): G.add_edge(i, j) else: if graph_type == "distance": G = distance_knn_graph(input_mat, num_neighbors) elif graph_type == "embedding": G = embedding_knn_graph(input_mat, num_neighbors) partition_kwargs = {"resolution_parameter": resolution, "seed": 888, "n_iterations": n_iterations} partition = leidenalg.find_partition(G, leidenalg.RBConfigurationVertexPartition, **partition_kwargs) clusters = np.array(partition.membership, dtype=int) return clusters