Source code for spateo.tools.spatial_correlation

from typing import Optional, Sequence, Union

import numpy as np
import pandas as pd
from anndata import AnnData
from esda.moran import Moran_BV, Moran_Local_BV
from libpysal.weights import WSP

from .find_neighbors import neighbors


[docs]def spatial_bv_moran_obs_genes( adata: AnnData, obs_key: str, connectivity_key: str = "spatial_connectivities", genes: Union[str, int, Sequence[str], Sequence[int], None] = None, n_neighbors: int = 10, mode: str = "moran", transformation: str = "r", permutations: Optional[int] = 999, copy: bool = False, ) -> Optional[pd.DataFrame]: """ Calculate global bivariate Moran's I between a spatial variable and gene expression Parameters ---------- adata AnnData object containing spatial data obs_key Key in `adata.obs` for the variable connectivity_key Key in `adata.obsp` for spatial connectivity matrix (default: 'spatial_connectivities') genes Genes to calculate (names or indices). If None, use all genes. mode Spatial correlation mode (only 'moran' supported) transformation Weight transformation method ('r' for row-standardization) permutations Number of permutations for significance testing copy Return a DataFrame instead of storing in AnnData Returns ------- If ``copy = True``, returns a :class:`pandas.DataFrame` with the following keys: I : float value of bivariate Moran's I sim : array (if permutations>0) vector of I values for permuted samples p_sim : float (if permutations>0) p-value based on permutations (one-sided) null: spatial randomness alternative: the observed I is extreme it is either extremely high or extremely low z_sim : array (if permutations>0) standardized I based on permutations p_z_sim : float (if permutations>0) p-value based on standard normal approximation from permutations Otherwise, modifies the ``adata`` with the following key: - :attr:`anndata.AnnData.uns` ``['{obs_key}_gene_bv_moranI']`` - the above mentioned dataframe``. """ # Validate inputs if mode != "moran": raise ValueError(f"Unsupported mode: {mode}. Only 'moran' is currently supported") if obs_key not in adata.obs: raise KeyError(f"'{obs_key}' not found in adata.obs") if connectivity_key not in adata.obsp: neighbors( adata, basis="spatial", spatial_key="spatial", n_neighbors_method="ball_tree", n_neighbors=n_neighbors, ) connectivity_key = "spatial_connectivities" # Extract target variable and spatial weights y = np.array(adata.obs[obs_key].values, dtype=np.float64) spatial_weights = adata.obsp[connectivity_key] wsp = WSP(spatial_weights) w = wsp.to_W() # Identify genes to process if genes is None: gene_names = adata.var_names.tolist() gene_indices = range(adata.n_vars) elif isinstance(genes, (str, int)): gene_names = [genes] if isinstance(genes, str) else [adata.var_names[genes]] gene_indices = [adata.var_names.get_loc(genes)] if isinstance(genes, str) else [genes] else: gene_names = [] gene_indices = [] for gene in genes: if isinstance(gene, str): gene_names.append(gene) gene_indices.append(adata.var_names.get_loc(gene)) else: gene_names.append(adata.var_names[gene]) gene_indices.append(gene) # Prepare results storage results = { "I": [], } if permutations is not None: results.update( { "EI_sim": [], "pval_sim": [], "pval_z_sim": [], "z_sim": [], } ) # Process each gene for idx in gene_indices: # Extract gene expression x = adata.X[:, idx] if hasattr(x, "toarray"): x = x.toarray().flatten() x = x.astype(np.float64) # Calculate bivariate Moran's I moran_bv = Moran_BV( x, y, w, transformation=transformation, permutations=permutations, ) # Store results results["I"].append(moran_bv.I) if permutations is not None: results["EI_sim"].append(moran_bv.EI_sim) results["pval_sim"].append(moran_bv.p_sim) results["pval_z_sim"].append(moran_bv.p_z_sim) results["z_sim"].append(moran_bv.z_sim) # Create results DataFrame df = pd.DataFrame(results, index=gene_names) # Store or return results if copy: return df else: adata.uns[f"{obs_key}_gene_bv_moranI"] = df return None
[docs]def spatial_bv_local_moran( adata: AnnData, feature1_key: str, feature2_key: str, connectivity_key: str = "spatial_connectivities", n_neighbors: int = 10, mode: str = "moran", transformation: str = "r", permutations: Optional[int] = 999, copy: bool = False, ) -> Optional[pd.DataFrame]: """ Calculate global bivariate Moran's I between a spatial variable and gene expression Parameters ---------- adata AnnData object containing spatial data feature1_key Key in `adata.obs` for the first variable or gene_name feature2_key Key in `adata.obs` for the seconda variable or gene_name connectivity_key Key in `adata.obsp` for spatial connectivity matrix (default: 'spatial_connectivities') mode Spatial correlation mode (only 'moran' supported) transformation Weight transformation method ('r' for row-standardization) permutations Number of permutations for significance testing copy Return a DataFrame instead of storing in AnnData Returns ------- If ``copy = True``, returns a :class:`pandas.DataFrame` with the following keys: I : float value of bivariate Moran's I q : array (if permutations>0) values indicate quandrant location 1 HH, 2 LH, 3 LL, 4 HL sim : array (if permutations>0) vector of I values for permuted samples p_sim : float (if permutations>0) p-value based on permutations (one-sided) null: spatial randomness alternative: the observed I is extreme it is either extremely high or extremely low z_sim : array (if permutations>0) standardized I based on permutations p_z_sim : float (if permutations>0) p-value based on standard normal approximation from permutations Otherwise, modifies the ``adata`` with the following key: - :attr:`anndata.AnnData.uns` ``['{feature1_key}_{feature2_key}_bv_local_moranI']`` - the above mentioned dataframe``. """ # Validate inputs if mode != "moran": raise ValueError(f"Unsupported mode: {mode}. Only 'moran' is currently supported") if feature1_key not in adata.obs and feature1_key not in adata.var_names: raise KeyError(f"'{feature1_key}' not found in adata.obs and a gene name") if feature2_key not in adata.obs and feature2_key not in adata.var_names: raise KeyError(f"'{feature2_key}' not found in adata.obs and a gene name") if connectivity_key not in adata.obsp: neighbors( adata, basis="spatial", spatial_key="spatial", n_neighbors_method="ball_tree", n_neighbors=n_neighbors, ) connectivity_key = "spatial_connectivities" # Extract target variable and spatial weights if feature1_key in adata.obs: x = np.array(adata.obs[feature1_key].values, dtype=np.float64) else: idx = adata.var_names.get_loc(feature1_key) x = adata.X[:, idx] if hasattr(x, "toarray"): x = x.toarray() x = x.squeeze().astype(np.float64) if feature2_key in adata.obs: y = np.array(adata.obs[feature2_key].values, dtype=np.float64) else: idx = adata.var_names.get_loc(feature2_key) print(hasattr(adata.X[:, idx], "toarray")) y = adata.X[:, idx] if hasattr(y, "toarray"): y = y.toarray() y = y.squeeze().astype(np.float64) spatial_weights = adata.obsp[connectivity_key] wsp = WSP(spatial_weights) w = wsp.to_W() local_moran_bv = Moran_Local_BV(x, y, w, transformation=transformation, permutations=permutations) df = pd.DataFrame(index=adata.obs_names) df["I"] = local_moran_bv.Is if permutations is not None: df["q"] = local_moran_bv.q df["EI_sim"] = local_moran_bv.EI_sim df["pval_sim"] = local_moran_bv.p_sim df["pval_z_sim"] = local_moran_bv.p_z_sim df["z_sim"] = local_moran_bv.z_sim # Store or return results if copy: return df else: adata.uns[f"{feature1_key}_{feature2_key}_bv_local_moranI"] = df return None