Source code for spateo.preprocessing.aggregate
"""
Aggregate buckets of AnnData object by binning.
"""
import numpy as np
import pandas as pd
import scipy
from anndata import AnnData
from ..configuration import SKM
from ..logging import logger_manager as lm
@SKM.check_adata_is_type(SKM.ADATA_UMI_TYPE, "adata")
[docs]def bin_adata(
adata: AnnData,
bin_size: int = 1,
coords_key: str = "spatial",
) -> AnnData:
"""Aggregate cell-based AnnData by bin size. Cells within the same bin are aggregated together into one bucket.
Args:
adata: Input AnnData object
bin_size: Shrinking factor to be applied to spatial coordinates; the size of this factor dictates the size of
the regions that will be combined into one pseudo-cell (larger -> generally higher number of cells in
each bin).
coords_key: Key in .obsm where spatial coordinates are stored- bin coordinates will be used to update this
array inplace.
Returns:
adata_binned: New AnnData object generated by this process.
"""
adata = adata.copy()
adata.obsm[coords_key] = (adata.obsm[coords_key] // bin_size).astype(np.int32)
if scipy.issparse(adata.X):
df = pd.DataFrame(adata.X.A, columns=adata.var_names)
else:
df = pd.DataFrame(adata.X, columns=adata.var_names)
df[["x", "y"]] = adata.obsm[coords_key]
df2 = df.groupby(by=["x", "y"]).sum()
adata_binned = AnnData(df2)
adata_binned.uns["__type"] = "UMI"
adata_binned.obs_names = [str(i[0]) + "_" + str(i[1]) for i in df2.index.to_list()]
adata_binned.obsm[coords_key] = np.array([list(i) for i in df2.index.to_list()], dtype=np.float64)
return adata_binned