Source code for spateo.tdr.interpolations.interpolation_dl

from typing import Optional, Union

import numpy as np
import pandas as pd
from anndata import AnnData
from numpy import ndarray
from scipy.sparse import issparse

from ...logging import logger_manager as lm
from .interpolation_deeplearn import DataSampler, DeepInterpolation, interpolation_nn


[docs]def deep_intepretation( source_adata: AnnData, target_points: Optional[ndarray] = None, keys: Union[str, list] = None, spatial_key: str = "spatial", layer: str = "X", max_iter: int = 1000, data_batch_size: int = 2000, autoencoder_batch_size: int = 50, data_lr: float = 1e-4, autoencoder_lr: float = 1e-4, **kwargs, ) -> AnnData: """Learn a continuous mapping from space to gene expression pattern with the deep neural net model. Args: source_adata: AnnData object that contains spatial (numpy.ndarray) in the `obsm` attribute. target_points: The spatial coordinates of new data point. If target_coords is None, generate new points based on grid_num. keys: Gene list or info list in the `obs` attribute whose interpolate expression across space needs to learned. spatial_key: The key in ``.obsm`` that corresponds to the spatial coordinate of each bucket. layer: If ``'X'``, uses ``.X``, otherwise uses the representation given by ``.layers[layer]``. max_iter: The maximum iteration the network will be trained. data_batch_size: The size of the data sample batches to be generated in each iteration. autoencoder_batch_size: The size of the auto-encoder training batches to be generated in each iteration. Must be no greater than batch_size. . data_lr: The learning rate for network training. autoencoder_lr: The learning rate for network training the auto-encoder. Will have no effect if network_dim equal data_dim. **kwargs: Additional parameters that will be passed to the training step of the deep neural net. Returns: interp_adata: an anndata object that has interpolated expression. """ # Inference source_adata = source_adata.copy() source_adata.X = source_adata.X if layer == "X" else source_adata.layers[layer] source_spatial_data = source_adata.obsm[spatial_key] info_data = np.ones(shape=(source_spatial_data.shape[0], 1)) assert keys != None, "`keys` cannot be None." keys = [keys] if isinstance(keys, str) else keys obs_keys = [key for key in keys if key in source_adata.obs.keys()] if len(obs_keys) != 0: obs_data = np.asarray(source_adata.obs[obs_keys].values) info_data = np.c_[info_data, obs_data] var_keys = [key for key in keys if key in source_adata.var_names.tolist()] if len(var_keys) != 0: var_data = source_adata[:, var_keys].X if issparse(var_data): var_data = var_data.A info_data = np.c_[info_data, var_data] info_data = info_data[:, 1:] data_dict = {"X": source_spatial_data, "Y": info_data} velocity_data_sampler = DataSampler(data=data_dict, normalize_data=False) NN_model = DeepInterpolation( model=interpolation_nn, data_sampler=velocity_data_sampler, enforce_positivity=False, ) NN_model.train( max_iter=max_iter, data_batch_size=data_batch_size, autoencoder_batch_size=autoencoder_batch_size, data_lr=data_lr, autoencoder_lr=autoencoder_lr, **kwargs, ) # Interpolation target_info_data = NN_model.predict(input_x=target_points) lm.main_info("Creating an adata object with the interpolated expression...") if len(obs_keys) != 0: obs_data = target_info_data[:, : len(obs_keys)] obs_data = pd.DataFrame(obs_data, columns=obs_keys) if len(var_keys) != 0: X = target_info_data[:, len(obs_keys) :] var_data = pd.DataFrame(index=var_keys) interp_adata = AnnData( X=X if len(var_keys) != 0 else None, obs=obs_data if len(obs_keys) != 0 else None, obsm={spatial_key: np.asarray(target_points)}, var=var_data if len(var_keys) != 0 else None, ) lm.main_finish_progress(progress_name="DeepLearnInterpolation") return interp_adata