Source code for spateo.io.seqfish

"""IO functions for seqFISH-PLUS technology.
"""
import numpy as np
import pandas as pd
from anndata import AnnData
from scipy.sparse import csr_matrix

from ..configuration import SKM
from ..logging import logger_manager as lm


[docs]def read_seqfish_meta_as_dataframe( path: str, fov_offset: pd.DataFrame = None, accumulate_x: bool = False, accumulate_y: bool = False ) -> pd.DataFrame: """Read a seqFISH cell centroid locations file. Args: path: Path to file fov_offset: a dataframe contains the x/y offset of each fov (field of view), for example, {'fov':[fov_1, ..], 'x_offset':[x_offset_1, ..], 'y_offset':[y_offset_1, ..]} accumulate_x: whether to accumulate x_offset accumulate_y: whether to accumulate y_offset Return: Pandas DataFrame with the following columns. * `fov`: ID of field of view * `cell_id`: ID of cell in each fov * `x`, `y`: X, Y coordinates of the cell centroids * `region`: sample region(tissue) """ dtype = { "Field of View": np.uint8, "Cell ID": np.uint16, "X": np.float32, "Y": np.float32, "Region": "category", } df_loc = pd.read_csv( path, dtype=dtype, ) rename = { "Field of View": "fov", "Cell ID": "cell_id", "X": "x", "Y": "y", "Region": "region", } df_loc = df_loc.rename(columns=rename) if fov_offset is not None: if accumulate_x: for i in range(1, fov_offset.shape[0]): fov_offset["x_offset"][i] = fov_offset["x_offset"][i] + fov_offset["x_offset"][i - 1] if accumulate_y: for i in range(1, fov_offset.shape[0]): fov_offset["y_offset"][i] = fov_offset["y_offset"][i] + fov_offset["y_offset"][i - 1] for i in range(fov_offset.shape[0]): df_loc["x"][df_loc["fov"] == fov_offset["fov"][i]] = ( df_loc["x"][df_loc["fov"] == fov_offset["fov"][i]] + fov_offset["x_offset"][i] ) df_loc["y"][df_loc["fov"] == fov_offset["fov"][i]] = ( df_loc["y"][df_loc["fov"] == fov_offset["fov"][i]] + fov_offset["y_offset"][i] ) df_loc["spatial"] = [[int(df_loc["x"][i]), int(df_loc["y"][i])] for i in range(df_loc.shape[0])] return df_loc
[docs]def read_seqfish( path: str, meta_path: str, fov_offset: pd.DataFrame = None, accumulate_x: bool = False, accumulate_y: bool = False, ) -> AnnData: """Read seqFISH data as AnnData. Args: path: Path to seqFISH digital expression matrix CSV. meta_path: Path to CSV file containing cell centroid locations. fov_offset: a dataframe contain offset of each fov, for example, {'fov':[fov_1, ..], 'x_offset':[x_offset_1, ..], 'y_offset':[y_offset_1, ..]} accumulate_x: whether to accumulate x_offset accumulate_y: whether to accumulate y_offset """ df = pd.read_csv(path, dtype=np.uint16) X = csr_matrix(df) obs = pd.DataFrame(index=df.index.to_list()) var = pd.DataFrame(index=df.columns.to_list()) df_loc = read_seqfish_meta_as_dataframe(meta_path, fov_offset, accumulate_x, accumulate_y) lm.main_info("Constructing count matrix.") adata = AnnData(X=X, obs=obs, var=var) adata.obs["fov"] = df_loc["fov"].to_list() adata.obs["cell_id"] = df_loc["cell_id"].to_list() adata.obs["region"] = df_loc["region"].to_list() adata.obsm = pd.DataFrame(index=df_loc.index.to_list()) adata.obsm["spatial"] = np.array(df_loc["spatial"].to_list()) scale, scale_unit = 1.0, None # Set uns SKM.init_adata_type(adata, SKM.ADATA_UMI_TYPE) SKM.init_uns_pp_namespace(adata) SKM.init_uns_spatial_namespace(adata) SKM.set_uns_spatial_attribute(adata, SKM.UNS_SPATIAL_SCALE_KEY, scale) SKM.set_uns_spatial_attribute(adata, SKM.UNS_SPATIAL_SCALE_UNIT_KEY, scale_unit) return adata