Source code for spateo.io.seqscope

"""IO functions for SeqScope technology.
"""
import os
from typing import Optional

import numpy as np
import pandas as pd
import scipy.io
from anndata import AnnData
from scipy.sparse import coo_matrix

try:
    from typing import Literal
except ImportError:
    from typing_extensions import Literal

from ..configuration import SKM
from ..logging import logger_manager as lm
from .utils import bin_indices, get_bin_props


[docs]def read_seqscope_as_anndata(matrix_dir: str) -> AnnData: """Read SeqScope matrix directory as AnnData. Args: matrix_dir: Path to directory containing matrix files. Returns: AnnData of barcode x genes. """ lm.main_info("Constructing count matrix.") obs = pd.read_csv(os.path.join(matrix_dir, "barcodes.tsv"), names=["barcode"]).set_index("barcode") var = pd.read_csv(os.path.join(matrix_dir, "features.tsv"), names=["gene_name", "gene_id", "library"]).set_index( "gene_id" ) X = scipy.io.mmread(os.path.join(matrix_dir, "matrix.mtx")).transpose().tocsr() return AnnData(X=X, obs=obs, var=var)
[docs]def read_seqscope_positions_as_dataframe(path: str) -> pd.DataFrame: """Read SeqScope barcode positions CSV as dataframe. Args: path: Path to file Returns: DataFrame containing barcode positions. """ dtype = { "barcode": "category", "lane": np.uint16, "tile": np.uint16, "x": np.uint32, "y": np.uint32, } df = pd.read_table(path, names=["barcode", "lane", "tile", "x", "y"], sep="\s+", dtype=dtype) return df
[docs]def read_seqscope( matrix_dir: str, positions_path: str, binsize: Optional[int] = 1, add_props: bool = True, version: Literal["seqscope"] = "seqscope", ) -> AnnData: """Read SeqScope data as AnnData. Args: matrix_dir: Directory containing matrix files (barcodes.tsv.gz, features.tsv.gz, matrix.mtx.gz) positions_path: Path to CSV containing spatial coordinates binsize: Size of pixel bins add_props: Whether or not to compute label properties, such as area, bounding box, centroid, etc. version: SeqScope technology version. Currently only used to set the scale and scale units of each unit coordinate. This may change in the future. """ if binsize is not None and abs(int(binsize)) != binsize: raise IOError("Positive integer `binsize` must be provided when `segmentation_adata` is not provided.") adata = read_seqscope_as_anndata(matrix_dir) positions = read_seqscope_positions_as_dataframe(positions_path) adata.obs = positions.set_index("barcode").loc[adata.obs_names] props = None if binsize is not None: lm.main_info(f"Using binsize={binsize}") if binsize < 2: lm.main_warning("Please consider using a larger bin size.") if binsize > 1: x_bin = bin_indices(adata.obs["x"].values, 0, binsize) y_bin = bin_indices(adata.obs["y"].values, 0, binsize) adata.obs["x"], adata.obs["y"] = x_bin, y_bin adata.obs["label"] = adata.obs["x"].astype(str) + "-" + adata.obs["y"].astype(str) if add_props: props = get_bin_props(adata.obs[["x", "y", "label"]].drop_duplicates(), binsize) adata.strings_to_categoricals() assert pd.api.types.is_categorical_dtype(adata.obs["label"]) cat = adata.obs["label"].values indicator = coo_matrix( (np.broadcast_to(True, adata.n_obs), (cat.codes, np.arange(adata.n_obs))), shape=(len(cat.categories), adata.n_obs), ) adata = AnnData( indicator @ adata.X, var=adata.var, obs=adata.obs.set_index("label").drop_duplicates().loc[cat.categories] ) if props is not None: ordered_props = props.loc[adata.obs_names] adata.obs["area"] = ordered_props["area"].values adata.obsm["spatial"] = ordered_props.filter(regex="centroid-").values adata.obsm["contour"] = ordered_props["contour"].values adata.obsm["bbox"] = ordered_props.filter(regex="bbox-").values else: adata.obsm["spatial"] = adata.obs[["x", "y"]].values scale, scale_unit = 1.0, None # if version in VERSIONS: # resolution = VERSIONS[version].resolution # scale, scale_unit = resolution.scale, resolution.unit # Set uns SKM.init_adata_type(adata, SKM.ADATA_UMI_TYPE) SKM.init_uns_pp_namespace(adata) SKM.init_uns_spatial_namespace(adata) SKM.set_uns_spatial_attribute(adata, SKM.UNS_SPATIAL_BINSIZE_KEY, binsize) SKM.set_uns_spatial_attribute(adata, SKM.UNS_SPATIAL_SCALE_KEY, scale) SKM.set_uns_spatial_attribute(adata, SKM.UNS_SPATIAL_SCALE_UNIT_KEY, scale_unit) return adata