Source code for spateo.io.slideseq

"""IO functions for Slide-seq technology.
"""
from typing import NamedTuple, Optional

import numpy as np
import pandas as pd
from anndata import AnnData
from scipy.sparse import csr_matrix
from typing_extensions import Literal

from ..configuration import SKM
from ..logging import logger_manager as lm
from .utils import bin_indices, get_bin_props

try:
    import ngs_tools as ngs

[docs] VERSIONS = { "slide2": ngs.chemistry.get_chemistry("Slide-seqV2").resolution, }
except ModuleNotFoundError: class SpatialResolution(NamedTuple): scale: float = 1.0 unit: Optional[Literal["nm", "um", "mm"]] = None VERSIONS = {"slide2": SpatialResolution(10.0, "um")}
[docs]def read_slideseq_as_dataframe(path: str) -> pd.DataFrame: """Read a Slide-seq digital expression matrix as long-format pandas DataFrame. Args: path: Path to file Return: Pandas DataFrame with the the following standardized column names. * `barcode`: Bead barcode * `gene`: Gene name/ID * `count`: Observed UMIs. Zeros are filtered. """ df = pd.read_csv(path, sep="\t").rename(columns={"GENE": "gene"}) df = df.melt(id_vars="gene", var_name="barcode", value_name="count") df = df[df["count"] > 0] df["gene"] = df["gene"].astype("category") df["barcode"] = df["barcode"].astype("category") df["count"] = df["count"].astype(np.uint16) return df
[docs]def read_slideseq_beads_as_dataframe(path: str) -> pd.DataFrame: """Read a Slide-seq bead locations file. Args: path: Path to file Return: Pandas DataFrame with the following columns. * `barcode`: Bead barcode * `x`, `y`: X, Y coordinates """ skiprows = None with ngs.utils.open_as_text(path, "r") as f: line = f.readline() if line.startswith("barcode"): skiprows = 1 df = pd.read_csv(path, skiprows=skiprows, names=["barcode", "x", "y"], dtype={"barcode": "category"}) return df
[docs]def read_slideseq( path: str, beads_path: str, binsize: Optional[int] = None, version: Literal["slide2"] = "slide2" ) -> AnnData: """Read Slide-seq data as AnnData. Args: path: Path to Slide-seq digital expression matrix CSV. beads_path: Path to CSV file containing bead locations. binsize: Size of pixel bins. version: Slideseq technology version. Currently only used to set the scale and scale units of each unit coordinate. This may change in the future. """ data = read_slideseq_as_dataframe(path) beads = read_slideseq_beads_as_dataframe(beads_path) data = pd.merge(data, beads, on="barcode") if binsize is not None: lm.main_info(f"Using binsize={binsize}") x_bin = bin_indices(data["x"].values, 0, binsize) y_bin = bin_indices(data["y"].values, 0, binsize) data["x"], data["y"] = x_bin, y_bin data["label"] = data["x"].astype(str) + "-" + data["y"].astype(str) props = get_bin_props(data[["x", "y", "label"]].drop_duplicates(), binsize) else: data.rename(columns={"barcode": "label"}, inplace=True) props = ( data[["x", "y", "label"]] .drop_duplicates() .set_index("label") .rename({"x": "centroid-0", "y": "centroid-1"}) ) uniq_gene = sorted(data["gene"].unique()) uniq_cell = sorted(data["label"].unique()) shape = (len(uniq_cell), len(uniq_gene)) cell_dict = dict(zip(uniq_cell, range(len(uniq_cell)))) gene_dict = dict(zip(uniq_gene, range(len(uniq_gene)))) x_ind = data["label"].map(cell_dict).astype(int).values y_ind = data["gene"].map(gene_dict).astype(int).values lm.main_info("Constructing count matrix.") X = csr_matrix((data["count"].values, (x_ind, y_ind)), shape=shape) obs = pd.DataFrame(index=uniq_cell) var = pd.DataFrame(index=uniq_gene) adata = AnnData(X=X, obs=obs, var=var) ordered_props = props.loc[adata.obs_names] adata.obsm["spatial"] = ordered_props.filter(regex="centroid-").values scale, scale_unit = 1.0, None if version in VERSIONS: resolution = VERSIONS[version] scale, scale_unit = resolution.scale, resolution.unit # Set uns SKM.init_adata_type(adata, SKM.ADATA_UMI_TYPE) SKM.init_uns_pp_namespace(adata) SKM.init_uns_spatial_namespace(adata) SKM.set_uns_spatial_attribute(adata, SKM.UNS_SPATIAL_BINSIZE_KEY, binsize) SKM.set_uns_spatial_attribute(adata, SKM.UNS_SPATIAL_SCALE_KEY, scale) SKM.set_uns_spatial_attribute(adata, SKM.UNS_SPATIAL_SCALE_UNIT_KEY, scale_unit) return adata