Source code for spateo.io.tenx

"""IO functions for 10x Visium technology.
"""
import os
from typing import List, NamedTuple, Optional, Union

import numpy as np
import pandas as pd
import scipy.io
from anndata import AnnData
from typing_extensions import Literal

from ..configuration import SKM
from ..logging import logger_manager as lm
from .utils import get_points_props

try:
    import ngs_tools as ngs

[docs] VERSIONS = { "visium": ngs.chemistry.get_chemistry("Visium").resolution, }
except ModuleNotFoundError: class SpatialResolution(NamedTuple): scale: float = 1.0 unit: Optional[Literal["nm", "um", "mm"]] = None VERSIONS = {"visium": SpatialResolution(55.0, "um")}
[docs]def read_10x_as_anndata(matrix_dir: str) -> AnnData: """Read 10x Visium matrix directory as AnnData. Args: matrix_dir: Path to directory containing matrix files. Returns: AnnData of barcodes x genes. """ obs = pd.read_csv(os.path.join(matrix_dir, "barcodes.tsv.gz"), names=["barcode"]).set_index("barcode") var = pd.read_csv(os.path.join(matrix_dir, "features.tsv.gz"), names=["gene_name", "gene_id", "library"]).set_index( "gene_id" ) X = scipy.io.mmread(os.path.join(matrix_dir, "matrix.mtx.gz")).tocsr() return AnnData(X=X, obs=obs, var=var)
[docs]def read_10x_positions_as_dataframe(path: str) -> pd.DataFrame: """Read 10x tissue positions CSV as dataframe. https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/output/images Args: path: Path to file Returns: DataFrame containing barcode positions. """ df = pd.read_csv( path, names=["barcode", "in_tissue", "array_row", "array_col", "pxl_row_in_fullres", "pxl_col_in_fullres"] ) return df
[docs]def read_10x(matrix_dir: str, positions_path: str, version: Literal["visium"] = "visium") -> AnnData: """Read 10x Visium data as AnnData. Args: matrix_dir: Directory containing matrix files (barcodes.tsv.gz, features.tsv.gz, matrix.mtx.gz) positions_path: Path to CSV containing spatial coordinates version: 10x technology version. Currently only used to set the scale and scale units of each unit coordinate. This may change in the future. """ adata = read_10x_as_anndata(matrix_dir) positions = read_10x_positions_as_dataframe(positions_path) adata.obs = positions.set_index("barcode").loc[adata.obs_names] adata.obsm["spatial"] = adata.obs[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values scale, scale_unit = 1.0, None if version in VERSIONS: resolution = VERSIONS[version] scale, scale_unit = resolution.scale, resolution.unit # Set uns SKM.init_adata_type(adata, SKM.ADATA_UMI_TYPE) SKM.init_uns_pp_namespace(adata) SKM.init_uns_spatial_namespace(adata) # SKM.set_uns_spatial_attribute(adata, SKM.UNS_SPATIAL_BINSIZE_KEY, binsize) SKM.set_uns_spatial_attribute(adata, SKM.UNS_SPATIAL_SCALE_KEY, scale) SKM.set_uns_spatial_attribute(adata, SKM.UNS_SPATIAL_SCALE_UNIT_KEY, scale_unit) return adata