"""IO functions for STARmap technology."""importosimportnumpyasnpimportpandasaspdfromanndataimportAnnDatafromscipy.sparseimportcsr_matrixfrom..configurationimportSKMfrom..loggingimportlogger_manageraslmfrom.utilsimportget_points_props
[docs]defread_starmap_as_anndata(data_dir:str)->AnnData:"""Read STARmap data directory as AnnData. Args: data_dir: Path to directory containing STARmap files. Returns: AnnData of cell x genes. """lm.main_info("Constructing count matrix.")X=pd.read_csv(os.path.join(data_dir,"cell_barcode_count.csv"),header=None)genes=pd.read_csv(os.path.join(data_dir,"cell_barcode_names.csv"),header=None)obs=pd.DataFrame(index=["Cell_"+str(i)foriinrange(X.shape[0])])var=pd.DataFrame(index=genes[2])returnAnnData(X=csr_matrix(X,dtype=np.uint16),obs=obs,var=var)
[docs]defread_starmap_positions_as_dataframe(path:str)->pd.DataFrame:"""Read STARmap cell positions npz as dataframe. Args: path: Path to file Returns: DataFrame containing cell positions. """labels=np.load(path)["labels"]labels=csr_matrix(labels).tocoo()df_labels=pd.DataFrame({"x":labels.row,"y":labels.col,"label":labels.data})[["x","y","label"]]# To consist with# https://github.com/weallen/STARmap/blob/0b1cddf459a69b73f935aca7f7e0008c349453c0/python/viz.py#L20unique_label,label_area=np.unique(df_labels["label"],return_counts=True)df_labels=df_labels[df_labels["label"].isin(unique_label[np.logical_and(label_area>1000,label_area<100000)])]df_labels=df_labels[df_labels["label"]!=np.max(df_labels["label"])]returndf_labels
[docs]defread_starmap(data_dir:str,)->AnnData:"""Read STARmap data as AnnData. Args: data_dir: Path to directory containing STARmap files. """adata=read_starmap_as_anndata(data_dir)df_labels=read_starmap_positions_as_dataframe(os.path.join(data_dir,"labels.npz"))props=get_points_props(df_labels)props.index=adata.obs_namesordered_props=props.loc[adata.obs_names]adata.obs["area"]=ordered_props["area"].valuesadata.obsm["spatial"]=ordered_props.filter(regex="centroid-").valuesadata.obsm["contour"]=ordered_props["contour"].valuesadata.obsm["bbox"]=ordered_props.filter(regex="bbox-").valuesscale,scale_unit=1.0,None# Set unsSKM.init_adata_type(adata,SKM.ADATA_UMI_TYPE)SKM.init_uns_pp_namespace(adata)SKM.init_uns_spatial_namespace(adata)SKM.set_uns_spatial_attribute(adata,SKM.UNS_SPATIAL_SCALE_KEY,scale)SKM.set_uns_spatial_attribute(adata,SKM.UNS_SPATIAL_SCALE_UNIT_KEY,scale_unit)returnadata