Source code for spateo.sample_data

import ntpath
import os
import shutil
from pathlib import Path
from typing import Optional, Tuple, Union
from urllib.request import urlretrieve

from anndata import AnnData, read_h5ad, read_loom

from .logging import logger_manager as lm


[docs]def download_data(url: str, file_path: Optional[str] = None, dir_name: str = "./data") -> str: """Parse url to get the file name and then download the data to designated folders. Args: url: url that deposits the data. file_path: file path that will store the data locally. dir_name: name of the directory. Returns: the file path that points to the downloaded data. """ file_path = ntpath.basename(url) if file_path is None else file_path file_path = os.path.join(dir_name, file_path) lm.main_info("Downloading data to " + file_path) if not os.path.exists(file_path): if not os.path.exists(dir_name): Path(dir_name).mkdir(parents=True, exist_ok=True) # download the data print(url) urlretrieve(url, file_path, reporthook=lm.get_main_logger().request_report_hook) return file_path
[docs]def get_adata(url: str, filename: Optional[str] = None, dir_name: str = "./data") -> AnnData: """Download example data to local folder. Args: url: url that deposits the data. filename: file name that will store the data locally. dir_name: name of the directory. Returns: adata: :class:`~anndata.AnnData` an Annodata object. """ file_path = download_data(url=url, file_path=filename, dir_name=dir_name) if Path(file_path).suffixes[-1][1:] == "loom": adata = read_loom(filename=file_path) elif Path(file_path).suffixes[-1][1:] == "h5ad": adata = read_h5ad(filename=file_path) adata.var_names_make_unique() return adata
[docs]def drosophila( filename: str = "E7-9h_cellbin_tdr_v2.h5ad", dir_name: str = "./data", backup: bool = False, ): """Multiple drosophila spatial transcriptome data. Args: filename: file name of the data. Available ``filename`` are: * ``E7-9h_cellbin_tdr_v1.h5ad`` * ``E7-9h_cellbin_tdr_v2.h5ad`` * ``E7-9h_cellbin_tdr_v2_midgut.h5ad`` * ``E7-9h_cellbin_tdr_v3_midgut.h5ad`` * ``E7-9h_cellbin_h5ad.zip`` * ``E7-9h_bin20_h5ad.zip`` * ``E9-10h_cellbin_tdr_v1.h5ad`` * ``E9-10h_cellbin_tdr_v2.h5ad`` * ``E9-10h_cellbin_tdr_v2_midgut.h5ad`` * ``E9-10h_cellbin_tdr_v2_CNS.h5ad`` dir_name: dir path that will store the data locally. backup: Whether to use an alternate link to download data. This is very useful for Chinese scientists. Returns: Returns `adata` object. """ if backup is False: url_dict = { "E7-9h_cellbin_tdr_v1.h5ad": "https://www.dropbox.com/s/ow8xkge0538309a/E7-9h_cellbin_tdr_v1.h5ad?dl=1", "E7-9h_cellbin_tdr_v2.h5ad": "https://www.dropbox.com/s/bvstb3en5kc6wui/E7-9h_cellbin_tdr_v2.h5ad?dl=1", "E7-9h_cellbin_tdr_v2_midgut.h5ad": "https://www.dropbox.com/s/q020zgxxemxl7j4/E7-9h_cellbin_tdr_v2_midgut.h5ad?dl=1", "E7-9h_cellbin_tdr_v3_midgut.h5ad": "https://www.dropbox.com/s/cz2nqpmoc3oo5f3/E7-9h_cellbin_tdr_v3_midgut.h5ad?dl=1", "E7-9h_cellbin_h5ad.zip": "https://yq01-ct01.baidupcs.com/file/33a841239s21a64188a4dfa5ba6fc000?bkt=en-e031c0692dcd5a21cb9b7dc32086fda95e0ccccb0c3349d8cf10c6a227b624b16fa4148db441052c&fid=3627617064-250528-547706644850916&time=1670092976&sign=FDTAXUbGERLQlBHSKfWqi-DCb740ccc5511e5e8fedcff06b081203-ucK5LvvmMdebDCkUK6bIt7zl%2B84%3D&to=427&size=16158219&sta_dx=16158219&sta_cs=0&sta_ft=zip&sta_ct=0&sta_mt=0&fm2=MH%2CYangquan%2CAnywhere%2C%2C%E9%99%95%E8%A5%BF%2Cct&ctime=1670091825&mtime=1670091825&resv0=-1&resv1=0&resv2=rlim&resv3=5&resv4=16158219&vuk=3627617064&iv=0&htype=&randtype=&tkbind_id=0&newver=1&newfm=1&secfm=1&flow_ver=3&pkey=en-9054aa2cad924f0d599095a94ed4b854e5970e39aa0c9444cf57152bf7053f32510488b37501bfef&sl=76480590&expires=8h&rt=pr&r=574265491&vbdid=508923211&fin=cellbin_h5ad.zip&fn=cellbin_h5ad.zip&rtype=1&dp-logid=8971527268563088775&dp-callid=0.1&hps=1&tsl=80&csl=80&fsl=-1&csign=SO8aWwhnhOnGKGYv9cWXdJamoKk%3D&so=0&ut=6&uter=4&serv=0&uc=1058262541&ti=c77e04c9862927e5cc28032e1c6abcd6f293302b741abd8e54086130d5ff933c&hflag=30&from_type=1&adg=c_e1c2310f52475804a26726f9181d2ae8&reqlabel=250528_f_68129c39a137f978b08a95b72fcc7f8f_-1_4717521fbacb8ab13b0b72daacf708b3&by=themis", "E7-9h_bin20_h5ad.zip": "https://ucd1e2dec0e8649689333bc8f748.dl.dropboxusercontent.com/cd/0/get/Bx9-BFgcvjpaDYK2g4JzMhDf_EFQtaKEKa-eLjVOYSGaBRYsIL7KQby9Sw75TfXkXwcRBudmW81EBV12iAeh4IX1-MbJm8zwkLJHOoWlNWV-lmqV5i8v4fwxABqMLT1Ad86AuP9nuLwlpwghAusj2Wf61bl1Gq5n-gokf5IzrO1L0xmouroXbT3Sz4dTlX6nPhg/file#", "E9-10h_cellbin_tdr_v1.h5ad": "https://www.dropbox.com/s/q2l8mqpn7qvz2xr/E9-10h_cellbin_tdr_v1.h5ad?dl=1", "E9-10h_cellbin_tdr_v2.h5ad": "https://www.dropbox.com/s/q02sx6acvcqaf35/E9-10h_cellbin_tdr_v2.h5ad?dl=1", "E9-10h_cellbin_tdr_v2_midgut.h5ad": "https://www.dropbox.com/s/we2fkpd1p3ww33f/E9-10h_cellbin_tdr_v2_midgut.h5ad?dl=1", "E9-10h_cellbin_tdr_v2_CNS.h5ad": "https://www.dropbox.com/s/a7bllwm760dmda6/E9-10h_cellbin_tdr_v2_CNS.h5ad?dl=1", } else: url_dict = { "E7-9h_cellbin_tdr_v1.h5ad": "https://figshare.com/s/296ada88086141393702", "E7-9h_cellbin_tdr_v2.h5ad": "https://figshare.com/s/8f9623f1fe99e47ed1bf", "E7-9h_cellbin_tdr_v2_midgut.h5ad": "https://figshare.com/s/32ab3b9672e8a49426bc", "E7-9h_cellbin_tdr_v3_midgut.h5ad": "https://figshare.com/s/fb2097c552c3ff802a74", "E7-9h_cellbin_h5ad.zip": "https://figshare.com/s/60a8f8b7a350d4fbe23c", "E7-9h_bin20_h5ad.zip": "https://figshare.com/s/510b4118e6165519cfcb", "E9-10h_cellbin_tdr_v1.h5ad": "https://figshare.com/s/ee83e00ff016bb825e01", "E9-10h_cellbin_tdr_v2.h5ad": "https://figshare.com/s/174f15b4aa349269f90f", "E9-10h_cellbin_tdr_v2_midgut.h5ad": "", "E9-10h_cellbin_tdr_v2_CNS.h5ad": "https://figshare.com/s/ea71722ad3c15199ebce", } if filename.endswith(".h5ad") or filename.endswith(".loom"): adata = get_adata(url=url_dict[filename], filename=filename, dir_name=dir_name) return adata elif filename.endswith(".zip"): file_path = download_data(url=url_dict[filename], file_path=filename) shutil.unpack_archive(file_path, dir_name) zip_folder = os.path.join(dir_name, filename[:-4]) adata_list = [ read_h5ad(filename=os.path.join(zip_folder, filename)) for root, dirs, files in os.walk(zip_folder) for filename in files ] return adata_list
[docs]def mousebrain( filename, dir_name: str = "./data", backup: bool = False, ): """Mouse brain spatial transcriptome data. Args: filename: file name of the data. Available ``filename`` are: * ``mousebrain_bin30.h5ad`` * ``mousebrain_bin50_raw.h5ad`` * ``mousebrain_bin60.h5ad`` * ``mousebrain_bin60_clustered.h5ad`` * ``mousebrain_cellbin_clustered.h5ad`` dir_name: dir path that will store the data locally. backup: Whether to use an alternate link to download data. This is very useful for Chinese scientists. Returns: Returns `adata` object. """ if backup is False: url_dict = { "mousebrain_bin30.h5ad": "https://www.dropbox.com/s/tyvhndoyj8se5xt/mousebrain_bin30.h5ad?dl=1", "mousebrain_bin50_raw.h5ad": "https://www.dropbox.com/s/vtapwsccpi885l2/mousebrain_bin50_raw.h5ad?dl=0", "mousebrain_bin60.h5ad": "https://www.dropbox.com/s/c5tu4drxda01m0u/mousebrain_bin60.h5ad?dl=1", "mousebrain_bin60_clustered.h5ad": "https://www.dropbox.com/s/wxgkim87uhpaz1c/mousebrain_bin60_clustered.h5ad?dl=1", "mousebrain_cellbin_clustered.h5ad": "https://www.dropbox.com/s/seusnva0dgg5de5/mousebrain_cellbin_clustered.h5ad?dl=1", } else: url_dict = { "mousebrain_bin30.h5ad": "https://figshare.com/s/06031809ad3d07f4ae47", "mousebrain_bin50_raw.h5ad": "https://figshare.com/s/5b990697c6710281bb94", "mousebrain_bin60.h5ad": "https://figshare.com/s/cdf561c40ff2445ae157", "mousebrain_bin60_clustered.h5ad": "https://figshare.com/s/b7eb6849985edba965a8", "mousebrain_cellbin_clustered.h5ad": "https://figshare.com/s/254ad2f3e6ed9d23d6f9", } adata = get_adata(url_dict[filename], filename=filename, dir_name=dir_name) return adata
[docs]def axolotl( filename, dir_name: str = "./data", backup: bool = False, ): """axolotl spatial transcriptome data. Args: filename: file name of the data. Available ``filename`` are: * ``axolotl_2DPI.h5ad`` * ``axolotl_2DPI_right.h5ad`` dir_name: dir path that will store the data locally. backup: Whether to use an alternate link to download data. This is very useful for Chinese scientists. Returns: Returns `adata` object. """ if backup is False: url_dict = { "axolotl_2DPI.h5ad": "https://www.dropbox.com/s/7w2jxf41xazrqxo/axolotl_2DPI.h5ad?dl=1", "axolotl_2DPI_right.h5ad": "https://www.dropbox.com/s/pm5vvqcd4leahsb/axolotl_2DPI_right.h5ad?dl=1", } else: url_dict = { "axolotl_2DPI.h5ad": "https://figshare.com/s/216e022ff17d841dfc1f", "axolotl_2DPI_right.h5ad": "https://figshare.com/s/4995e72dc86b2349c54e", } adata = get_adata(url_dict[filename], filename=filename, dir_name=dir_name) return adata
[docs]def slideseq( filename="slideseq_mouse_hippocampus.h5ad", dir_name: str = "./data", backup: bool = False, ): """Saptial transcriptomic sample from the mouse hippocampus; data generated using Slide-seqV2. See: Stickels, R. R., Murray, E., Kumar, P., Li, J., Marshall, J. L., Di Bella, D. J., ... & Chen, F. (2021). Highly sensitive spatial transcriptomics at near-cellular resolution with Slide-seqV2. Nature biotechnology, 39(3), 313-319. Args: filename: file name of the data. Available ``filename`` are: * ``slideseq_mouse_hippocampus.h5ad`` dir_name: dir path that will store the data locally. backup: Whether to use an alternate link to download data. This is very useful for Chinese scientists. Returns: Returns `adata` object containing Slide-seq data. """ if backup is False: url_dict = { "slideseq_mouse_hippocampus.h5ad": "https://www.dropbox.com/s/d3tpusisbyzn6jk/slideseq.h5ad?dl=1", } else: url_dict = { "slideseq_mouse_hippocampus.h5ad": "https://figshare.com/s/b6c7ef4fd5abd05a45eb", } adata = get_adata(url_dict[filename], filename=filename, dir_name=dir_name) return adata
[docs]def seqfish( filename="seqfish_mouse_embryo.h5ad", dir_name: str = "./data", backup: bool = False, ): """Spatial transcriptomic sample taken at one timepoint in the process of mouse organogenesis; data generated using seqFISH. See: Lohoff, T., Ghazanfar, S., Missarova, A., Koulena, N., Pierson, N., Griffiths, J. A., ... & Marioni, J. C. (2022). Integration of spatial and single-cell transcriptomic data elucidates mouse organogenesis. Nature biotechnology, 40(1), 74-85. Args: filename: file name of the data. Available ``filename`` are: * ``seqfish_mouse_embryo.h5ad`` dir_name: dir path that will store the data locally. backup: Whether to use an alternate link to download data. This is very useful for Chinese scientists. Returns: Returns `adata` object containing seqFISH data. """ if backup is False: url_dict = { "seqfish_mouse_embryo.h5ad": "https://www.dropbox.com/s/cm3uw8czhz5hu30/seqFISH.h5ad?dl=1", } else: url_dict = { "seqfish_mouse_embryo.h5ad": "https://figshare.com/s/9f9c3f6a8d2690e8be53", } adata = get_adata(url_dict[filename], filename=filename, dir_name=dir_name) return adata
[docs]def merfish( filename="merfish_mouse_hypothalamus.h5ad", dir_name: str = "./data", backup: bool = False, ): """Spatial transcriptomic sample taken from the mouse hypothalamus; data generated using MERFISH. See: Moffitt, J. R., Bambah-Mukku, D., Eichhorn, S. W., Vaughn, E., Shekhar, K., Perez, J. D., ... & Zhuang, X. (2018). Molecular, spatial, and functional single-cell profiling of the hypothalamic preoptic region. Science, 362(6416), eaau5324. Args: filename: file name of the data. Available ``filename`` are: * ``merfish_mouse_hypothalamus.h5ad`` dir_name: dir path that will store the data locally. backup: Whether to use an alternate link to download data. This is very useful for Chinese scientists. Returns: Returns `adata` object containing MERFISH data. """ if backup is False: url_dict = { "merfish_mouse_hypothalamus.h5ad": "https://www.dropbox.com/s/e8hwgqnrx2ob9h4/MERFISH.h5ad?dl=1", } else: url_dict = { "merfish_mouse_hypothalamus.h5ad": "https://figshare.com/s/ca4158609f626e28aebd", } adata = get_adata(url_dict[filename], filename=filename, dir_name=dir_name) return adata
[docs]def seqscope( filename="seqscope_mouse_liver.h5ad", dir_name: str = "./data", backup: bool = False, ): """Spatial transcriptomic sample taken from the mouse liver; data generated using Seq-Scope. See: Cho, C. S., Xi, J., Si, Y., Park, S. R., Hsu, J. E., Kim, M., ... & Lee, J. H. (2021). Microscopic examination of spatial transcriptome using Seq-Scope. Cell, 184(13), 3559-3572, and: Xi, J., Lee, J. H., Kang, H. M., & Jun, G. (2022). STtools: a comprehensive software pipeline for ultra-high-resolution spatial transcriptomics data. Bioinformatics Advances, 2(1), vbac061. Args: filename: file name of the data. Available ``filename`` are: * ``seqscope_mouse_liver.h5ad`` dir_name: dir path that will store the data locally. backup: Whether to use an alternate link to download data. This is very useful for Chinese scientists. Returns: Returns `adata` object containing Seq-Scope data. """ if backup is False: url_dict = { "seqscope_mouse_liver.h5ad": "https://www.dropbox.com/s/hci9up23dkuyexb/SeqScope.h5ad?dl=1", } else: url_dict = { "seqscope_mouse_liver.h5ad": "https://figshare.com/s/e13d1bf7cda71eab74d9", } adata = get_adata(url_dict[filename], filename=filename, dir_name=dir_name) return adata
[docs]def starmap( filename="starmap_mouse_brain.h5ad", dir_name: str = "./data", backup: bool = False, ): """Spatial transcriptomic sample taken from the mouse brain; data generated using STARmap. See: Wang, X., Allen, W. E., Wright, M. A., Sylwestrak, E. L., Samusik, N., Vesuna, S., ... & Deisseroth, K. (2018). Three-dimensional intact-tissue sequencing of single-cell transcriptional states. Science, 361(6400), eaat5691. Args: filename: file name of the data. Available ``filename`` are: * ``starmap_mouse_brain.h5ad`` dir_name: dir path that will store the data locally. backup: Whether to use an alternate link to download data. This is very useful for Chinese scientists. Returns: Returns `adata` object containing STARmap data. """ if backup is False: url_dict = { "starmap_mouse_brain.h5ad": "https://www.dropbox.com/s/zpvu387tajrwth7/STARmap.h5ad?dl=1", } else: url_dict = { "starmap_mouse_brain.h5ad": "https://figshare.com/s/19cfa3ba3553508f02c7", } adata = get_adata(url_dict[filename], filename=filename, dir_name=dir_name) return adata