Source code for spateo.io.bbs

"""IO functions for calculating the bounding box.
"""

import math
from typing import List, Optional, Tuple, Union

import numpy as np
from scipy.spatial import Delaunay
from shapely.geometry import (
    LineString,
    MultiLineString,
    MultiPoint,
    MultiPolygon,
    Point,
    Polygon,
    multipolygon,
)
from shapely.ops import polygonize, unary_union

from ..configuration import SKM
from ..logging import logger_manager as lm
from .bgi import read_bgi_agg
from .utils import centroids


[docs]def alpha_shape( x: np.ndarray, y: np.ndarray, alpha: float = 1, buffer: float = 1, vectorize: bool = True, ) -> Tuple[Union[MultiPolygon, Polygon], List]: """Compute the alpha shape (concave hull) of a set of points. Code adapted from: https://gist.github.com/dwyerk/10561690 Args: x: x-coordinates of the DNA nanoballs or buckets, etc. y: y-coordinates of the DNA nanoballs or buckets, etc. alpha: alpha value to influence the gooeyness of the border. Smaller numbers don't fall inward as much as larger numbers. Too large, and you lose everything! buffer: the buffer used to smooth and clean up the shapley identified concave hull polygon. vectorize: Whether to vectorize the alpha-shape calculation instead of looping through. Returns: alpha_hull: The computed concave hull. edge_points: The coordinates of the edge of the resultant concave hull. """ coords = np.array([x.flatten(), y.flatten()]).transpose() points = MultiPoint(coords) if len(points) < 4: # When you have a triangle, there is no sense # in computing an alpha shape. return MultiPoint(list(points)).convex_hull tri = Delaunay(coords) if vectorize: # ia, ib, ic = indices of corner points of the triangle triangles = coords[tri.vertices] # Lengths of sides of triangle a = ((triangles[:, 0, 0] - triangles[:, 1, 0]) ** 2 + (triangles[:, 0, 1] - triangles[:, 1, 1]) ** 2) ** 0.5 b = ((triangles[:, 1, 0] - triangles[:, 2, 0]) ** 2 + (triangles[:, 1, 1] - triangles[:, 2, 1]) ** 2) ** 0.5 c = ((triangles[:, 2, 0] - triangles[:, 0, 0]) ** 2 + (triangles[:, 2, 1] - triangles[:, 0, 1]) ** 2) ** 0.5 # Semiperimeter of triangle s = (a + b + c) / 2.0 # Area of triangle by Heron's formula areas = (s * (s - a) * (s - b) * (s - c)) ** 0.5 circums = a * b * c / (4.0 * areas) # Here's the radius filter. filtered = triangles[circums < (1.0 / alpha)] edge1 = filtered[:, (0, 1)] edge2 = filtered[:, (1, 2)] edge3 = filtered[:, (2, 0)] edge_points = np.unique(np.concatenate((edge1, edge2, edge3)), axis=0).tolist() else: def add_edge(edges, edge_points, coords, i, j): """ Add a line between the i-th and j-th points, if not in the list already """ if (i, j) in edges or (j, i) in edges: # already added return edges.add((i, j)) edge_points.append(coords[[i, j]]) edges = set() edge_points = [] # loop over triangles: # ia, ib, ic = indices of corner points of the triangle for ia, ib, ic in tri.vertices: pa = coords[ia] pb = coords[ib] pc = coords[ic] # Lengths of sides of triangle a = math.sqrt((pa[0] - pb[0]) ** 2 + (pa[1] - pb[1]) ** 2) b = math.sqrt((pb[0] - pc[0]) ** 2 + (pb[1] - pc[1]) ** 2) c = math.sqrt((pc[0] - pa[0]) ** 2 + (pc[1] - pa[1]) ** 2) # Semiperimeter of triangle s = (a + b + c) / 2.0 # Area of triangle by Heron's formula area = math.sqrt(s * (s - a) * (s - b) * (s - c)) circum_r = a * b * c / (4.0 * area) # Here's the radius filter. if circum_r < 1.0 / alpha: add_edge(edges, edge_points, coords, ia, ib) add_edge(edges, edge_points, coords, ib, ic) add_edge(edges, edge_points, coords, ic, ia) triangles = list(polygonize(edge_points)) alpha_hull = unary_union(triangles) if buffer != 0: alpha_hull.buffer(buffer) return alpha_hull, edge_points
[docs]def get_concave_hull( path: str, binsize: int = 20, min_agg_umi: Optional[int] = None, alpha: float = 1.0, buffer: Optional[float] = None, ) -> Tuple[Polygon, List]: """Return the convex hull of all nanoballs that have non-zero UMI (or at least > min_agg_umi UMI). Args: path: Path to read file. binsize: The number of spatial bins to aggregate RNAs captured by DNBs in those bins. By default it is 20, which is close to the size of a single cell. If stereo-seq chip used is bigger than 1 x 1 mm, you may need to increase the binsize. min_agg_umi: the minimal aggregated UMI number for the bucket. alpha: alpha value to influence the gooeyness of the border. Smaller numbers don't fall inward as much as larger numbers. Too large, and you lose everything! buffer: the buffer used to smooth and clean up the shapley identified concave hull polygon. Returns: alpha_hull: The computed concave hull. edge_points: The coordinates of the edge of the resultant concave hull. """ adata = read_bgi_agg(path, binsize=binsize) if min_agg_umi is None: min_agg_umi = binsize - 1 i, j = (adata.X > min_agg_umi).nonzero() x_min, y_min = int(adata.obs_names[0]), int(adata.var_names[0]) # We use centroids function to get the true stereo-seq chip coordinates. if binsize != 1: i = centroids(i, coord_min=x_min, binsize=binsize) j = centroids(j, coord_min=y_min, binsize=binsize) else: i, j = i + x_min, j + y_min if buffer is None: buffer = binsize alpha_hull, edge_points = alpha_shape(i, j, alpha, buffer, vectorize=True) lm.main_warning(f"No alpha convex hull is identified, please set to be smaller than {alpha}") return alpha_hull, edge_points