Source code for spateo.digitization.utils

"""Written by @Jinerhal, adapted by @Xiaojieqiu.
"""

import math
from typing import List, Optional, Tuple, Union

import cv2
import numpy as np
from anndata import AnnData
from skimage import morphology

from ..configuration import SKM
from ..logging import logger_manager as lm


@SKM.check_adata_is_type(SKM.ADATA_UMI_TYPE)
[docs]def fill_grid_label(
    adata: AnnData,
    spatial_key: str,
    seg_grid_img: np.ndarray,
    bdl_seg_coor_x: np.ndarray,
    bdl_seg_coor_y: np.ndarray,
    curr_layer: int,
    curr_sign: int,
    layer_label_key: str = "layer_label",
    column_label_key: str = "column_label",
    init: bool = False,
) -> Tuple[np.ndarray, np.ndarray]:
    """Assign the interior/exterior layer, column and grid to each bucket.

    Args:
        adata: The adata object to be used for assign the interior/exterior layers, columns and grid.
        spatial_key: The key name in `adata.obsm` of the spatial coordinates. Default to "spatial". Passed to
            `fill_grid_label` function.
        seg_grid_img: The matrix that stores the image information of the borderline between the source and target
            cluster(s), as well as the i and i+1-th borderlines.
        bdl_seg_coor_x: The numpy array of the coordinates of the i-th borderline.
        bdl_seg_coor_y: The numpy array of the coordinates of the i+1-th borderline.
        curr_layer: The number of the current layer.
        curr_sign: The sign of the current layer.
        layer_label_key: The key in `.obs` that points to the key of the layer labels.
        column_label_key: The key in `.obs` that points to the key of the column labels.
        init: Whether to generate (and potentially overwrite) the `layer_label_key` and `column_label_key` in
            `fill_grid_label` function.
    Returns:
        layer_grid_img: A numpy array that store the image of the layers and layer grids.
        column_grid_img: A numpy array that store the image of the columns and column grids.
    """

    # mask image should be 2 pixels wider and higher, according to cv2.floodFill
    layer_grid_img = seg_grid_img.copy()
    layer_mask = np.zeros((layer_grid_img.shape[0] + 2, layer_grid_img.shape[1] + 2), dtype=np.uint8)
    layer_mask[1:-1, 1:-1] = layer_grid_img
    column_grid_img = seg_grid_img.copy()
    column_mask = np.zeros((column_grid_img.shape[0] + 2, column_grid_img.shape[1] + 2), dtype=np.uint8)
    column_mask[1:-1, 1:-1] = column_grid_img

    lm.main_info("Use cv2.floodFill to fill layer/column number.")
    for i in range(len(bdl_seg_coor_x) - 1):
        curr_column = i + 1
        # identify the middle point for each layer/column and use that as the seed for cv2.floodFill.
        fpx = int(
            np.mean([bdl_seg_coor_x[i][0], bdl_seg_coor_x[i + 1][0], bdl_seg_coor_y[i][0], bdl_seg_coor_y[i + 1][0]])
        )
        fpy = int(
            np.mean([bdl_seg_coor_x[i][1], bdl_seg_coor_x[i + 1][1], bdl_seg_coor_y[i][1], bdl_seg_coor_y[i + 1][1]])
        )
        # Fills a connected component with the given color.
        cv2.floodFill(layer_grid_img, layer_mask, (fpx, fpy), curr_layer)
        cv2.floodFill(column_grid_img, column_mask, (fpx, fpy), curr_column)

    if init:
        adata.obs[layer_label_key] = 0
        adata.obs[column_label_key] = 0
    else:
        try:
            _ = adata.obs[layer_label_key]
        except:
            adata.obs[layer_label_key] = 0

        try:
            _ = adata.obs[column_label_key]
        except:
            adata.obs[column_label_key] = 0

    lm.main_info(
        f"Assign layer/column number for each bucket with the {layer_label_key} and {column_label_key}, "
        f"respectively."
    )
    for i in range(len(adata)):
        if adata.obs[layer_label_key][i] == 0:
            adata.obs[layer_label_key][i] = (
                layer_grid_img[int(adata.obsm[spatial_key][i, 0]), int(adata.obsm[spatial_key][i, 1])] * curr_sign
            )
        if adata.obs[column_label_key][i] == 0:
            adata.obs[column_label_key][i] = column_grid_img[
                int(adata.obsm[spatial_key][i, 0]), int(adata.obsm[spatial_key][i, 1])
            ]
    adata.obs[layer_label_key][abs(adata.obs[layer_label_key]) == 255] = 0
    adata.obs[column_label_key][adata.obs[column_label_key] == 255] = 0

    return layer_grid_img, column_grid_img


[docs]def order_borderline(
    borderline_img: np.ndarray,
    pt_start: Tuple[int, int],
    pt_end: Tuple[int, int],
) -> Tuple[List, np.ndarray]:
    """Retrieve the borderline segment given the start end end point with the coordinates ordered.

    Args:
        borderline_img: The matrix that stores the image of the borderline.
        pt_start: The coordinate tuple of the start point.
        pt_end: The coordinate tuple of the start point.

    Returns:
        ordered_bdl_list: List of points along the borderline segment.
        ordered_bdl_img: A numpy aray that stores the image of the borderline segment.

    """
    lm.main_info(
        f"Reorder the coordinates along the borderline with the givien start {pt_start} and end {pt_end} " f"points."
    )

    ctrs, _ = cv2.findContours(borderline_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    ordered_bdl_img = np.zeros_like(borderline_img, dtype=np.uint8)

    ctrs_pt_list = []
    for pt in ctrs[0]:  # should only contain a single contour
        ctrs_pt_list.append((pt[0][0], pt[0][1]))

    start_idx = ctrs_pt_list.index(pt_start)
    end_idx = ctrs_pt_list.index(pt_end)
    ordered_bdl_list = ctrs_pt_list[min(start_idx, end_idx) : max(start_idx, end_idx) + 2]
    for i in range(len(ordered_bdl_list) - 1):
        cv2.line(ordered_bdl_img, ordered_bdl_list[i], ordered_bdl_list[i + 1], 255, 1)

    lm.main_info(f"Extracted boundary line length: {len(ordered_bdl_list)}.")

    return ordered_bdl_list, ordered_bdl_img


[docs]def draw_seg_grid(
    borderline_img,
    bdl_seg_coor_x,
    bdl_seg_coor_y,
    gridline_width=1,
    mode="grid",
) -> Optional[np.ndarray]:
    """Draw the grid lines for each layer and column.

    Args:
        borderline_img: The matrix that stores the image information of the borderline between the source and target
            cluster(s).
        bdl_seg_coor_x: The coordinate of i-th layer.
        bdl_seg_coor_y: The coordinate of i+1-th (the consecutive) layer.
        gridline_width: Linewidth of the grid.
        mode: The mode to draw the grid line.

    Returns:
        When `mode` is set to be `grid`, a matrix with the gridlines is created.
    """

    seg_grid_img = np.zeros_like(borderline_img, dtype=np.uint8)

    if len(bdl_seg_coor_x) != len(bdl_seg_coor_y):
        lm.main_info(f"Warning: segmentation does not match between two borderlines. Using the shorter borderline.")

    min_seg_num = min(len(bdl_seg_coor_x), len(bdl_seg_coor_y))
    for i in range(min_seg_num):
        cv2.line(seg_grid_img, bdl_seg_coor_x[i], bdl_seg_coor_y[i], 255, gridline_width)
        if i < min_seg_num - 1:
            cv2.line(seg_grid_img, bdl_seg_coor_x[i], bdl_seg_coor_x[i + 1], 255, gridline_width)
            cv2.line(seg_grid_img, bdl_seg_coor_y[i], bdl_seg_coor_y[i + 1], 255, gridline_width)

    if mode == "grid":  # gridding image
        return seg_grid_img
    elif mode == "gray":
        # TODO: Directly label each region in adata, function fill_grid_label can be merged.
        pass


[docs]def euclidean_dist(
    point_x: Tuple,  # geometric coordinate
    point_y: Tuple,
) -> float:
    """Caluate the euclidean distance between two points."""
    return math.sqrt((point_x[0] - point_y[0]) ** 2 + (point_x[1] - point_y[1]) ** 2)


[docs]def segment_bd_line(
    borderline_list: List,
    column_num: int,
):
    """Segment the borderline into `column_num` even segments based on the arclength along the borderline.

    Args:
        borderline_list: An order list of np.arrays of coordinates of the borderlines.
        column_num: Number of columns to segment for each layer.

    Returns:
        seg_point_ls: The list of the segmentation points.
    """

    dist_ls = []  # dist between sequence points
    arclen_ls = []  # accumulative arclengths
    dist_seg = []  # length for each segmentation part
    seg_index = []  # index for segmentation points

    arclen = 0
    for i in range(len(borderline_list) - 1):
        dist_ls.append(euclidean_dist(borderline_list[i + 1], borderline_list[i]))
        arclen += dist_ls[i]
        arclen_ls.append(arclen)

    # length per line segment.
    len_per_seg = arclen / column_num
    lm.main_info(
        f"Line total length: {round(arclen, 2)}. Segmenting into {column_num} columns, with {round(len_per_seg, 2)} "
        f"each."
    )

    # The array that will keep the arclen of the line from the latest segment point dynamically.
    dynamic_arclen = np.array(arclen_ls)

    first = True
    for i in range(len(dynamic_arclen)):  # per dist array add.
        # add the start and end index
        if i == 0 or i == len(dynamic_arclen) - 1:
            seg_index.append(i)
        else:
            # When we find a point whose current accumative arclength is larger than required segment length, include
            # the index and subtract all arc_len by the current arc length.
            if (dynamic_arclen[i] >= len_per_seg) and first:  # first step
                error_dist = dynamic_arclen[i] - len_per_seg
                seg_index.append(i)
                dist_seg.append(dynamic_arclen[i])
                dynamic_arclen = dynamic_arclen - dynamic_arclen[i]
                first = False

            # compensate the extra length from the previous segment
            if (dynamic_arclen[i] >= len_per_seg) and (error_dist > 0):
                error_dist = error_dist + dynamic_arclen[i - 1] - len_per_seg
                seg_index.append(i - 1)
                dist_seg.append(dynamic_arclen[i - 1])
                dynamic_arclen = dynamic_arclen - dynamic_arclen[i - 1]

            # compensate the negative length from the previous segment
            elif (dynamic_arclen[i] >= len_per_seg) and (error_dist < 0):
                error_dist = error_dist + dynamic_arclen[i] - len_per_seg
                seg_index.append(i)
                dist_seg.append(dynamic_arclen[i])
                dynamic_arclen = dynamic_arclen - dynamic_arclen[i]

    seg_point_ls = np.array(borderline_list)[seg_index]

    return seg_point_ls  # segmentation point list


[docs]def extend_layer(
    borderline_img: np.ndarray,
    borderline_list: List,
    extend_width=10,
) -> Tuple[np.ndarray, List]:
    """Extend the layer to both interior ane exterior sides.

    Args:
        borderline_img: The matrix that stores the image information of the borderline between the source and target
            cluster(s).
        borderline_list: An order list of np.arrays of coordinates of the borderlines.
        extend_width: The layer width to extend.

    Returns:
        extend_layer_img: The matrix that stores the extended layer image.
        extend_layer_bdl: The list of extended layer borderline.
    """

    lm.main_info(f"Generating layer area.")
    extend_layer_mask = np.zeros_like(borderline_img, dtype=np.uint8)
    extend_layer_img = np.zeros_like(borderline_img, dtype=np.uint8)
    for pt in borderline_list:
        cv2.circle(extend_layer_mask, pt, extend_width, 255, -1)

    extend_layer_contour, _ = cv2.findContours(extend_layer_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    cv2.drawContours(extend_layer_img, extend_layer_contour, -1, 255, 1)

    lm.main_info(f"Refining layer contour.")
    extend_layer_tmp = np.zeros_like(borderline_img, dtype=np.uint8)

    # extend only the start and end point of the border line.
    cv2.circle(extend_layer_tmp, borderline_list[0], extend_width, 255, -1)
    cv2.circle(extend_layer_tmp, borderline_list[-1], extend_width, 255, -1)
    contours_edge, _ = cv2.findContours(extend_layer_tmp, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    extend_layer_tmp = np.zeros_like(borderline_img, dtype=np.uint8)
    cv2.drawContours(extend_layer_tmp, contours_edge, -1, 255, 1)

    # remove the contour points formed by the start/end point extensions to keep only two borderlines.
    extend_layer_img = np.where(extend_layer_tmp != 0, 0, extend_layer_img)
    extend_layer_img = (
        morphology.remove_small_objects(extend_layer_img.astype(bool), min_size=5, connectivity=2).astype(np.uint8)
        * 255
    )

    # no start / end points region removed.
    extend_layer_bdl = []  # extended layer boundary line
    for pt in extend_layer_contour[0]:  # should be a single contour
        pt_x = pt[0][0]
        pt_y = pt[0][1]
        if extend_layer_img[pt_y, pt_x] != 0:
            extend_layer_bdl.append((pt_x, pt_y))

    return extend_layer_img, extend_layer_bdl


[docs]def field_contour_line(
    ctr_seq: np.ndarray,
    pnt_pos: np.ndarray,
    min_pnt: Tuple[int, int],
    max_pnt: Tuple[int, int],
) -> list:
    """Retrieve the field contour line give min and max values from an ordered set of contour points.

    Args:
        ctr_seq: The numpy array that stores the ordered list of points on the contour.
        pnt_pos: The array that tags the position of all four corner points.
        min_pnt: The point corresponds to the position with minimal heat value.
        max_pnt: The point corresponds to the position with maximal heat value.

    Returns:
        line_seq: The line segment that starts from the point with the minimal heat value to the point with maximal heat
        value.
    """
    ctr_seq_rev = ctr_seq[::-1].copy()
    min_idx = ctr_seq.index(min_pnt)
    max_idx = ctr_seq.index(max_pnt) + 1
    if min_idx < max_idx:
        # contour orientation is the same as the min_pnt to max_pnt orientation
        if sum(pnt_pos[min_idx + 1 : max_idx - 1]) == 0:
            line_seq = ctr_seq[min_idx:max_idx]
        else:
            # when there are other corner points.
            min_idx = ctr_seq_rev.index(min_pnt)
            max_idx = ctr_seq_rev.index(max_pnt) + 1
            # the beginning of the normal sequence and the end of reverse sequence
            line_seq = ctr_seq_rev[min_idx:] + ctr_seq_rev[:max_idx]
    else:
        # reverse
        if sum(pnt_pos[min_idx + 1 :]) + sum(pnt_pos[: max_idx - 1]) == 0:
            line_seq = ctr_seq[min_idx:] + ctr_seq[:max_idx]
        else:
            min_idx = ctr_seq_rev.index(min_pnt)
            max_idx = ctr_seq_rev.index(max_pnt) + 1
            line_seq = ctr_seq_rev[min_idx:max_idx]

    return line_seq


[docs]def field_contours(
    contour: np.ndarray,
    pnt_xy: Tuple[int, int],
    pnt_Xy: Tuple[int, int],
    pnt_xY: Tuple[int, int],
    pnt_XY: Tuple[int, int],
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Identify four boundary lines according to given corner points.

    Args:
        contour: Contours generated by `cv2.findContours`.
        pnt_xy: Corner point to define an area of interest. pnt_xy corresponds to the point with minimal layer and
            minimal column value.
        pnt_Xy: Corner point corresponds to the point with maximal column value but minimal layer value.
        pnt_xY: Corner point corresponds to the point with minimal column value but maximal layer value.
        pnt_XY: Corner point corresponds to the point with maximal layer and maximal columns value.

    Returns:
        min_line_l: The np array of the points on the layer with minimal layer heat values.
        max_line_l: The np array of the points on the layer with maximal layer heat values.
        min_line_c: The np array of the points on the layer with minimal column heat values.
        max_line_c: The np array of the points on the layer with maximal column heat values.
    """

    ctr_seq = [tuple(i) for i in contour[:, 0]]

    pnt_pos = np.zeros(len(ctr_seq))
    pnt_pos[ctr_seq.index(pnt_xy)] = 1
    pnt_pos[ctr_seq.index(pnt_Xy)] = 1
    pnt_pos[ctr_seq.index(pnt_xY)] = 1
    pnt_pos[ctr_seq.index(pnt_XY)] = 1

    min_line_l = field_contour_line(ctr_seq, pnt_pos, pnt_xy, pnt_Xy)
    max_line_l = field_contour_line(ctr_seq, pnt_pos, pnt_xY, pnt_XY)
    min_line_c = field_contour_line(ctr_seq, pnt_pos, pnt_xy, pnt_xY)
    max_line_c = field_contour_line(ctr_seq, pnt_pos, pnt_Xy, pnt_XY)

    return min_line_l, max_line_l, min_line_c, max_line_c


[docs]def add_eh_boundary(
    heat_field: np.ndarray,
    field_line: np.ndarray,
    value: float,
) -> None:
    """Set equal heat value to the boundary line on the heat field.

    Args:
        heat_field: The field of the spatial domain of interests.
        field_line: The isoline on the field of the spatial domain of interests.
        value: The value that will be assigned to the isoline.

    Returns:
        Nothing but set the provided isoline to the specific value.
    """

    for x, y in field_line:
        heat_field[y, x] = value


[docs]def add_gh_boundary(
    heat_field: np.ndarray,
    field_line: np.ndarray,
    value_s: float,
    value_e: float,
) -> None:
    """Increase heat value progressively along the boundary line on the heat field.

    Args:
        heat_field: the field of the spatial domain of interest
        field_line: The line on the field of the spatial domain of interests that should have increasing heat values.
        value_s: Source heat value.
        value_e: End heat value.

    Returns:
        Nothing but set the provided line to the growing heat value.
    """

    gp_value = np.linspace(value_s, value_e, len(field_line))
    idx = 0
    for x, y in field_line:
        heat_field[y, x] = gp_value[idx]
        idx += 1


[docs]def effective_L2_error(
    heat_field_i: np.ndarray,
    heat_field_j: np.ndarray,
    field_mask: np.ndarray,
) -> float:
    """Calculate effective L2 error between two fields.

    Args:
        heat_field_i: The target field used in solving the heat equation.
        heat_field_j: The source field used in solving the heat equation.
        field_mask: The domain of interests (1 if inside the domain and 0 otherwise).

    Returns:
        A float variable of the L2 difference between two fields, normalized by the source field.
    """

    return np.sqrt(np.sum((heat_field_j - heat_field_i) ** 2 * field_mask) / np.sum(heat_field_j**2 * field_mask))


[docs]def domain_heat_eqn_solver(
    heat_field: np.ndarray,
    min_line: np.ndarray,
    max_line: np.ndarray,
    edge_line_a: np.ndarray,
    edge_line_b: np.ndarray,
    field_border: np.ndarray,
    field_mask: np.ndarray,
    max_err: float = 1e-5,
    max_itr: float = 1e5,
    lh: float = 1,
    hh: float = 100,
) -> np.ndarray:
    """Given the boundaries and boundary conditions of a close spatial domain, solve heat equation (a simple partial
    differential equation) to define the "heat" for each spatial pixel which can be used to digitize the
    spatial domain into different layers or columns. Diffusitivity is set to be 1/4, thus the update rule is defined as:

        grid_field[1:-1, 1:-1] = 0.25 * (
            grid_field_pre[1:-1, 2:] + grid_field_pre[1:-1, :-2] + grid_field_pre[2:, 1:-1] + grid_field_pre[:-2, 1:-1]
        )

    Args:
        heat_field: The field of the spatial domain of interests.
        min_line: The np array of the isoline points with minimal heat values.
        max_line: The np array of the isoline points with maximal  heat values.
        edge_line_a: The np array of the points with increasing heat values, orthogonal to the isolines.
        edge_line_b: The np array of the points with increasing heat values, orthogonal to the isolines.
        field_border: The border of the field of the spatial domain of interests.
        field_mask: The field of the spatial domain of interests, used for masking.
        max_err: The maximal tolerated error. Default to 1e-5.
        max_itr: The maximal diffusion iteration error. Default to 1e5.
        lh: Lowest heat value. Defaults to 1.
        hh: Highest heat value. Defaults to 100.

    Returns:
        grid_field: The resultant field filled with final values after solving the heat equation.
    """

    init_field = heat_field.copy()
    add_eh_boundary(init_field, min_line, lh)
    add_eh_boundary(init_field, max_line, hh)
    add_gh_boundary(init_field, edge_line_a, lh, hh)
    add_gh_boundary(init_field, edge_line_b, lh, hh)

    err = 1
    itr = 0
    grid_field = init_field.copy()
    while (err > max_err) and (itr <= max_itr):
        grid_field_pre = grid_field.copy()
        grid_field[1:-1, 1:-1] = 0.25 * (
            grid_field_pre[1:-1, 2:] + grid_field_pre[1:-1, :-2] + grid_field_pre[2:, 1:-1] + grid_field_pre[:-2, 1:-1]
        )
        grid_field = np.where(field_border != 0, init_field, grid_field)
        err = effective_L2_error(grid_field, grid_field_pre, field_mask)
        if itr >= max_itr:
            lm.main_info("Max iteration reached, with L2 error at: " + str(err))
        itr = itr + 1
    lm.main_info("Total iteration: " + str(itr))
    grid_field = grid_field * field_mask

    return grid_field