Source code for spateo.segmentation.simulation_evaluation.allocate_cell

# from cProfile import label
import os
import pickle

# from select import select
import cv2
import numpy as np
import pandas as pd
from anndata import AnnData
from scipy.sparse import csr_matrix

# image = np.zeros([15,15])
# cv2.ellipse(img=image, center=(5,14), axes=(4,2), angle=0.0, startAngle=0, endAngle=360, color=1, thickness=-1)
# print(image)


[docs]class Cell:
    def __init__(self, center, axes, color, angle):
        self.center = center
        self.axes = axes
        self.color = color
        self.angle = angle

[docs]    def set_center(self, center):
        self.center = center


[docs]def get_cell_pos(area_df, ltos, cell_num=100, height=500, width=500, seed=1, max_iter=20000, shift_length=100):
    labels = np.zeros([height, width], dtype=np.uint16)

    areas = select_area(area_df, cell_num, seed)
    # ctoas = select_ctoa(c_to_a_ratios, cell_num, seed)
    # axes = get_axes_from_area_and_ctoa(areas, ctoas, seed)
    axes = get_axes_from_area_and_ltos(areas, ltos, seed)
    centers = get_center(height, width, cell_num, seed)
    colors = [i for i in range(1, cell_num + 1)]
    np.random.seed(seed)
    angles = np.random.rand(cell_num) * 360

    cells = []
    for i in range(len(colors)):
        cells.append(Cell(centers[i], axes[i], colors[i], angles[i]))

    shift_cells(cells, labels, max_iter, seed, shift_length)

    return labels


[docs]def shift_cells(cells, labels, max_iter, seed, shift_length=10):
    cv2.ellipse(
        img=labels,
        center=cells[0].center,
        axes=cells[0].axes,
        color=cells[0].color,
        angle=0.0,
        startAngle=0,
        endAngle=360,
        thickness=-1,
    )
    deal_list = cells[1:]
    c = 0
    np.random.seed(seed)
    center_shifts = np.random.randint(-shift_length, shift_length + 1, 2 * max_iter + 2).reshape(-1, 2)

    while deal_list:
        c += 1
        one = deal_list.pop(0)
        labels_tmp = labels.copy()
        cv2.ellipse(
            img=labels_tmp,
            center=one.center,
            axes=one.axes,
            color=one.color,
            angle=one.angle,
            startAngle=0,
            endAngle=360,
            thickness=-1,
        )
        if (labels[labels_tmp == one.color] > 0).any():
            tmp = np.array(one.center) - center_shifts[c]
            tmp[tmp < 0] = 0
            tmp[0] = np.min([labels.shape[1], tmp[0]])
            tmp[1] = np.min([labels.shape[0], tmp[1]])
            one.set_center(tuple(tmp))
            deal_list.append(one)
        else:
            labels[:] = labels_tmp

        if c >= max_iter:
            print("max iteration has reached, pleas check the result.")
            deal_list = []


[docs]def get_center(height, width, cell_num, seed):
    import numpy as np

    np.random.seed(seed)
    heights = np.random.randint(height, size=cell_num)
    widths = np.random.randint(width, size=cell_num)
    return list(zip(heights, widths))


[docs]def select_area(area_df, cell_num, seed):
    np.random.seed(seed)
    area_df = area_df[area_df["prob"] > 0]
    areas = np.array([row["area"] for index, row in area_df.iterrows() for i in range(int(row["cell_num"]))])
    while len(areas) < cell_num:
        areas = np.tile(areas, 2)

    np.random.shuffle(areas)
    areas = areas[0:cell_num]
    return areas


[docs]def select_ctoa(c_to_a_ratios, cell_num, seed):
    while cell_num > len(c_to_a_ratios):
        c_to_a_ratios = np.tile(c_to_a_ratios, 2)
    np.random.seed(seed)
    np.random.shuffle(c_to_a_ratios)
    c_to_a_ratios = c_to_a_ratios[0:cell_num]
    return c_to_a_ratios


[docs]def get_axes_from_area_and_ctoa(areas, ctoas, seed):
    # S=pi*a*b
    # C=2pib + 4(a-b)
    # R = C/S
    # x = RS
    # y = S/pi
    # long = np.sqrt(y-np.pi*y/2+x/4)
    # short = y/longs

    x = ctoas * areas
    y = areas / np.pi

    longs = np.sqrt(y - np.pi * y / 2 + x / 4)
    shorts = y / longs
    axes = list(zip(longs, shorts))
    return axes


[docs]def get_axes_from_area_and_ltos(areas, ltos, seed):
    # S = pi*a*b
    # R = a/b
    # b = np.sqrt(S/(R*pi))
    # a = np.sqrt(S/(R*pi)) * R

    np.random.seed(seed)
    while len(areas) > len(ltos):
        ltos = np.tile(ltos, 2)
    ltos = ltos[0 : len(areas)]

    shorts = np.sqrt(areas / (ltos * np.pi))
    longs = (shorts * ltos).astype(np.uint16)
    shorts = shorts.astype(np.uint16)
    axes = list(zip(longs, shorts))
    return axes


[docs]def add_sig_to_cell(labels, cell_mean_df, bg_mean_df, seed):
    np.random.seed(seed)

    cell_mean_df = cell_mean_df[cell_mean_df["prob"] > 0]
    cells = np.array([index for index, row in cell_mean_df.iterrows() for i in range(int(row["prob"] * 1000))])
    while np.sum(labels > 0) > len(cells):
        cells = np.tile(cells, 2)
    np.random.shuffle(cells)

    cells = cells[0 : np.sum(labels > 0)]

    bg_mean_df = bg_mean_df[bg_mean_df["prob"] > 0]
    bgs = np.array([index for index, row in bg_mean_df.iterrows() for i in range(int(row["prob"] * 1000))])
    while np.sum(labels == 0) > len(bgs):
        bgs = np.tile(bgs, 2)
    np.random.shuffle(bgs)
    bgs = bgs[0 : np.sum(labels == 0)]

    sigs = np.zeros_like(labels, dtype=np.int16)
    sigs[labels > 0] = cells
    sigs[labels == 0] = bgs

    return sigs


[docs]def simulate_cell_and_sig(
    area_df,
    ltos,
    cell_sig_df,
    bg_sig_df,
    prefix,
    cell_num=100,
    height=500,
    width=500,
    seed=1,
    max_iter=20000,
    shift_length=100,
):
    labels = get_cell_pos(
        area_df=area_df,
        ltos=ltos,
        cell_num=cell_num,
        height=height,
        width=width,
        seed=seed,
        max_iter=max_iter,
        shift_length=shift_length,
    )
    sigs = add_sig_to_cell(labels, cell_sig_df, bg_sig_df, seed)
    # adata = AnnData(X=csr_matrix(sigs))
    # adata.layers['labels'] = labels

    if not os.path.exists(prefix):
        os.makedirs(prefix)

    out_file = prefix + "/seed" + str(seed) + ".txt"
    x, y = np.where(sigs > 0)
    df = pd.DataFrame({"geneID": "Malat1", "x": x, "y": y, "MIDCounts": sigs[sigs > 0]})
    df.to_csv(out_file, sep="\t", index=False)

    labels_file = prefix + "/seed" + str(seed) + ".labels.pkl"
    o = open(labels_file, "wb")
    pickle.dump(labels, o)
    o.close()