Tutorial Hierarchy analysis#

This tutorial describes how to use the hierarchy function. This allows an user to identify for an expansions object to which “parent” object of the original mask.

%load_ext autoreload

import os 
import sys
import time
import logging
import re
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import json
import anndata as ad
import scanpy as sc
from natsort import natsorted
import numpy as np
np.random.seed(42)

sys.path.append(os.path.dirname(os.getcwd()))
import seaborn as sns
from gridgene import get_arrays as ga
from gridgene import contours, get_masks
from gridgene.mask_properties import MaskAnalysisPipeline, MaskDefinition
# define the logger :  can be None, and is set to INFO
# Custom logger setup
logger = logging.getLogger('contour_logger')
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

Hierarchy using Constrained Mask Expansion

We will use Cancer Stroma Interface analysis in Xenium.

Get the files for Xenium

tma1_path = '../../xenium_data/HLA/GD_TMA1_S3/fov_filtered'
files_tma1 = os.listdir(tma1_path)


to_exclude = [
    'TMA1_Selection14_filtered.csv' , # little tumour
    'TMA1_Selection15_filtered.csv', # tonsil
    'TMA1_Selection18_filtered.csv' , # normal
    'TMA1_Selection24_filtered.csv', # tonsl 
    'TMA1_Selection27_filtered.csv', # low quality
    'TMA1_Selection32_filtered.csv', # low quality
    'TMA1_Selection33_filtered.csv', # low quality
             ]

files = [os.path.join(tma1_path, file) for file in files_tma1 if file not in to_exclude]
print(len(files))
20

Derive cancer and stroma masks

Apply GRIDGEN to Xenium to derive cancer and stroma masks (similar to the previous notebook: “Cancer Stroma in Xenium”).

After this, expansions from the cancer mask inside the stroma mask are derived, and mask information is retrieved. We will expand 10 and 20 um.

# param tum
target_tum = ['EPCAM', 'SMIM22','CLDN3', 'KRT18','LGALS4', 'KRT8', 'ELF3','TSPAN8', 'STMN1', 'CD47', 'MYC', 'LGALS3'] 
density_th_tum = 10    
min_area_th_tum =  700 
kernel_size_tum = 10  
# param empty
density_th_empty = 40
min_area_th_empty = 400 
kernel_size_empty = 10
file_csv = files[9]
df_total = pd.read_csv(file_csv)
df_total = df_total[['x_location', 'y_location', 'feature_name']]
df_total = df_total.rename(columns={'feature_name': 'target'})
df_total = df_total[~df_total['target'].str.contains('System|egative')]
df_total['X'] = df_total['x_location'] - min(df_total['x_location'])
df_total['Y'] = df_total['y_location'] - min(df_total['y_location'])

n_genes = len(df_total['target'].unique())
height = int(max(df_total['X'])) + 1
width = int(max(df_total['Y'])) + 1

# this makes the sparse df to an array with the spatial information 
target_dict_total = {target: index for index, target in enumerate(df_total['target'].unique())}
array_total = ga.transform_df_to_array(df = df_total, target_dict=target_dict_total, array_shape = (height, width,len(target_dict_total))).astype(np.int8)

# creating subsets 
df_subset_tum, array_subset_tum, target_indices_subset_tum = ga.get_subset_arrays(df_total, array_total,target_dict_total,
                                                                     target_list=target_tum, target_col = 'target')

# obtain contours 
CTum = contours.ConvolutionContours(array_subset_tum, contour_name='tum')
CTum.get_conv_sum(kernel_size=kernel_size_tum, kernel_shape='square')
CTum.contours_from_sum(density_threshold = density_th_tum,
                       min_area_threshold = min_area_th_tum , directionality = 'higher')

CEmpty = contours.ConvolutionContours(array_total, contour_name='empty')
CEmpty.get_conv_sum(kernel_size=kernel_size_empty, kernel_shape='square')
CEmpty.contours_from_sum(density_threshold = density_th_empty,
                       min_area_threshold = min_area_th_empty, directionality = 'lower') # attention that directionality is lower here 

#### obtain masks
GM = get_masks.GetMasks(image_shape = (height, width))

mask_empty = GM.create_mask(CEmpty.contours)
mask_tum = GM.create_mask(CTum.contours)
mask_tum = GM.fill_holes(mask_tum)
mask_stroma = GM.subtract_masks(np.ones((height, width), dtype=np.uint8), mask_tum, mask_empty)          
mask_stroma = GM.filter_binary_mask_by_area(mask_stroma, min_area=700)

GM.plot_masks(masks=[mask_stroma, mask_tum], mask_names=['Stroma', 'Tumour'],
              background_color=(1, 1, 1), mask_colors={'Stroma': (65, 105, 225), 'Tumour': (255, 165, 0)},
              path=None, show=True, ax=None, figsize=(8, 6)) 

TA = get_masks.ConstrainedMaskExpansion(mask_tum, mask_stroma)
TA.expand_mask(expansion_pixels=[10,20], min_area=1000)
GM.plot_masks(masks=TA.binary_expansions.values(),
              mask_names=TA.binary_expansions.keys(),
              background_color=(1, 1, 1), mask_colors=None, path=None, show=True, ax=None,
             figsize=(8, 6))
2025-06-19 23:51:29,534 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-19 23:51:29,623 - gridgen.contours.tum - INFO - get_conv_sum took 0.0886 seconds
2025-06-19 23:51:29,706 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 7
2025-06-19 23:51:29,707 - gridgen.contours.tum - INFO - contours_from_sum took 0.0829 seconds
2025-06-19 23:51:29,707 - gridgen.contours.empty - INFO - Initialized GetContour
2025-06-19 23:51:30,277 - gridgen.contours.empty - INFO - get_conv_sum took 0.5697 seconds
2025-06-19 23:51:30,843 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 29
2025-06-19 23:51:30,844 - gridgen.contours.empty - INFO - contours_from_sum took 0.5662 seconds
2025-06-19 23:51:30,844 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-19 23:51:30,859 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
../_images/09e0532f45d8a186e4dacb41c62ff79177828eb285862b422e037357cc4cc6ee.png
2025-06-19 23:51:31,143 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/3ed643e00fd7c96ae858029534ce99f7e267fc66763fda0313d8c933e4951da3.png

We can extract the information from the masks Pipeline run will extract counts and morphology

#  1. Define your masks using MaskDefinition
mask_definitions = [
    MaskDefinition(mask=TA.binary_expansions['constraint_remaining'], mask_name='Stroma_remaining', analysis_type='bulk'),
    MaskDefinition(mask=TA.binary_expansions['seed_mask'], mask_name='Cancer', analysis_type='per_object'),
    MaskDefinition(mask=TA.binary_expansions['expansion_10'], mask_name='CI_10um', analysis_type='per_object'),
    MaskDefinition(mask=TA.binary_expansions['expansion_20'], mask_name='CI_20um', analysis_type='per_object'),
]

# 2. Initialize and run the analysis pipeline
pipeline = MaskAnalysisPipeline(mask_definitions=mask_definitions, 
                                array_counts = array_total,
                        target_dict = target_dict_total,
  )
results = pipeline.run()
df = pipeline.get_results_df()
df.head()
2025-06-19 23:51:31,890 - gridgen.mask_properties.GetMasks - INFO - Initialized MaskAnalysisPipeline with 4 masks.
2025-06-19 23:51:31,890 - gridgen.mask_properties.GetMasks - INFO - Processing mask: Stroma_remaining (bulk)
2025-06-19 23:51:32,825 - gridgen.mask_properties.GetMasks - INFO - Processing mask: Cancer (per_object)
2025-06-19 23:51:33,356 - gridgen.mask_properties.GetMasks - INFO - Processing mask: CI_10um (per_object)
2025-06-19 23:51:33,823 - gridgen.mask_properties.GetMasks - INFO - Processing mask: CI_20um (per_object)
run took 2.3947 seconds
area object_id IL6ST IGHG1 COL1A1 CTSH SIRPA TIMP2 SPARC LMO2 ... analysis_type perimeter eccentricity solidity centroid_y centroid_x min_row min_col max_row max_col
0 1749628.0 bulk 5880 2682 20626 1271 1022 13634 31112 461 ... bulk NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 9614.0 1 50 11 95 81 29 29 90 0 ... per_object 556.060967 0.954892 0.929607 104.747764 977.762118 74.0 865.0 145.0 1088.0
2 9459.0 2 68 22 59 96 44 24 56 0 ... per_object 617.961508 0.981762 0.908122 397.413046 1533.974627 294.0 1455.0 500.0 1621.0
3 136753.0 3 741 98 484 1649 499 586 796 66 ... per_object 2205.493650 0.734512 0.846480 749.492940 1472.664453 446.0 1316.0 1054.0 1713.0
4 7150.0 4 54 37 76 106 47 29 109 1 ... per_object 481.203102 0.965569 0.906555 568.757063 152.850629 485.0 103.0 659.0 202.0

5 rows × 493 columns

pipeline.map_hierarchies will map the hierarchy objects

hierarchy_defs = {
    "CI_10um": {
        "labels": TA.referenced_expansions['expansion_10'],
        "level_hierarchy": "Cancer"
    },
    "CI_20um": {
        "labels": TA.referenced_expansions['expansion_20'],
        "level_hierarchy": "Cancer"
    },
        "Cancer": {
        "labels": TA.referenced_expansions['seed_mask'],
        "level_hierarchy": "Cancer"
    }
}


df_hierarchy = pipeline.map_hierarchies(hierarchy_defs, save_dir=None)
df_hierarchy
map_hierarchies took 0.0556 seconds
mask_name object_id parent_mask parent_ids
0 CI_10um 1 Cancer [1]
1 CI_10um 2 Cancer [2]
2 CI_10um 3 Cancer [3]
3 CI_10um 4 Cancer [4]
4 CI_10um 5 Cancer [5]
5 CI_20um 1 Cancer [1]
6 CI_20um 2 Cancer [2]
7 CI_20um 3 Cancer [3]
8 CI_20um 4 Cancer [4]
9 CI_20um 5 Cancer [3, 5]
10 Cancer 1 Cancer [1]
11 Cancer 2 Cancer [2]
12 Cancer 3 Cancer [3]
13 Cancer 4 Cancer [4]
14 Cancer 5 Cancer [5]

Lets check another example

file_csv = files[15]
df_total = pd.read_csv(file_csv)
df_total = df_total[['x_location', 'y_location', 'feature_name']]
df_total = df_total.rename(columns={'feature_name': 'target'})
df_total = df_total[~df_total['target'].str.contains('System|egative')]
df_total['X'] = df_total['x_location'] - min(df_total['x_location'])
df_total['Y'] = df_total['y_location'] - min(df_total['y_location'])

n_genes = len(df_total['target'].unique())
height = int(max(df_total['X'])) + 1
width = int(max(df_total['Y'])) + 1

# this makes the sparse df to an array with the spatial information 
target_dict_total = {target: index for index, target in enumerate(df_total['target'].unique())}
array_total = ga.transform_df_to_array(df = df_total, target_dict=target_dict_total, array_shape = (height, width,len(target_dict_total))).astype(np.int8)

# creating subsets 
df_subset_tum, array_subset_tum, target_indices_subset_tum = ga.get_subset_arrays(df_total, array_total,target_dict_total,
                                                                     target_list=target_tum, target_col = 'target')

# obtain contours 
CTum = contours.ConvolutionContours(array_subset_tum, contour_name='tum')
CTum.get_conv_sum(kernel_size=kernel_size_tum, kernel_shape='square')
CTum.contours_from_sum(density_threshold = density_th_tum,
                       min_area_threshold = min_area_th_tum , directionality = 'higher')

CEmpty = contours.ConvolutionContours(array_total, contour_name='empty')
CEmpty.get_conv_sum(kernel_size=kernel_size_empty, kernel_shape='square')
CEmpty.contours_from_sum(density_threshold = density_th_empty,
                       min_area_threshold = min_area_th_empty, directionality = 'lower') # attention that directionality is lower here 

#### obtain masks
GM = get_masks.GetMasks(image_shape = (height, width))

mask_empty = GM.create_mask(CEmpty.contours)
mask_tum = GM.create_mask(CTum.contours)
mask_tum = GM.fill_holes(mask_tum)
mask_stroma = GM.subtract_masks(np.ones((height, width), dtype=np.uint8), mask_tum, mask_empty)          
mask_stroma = GM.filter_binary_mask_by_area(mask_stroma, min_area=700)

TA = get_masks.ConstrainedMaskExpansion(mask_tum, mask_stroma)
TA.expand_mask(expansion_pixels=[10,20], min_area=None)
GM.plot_masks(masks=TA.binary_expansions.values(),
              mask_names=TA.binary_expansions.keys(),
              background_color=(1, 1, 1), mask_colors=None, path=None, show=True, ax=None,
             figsize=(8, 6))

#  1. Define your masks using MaskDefinition
mask_definitions = [
    MaskDefinition(mask=TA.binary_expansions['constraint_remaining'], mask_name='Stroma_remaining', analysis_type='bulk'),
    MaskDefinition(mask=TA.binary_expansions['seed_mask'], mask_name='Cancer', analysis_type='per_object'),
    MaskDefinition(mask=TA.binary_expansions['expansion_10'], mask_name='CI_10um', analysis_type='per_object'),
    MaskDefinition(mask=TA.binary_expansions['expansion_20'], mask_name='CI_20um', analysis_type='per_object'),
]

# 2. Initialize and run the analysis pipeline
pipeline = MaskAnalysisPipeline(mask_definitions=mask_definitions, 
                                array_counts = array_total,
                        target_dict = target_dict_total,
  )
results = pipeline.run()
df = pipeline.get_results_df()
df


hierarchy_defs = {
        "Cancer": {
        "labels": TA.referenced_expansions['seed_mask'],
        "level_hierarchy": "Cancer"
    }, 
    "CI_10um": {
        "labels": TA.referenced_expansions['expansion_10'],
        "level_hierarchy": "Cancer"
    },
    "CI_20um": {
        "labels": TA.referenced_expansions['expansion_20'],
        "level_hierarchy": "Cancer"
    },

}

df_hierarchy = pipeline.map_hierarchies(hierarchy_defs, save_dir=None)
df_hierarchy
2025-06-19 23:51:40,419 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-19 23:51:40,497 - gridgen.contours.tum - INFO - get_conv_sum took 0.0771 seconds
2025-06-19 23:51:40,569 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 25
2025-06-19 23:51:40,569 - gridgen.contours.tum - INFO - contours_from_sum took 0.0721 seconds
2025-06-19 23:51:40,570 - gridgen.contours.empty - INFO - Initialized GetContour
2025-06-19 23:51:41,128 - gridgen.contours.empty - INFO - get_conv_sum took 0.5579 seconds
2025-06-19 23:51:41,673 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 39
2025-06-19 23:51:41,674 - gridgen.contours.empty - INFO - contours_from_sum took 0.5453 seconds
2025-06-19 23:51:41,674 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-19 23:51:41,692 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-19 23:51:41,766 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/4c0463a291fade9384013a912ecbe31bf2078adeea50ba76bf2688c3f23843a0.png
2025-06-19 23:51:42,491 - gridgen.mask_properties.GetMasks - INFO - Initialized MaskAnalysisPipeline with 4 masks.
2025-06-19 23:51:42,494 - gridgen.mask_properties.GetMasks - INFO - Processing mask: Stroma_remaining (bulk)
2025-06-19 23:51:43,017 - gridgen.mask_properties.GetMasks - INFO - Processing mask: Cancer (per_object)
2025-06-19 23:51:44,098 - gridgen.mask_properties.GetMasks - INFO - Processing mask: CI_10um (per_object)
2025-06-19 23:51:45,018 - gridgen.mask_properties.GetMasks - INFO - Processing mask: CI_20um (per_object)
run took 3.5887 seconds
map_hierarchies took 0.2382 seconds
mask_name object_id parent_mask parent_ids
0 Cancer 1 Cancer [1]
1 Cancer 2 Cancer [2]
2 Cancer 3 Cancer [3]
3 Cancer 4 Cancer [4]
4 Cancer 5 Cancer [5]
... ... ... ... ...
114 CI_20um 68 Cancer [7]
115 CI_20um 69 Cancer [4]
116 CI_20um 70 Cancer [3]
117 CI_20um 71 Cancer [3]
118 CI_20um 72 Cancer [4, 15]

119 rows × 4 columns

Lets visualize the masks and expansions

import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from matplotlib.patches import Patch

# Mask dictionaries from your object
dicts = {
    "Binary Masks": TA.binary_expansions,
    "Labeled Masks": TA.labeled_expansions,
    "Referenced Masks": TA.referenced_expansions,
}

# Keys to display
keys = ['seed_mask', 'expansion_10', 'expansion_20']
seed_labels = np.unique(dicts["Labeled Masks"]["expansion_10"])
seed_labels = seed_labels[seed_labels != 0]  # Remove background
all_labels = sorted(seed_labels)

cmap = cm.get_cmap('tab20', 20)
label_to_color = {label: cmap((label - 1) % 20) for label in all_labels}

# STEP 3 — Color mapping function
def map_labels_to_rgb(mask):
    rgb = np.ones(mask.shape + (4,), dtype=np.float32)  # white background
    for label, color in label_to_color.items():
        rgb[mask == label] = color
    return rgb

# STEP 4 — Plotting
n_rows = len(dicts)
n_cols = len(keys)
fig, axs = plt.subplots(n_rows, n_cols, figsize=(4 * n_cols, 4 * n_rows))
if axs.ndim == 1:
    axs = axs.reshape((n_rows, n_cols))

for row_idx, (row_name, d) in enumerate(dicts.items()):
    for col_idx, key in enumerate(keys):
        mask = d[key]
        rgb_img = map_labels_to_rgb(mask)
        ax = axs[row_idx, col_idx]
        ax.imshow(rgb_img)
        ax.set_title(f"{row_name}\n{key}", fontsize=10)
        ax.axis('off')

# STEP 5 — Legend (for seed labels only)
legend_patches = [
    Patch(facecolor=label_to_color[label], edgecolor='black', label=str(label))
    for label in all_labels
]

fig.legend(
    handles=legend_patches,
    loc='lower center',
    bbox_to_anchor=(0.5, -0.05),
    ncol=min(len(legend_patches), 10),
    fontsize=8,
    title="Global Label Colors"
)

plt.tight_layout()
plt.subplots_adjust(bottom=0.2)
plt.show()
/tmp/ipykernel_784325/143693451.py:19: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.
  cmap = cm.get_cmap('tab20', 20)
../_images/52ff7c3ef54fed8a0ca4cb372c4b7c346920aad875f49e7166fb441235580b95.png

Hierarchy using Single Object Mask Expansion

We will use putative γδ T cell areas iin CosMx

cosmx_path_s3 =  '../../cosmx_data/S3/S3/20230628_151317_S3/AnalysisResults/yxyz3r7ufm'

folder_names_s3 = [folder_name for folder_name in os.listdir(cosmx_path_s3) if
                os.path.isdir(os.path.join(cosmx_path_s3, folder_name))]


target_files_s3 = [
    os.path.join(cosmx_path_s3, folder, file)
    for folder in os.listdir(cosmx_path_s3)
    if os.path.isdir(os.path.join(cosmx_path_s3, folder))
    for file in os.listdir(os.path.join(cosmx_path_s3, folder))
    if '__target_call_coord.csv' in file
]

files_cosmx = natsorted(target_files_s3)
len(files_cosmx)
20
target_gd = ['TRGC1/TRGC2', 'TRDC']
target_ab = ['TRBC1/TRBC2', 'TRAC']

target_cd8 = ['CD8A','CD8B', 'TRBC1/TRBC2', 'TRAC']
target_cd4 = ['CD4']

target_tum = ['EPCAM', 'CEACAM6', 'CLDN4', 'CDH1', 'RNF43', 'SPINK1', 'SOX9', 'CD24', 'KRT19', 'AREG',
              'REG1A', 'AGR2', 'PLAC8', 'CALB1', 'S100P', 'ITGA6', 'DMBT1', 'DUSP4',
              'KRT8', 'S100A6', 'RPL37', 'RPL32', 'KRT18', 'OLFM4',
              'PRSS2', 'CD55', 'EPHB4', 'ADGRL1', 'KRT17', 'ITGB8', 'ADGRE5', 'GDF15', 'IL27RA', 'AZGP1'
              ]  # cadherin 'PIGR',  'LYZ','SERPINA1'

density_th_gd = 2
min_area_th_gd = 5  # 40
kernel_size_gd = 45 #90 
fov = 'FOV010'
file_csv = [file for file in files_cosmx if fov in file][0]  
    
df_total = pd.read_csv(file_csv)
df_total = df_total.rename(columns={'x': 'X', 'y': 'Y'})
df_total = df_total[~df_total['target'].str.contains('System|egative')]

n_genes = len(df_total['target'].unique())
height = int(max(df_total['X'])) + 1
width = int(max(df_total['Y'])) + 1

target_dict_total = {target: index for index, target in enumerate(df_total['target'].unique())}
array_total = ga.transform_df_to_array(df = df_total, target_dict=target_dict_total, array_shape = (height, width,len(target_dict_total))).astype(np.int8)

# creating subsets 
df_subset_gd, array_subset_gd, target_indices_subset_gd = ga.get_subset_arrays(df_total, array_total,
                                                                                   target_dict_total,
                                                                                   target_list=target_gd,
                                                                           target_col='target')

df_subset_ab, array_subset_ab, target_indices_subset_ab = ga.get_subset_arrays(df_total, array_total,
                                                                                       target_dict_total,
                                                                                       target_list=target_ab,
                                                                                       target_col='target')
    
    
df_subset_g, array_subset_g, target_indices_subset_g = ga.get_subset_arrays(df_total, array_total,
                                                                                       target_dict_total,
                                                                                       target_list=['TRGC1/TRGC2'],
                                                                                       target_col='target')
    
df_subset_d, array_subset_d, target_indices_subset_d = ga.get_subset_arrays(df_total, array_total,
                                                                                       target_dict_total,
                                                                                       target_list=['TRDC'],
                                                                                       target_col='target')


df_subset_cd8, array_subset_cd8, target_indices_subset_cd8 = ga.get_subset_arrays(df_total, array_total,
                                                                                      target_dict_total,
                                                                                      target_list=target_cd8,
                                                                                      target_col='target')
                                                                               

df_subset_TRAC_gene, array_subset_TRAC_gene, target_indices_subset_TRAC_gene = ga.get_subset_arrays(df_total, 
                                                                                                 array_total,
                                                                                      target_dict_total,
                                                                                      target_list= ['TRAC'],
                                                                                      target_col='target')

df_subset_TRBC_gene, array_subset_TRBC_gene, target_indices_subset_TRBC_gene = ga.get_subset_arrays(df_total, 
                                                                                                 array_total,
                                                                                      target_dict_total,
                                                                                      target_list= ['TRBC1/TRBC2'],
                                                                                      target_col='target')


df_subset_cd8_gene, array_subset_cd8_gene, target_indices_subset_cd8_gene = ga.get_subset_arrays(df_total, 
                                                                                                 array_total,
                                                                                      target_dict_total,
                                                                                      target_list= ['CD8A', 'CD8B'],
                                                                                      target_col='target')

df_subset_cd4_gene, array_subset_cd4_gene, target_indices_subset_cd4_gene = ga.get_subset_arrays(df_total, 
                                                                                                 array_total,
                                                                                      target_dict_total,
                                                                                      target_list= ['CD4'],
                                                                                      target_col='target')
CGD = contours.ConvolutionContours(array_subset_gd, contour_name='GD', logger=logger)
CGD.get_conv_sum(kernel_size=kernel_size_gd, kernel_shape='square')  #
CGD.contours_from_sum(density_threshold=density_th_gd,
                      min_area_threshold=min_area_th_gd, directionality='higher')

### Filtering 
CGD.filter_contours_by_gene_comparison(gene_array1 = array_subset_gd, gene_array2 = array_subset_ab,
                                       gene_name1 = "gd", gene_name2 = "ab") # gene 1 > gene2 --> valid contour 
# G>0 D>0
CGD.filter_contours_by_gene_threshold(gene_array = array_subset_d.squeeze(), threshold = 1, gene_name = 'TRDC')# >= 1
CGD.filter_contours_by_gene_threshold(gene_array = np.sum(array_subset_g, axis=-1), threshold = 1, gene_name = 'TRGC1_2')# >= 1
2025-06-19 23:51:57,741 - contour_logger - INFO - Initialized GetContour
2025-06-19 23:51:58,156 - contour_logger - INFO - get_conv_sum took 0.4139 seconds
2025-06-19 23:51:58,374 - contour_logger - INFO - Number of contours after filtering no counts: 26
2025-06-19 23:51:58,374 - contour_logger - INFO - contours_from_sum took 0.2178 seconds
2025-06-19 23:51:59,317 - contour_logger - INFO - Excluding contour 7. gd count 3.00 ≤ ab count 4.00
2025-06-19 23:51:59,719 - contour_logger - INFO - Excluding contour 13. gd count 1.00 ≤ ab count 2.00
2025-06-19 23:51:59,785 - contour_logger - INFO - Excluding contour 14. gd count 3.00 ≤ ab count 3.00
2025-06-19 23:52:00,514 - contour_logger - INFO - Contours remaining after gene comparison: 23
2025-06-19 23:52:00,580 - contour_logger - INFO - Excluding contour 1. Gene TRDC count  0.0 is below threshold 1
2025-06-19 23:52:00,613 - contour_logger - INFO - Excluding contour 2. Gene TRDC count  0.0 is below threshold 1
2025-06-19 23:52:01,004 - contour_logger - INFO - Excluding contour 14. Gene TRDC count  0.0 is below threshold 1
2025-06-19 23:52:01,268 - contour_logger - INFO - Excluding contour 22. Gene TRDC count  0.0 is below threshold 1
2025-06-19 23:52:01,268 - contour_logger - INFO - Number of contours remaining: 19
2025-06-19 23:52:01,396 - contour_logger - INFO - Excluding contour 2. Gene TRGC1_2 count  0.0 is below threshold 1
2025-06-19 23:52:01,430 - contour_logger - INFO - Excluding contour 3. Gene TRGC1_2 count  0.0 is below threshold 1
2025-06-19 23:52:01,498 - contour_logger - INFO - Excluding contour 5. Gene TRGC1_2 count  0.0 is below threshold 1
2025-06-19 23:52:01,532 - contour_logger - INFO - Excluding contour 6. Gene TRGC1_2 count  0.0 is below threshold 1
2025-06-19 23:52:01,599 - contour_logger - INFO - Excluding contour 8. Gene TRGC1_2 count  0.0 is below threshold 1
2025-06-19 23:52:01,667 - contour_logger - INFO - Excluding contour 10. Gene TRGC1_2 count  0.0 is below threshold 1
2025-06-19 23:52:01,734 - contour_logger - INFO - Excluding contour 12. Gene TRGC1_2 count  0.0 is below threshold 1
2025-06-19 23:52:01,835 - contour_logger - INFO - Excluding contour 15. Gene TRGC1_2 count  0.0 is below threshold 1
2025-06-19 23:52:01,869 - contour_logger - INFO - Excluding contour 16. Gene TRGC1_2 count  0.0 is below threshold 1
2025-06-19 23:52:01,903 - contour_logger - INFO - Excluding contour 17. Gene TRGC1_2 count  0.0 is below threshold 1
2025-06-19 23:52:01,937 - contour_logger - INFO - Excluding contour 18. Gene TRGC1_2 count  0.0 is below threshold 1
2025-06-19 23:52:01,937 - contour_logger - INFO - Number of contours remaining: 8
GM = get_masks.GetMasks(image_shape=(height, width))

SA = get_masks.SingleClassObjectAnalysis(GM,contours_object=CGD.contours)
SA.get_mask_objects(exclude_masks=None)  # Not excluding any tum or empty 

SA.get_objects_expansion(expansions_pixels=[10, 20, 30],)  
2025-06-19 23:52:01,941 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-19 23:52:01,943 - gridgen.get_masks.GetMasks - INFO - Mask for objects created.
#  1. Define your masks using MaskDefinition
mask_definitions = [
    MaskDefinition(mask=SA.binary_expansions['seed_mask'], mask_name='GD', analysis_type='per_object'),
    MaskDefinition(mask=SA.binary_expansions['expansion_10'], mask_name='GD_10um', analysis_type='per_object'),
    MaskDefinition(mask=SA.binary_expansions['expansion_20'], mask_name='GD_20um', analysis_type='per_object'),
    MaskDefinition(mask=SA.binary_expansions['expansion_30'], mask_name='GD_30um', analysis_type='per_object'),

]

# 2. Initialize and run the analysis pipeline
pipeline = MaskAnalysisPipeline(mask_definitions=mask_definitions, 
                                array_counts = array_total,
                        target_dict = target_dict_total,
  )
results = pipeline.run()
df = pipeline.get_results_df()
df


hierarchy_defs = {
        "GD": {
        "labels": SA.referenced_expansions['seed_mask'],
        "level_hierarchy": "GD"
    }, 
    "GD_10um": {
        "labels": SA.referenced_expansions['expansion_10'],
        "level_hierarchy": "GD"
    },
    "GD_20um": {
        "labels": SA.referenced_expansions['expansion_20'],
        "level_hierarchy": "GD"
    },
    "GD_30um": {
    "labels": SA.referenced_expansions['expansion_30'],
    "level_hierarchy": "GD"
    }
}
    
df_hierarchy = pipeline.map_hierarchies(hierarchy_defs, save_dir=None)
df_hierarchy
2025-06-19 23:52:05,112 - gridgen.mask_properties.GetMasks - INFO - Initialized MaskAnalysisPipeline with 4 masks.
2025-06-19 23:52:05,113 - gridgen.mask_properties.GetMasks - INFO - Processing mask: GD (per_object)
2025-06-19 23:52:21,889 - gridgen.mask_properties.GetMasks - INFO - Processing mask: GD_10um (per_object)
2025-06-19 23:52:39,244 - gridgen.mask_properties.GetMasks - INFO - Processing mask: GD_20um (per_object)
2025-06-19 23:52:57,697 - gridgen.mask_properties.GetMasks - INFO - Processing mask: GD_30um (per_object)
run took 70.1832 seconds
map_hierarchies took 0.5748 seconds
mask_name object_id parent_mask parent_ids
0 GD 1 GD [1]
1 GD 2 GD [2]
2 GD 3 GD [3]
3 GD 4 GD [4]
4 GD 5 GD [5]
5 GD 6 GD [6]
6 GD 7 GD [7]
7 GD 8 GD [8]
8 GD_10um 1 GD [1]
9 GD_10um 2 GD [2]
10 GD_10um 3 GD [3]
11 GD_10um 4 GD [4]
12 GD_10um 5 GD [5]
13 GD_10um 6 GD [6]
14 GD_10um 7 GD [7]
15 GD_10um 8 GD [8]
16 GD_20um 1 GD [1]
17 GD_20um 2 GD [2]
18 GD_20um 3 GD [3]
19 GD_20um 4 GD [4]
20 GD_20um 5 GD [5]
21 GD_20um 6 GD [6]
22 GD_20um 7 GD [7]
23 GD_20um 8 GD [8]
24 GD_30um 1 GD [1]
25 GD_30um 2 GD [2]
26 GD_30um 3 GD [3]
27 GD_30um 4 GD [4]
28 GD_30um 5 GD [5]
29 GD_30um 6 GD [6]
30 GD_30um 7 GD [7]
31 GD_30um 8 GD [8]
# Mask dictionaries from your object
dicts = {
    "Binary Masks": SA.binary_expansions,
    "Labeled Masks": SA.labeled_expansions,
    "Referenced Masks": SA.referenced_expansions,
}

# Keys to display
keys = ['seed_mask', 'expansion_10', 'expansion_20', 'expansion_30']
seed_labels = np.unique(dicts["Labeled Masks"]["expansion_10"])
seed_labels = seed_labels[seed_labels != 0]  # Remove background
all_labels = sorted(seed_labels)

cmap = cm.get_cmap('tab20', 20)
label_to_color = {label: cmap((label - 1) % 20) for label in all_labels}

# STEP 3 — Color mapping function
def map_labels_to_rgb(mask):
    rgb = np.ones(mask.shape + (4,), dtype=np.float32)  # white background
    for label, color in label_to_color.items():
        rgb[mask == label] = color
    return rgb

# STEP 4 — Plotting
n_rows = len(dicts)
n_cols = len(keys)
fig, axs = plt.subplots(n_rows, n_cols, figsize=(4 * n_cols, 4 * n_rows))
if axs.ndim == 1:
    axs = axs.reshape((n_rows, n_cols))

for row_idx, (row_name, d) in enumerate(dicts.items()):
    for col_idx, key in enumerate(keys):
        mask = d[key]
        rgb_img = map_labels_to_rgb(mask)
        ax = axs[row_idx, col_idx]
        ax.imshow(rgb_img)
        ax.set_title(f"{row_name}\n{key}", fontsize=10)
        ax.axis('off')

# STEP 5 — Legend (for seed labels only)
legend_patches = [
    Patch(facecolor=label_to_color[label], edgecolor='black', label=str(label))
    for label in all_labels
]

fig.legend(
    handles=legend_patches,
    loc='lower center',
    bbox_to_anchor=(0.5, -0.05),
    ncol=min(len(legend_patches), 10),
    fontsize=8,
    title="Global Label Colors"
)

plt.tight_layout()
plt.subplots_adjust(bottom=0.2)
plt.show()
/tmp/ipykernel_784325/1730548250.py:14: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.
  cmap = cm.get_cmap('tab20', 20)
../_images/b8a135f631d1fe1b34db41447aa52753d28dcb95f1c5215603e63cbf42aac6a1.png