Analysis of Cancer Stroma Interface in Xenium#

%load_ext autoreload

import os 
import sys
import time
import logging
import re
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import json
import anndata as ad
import scanpy as sc
from natsort import natsorted

import numpy as np
np.random.seed(42)

# import itables
# from itables import show
# itables.init_notebook_mode(all_interactive=True)

sys.path.append(os.path.dirname(os.getcwd()))
import seaborn as sns
from gridgene import get_arrays as ga
from gridgene import contours, get_masks
# from GRIDGEN.src.mask_properties2 import GetMasksProperties
from gridgene.mask_properties2 import MaskAnalysisPipeline, MaskDefinition 
# define the logger :  can be None, and is set to INFO
# Custom logger setup
logger = logging.getLogger('contour_logger')
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

Get the files for Xenium

tma1_path = '../../xenium_data/HLA/GD_TMA1_S3/fov_filtered'
files_tma1 = os.listdir(tma1_path)


to_exclude = [
    'TMA1_Selection14_filtered.csv' , # little tumour
    'TMA1_Selection15_filtered.csv', # tonsil
    'TMA1_Selection18_filtered.csv' , # normal
    'TMA1_Selection24_filtered.csv', # tonsl 
    'TMA1_Selection27_filtered.csv', # low quality
    'TMA1_Selection32_filtered.csv', # low quality
    'TMA1_Selection33_filtered.csv', # low quality
             ]

files = [os.path.join(tma1_path, file) for file in files_tma1 if file not in to_exclude]
print(len(files))

metadata_file = '../../xenium_data/HLA/map_HLA_19_09_2024.xlsx'
metadata = pd.read_excel(metadata_file)
metadata['name'] = metadata['XENIUM_TMA'] + '_' + metadata['Selection'].str.replace(' ', '')
metadata = metadata.loc[metadata["XENIUM_TMA"] == "TMA1"]
metadata.head()
20
XENIUM_TMA Selection annotation Hes TMA COSMX selection Case Block Unnamed: 8 MMR HLA B2M abcam IHC group B2M gene defect Final group name
0 TMA1 Selection 7 NaN yes GD_TMA1 COSMX 17/H15722 1I S00159 MMRd negative mut pattern HLA defect no mutation HLA defect TMA1_Selection7
1 TMA1 Selection 8 NaN yes GD_TMA1 COSMX 17/H15137 1G S00154 MMRd positive positive HLA+ no mutation HLA+ TMA1_Selection8
2 TMA1 Selection 9 NaN yes GD_TMA1 COSMX 17/H09934 1M S00120 MMRd positive positive HLA+ NaN HLA+ TMA1_Selection9
3 TMA1 Selection 11 NaN yes GD_TMA1 COSMX 17/H08334 1E S00102 MMRd negative positive HLA defect mutation (exon 2) B2M defect TMA1_Selection11
4 TMA1 Selection 12 NaN yes GD_TMA1 COSMX 17/H07092 1AD S00096 MMRd cytoplasmic negative B2M defect mutation (exon 1) B2M defect TMA1_Selection12

Derive cancer and stroma masks

Apply GRIDGEN to Xenium to derive cancer and stroma masks (similar to the previous notebook: “Cancer Stroma in Xenium”).

After this, expansions from the cancer mask inside the stroma mask are derived, and mask information is retrieved.

We will show this in one example first and then apply to the cohort.

# param tum
target_tum = ['EPCAM', 'SMIM22','CLDN3', 'KRT18','LGALS4', 'KRT8', 'ELF3','TSPAN8', 'STMN1', 'CD47', 'MYC', 'LGALS3'] 
density_th_tum = 10    # 50  30
min_area_th_tum =  700 #1000     check how much is a cell 
kernel_size_tum = 10   # 20

# param empty
density_th_empty = 40
min_area_th_empty = 400 #400
kernel_size_empty = 10
file_csv = files[9]
df_total = pd.read_csv(file_csv)
df_total = df_total[['x_location', 'y_location', 'feature_name']]
df_total = df_total.rename(columns={'feature_name': 'target'})
df_total = df_total[~df_total['target'].str.contains('System|egative')]
df_total['X'] = df_total['x_location'] - min(df_total['x_location'])
df_total['Y'] = df_total['y_location'] - min(df_total['y_location'])

n_genes = len(df_total['target'].unique())
height = int(max(df_total['X'])) + 1
width = int(max(df_total['Y'])) + 1

print('df shape', df_total.shape)
print('shape',height,width,n_genes)

# this makes the sparse df to an array with the spatial information 
target_dict_total = {target: index for index, target in enumerate(df_total['target'].unique())}
array_total = ga.transform_df_to_array(df = df_total, target_dict=target_dict_total, array_shape = (height, width,len(target_dict_total))).astype(np.int8)

# creating subsets 
df_subset_tum, array_subset_tum, target_indices_subset_tum = ga.get_subset_arrays(df_total, array_total,target_dict_total,
                                                                     target_list=target_tum, target_col = 'target')

# obtain contours 
CTum = contours.ConvolutionContours(array_subset_tum, contour_name='tum')
CTum.get_conv_sum(kernel_size=kernel_size_tum, kernel_shape='square')
CTum.contours_from_sum(density_threshold = density_th_tum,
                       min_area_threshold = min_area_th_tum , directionality = 'higher')

CEmpty = contours.ConvolutionContours(array_total, contour_name='empty')
CEmpty.get_conv_sum(kernel_size=kernel_size_empty, kernel_shape='square')
CEmpty.contours_from_sum(density_threshold = density_th_empty,
                       min_area_threshold = min_area_th_empty, directionality = 'lower') # attention that directionality is lower here 

# PLOTs -- instead of plotting just with the function for example: CTum.plot_contours_scatter, we will make a composite image 

fig, axs = plt.subplots(1, 2, figsize=(30, 15))
CTum.plot_contours_scatter(path=None, show=False, s=0.05, alpha=0.5, linewidth=1,
                           c_points= 'blue',c_contours= 'red', ax=axs[0])
axs[0].set_title('Tum points and tum contours')  


CEmpty.plot_contours_scatter(path=None, show=False, s=0.05, alpha=0.5, linewidth=1,
                           c_points= 'blue',c_contours= 'red', ax=axs[1])
axs[0].set_title('total points and contours for empty')  
plt.subplots_adjust(wspace=0.02)  # Adjust to your preference

plt.show()


fig, axs = plt.subplots(1, 2, figsize=(15, 10))

CTum.plot_conv_sum(cmap='plasma', c_countour='white', ax=axs[0])
axs[0].set_title('Tum points and tum contours')

CEmpty.plot_conv_sum(cmap='plasma', c_countour='white', ax=axs[1])
axs[1].set_title('total points and contours for empty')

plt.show()

#### obtain masks
GM = get_masks.GetMasks(image_shape = (height, width))

mask_empty = GM.create_mask(CEmpty.contours)
mask_tum = GM.create_mask(CTum.contours)
mask_tum = GM.fill_holes(mask_tum)
mask_stroma = GM.subtract_masks(np.ones((height, width), dtype=np.uint8), mask_tum, mask_empty)          
mask_stroma = GM.filter_binary_mask_by_area(mask_stroma, min_area=700)

GM.plot_masks(masks=[mask_stroma, mask_tum], mask_names=['Stroma', 'Tumour'],
              background_color=(1, 1, 1), mask_colors={'Stroma': (65, 105, 225), 'Tumour': (255, 165, 0)},
              path=None, show=True, ax=None, figsize=(8, 6)) 
df shape (2084362, 5)
shape 1817 1817 480
2025-06-13 15:12:09,584 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:12:09,723 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 7
2025-06-13 15:12:09,723 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0801 seconds
contours_from_sum took 0.0590 seconds
get_conv_sum took 0.5682 seconds
2025-06-13 15:12:10,869 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 29
contours_from_sum took 0.5778 seconds
../_images/b5e2e919bdf56f13368bbdee6693e0466212063249d6b21cc8f646f56988229a.png ../_images/1fb01aa90bdbbf3e3823a947a20a64b2286d15da46cbc10ffa7bfa00f774634c.png
2025-06-13 15:12:12,882 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:12:12,901 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
../_images/09e0532f45d8a186e4dacb41c62ff79177828eb285862b422e037357cc4cc6ee.png

Now with cancer and stroma masks we can derive the interface.

The interface is the Cancer region expanded inside the stroma area. We will expand 10 and 20 um.

%autoreload
from gridgene import get_arrays as ga
from gridgene import contours, get_masks
# from GRIDGEN.src.mask_properties2 import GetMasksProperties
from gridgene.mask_properties2 import MaskAnalysisPipeline

TA = get_masks.ConstrainedMaskExpansion(mask_tum, mask_stroma)
TA.expand_mask(expansion_pixels=[10,20], min_area=1000)

# change names and colos«rs! 
mask_colors = {'expansion_10':(0, 165, 0),
               'expansion_20':(100, 200, 0),
               'seed_mask':(255, 165, 0),
               'constraint_remaining':(65, 105, 225) } # Strom}

GM.plot_masks(masks=TA.binary_expansions.values(),
              mask_names=TA.binary_expansions.keys(),
              background_color=(1, 1, 1), mask_colors=mask_colors, path=None, show=True, ax=None,
             figsize=(8, 6))
2025-06-13 15:17:01,377 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/e0b0f4edd975e5b6bb60764f3ad52ff5f0ad4c3db9bf89fb3f9156623216393c.png

Finally we can extract the information from the masks

# 1. Define your masks using MaskDefinition
mask_definitions = [
    MaskDefinition(mask=TA.binary_expansions['constraint_remaining'], mask_name='Stroma_remaining', analysis_type='bulk'),
    MaskDefinition(mask=TA.binary_expansions['seed_mask'], mask_name='Cancer', analysis_type='bulk'),
    MaskDefinition(mask=TA.binary_expansions['expansion_10'], mask_name='CI_10um', analysis_type='bulk'),
    MaskDefinition(mask=TA.binary_expansions['expansion_20'], mask_name='CI_20um', analysis_type='bulk'),
]

# 2. Initialize and run the analysis pipeline
pipeline = MaskAnalysisPipeline(mask_definitions=mask_definitions, 
                                array_counts = array_total,
                        target_dict = target_dict_total,
  )
results = pipeline.run()
df = pipeline.get_results_df()
df
run took 8.3321 seconds
area object_id IL6ST IGHG1 COL1A1 CTSH SIRPA TIMP2 SPARC LMO2 ... IFNB1 IGHD CD45RA CCL19 CD7 EBI3 LST1 VPREB3 mask_name analysis_type
0 1749628 bulk 5880 2682 20626 1271 1022 13634 31112 461 ... 40 6 3 40 14 26 0 8 Stroma_remaining bulk
1 217909 bulk 1304 212 890 2249 758 754 1281 71 ... 23 6 0 17 10 11 1 4 Cancer bulk
2 31943 bulk 163 60 910 174 97 399 1212 25 ... 1 0 0 3 2 7 0 0 CI_10um bulk
3 31051 bulk 163 127 793 93 45 380 1049 22 ... 1 1 0 2 0 12 0 0 CI_20um bulk

4 rows × 484 columns

We will now set up the code and apply the Cancer Stroma interface analysis to the cohort. We will save the results.

start_time = time.time()
save_path_ = 'results/tb_xenium_gr_/'
if not os.path.exists(save_path_):
    os.makedirs(save_path_)

logger = logging.getLogger('contour_logger')
handler = logging.FileHandler(f'{save_path_}/log_file.log')  # Change this to your desired log file path
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
logger.propagate = False

# TUM / STROMA + TM BORDER
for file_csv in tqdm(files, desc="Processing Folders", unit="folder"):
    start_time_lap = time.time()
    logger.info(f'Processing folder {file_csv}')
    base_name = os.path.basename(file_csv)
    file_name = "_".join(base_name.split("_")[:2])
    save_path = os.path.join(save_path_, file_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    df_total = pd.read_csv(file_csv)
    df_total = df_total[['x_location', 'y_location', 'feature_name']]
    df_total = df_total.rename(columns={'feature_name': 'target'})
    df_total = df_total[~df_total['target'].str.contains('System|egative')]
    df_total['X'] = df_total['x_location'] - min(df_total['x_location'])
    df_total['Y'] = df_total['y_location'] - min(df_total['y_location'])
    
    n_genes = len(df_total['target'].unique())
    height = int(max(df_total['X'])) + 1
    width = int(max(df_total['Y'])) + 1
        
    # define arrays
    target_dict_total = {target: index for index, target in enumerate(df_total['target'].unique())}
    array_total = ga.transform_df_to_array(df = df_total, target_dict=target_dict_total, array_shape = (height, width,len(target_dict_total))).astype(np.int8)
    df_subset_tum, array_subset_tum, target_indices_subset_tum = ga.get_subset_arrays(df_total, array_total,target_dict_total,
                                                                         target_list=target_tum, target_col = 'target')
        
    # obtain contours 
    CTum = contours.ConvolutionContours(array_subset_tum, contour_name='tum')
    CTum.get_conv_sum(kernel_size=kernel_size_tum, kernel_shape='square')
    CTum.contours_from_sum(density_threshold = density_th_tum,
                           min_area_threshold = min_area_th_tum , directionality = 'higher')

    CEmpty = contours.ConvolutionContours(array_total, contour_name='empty')
    CEmpty.get_conv_sum(kernel_size=kernel_size_empty, kernel_shape='square')
    CEmpty.contours_from_sum(density_threshold = density_th_empty,
                           min_area_threshold = min_area_th_empty, directionality = 'lower') # attention that directionality is lower here 

    fig, axs = plt.subplots(1, 2, figsize=(15, 10))
    
    CTum.plot_conv_sum(cmap='plasma', c_countour='white', ax=axs[0])
    axs[0].set_title('Tum points and tum contours')
    
    CEmpty.plot_conv_sum(cmap='plasma', c_countour='white', ax=axs[1])
    axs[1].set_title('total points and contours for empty')
    
    plt.show()
    
    #### obtain masks
    GM = get_masks.GetMasks(image_shape = (height, width))
    
    mask_empty = GM.create_mask(CEmpty.contours)
    mask_tum = GM.create_mask(CTum.contours)
    mask_tum = GM.fill_holes(mask_tum)
    mask_stroma = GM.subtract_masks(np.ones((height, width), dtype=np.uint8), mask_tum, mask_empty)          
    mask_stroma = GM.filter_binary_mask_by_area(mask_stroma, min_area=700)

    #### Interface analysis
    TA = get_masks.ConstrainedMaskExpansion(mask_tum, mask_stroma)
    TA.expand_mask(expansion_pixels=[10,20], min_area=1000)

    # change names and colos«rs! 
    mask_colors = {'expansion_10':(0, 165, 0),
                   'expansion_20':(100, 200, 0),
                   'seed_mask':(255, 165, 0),
                   'constraint_remaining':(65, 105, 225) } # Strom}

    GM.plot_masks(masks=TA.binary_expansions.values(),
                  mask_names=TA.binary_expansions.keys(),
                  background_color=(1, 1, 1), mask_colors=mask_colors, path=None, show=True, ax=None,
                 figsize=(8, 6))


    mask_definitions = [
        MaskDefinition(mask=TA.binary_expansions['constraint_remaining'], mask_name='Stroma_remaining', analysis_type='per_object'),
        MaskDefinition(mask=TA.binary_expansions['seed_mask'], mask_name='Cancer', analysis_type='per_object'),
        MaskDefinition(mask=TA.binary_expansions['expansion_10'], mask_name='CI_10um', analysis_type='per_object'),
        MaskDefinition(mask=TA.binary_expansions['expansion_20'], mask_name='CI_20um', analysis_type='per_object'),
    ]

    # 2. Initialize and run the analysis pipeline
    pipeline = MaskAnalysisPipeline(mask_definitions=mask_definitions, 
                                    array_counts = array_total,
                            target_dict = target_dict_total,
      )
    results = pipeline.run()
    df = pipeline.get_results_df()
    df.to_csv(os.path.join(save_path + f'/{file_name}_tb.csv'), index=False)

# Record the end time
end_time = time.time()

# Calculate the duration
duration = start_time - time.time()
logging.info(f'full analysis in {duration}seconds')
Processing Folders:   0%|                                                                                                                                | 0/20 [00:00<?, ?folder/s]2025-06-13 15:24:48,699 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection23_filtered.csv
2025-06-13 15:24:53,483 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:24:53,628 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 29
2025-06-13 15:24:53,629 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0813 seconds
contours_from_sum took 0.0635 seconds
get_conv_sum took 0.6328 seconds
2025-06-13 15:24:54,905 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 94
contours_from_sum took 0.6411 seconds
../_images/842627f68ce1f557b051bace882be5330a77b5c8bef0958020be28e044102192.png
2025-06-13 15:24:55,597 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:24:55,617 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:24:55,736 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/fb34fe43139c99813fda7d923e1f111b643ba2216ade4addcfd391ae77f6acb3.png
Processing Folders:   5%|██████                                                                                                                  | 1/20 [00:16<05:09, 16.30s/folder]2025-06-13 15:25:04,995 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection21_filtered.csv
run took 8.5054 seconds
2025-06-13 15:25:10,567 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:25:10,727 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 43
2025-06-13 15:25:10,728 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0830 seconds
contours_from_sum took 0.0766 seconds
get_conv_sum took 0.7076 seconds
2025-06-13 15:25:12,086 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 40
contours_from_sum took 0.6504 seconds
../_images/47968be82e0b165b816a7228851e783849a26ccb76f285702301d6dc85f3855d.png
2025-06-13 15:25:12,737 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:25:12,755 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:25:12,824 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/c184e61da08fb116cffd288262516fcb45e48a38bd3e9d4f85eb32f0d7606983.png
Processing Folders:  10%|████████████                                                                                                            | 2/20 [00:33<05:01, 16.75s/folder]2025-06-13 15:25:22,069 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection17_filtered.csv
run took 8.6011 seconds
2025-06-13 15:25:25,416 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:25:25,543 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 10
2025-06-13 15:25:25,544 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0696 seconds
contours_from_sum took 0.0576 seconds
get_conv_sum took 0.5580 seconds
2025-06-13 15:25:26,665 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 114
contours_from_sum took 0.5631 seconds
../_images/91c458690145112ea9c372675bfe60c0045311934983df0d5e7b81c27a0af631.png
2025-06-13 15:25:27,284 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:25:27,299 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:25:27,316 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/958213023866067c70a78da20bd18b247aece37961eadd9492767f38071566bc.png
Processing Folders:  15%|██████████████████                                                                                                      | 3/20 [00:46<04:14, 14.99s/folder]2025-06-13 15:25:34,956 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection31_filtered.csv
run took 7.1751 seconds
2025-06-13 15:25:37,672 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:25:37,796 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 14
2025-06-13 15:25:37,796 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0654 seconds
contours_from_sum took 0.0584 seconds
get_conv_sum took 0.5543 seconds
2025-06-13 15:25:38,894 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 11
contours_from_sum took 0.5432 seconds
../_images/d96de991ade94ee25c6f8ee326e2f2a7a6c6e2b44f49427bed335abc6bd7ef17.png
2025-06-13 15:25:39,433 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:25:39,447 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:25:39,474 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/762330b085568381093d20ad38477df1a913e1bfc8dc600bd7943dda9e2e31b3.png
Processing Folders:  20%|████████████████████████                                                                                                | 4/20 [00:58<03:40, 13.76s/folder]2025-06-13 15:25:46,821 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection7_filtered.csv
run took 6.8109 seconds
2025-06-13 15:25:50,694 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:25:50,831 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 10
2025-06-13 15:25:50,832 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0812 seconds
contours_from_sum took 0.0559 seconds
get_conv_sum took 0.6345 seconds
2025-06-13 15:25:52,047 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 78
contours_from_sum took 0.5817 seconds
../_images/03a3947860a48cb9a9feca13af671d6744d6e0190dec85d74857b3a820aa0eac.png
2025-06-13 15:25:52,676 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:25:52,691 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:25:52,714 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/4743c61c040824a4786d7d7588e5c760f2e72326af230ffca321d8aca4c98e4e.png
Processing Folders:  25%|██████████████████████████████                                                                                          | 5/20 [01:13<03:32, 14.16s/folder]2025-06-13 15:26:01,706 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection11_filtered.csv
run took 8.4426 seconds
2025-06-13 15:26:09,129 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:26:09,298 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 55
2025-06-13 15:26:09,298 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0832 seconds
contours_from_sum took 0.0849 seconds
get_conv_sum took 0.6679 seconds
2025-06-13 15:26:10,607 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 6
contours_from_sum took 0.6403 seconds
../_images/4893ee60eede22ba3e5dcf489aa66943e1a9d5238a28d5af51ccd3550fb6ee07.png
2025-06-13 15:26:11,363 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:26:11,377 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:26:11,422 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/08f8f9feab1ad96c77f0b2f07b9c6f57c51ff400e538b2441820e4c5d55de21a.png
Processing Folders:  30%|████████████████████████████████████                                                                                    | 6/20 [01:32<03:41, 15.85s/folder]2025-06-13 15:26:20,845 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection25_filtered.csv
run took 8.7425 seconds
2025-06-13 15:26:27,196 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:26:27,360 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 36
2025-06-13 15:26:27,360 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0827 seconds
contours_from_sum took 0.0808 seconds
get_conv_sum took 0.7205 seconds
2025-06-13 15:26:28,787 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 8
contours_from_sum took 0.7060 seconds
../_images/25db8672918624458920b655d7b37ea89880023f832c2d1e67d27e3dc0fa9d96.png
2025-06-13 15:26:29,418 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:26:29,436 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:26:29,496 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/33441528d22c7c9411f1d63677fe933f8849968f19622db1e4a5fab87618f961.png
Processing Folders:  35%|██████████████████████████████████████████                                                                              | 7/20 [01:50<03:35, 16.55s/folder]2025-06-13 15:26:38,816 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection22_filtered.csv
run took 8.6170 seconds
2025-06-13 15:26:45,233 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:26:45,392 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 40
2025-06-13 15:26:45,393 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0814 seconds
contours_from_sum took 0.0776 seconds
get_conv_sum took 0.6747 seconds
2025-06-13 15:26:46,732 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 29
contours_from_sum took 0.6644 seconds
../_images/4ee96175bec0d9a7ebf2699817ef538626ff8686603a5d3ff5c2a52fb0ad287a.png
2025-06-13 15:26:47,368 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:26:47,386 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:26:47,435 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/026c7b988397f5009bcb8cc1183ebe4fff29d451220ee1e3d5fe9819aeeafaba.png
Processing Folders:  40%|████████████████████████████████████████████████                                                                        | 8/20 [02:07<03:22, 16.90s/folder]2025-06-13 15:26:56,472 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection30_filtered.csv
run took 8.1412 seconds
2025-06-13 15:27:02,064 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:27:02,201 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 15
2025-06-13 15:27:02,202 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0789 seconds
contours_from_sum took 0.0582 seconds
get_conv_sum took 0.5776 seconds
2025-06-13 15:27:03,358 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 17
contours_from_sum took 0.5787 seconds
../_images/aeb6ebd2ae48a8ef4775a3cb8c7b7be4c0a9b6132f7f435af5e0ac43e52c01f7.png
2025-06-13 15:27:04,007 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:27:04,021 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:27:04,093 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/f343a6857d52f79129dcc192d2846f22f73090fb3a80a613a3748331c8d70177.png
Processing Folders:  45%|██████████████████████████████████████████████████████                                                                  | 9/20 [02:24<03:05, 16.84s/folder]2025-06-13 15:27:13,193 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection20_filtered.csv
run took 8.4403 seconds
2025-06-13 15:27:16,320 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:27:16,463 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 7
2025-06-13 15:27:16,464 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0802 seconds
contours_from_sum took 0.0626 seconds
get_conv_sum took 0.5847 seconds
2025-06-13 15:27:17,734 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 29
contours_from_sum took 0.6858 seconds
../_images/1fb01aa90bdbbf3e3823a947a20a64b2286d15da46cbc10ffa7bfa00f774634c.png
2025-06-13 15:27:18,346 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:27:18,358 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:27:18,409 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/e0b0f4edd975e5b6bb60764f3ad52ff5f0ad4c3db9bf89fb3f9156623216393c.png
Processing Folders:  50%|███████████████████████████████████████████████████████████▌                                                           | 10/20 [02:38<02:39, 15.98s/folder]2025-06-13 15:27:27,249 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection16_filtered.csv
run took 8.2607 seconds
2025-06-13 15:27:31,353 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:27:31,472 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 25
2025-06-13 15:27:31,472 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0625 seconds
contours_from_sum took 0.0555 seconds
get_conv_sum took 0.5592 seconds
2025-06-13 15:27:32,571 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 33
contours_from_sum took 0.5396 seconds
../_images/120e6e07b57b16f0a14512d7681e755627c2727dab478be8ff7097db1682c291.png
2025-06-13 15:27:33,141 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:27:33,156 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:27:33,176 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/548703eaf1c9718c0cd432a15e28edbe0a1a4657a1dc6952419d707e571f563c.png
Processing Folders:  55%|█████████████████████████████████████████████████████████████████▍                                                     | 11/20 [02:51<02:15, 15.04s/folder]2025-06-13 15:27:40,140 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection26_filtered.csv
run took 6.3943 seconds
2025-06-13 15:27:45,103 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:27:45,210 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 42
2025-06-13 15:27:45,211 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0580 seconds
contours_from_sum took 0.0492 seconds
get_conv_sum took 0.4936 seconds
2025-06-13 15:27:46,195 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 42
contours_from_sum took 0.4902 seconds
../_images/701410673378833982cb785c0185ccf4f8ec2354bed7c5ab7bafc600e01384b8.png
2025-06-13 15:27:46,780 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:27:46,797 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:27:46,877 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/2bd27dd8c3546093bbb0303a92fda66a53660e26c7edb8c90d8b621a8f56895c.png
Processing Folders:  60%|███████████████████████████████████████████████████████████████████████▍                                               | 12/20 [03:04<01:56, 14.58s/folder]2025-06-13 15:27:53,662 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection8_filtered.csv
run took 6.1695 seconds
2025-06-13 15:27:57,985 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:27:58,107 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 7
2025-06-13 15:27:58,107 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0714 seconds
contours_from_sum took 0.0502 seconds
get_conv_sum took 0.5146 seconds
2025-06-13 15:27:59,129 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 17
contours_from_sum took 0.5074 seconds
../_images/f0861fdfec50b33d5718430130ae4ccffcf597125209e17fab38506f03c2ef83.png
2025-06-13 15:27:59,723 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:27:59,737 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:27:59,754 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/563e4440d5b1357b63f0b064ea63e9ce57a6626e9acac414ad71403942f58079.png
Processing Folders:  65%|█████████████████████████████████████████████████████████████████████████████▎                                         | 13/20 [03:18<01:40, 14.40s/folder]2025-06-13 15:28:07,657 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection29_filtered.csv
run took 7.4426 seconds
2025-06-13 15:28:10,960 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:28:11,104 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 68
2025-06-13 15:28:11,105 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0685 seconds
contours_from_sum took 0.0752 seconds
get_conv_sum took 0.4979 seconds
2025-06-13 15:28:12,102 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 120
contours_from_sum took 0.4993 seconds
../_images/abf430564159122d0aac9d2b97e7f70a44213df2a5f69db115f9746cf2a72b8e.png
2025-06-13 15:28:12,774 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:28:12,796 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:28:12,896 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/a15b8ff77387fa6d104526741a60226dabaf8b8497e75bac1cc9160d7f0e69f4.png
Processing Folders:  70%|███████████████████████████████████████████████████████████████████████████████████▎                                   | 14/20 [03:32<01:24, 14.02s/folder]2025-06-13 15:28:20,809 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection13_filtered.csv
run took 7.0631 seconds
2025-06-13 15:28:26,650 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:28:26,805 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 50
2025-06-13 15:28:26,805 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0826 seconds
contours_from_sum took 0.0718 seconds
get_conv_sum took 0.7140 seconds
2025-06-13 15:28:28,206 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 28
contours_from_sum took 0.6869 seconds
../_images/1a38817590d1fd656d6feb2f8c024bf072900b07f69fe48651871d81319bf8aa.png
2025-06-13 15:28:28,885 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:28:28,903 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:28:28,981 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/bc0bae21adddefdfc494a69d240cf0bcffa89c8f9fad363a0c93151e7356239f.png
Processing Folders:  75%|█████████████████████████████████████████████████████████████████████████████████████████▎                             | 15/20 [03:49<01:15, 15.08s/folder]2025-06-13 15:28:38,339 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection12_filtered.csv
run took 8.6615 seconds
2025-06-13 15:28:44,303 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:28:44,441 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 25
2025-06-13 15:28:44,442 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0769 seconds
contours_from_sum took 0.0616 seconds
get_conv_sum took 0.6044 seconds
2025-06-13 15:28:45,575 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 39
contours_from_sum took 0.5282 seconds
../_images/f034036f9c976a08e6dd492df472abc839c712aa6f4ee6ea320c09ec4dc66a0c.png
2025-06-13 15:28:46,181 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:28:46,189 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:28:46,225 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/f142be9c0283bc015821b300b798b8166334fc97ce6c6c89c899079c08873e15.png
Processing Folders:  80%|███████████████████████████████████████████████████████████████████████████████████████████████▏                       | 16/20 [04:06<01:01, 15.48s/folder]2025-06-13 15:28:54,761 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection28_filtered.csv
run took 7.7920 seconds
2025-06-13 15:28:59,173 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:28:59,310 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 36
2025-06-13 15:28:59,310 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0769 seconds
contours_from_sum took 0.0598 seconds
get_conv_sum took 0.6886 seconds
2025-06-13 15:29:00,574 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 67
contours_from_sum took 0.5752 seconds
../_images/d258661f335ed0a8a2bedd6bbf0123a9e7f3aacc6f6d9dfc25e63274c7fffe74.png
2025-06-13 15:29:01,208 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:29:01,214 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:29:01,260 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/59d3c063cbf1d6011042d31e99f366830bccc68214892af0b7eb3924a21a22a4.png
Processing Folders:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████▏                 | 17/20 [04:21<00:46, 15.45s/folder]2025-06-13 15:29:10,145 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection34_filtered.csv
run took 8.2578 seconds
2025-06-13 15:29:14,999 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:29:15,155 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 28
2025-06-13 15:29:15,155 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0866 seconds
contours_from_sum took 0.0686 seconds
get_conv_sum took 0.7575 seconds
2025-06-13 15:29:16,650 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 52
contours_from_sum took 0.7374 seconds
../_images/d980d297e0566a5b955d99469b48dc16f520a83910a6f81c70fa9c078c451af8.png
2025-06-13 15:29:17,430 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:29:17,450 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:29:17,535 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/404e8f312b597b862e228dedd81b54759a18d8f7a3e4e2de07eae7f7c73796e5.png
Processing Folders:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████            | 18/20 [04:38<00:32, 16.02s/folder]2025-06-13 15:29:27,479 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection9_filtered.csv
run took 9.2704 seconds
/tmp/ipykernel_793845/3712573084.py:24: DtypeWarning: Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.
  df_total = pd.read_csv(file_csv)
2025-06-13 15:29:31,787 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:29:31,928 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 56
2025-06-13 15:29:31,928 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0750 seconds
contours_from_sum took 0.0645 seconds
get_conv_sum took 0.5177 seconds
2025-06-13 15:29:32,963 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 48
contours_from_sum took 0.5166 seconds
../_images/d97f38efb5226b2deb237e1f64c41962e3deec9f9fa2ae7cb081a399306f45bf.png
2025-06-13 15:29:33,612 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:29:33,629 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:29:33,673 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/bb2539ed41b9fea150bb67da58e072a50ff6a15a93046caea5a52026f350fa59.png
Processing Folders:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 19/20 [04:53<00:15, 15.54s/folder]2025-06-13 15:29:41,890 - contour_logger - INFO - Processing folder ../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection19_filtered.csv
run took 7.6432 seconds
2025-06-13 15:29:45,775 - gridgen.contours.tum - INFO - Initialized GetContour
2025-06-13 15:29:45,909 - gridgen.contours.tum - INFO - Number of contours after filtering no counts: 38
2025-06-13 15:29:45,909 - gridgen.contours.empty - INFO - Initialized GetContour
get_conv_sum took 0.0672 seconds
contours_from_sum took 0.0664 seconds
get_conv_sum took 0.5556 seconds
2025-06-13 15:29:47,016 - gridgen.contours.empty - INFO - Number of contours after filtering no counts: 61
contours_from_sum took 0.5510 seconds
../_images/2ad8e21f7f9aa3e0f9036e7d98208b7f1b228b4f416fb63462209700e4bf1b20.png
2025-06-13 15:29:47,631 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
2025-06-13 15:29:47,639 - gridgen.get_masks.GetMasks - INFO - Subtracted masks from base mask.
2025-06-13 15:29:47,696 - gridgen.get_masks.GetMasks - INFO - Initialized GetMasks
../_images/265cd5b4e4a9584c0e8ad276d1a5804f300b5b5f3d7723c2c7793dd86dcc9343.png
Processing Folders: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [05:06<00:00, 15.34s/folder]
run took 7.2035 seconds

Analysis of the results from cancer stroma interface

To be noted that each total mask per sample is one object. This is, per sample, we have one cancer object, one stroma object, one 10 um interface and one 20 um interface.

Grab information stored in csv

save_path_ = 'results/tb_xenium_gr_/'

dfs = []
for file in files: 
    base_name = os.path.basename(file)
    selection_name = "_".join(base_name.split("_")[:2])
    df = pd.read_csv(f'{save_path_}/{selection_name}/{selection_name}_tb.csv')
    df['selection'] = selection_name
    dfs.append(df)


full_df = pd.concat(dfs, ignore_index=True)
full_df.head()
area object_id BTN2A2 NT5E MDM2 LGALS3 EMP1 PODXL SPARC REG1A ... EBI3 CXCL14 CSF2 TNFRSF18 LST1 VPREB3 CD45RA mask_name analysis_type selection
0 335788 bulk 136 362 642 649 2303 597 12760 2600 ... 16 31 7 4 1 0 0.0 Stroma_remaining bulk TMA1_Selection23
1 1265974 bulk 1260 12727 9217 34220 7574 4859 17094 278407 ... 27 287 39 7 4 3 2.0 Cancer bulk TMA1_Selection23
2 169633 bulk 77 295 390 471 1948 689 13848 5154 ... 14 45 2 1 0 0 0.0 CI_10um bulk TMA1_Selection23
3 88214 bulk 32 119 215 210 1037 308 7496 1188 ... 5 29 0 4 0 0 0.0 CI_20um bulk TMA1_Selection23
4 723049 bulk 405 1654 1419 2402 7528 1021 26972 171 ... 53 74 480 8 9 9 0.0 Stroma_remaining bulk TMA1_Selection21

5 rows × 485 columns

Transform into adata object

obs_columns = ['area', 'object_id', 'mask_name','analysis_type','selection',]
#                'perimeter', 'centroid', 'min_x', 'min_y', 'max_x', 'max_y', 'vertices', 
#                'BoundingBox', 'mask_name', 'per_object', 'selection',]
counts_columns = full_df.columns.difference(obs_columns)
counts = full_df[counts_columns]

obs = full_df[obs_columns]


adata = ad.AnnData(counts)
adata.obs = obs
adata.layers["counts"] = adata.X.copy()

adata.obs = adata.obs.merge(metadata, left_on='selection', right_on='name', how='left')

#combine TB of 10 and 20in an unque TB
combined_replacements = {
    'CI_10um': 'Interface',
    'CI_20um': 'Interface',
    'Stroma_remaining': 'Stroma'

}
obs['mask_name_combined'] = obs['mask_name'].replace(combined_replacements).astype('category')
adata.obs['mask_name_combined'] = obs['mask_name_combined']

print('number of cores: ', len(adata.obs['name'].unique()))
print('number of cores: ', len(adata.obs['name'].unique()))
adata.obs.head()
number of cores:  20
number of cores:  20
/home/martinha/miniconda3/envs/GRIDGEN/lib/python3.11/site-packages/anndata/utils.py:311: UserWarning: X converted to numpy array with dtype float64
  warnings.warn(f"{name} converted to numpy array with dtype {arr.dtype}")
/home/martinha/miniconda3/envs/GRIDGEN/lib/python3.11/site-packages/anndata/_core/aligned_df.py:68: ImplicitModificationWarning: Transforming to str index.
  warnings.warn("Transforming to str index.", ImplicitModificationWarning)
/tmp/ipykernel_793845/2097024825.py:23: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  obs['mask_name_combined'] = obs['mask_name'].replace(combined_replacements).astype('category')
area object_id mask_name analysis_type selection XENIUM_TMA Selection annotation Hes TMA ... Block Unnamed: 8 MMR HLA B2M abcam IHC group B2M gene defect Final group name mask_name_combined
0 335788 bulk Stroma_remaining bulk TMA1_Selection23 TMA1 Selection 23 NaN yes GD_TMA1 ... 1G S00067 MMRd positive positive HLA+ NaN HLA+ TMA1_Selection23 Stroma
1 1265974 bulk Cancer bulk TMA1_Selection23 TMA1 Selection 23 NaN yes GD_TMA1 ... 1G S00067 MMRd positive positive HLA+ NaN HLA+ TMA1_Selection23 Cancer
2 169633 bulk CI_10um bulk TMA1_Selection23 TMA1 Selection 23 NaN yes GD_TMA1 ... 1G S00067 MMRd positive positive HLA+ NaN HLA+ TMA1_Selection23 Interface
3 88214 bulk CI_20um bulk TMA1_Selection23 TMA1 Selection 23 NaN yes GD_TMA1 ... 1G S00067 MMRd positive positive HLA+ NaN HLA+ TMA1_Selection23 Interface
4 723049 bulk Stroma_remaining bulk TMA1_Selection21 TMA1 Selection 21 NaN yes GD_TMA1 ... 1E S00102 MMRd negative positive HLA defect mutation (exon 2) B2M defect TMA1_Selection21 Stroma

5 rows × 22 columns

Areas of the mask

plt.figure(figsize=(12, 6))
sns.boxplot(x='mask_name', y='area', data=adata.obs, showfliers=False)

# Overlay the points
sns.stripplot(x='mask_name', y='area', data=adata.obs, alpha=0.5)

plt.title('Boxplot of Mask Areas')
plt.xlabel('Mask Name')
plt.ylabel('Area')
plt.tight_layout()
plt.show()
../_images/df73abb27b5805e6ca4fce0bc6a4fa1356adadf8ec607805f97e0f8b63bed3ad.png

Normalization

We will normalize the values, as it was single cell data

# Normalize the counts in the 'X' matrix
adata.X = np.nan_to_num(adata.X / adata.obs['area'].values[:, None])*100
# sc.pp.scale(adata)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

sc.tl.pca(adata)
pct = adata.uns['pca']['variance_ratio'] / sum(adata.uns['pca']['variance_ratio']) * 100
cumu = np.cumsum(pct)
# Point 1.
co1 = list(np.where(np.logical_and(cumu > 90, pct < 5))[0])[0]
# Point 2.
x = list(np.where(pct[0:len(pct)-1] - pct[1:len(pct)] > 0.1)[0])
x.sort(reverse=True)
co2 = x[0]+1
# Elbow:
elbow = min(co1, co2) + 1 # Without the +1, it gives the index and not the PC number
                          # (indices in python start with 0, unlike R that start with 1)
print('Elbow at: ', elbow)

sc.pl.pca_variance_ratio(adata, n_pcs=50, log=True)

sc.pp.neighbors(adata, n_neighbors=20) #, n_pcs=elbow)

sc.tl.umap(adata) 

sc.pl.umap(
    adata,
    color= ["mask_name", "selection"],
    size=15,
)
Elbow at:  15
../_images/7d9a54baf3e59af66bd3e8f7ddb82f83d25178baf11ed3893a3242b83d937534.png ../_images/b6788493082af3cbccdf64efdcaed9cfb86bed87ecf7a77d6df394cbac783b63.png

Cancer masks are clearly distinct from Stroma masks, which it is to be expected. From the UMAP there is no clear separation between Stroma and interface masks.

Differential gene expression

Stroma vs Interface vs Cancer

Stroma vs Interface

Interface vs cancer

# function for rank genes and derive dfs
def rank_subgroup_results_dotplot(groupcl, column_name = 'Final group', save_fig=None):
    # groupcl.uns['log1p']['base'] = None
    sc.tl.rank_genes_groups(groupcl,column_name, method='wilcoxon', pts=True)
    sc.tl.filter_rank_genes_groups(groupcl, groupby=column_name, min_fold_change=1)

    # Extract the results
    result = groupcl.uns['rank_genes_groups']
    groups = result['names'].dtype.names

    # Create a DataFrame to store the results
    result_df = pd.DataFrame()
    for group in groups:
        group_df = pd.DataFrame({
            'gene': result['names'][group],
            'logfoldchanges': result['logfoldchanges'][group],
            'pvals': result['pvals'][group],
            'pvals_adj': result['pvals_adj'][group],
            'scores': result['scores'][group]
        })
        group_df['mask'] = group
        result_df = pd.concat([result_df, group_df], ignore_index=True)

    if save_fig:
        sc.pl.rank_genes_groups_dotplot(groupcl, groupby=column_name, standard_scale="var", n_genes=15,save='dotplot.png')
    else:
        sc.pl.rank_genes_groups_dotplot(groupcl, groupby=column_name, standard_scale="var", n_genes=15)

    return result_df
column_name = 'mask_name_combined'
groupcl = adata
groupcl.obs[column_name] = pd.Categorical(
    groupcl.obs[column_name],
    categories=['Stroma','Interface','Cancer',],
    ordered=True
)

# groupcl.uns['log1p']['base'] = None
sc.tl.rank_genes_groups(groupcl,column_name, method='wilcoxon', pts=True)
sc.tl.filter_rank_genes_groups(groupcl, groupby=column_name, min_fold_change=1)

# Extract the results
result = groupcl.uns['rank_genes_groups']
groups = result['names'].dtype.names

# Create a DataFrame to store the results
results_df = pd.DataFrame()
for group in groups:
    group_df = pd.DataFrame({
        'gene': result['names'][group],
        'logfoldchanges': result['logfoldchanges'][group],
        'pvals': result['pvals'][group],
        'pvals_adj': result['pvals_adj'][group],
        'scores': result['scores'][group]
    })
    group_df['mask'] = group
    results_df = pd.concat([results_df, group_df], ignore_index=True)

sc.set_figure_params(scanpy=True, fontsize=14)

dotplot = sc.pl.rank_genes_groups_dotplot(
    groupcl,
    groupby=column_name,
    standard_scale="var",
    n_genes=15, #15
    dendrogram=True,
    return_fig=True,
#     swap_axes=True,
)



dotplot.savefig('figures/dotplot_highres.png', dpi=500, bbox_inches='tight')
results_df
gene logfoldchanges pvals pvals_adj scores mask
0 OGN 1.377607 1.707981e-05 9.676217e-04 4.300000 Stroma
1 TAGLN 1.534681 5.244734e-05 1.798195e-03 4.044445 Stroma
2 THBS1 1.016705 1.383436e-04 3.320246e-03 3.811111 Stroma
3 MFAP5 1.286742 2.797832e-04 5.371837e-03 3.633333 Stroma
4 IGF1 1.382461 2.797832e-04 5.371837e-03 3.633333 Stroma
... ... ... ... ... ... ...
1435 COL4A2 -2.389624 2.616785e-11 5.410896e-10 -6.666667 Cancer
1436 LUM -2.758327 2.616785e-11 5.410896e-10 -6.666667 Cancer
1437 TAGLN -2.997534 2.616785e-11 5.410896e-10 -6.666667 Cancer
1438 VIM -2.002866 2.616785e-11 5.410896e-10 -6.666667 Cancer
1439 LGALS1 -2.143261 2.616785e-11 5.410896e-10 -6.666667 Cancer

1440 rows × 6 columns

../_images/7c2183660b16b12fb7264d154bb31f69c782ef0f18d2a8b1a0296d6cfa6d0a51.png

Plot genes back to 2 core samples

s_genes = results_df[results_df['mask']=='Stroma'].sort_values(by='scores', ascending=False)['gene'][:10]
t_genes = results_df[results_df['mask']=='Cancer'].sort_values(by='scores', ascending=False)['gene'][:10]
tb_genes = results_df[results_df['mask']=='Interface'].sort_values(by='scores', ascending=False)['gene'][:10]

fig1 = '../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection30_filtered.csv'
fig2 = '../../xenium_data/HLA/GD_TMA1_S3/fov_filtered/TMA1_Selection13_filtered.csv'

df1 = pd.read_csv(fig1)
df1 = df1[['x_location', 'y_location', 'feature_name']]
df1 = df1.rename(columns={'feature_name': 'target'})
df1['X'] = df1['x_location'] - min(df1['x_location'])
df1['Y'] = df1['y_location'] - min(df1['y_location'])
height1 = int(max(df1['X'])) + 1
width1 = int(max(df1['Y'])) + 1
    
df2 = pd.read_csv(fig2)
df2 = df2[['x_location', 'y_location', 'feature_name']]
df2 = df2.rename(columns={'feature_name': 'target'})
df2['X'] = df2['x_location'] - min(df2['x_location'])
df2['Y'] = df2['y_location'] - min(df2['y_location'])
height2 = int(max(df2['X'])) + 1
width2 = int(max(df2['Y'])) + 1


def plot_data(ax, df, s_genes, t_genes, tb_genes):
    s_df = df[df['target'].isin(s_genes)]
    ax.scatter(s_df['X'], s_df['Y'],
                   c='blue',
                   s=1, # Default size for blue
                   alpha=0.8, # Default alpha for blue
                   label='Stroma' # Add label for legend
                  )
    t_df = df[df['target'].isin(t_genes)]
    ax.scatter(t_df['X'], t_df['Y'],
                   c='red',
                   s=1, # Default size for blue
                   alpha=0.8, 
                   label='Cancer' # Add label for legend
                  )
    ci_df = df[df['target'].isin(tb_genes)]
    ax.scatter(ci_df['X'], ci_df['Y'],
                   c='green',
                   s=1,
                   alpha=1.0, 
                   label='Cancer Interface' # Add label for legend
                  )
    ax.grid(False)

    # Remove x and y tick labels and marks
    ax.set_xticks([])
    ax.set_yticks([])
    ax.tick_params(left=False, bottom=False)


# Set up the figure and axes
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(50, 25))

# Plot data for both dataframes
plot_data(ax2, df1, list(s_genes), list(t_genes), list(tb_genes))
plot_data(ax1, df2,list(s_genes), list(t_genes), list(tb_genes))

# Adjust layout and display the plot
plt.tight_layout()
plt.show()

extent1 = ax1.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
fig.savefig('figures/plot1.png', bbox_inches=extent1, dpi=300)

# Save the second subplot
extent2 = ax2.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
fig.savefig('figures/plot2.png', bbox_inches=extent2, dpi=300)
../_images/a733d8a5d99c25e9fc5cf6cadae5531de4bf1bd73ff900daffd3dde866fe9274.png
import math
zoom_centers = [
    (1000, 800), 
    (900,350),
    (900,900),
    (1350,1150)
]


half_width = 250
half_height = 250

n_zooms = len(zoom_centers)
n_cols = 2
n_rows = math.ceil(n_zooms / n_cols)

fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 5 * n_rows))
axes = axes.flatten()

for i, (center_x, center_y) in enumerate(zoom_centers):
    ax = axes[i]
    plot_data(ax, df1, list(s_genes), list(t_genes), list(tb_genes))
    ax.set_xlim(center_x - half_width, center_x + half_width)
    ax.set_ylim(center_y - half_height, center_y + half_height)
    # ax.set_title(f'Zoom {i+1}: Center ({center_x}, {center_y})')
    
    # Save each individual axis to its own figure
    single_fig = plt.figure()
    single_ax = single_fig.add_subplot(111)
    
    plot_data(single_ax, df1, list(s_genes), list(t_genes), list(tb_genes))
    single_ax.set_xlim(center_x - half_width, center_x + half_width)
    single_ax.set_ylim(center_y - half_height, center_y + half_height)
    single_ax.axis('off')
    
    single_fig.savefig(f'figures/zoom_section_{i+1}.png', dpi=300, bbox_inches='tight', pad_inches=0)
    plt.close(single_fig)  # Close to avoid clutter

# Hide extra unused axes
for j in range(i + 1, len(axes)):
    axes[j].axis('off')

plt.tight_layout()
plt.show()
../_images/ccafe51e058dc13b045051b0de53c213c7e14a70ff6fc282ff9e40b23b9926e9.png
zoom_centers = [
    (700, 500),  # center 1
    (1000, 800), # center 2
    (1200, 1200), # center 3
    (1300,650),
    (1300,850),
    (1000,750)
]

half_width = 250
half_height = 250

n_zooms = len(zoom_centers)
n_cols = 2
n_rows = math.ceil(n_zooms / n_cols)

fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 5 * n_rows))
axes = axes.flatten()

for i, (center_x, center_y) in enumerate(zoom_centers):
    ax = axes[i]
    plot_data(ax, df2, list(s_genes), list(t_genes), list(tb_genes))
    
    ax.set_xlim(center_x - half_width, center_x + half_width)
    ax.set_ylim(center_y - half_height, center_y + half_height)
    # ax.set_title(f'Zoom {i+1}: Center ({center_x}, {center_y})')
    
    # Save each individual axis to its own figure
    single_fig = plt.figure()
    single_ax = single_fig.add_subplot(111)
    
    plot_data(single_ax, df2, list(s_genes), list(t_genes), list(tb_genes))
    single_ax.set_xlim(center_x - half_width, center_x + half_width)
    single_ax.set_ylim(center_y - half_height, center_y + half_height)
    single_ax.axis('off')
    
    single_fig.savefig(f'figures/2_zoom_section_{i+1}.png', dpi=300, bbox_inches='tight', pad_inches=0)
    plt.close(single_fig)  # Close to avoid clutter

# Hide extra unused axes
for j in range(i + 1, len(axes)):
    axes[j].axis('off')

plt.tight_layout()
plt.show()
../_images/b126e41fe6d88d363ba95b3d31bb5609c00f25d653ee54e9b43fb8f531feefd8.png