Tutorial 6: Multi-sample integration of spatial transcriptomic mouse brain sagittal samples across different dissected regions¶
The patial transcriptomic mouse brain sagittal samples, including a sagittal anterior section and a sagittal posterior section, are generated by the 10x Genomics Visium platform.
[1]:
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import anndata as ad
import scanpy as sc
import episcanpy.api as epi
from PRESENT import gene_sets_alignment, PRESENT_BC_function
warnings.filterwarnings("ignore")
sc.set_figure_params(dpi=80, figsize=(4,4), facecolor="white")
[2]:
## Load SRT data from multiple dissected regions
# mouse brain sagittal anterior section
adata_anterior = sc.read_visium(path=f"V2_Mouse_Brain_Serial_Section_2_Sagittal-Anterior",
count_file="V1_Mouse_Brain_Sagittal_Anterior_Section_2_filtered_feature_bc_matrix.h5")
adata_anterior.obs["slice_name"] = "Sagittal-Anterior"
adata_anterior.obs_names = ["Anterior-"+s for s in adata_anterior.obs_names]
adata_anterior.obsm["spatial"] = adata_anterior.obsm["spatial"].astype(int)
# mouse brain sagittal posterior section
adata_posterior = sc.read_visium(path=f"V2_Mouse_Brain_Serial_Section_2_Sagittal-Posterior",
count_file="V1_Mouse_Brain_Sagittal_Posterior_Section_2_filtered_feature_bc_matrix.h5")
adata_posterior.obs["slice_name"] = "Sagittal-Posterior"
adata_posterior.obs_names = ["Posterior-"+s for s in adata_posterior.obs_names]
adata_posterior.obsm["spatial"] = adata_posterior.obsm["spatial"].astype(int)
## feature sets unification
adata_list = gene_sets_alignment([adata_anterior.copy(), adata_posterior.copy()])
adata = ad.concat(adata_list)
print(adata)
AnnData object with n_obs × n_vars = 6112 × 32285
obs: 'in_tissue', 'array_row', 'array_col', 'slice_name'
obsm: 'spatial'
Run PRESENT model¶
[3]:
adata = PRESENT_function(
spatial_key = "spatial", ## obsm key under which to load the spatial matrix of spots
batch_key = "slice_name", ## obs key under which to load the batch indices of spots
adata_rna = adata, ## The ATAC raw fragment count matrix of spots in anndata.AnnData format
gene_min_cells = 1, ## Minimum number of cells expressed required for a gene to pass filtering
num_hvg = 3000, ## Number of highly variable genes to select for RNA data
device = "cuda", ## Device used for training: cuda or cpu
nclusters = 28,
device_id = 2
)
print(adata)
Loading data and parameters...
Input data has been loaded
First-stage trains basic model: 24%|██▍ | 24/100 [00:13<00:42, 1.77it/s, NLL_loss=0.464, BNN_loss=0.319, MSE_loss=0.118, ES counter=20, ES patience=20]
Early stop the first-stage training process
Second-stage trains BC model: 19%|▏| 19/100 [04:32<19:23, 14.36s/it, IBA_loss=1.53, IBP_loss=0.208, DISC_loss=0.703, GEN_loss=-0.7, ES counter=10, ES patience
Early stop the second-stage training process
Succeed to find 28 clusters at resolution 1.312
AnnData object with n_obs × n_vars = 6112 × 50
obs: 'in_tissue', 'array_row', 'array_col', 'slice_name', 'batch', 'n_genes', 'leiden', 'LeidenClusters'
uns: 'neighbors', 'leiden'
obsm: 'spatial', 'embeddings'
obsp: 'distances', 'connectivities'
Visualization¶
[4]:
colors = list(sns.color_palette(n_colors=60))
colors_dict = {str(k):v for k, v in zip(list(range(len(colors))), colors)}
[5]:
adata_anterior.obs["LeidenClusters"] = adata[adata_anterior.obs_names, ].obs["LeidenClusters"]
adata_posterior.obs["LeidenClusters"] = adata[adata_posterior.obs_names, ].obs["LeidenClusters"]
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
sc.pl.spatial(adata_anterior, color="LeidenClusters", img_key="hires", palette=colors_dict, ax=axs[0], show=False, title="Anterior", bw=True)
sc.pl.spatial(adata_posterior, color="LeidenClusters", img_key="hires", palette=colors_dict, ax=axs[1], show=False, title="Posterior", bw=True)
axs[0].set_xlabel("")
axs[1].set_xlabel("")
axs[0].set_ylabel("")
axs[1].set_ylabel("")
axs[0].legend().remove()
plt.tight_layout()
plt.show()
[ ]: