单细胞空间转录组: 细胞类型共定位分析(基于 MISTy)
1. 模块简介
本模块基于 MISTy 算法,用于对空间转录组数据进行多尺度的细胞类型共定位与空间依赖性分析。
MISTy 是一个可解释的机器学习框架,旨在从空间组学数据中提取不同空间环境 (视图,Views) 下细胞特征之间的相互作用。本流程主要实现以下核心分析目标:
- 揭示空间依赖性:评估一种特定细胞类型的分布,是否在空间上依赖于其近邻 (Juxta) 或更广泛的组织微环境 (Para) 中其他细胞类型的存在。
- 多尺度空间互作挖掘:同时分析直接接触级别的共定位模式与旁分泌级别的远距离共定位模式,从而推断细胞间潜在的通讯机制。
💡Note
本流程输入为Seurat 兼容格式数据 (需包含空间坐标与细胞类型注释),分析结果将生成展示细胞空间依赖关系的热图及网络图。
算法原理简述:
1.构建空间视图 (Views):提取细胞自身的特征作为内部视图 (Intra-view) ,并基于空间坐标,分别构建反映直接物理接触的近邻视图 (Juxtaview,基于固定邻居距离) 和反映信号扩散的远距离视图 (Paraview,基于距离衰减权重) 。
2.多视图建模:使用随机森林等机器学习模型,以不同视图中其他细胞类型的丰度作为预测变量,来预测目标细胞类型的丰度。
3.提取互作重要性:从训练好的模型中提取特征重要性分数。正值表示空间共定位 (相互吸引) ,负值表示相互排斥。
2. 输入文件准备
本模块需要提供包含细胞类型注释和物理空间坐标的 Seurat 对象 (.rds 格式) 以及对应的 Metadata。
核心输入参数要求:
- Seurat 对象 (
rds_path):必须包含降维槽@reductions$spatial,用于存储细胞在组织切片上的物理空间坐标。 - Metadata (
meta_path):必须包含样本名称列 (由col_sam指定) 和细胞类型注释列 (由col_celltype指定) 。
空间距离参数设置:
juxta:指定近距离共定位 (直接相邻/旁分泌近端) 的范围阈值 (默认 200) 。para:指定远距离共定位 (更广泛微环境) 的范围或衰减权重距离 (默认 500) 。
3. 数据加载与预处理
准备运行 MISTy 所需的环境和参数配置。
# --- 环境准备 ---
library(Seurat)
library(tibble)
library(dplyr)
library(mistyR)
library(purrr)
library(tidyverse)
library(base64enc)
library(figpatch)
library(scuttle)
library(scater)
library(cowplot)
library(phenoptr)The following object is masked from ‘package:distances’:
distance_matrix
# --- 输入参数配置 ---
# 细胞类型共定位模块输入参数介绍:
# rds_path:指定输入 Seurat 对象的 rds 路径
rds_path = "./rds/25020230_nao_CS.rds"
# meta_path:指定 metadata 路径
meta_path = "./rds/meta.tsv"
# col_sam:指定 metadata 的样本列名
col_sam = "Sample"
# sample_name:指定样本名,必须在 col_sam 列中存在
sample_name = "25020230_nao_CS_expression"
# col_celltype:指定 metadata 的细胞类型列名
col_celltype = "RNA_snn_res.0.2"
# outdir:指定输出目录路径
outdir = "MISTy_out"
# juxta:指定细胞类型共定位的近距离范围
# para:指定细胞类型共定位的远距离范围
juxta = 200
para = 5004. MISTy 核心计算
本节定义并执行 MISTy 分析流程,包括提取空间特征、构建近邻与远距离视图 (Juxtaview & Paraview) 、训练多视图模型并收集最终的相互作用结果。
run_misty_seurat <- function(visium.slide,
# 包含空间转录组数据的 Seurat 对象
view.assays,
# 为每个视图指定的 Assay 命名列表
view.features = NULL,
# 指定要使用的特征/标记基因的命名列表
# 默认使用所有特征
view.types,
# 指定从 Assay 构建的视图类型的命名列表
#
view.params,
# 包含每个视图参数(NULL 或具体值)的命名列表
#
spot.ids = NULL,
# 要使用的 spot ID,默认使用所有
out.alias = "results"
# 输出文件夹名称
) {
mistyR::clear_cache()
# 提取空间几何坐标
#geometry <- GetTissueCoordinates(visium.slide,
# cols = c("row", "col"), scale = NULL
#)
geometry <- visium.slide@reductions$spatial@cell.embeddings
colnames(geometry) <- c("row","col")
geometry <- geometry*0.265385
# 提取数据
view.data <- map(view.assays,
extract_seurat_data,
geometry = geometry,
visium.slide = visium.slide
)
# 构建并运行 MISTy 分析工作流
build_misty_pipeline(
view.data = view.data,
view.features = view.features,
view.types = view.types,
view.params = view.params,
geometry = geometry,
spot.ids = spot.ids,
out.alias = out.alias
)
}
# 从 Seurat 对象的特定 Assay 中提取数据
# 并将细胞 ID 与空间坐标对齐
extract_seurat_data <- function(visium.slide,
assay,
geometry) {
print(assay)
data <- GetAssayData(visium.slide, assay = assay) %>%
as.matrix() %>%
t() %>%
as_tibble(rownames = NA)
return(data %>% slice(match(rownames(.), rownames(geometry))))
}
# 过滤数据,仅保留感兴趣的特征
filter_data_features <- function(data,
features) {
if (is.null(features)) features <- colnames(data)
return(data %>% rownames_to_column() %>%
select(rowname, all_of(features)) %>% rename_with(make.names) %>%
column_to_rownames())
}
# 根据定义的参数构建空间视图
create_default_views <- function(data,
view.type,
view.param,
view.name,
spot.ids,
geometry) {
mistyR::clear_cache()
view.data.init <- create_initial_view(data)
if (!(view.type %in% c("intra", "para", "juxta"))) {
view.type <- "intra"
}
if (view.type == "intra") {
data.red <- view.data.init[["intraview"]]$data %>%
rownames_to_column() %>%
filter(rowname %in% spot.ids) %>%
select(-rowname)
} else if (view.type == "para") {
view.data.tmp <- view.data.init %>%
add_paraview(geometry, l = view.param)
data.ix <- paste0("paraview.", view.param)
data.red <- view.data.tmp[[data.ix]]$data %>%
mutate(rowname = rownames(data)) %>%
filter(rowname %in% spot.ids) %>%
select(-rowname)
} else if (view.type == "juxta") {
view.data.tmp <- view.data.init %>%
add_juxtaview(
positions = geometry,
neighbor.thr = view.param
)
data.ix <- paste0("juxtaview.", view.param)
data.red <- view.data.tmp[[data.ix]]$data %>%
mutate(rowname = rownames(data)) %>%
filter(rowname %in% spot.ids) %>%
select(-rowname)
}
if (is.null(view.param) == TRUE) {
misty.view <- create_view(
paste0(view.name),
data.red
)
} else {
misty.view <- create_view(
paste0(view.name, "_", view.param),
data.red
)
}
return(misty.view)
}
# 自动构建 MISTy 工作流并运行
build_misty_pipeline <- function(view.data,
view.features,
view.types,
view.params,
geometry,
spot.ids = NULL,
out.alias = "default") {
# 如果未定义 spot ID,则添加所有的 spot ID
if (is.null(spot.ids)) {
spot.ids <- rownames(view.data[[1]])
}
# 首先从数据中过滤出指定特征
view.data.filt <- map2(view.data, view.features, filter_data_features)
# 创建初始视图 (Intra-view)
views.main <- create_initial_view(view.data.filt[[1]] %>%
rownames_to_column() %>%
filter(rowname %in% spot.ids) %>%
select(-rowname))
# 创建其他视图 (Juxta-view, Para-view 等)
view.names <- names(view.data.filt)
all.views <- pmap(list(
view.data.filt[-1],
view.types[-1],
view.params[-1],
view.names[-1]
),
create_default_views,
spot.ids = spot.ids,
geometry = geometry
)
pline.views <- add_views(
views.main,
unlist(all.views, recursive = FALSE)
)
# 运行 MISTy 模型训练
run_misty(pline.views, out.alias, cached = FALSE)
}
#
# 修复收集结果时的潜在错误
#
collect_results_v2 <- function(folders){
samples <- R.utils::getAbsolutePath(folders)
message("\nCollecting improvements")
improvements <- samples %>% furrr::future_map_dfr(function(sample) {
performance <- readr::read_table2(paste0(sample, .Platform$file.sep,
"performance.txt"), na = c("", "NA", "NaN"), col_types = readr::cols()) %>%
dplyr::distinct()
performance %>% dplyr::mutate(sample = sample, gain.RMSE = 100 *
(.data$intra.RMSE - .data$multi.RMSE)/.data$intra.RMSE,
gain.R2 = 100 * (.data$multi.R2 - .data$intra.R2),
)
}, .progress = TRUE) %>% tidyr::pivot_longer(-c(.data$sample,
.data$target), names_to = "measure")
message("\nCollecting contributions")
contributions <- samples %>% furrr::future_map_dfr(function(sample) {
coefficients <- readr::read_table2(paste0(sample, .Platform$file.sep,
"coefficients.txt"), na = c("", "NA", "NaN"), col_types = readr::cols()) %>%
dplyr::distinct()
coefficients %>% dplyr::mutate(sample = sample, .after = "target") %>%
tidyr::pivot_longer(cols = -c(.data$sample, .data$target),
names_to = "view")
}, .progress = TRUE)
improvements.stats <- improvements %>% dplyr::filter(!stringr::str_starts(.data$measure,
"p\\.")) %>% dplyr::group_by(.data$target, .data$measure) %>%
dplyr::summarise(mean = mean(.data$value), sd = stats::sd(.data$value),
cv = .data$sd/.data$mean, .groups = "drop")
contributions.stats <- dplyr::inner_join((contributions %>%
dplyr::filter(!stringr::str_starts(.data$view, "p\\.") &
.data$view != "intercept") %>% dplyr::group_by(.data$target,
.data$view) %>% dplyr::summarise(mean = mean(.data$value),
.groups = "drop_last") %>% dplyr::mutate(fraction = abs(.data$mean)/sum(abs(.data$mean))) %>%
dplyr::ungroup()), (contributions %>% dplyr::filter(stringr::str_starts(.data$view,
"p\\.") & !stringr::str_detect(.data$view, "intercept")) %>%
dplyr::group_by(.data$target, .data$view) %>% dplyr::mutate(view = stringr::str_remove(.data$view,
"^p\\.")) %>% dplyr::summarise(p.mean = mean(.data$value),
p.sd = stats::sd(.data$value), .groups = "drop")), by = c("target",
"view"))
message("\nCollecting importances")
importances <- samples %>% furrr::future_map(function(sample) {
targets <- contributions.stats %>% dplyr::pull(.data$target) %>%
unique() %>% sort()
views <- contributions.stats %>% dplyr::pull(.data$view) %>%
unique()
maps <- views %>% furrr::future_map(function(view) {
all.importances <- targets %>% purrr::map(~readr::read_csv(paste0(sample,
.Platform$file.sep, "importances_", .x, "_",
view, ".txt"), col_types = readr::cols()) %>%
dplyr::distinct() %>% dplyr::rename(feature = target))
features <- all.importances %>% purrr::map(~.x$feature) %>%
unlist() %>% unique() %>% sort()
pvalues <- contributions %>% dplyr::filter(sample ==
!!sample, view == paste0("p.", !!view)) %>% dplyr::mutate(value = 1 -
.data$value)
all.importances %>% purrr::imap_dfc(~tibble::tibble(feature = features,
zero.imp = 0) %>% dplyr::left_join(.x, by = "feature") %>%
dplyr::arrange(.data$feature) %>% dplyr::mutate(imp = scale(.data$imp)[,
1], `:=`(!!targets[.y], .data$zero.imp + (.data$imp *
(pvalues %>% dplyr::filter(target == targets[.y]) %>%
dplyr::pull(.data$value))))) %>% dplyr::select(targets[.y])) %>%
dplyr::mutate(Predictor = features)
}) %>% `names<-`(views)
}, .progress = TRUE) %>% `names<-`(samples)
message("\nAggregating")
importances.aggregated <- importances %>% purrr::reduce(function(acc,
l) {
acc %>% purrr::map2(l, ~(((.x %>% dplyr::select(-.data$Predictor)) +
(.y %>% dplyr::select(-.data$Predictor))) %>% dplyr::mutate(Predictor = .x %>%
dplyr::pull(.data$Predictor))))
}) %>% purrr::map(~.x %>% dplyr::mutate_if(is.numeric, ~./length(samples)))
return(list(improvements = improvements, improvements.stats = improvements.stats,
contributions = contributions, contributions.stats = contributions.stats,
importances = importances, importances.aggregated = importances.aggregated))
}run_colocalization <- function(slide,
assay,
useful_features,
out_label,
misty_out_alias = "./") {
# 定义每个视图 (view) 所使用的 Assay ---------------
view_assays <- list("main" = assay,
"juxta" = assay,
"para" = assay)
# 定义每个视图 (view) 所使用的特征 (features) ------------
view_features <- list("main" = useful_features,
"juxta" = useful_features,
"para" = useful_features)
# 定义每个视图 (view) 的空间上下文环境 -----
view_types <- list("main" = "intra",
"juxta" = "juxta",
"para" = "para")
# 定义附加参数(Para-view 中的 l 参数,
# 或 Juxta-view 中的近邻数量 n) --------
view_params <- list("main" = NULL,
"juxta" = juxta,
"para" = para)
misty_out <- paste0(misty_out_alias,"/",
out_label, "_", assay)
run_misty_seurat(visium.slide = slide,
view.assays = view_assays,
view.features = view_features,
view.types = view_types,
view.params = view_params,
spot.ids = NULL,
out.alias = misty_out)
return(misty_out)
}
sample_name = str_split(sample_name,",")[[1]]
data = readRDS(rds_path)
meta <- read.table(meta_path,header=T,sep='\t',check.names=F)
if(colnames(meta)[1]=="barcode"){
meta = column_to_rownames(meta,"barcode")
} else if(colnames(meta)[1]=="barcodes"){
meta = column_to_rownames(meta,"barcodes")
}
data@meta.data <- meta
ll_heatmap = list()
ll_network = list()
misty_outs <- map(sample_name, function(sample_name){
print(sample_name)
data_subset = subset(data,cells = rownames(data@meta.data[data@meta.data[[col_sam]] %in% sample_name,]))
metadata = data_subset@meta.data
metadata = rownames_to_column(metadata,var = "barcode")
metadata = metadata[,c(col_celltype,"barcode")]
colnames(metadata)[1] = "celltype"
metadata$exit = 1
metadata_width = reshape2::dcast(data = metadata,formula = celltype~barcode)
metadata_width = column_to_rownames(metadata_width,var = "celltype")
metadata_width[is.na(metadata_width)] = 0
data_subset[['cellpro']] = CreateAssayObject(counts = metadata_width)
assay = "cellpro"
DefaultAssay(data_subset) = assay
useful_features = rownames(data_subset)
mout = run_colocalization(slide = data_subset,
useful_features = useful_features,
out_label = sample_name,
assay = assay,
misty_out_alias = outdir)
misty_res_slide = collect_results(mout)
return(mout)
})Using exit as value column: use value.var to override.
Warning message in mistyR::clear_cache():
“Cache folder doesn't exist.”
[1] "cellpro"
[1] "cellpro"
[1] "cellpro"
Warning message in mistyR::clear_cache():
“Cache folder doesn't exist.”
Computing triangulation
Generating juxtaview
Warning message in mistyR::clear_cache():
“Cache folder doesn't exist.”
Generating paraview
Warning message in run_misty(pline.views, out.alias, cached = FALSE):
“Targets X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17, X18, X19 have fewer unique values than cv.folds. This might result in errors during modeling.”
Training models
Registered S3 method overwritten by 'pROC':
method from
plot.roc spatstat.explore
Collecting improvements
Collecting contributions
Collecting importances
Aggregating
5. 细胞类型共定位综合展示
通过热图矩阵和直观的网络拓扑图的形式,全面展示所有被分析的细胞类型两两之间的空间依赖程度与群落结构。
图注说明:下图左侧展示细胞类型共定位热图,右侧展示细胞类型共定位网络图。
左侧热图 (Heatmap):
- 横轴 (Predictor):表示作为“预测变量”的细胞类型,即微环境中存在的其他细胞。
- 纵轴 (Target):表示作为“目标变量”的细胞类型,即我们想要预测其分布的细胞。
- 颜色映射 (Importance):代表共定位的程度 (特征重要性) 。颜色越深 (正值) ,代表横轴细胞类型的存在强烈预示着纵轴细胞类型的出现,即高度共定位 (相互吸引);颜色较浅或空白,则表示共定位关系较弱或无明显空间依赖。
右侧网络图 (Network):
- 节点 (Node):每个圆点代表一种特定的细胞类型。
- 模块/社区 (Community):处在同一个大圈 (背景阴影) 内的多个细胞类型,代表它们具有高度密集的相互共定位关系,构成了一个特定的“空间微环境生态位”。
- 连线 (Edge):节点之间的连线代表空间依赖关系。箭头的方向代表预测方向 (从预测变量指向目标变量) 。
综合判读:热图用于快速扫描并量化组织中哪些细胞类型倾向于在空间上聚集在一起;网络图则进一步将这些依赖关系转化为拓扑结构,用于发现组织中由多种细胞类型共同构成的复杂生态位 (Niche) 或功能模块。
options(repr.plot.width = 16, repr.plot.height = 6)
for(i in 1:length(sample_name)){
print(sample_name[i])
misty_res_slide = collect_results(misty_outs[[i]])
for(j in c("juxta","para")){
if(j=="juxta"){
p_para_heat <- mistyR::plot_interaction_heatmap(misty_res_slide, paste0("juxta_", juxta), cutoff = 0)
p_para_net <- wrap_elements(panel = ~ mistyR::plot_interaction_communities(misty_res_slide, paste0("juxta_", juxta), cutoff = 0.5))
print(p_para_heat + p_para_net)
} else if(j=="para"){
p_para_heat <- mistyR::plot_interaction_heatmap(misty_res_slide, paste0("para_", para), cutoff = 0)
p_para_net <- wrap_elements(panel = ~ mistyR::plot_interaction_communities(misty_res_slide, paste0("para_", para), cutoff = 0.5))
print(p_para_heat + p_para_net)
}
}
}

6. 细胞空间分布密度分析
基于物理距离量化不同细胞类型相对于目标细胞的空间分布密度情况。
图注说明:下图展示细胞类型共定位密度图。
- 横坐标 (Distance to Target):表示某一类细胞到目标细胞 (Target Cell) 的空间物理距离。
- 纵坐标 (Density):表示细胞在不同距离区间的分布密度。
- 颜色分组 (Phenotype):不同的颜色曲线代表不同的细胞类型 (Phenotype) 。
- 判读:曲线的峰值越靠左 (距离越近) ,说明该细胞类型在空间上越倾向于与目标细胞共定位 (聚集) ;曲线越平缓或峰值越靠右,说明该细胞类型与目标细胞在空间上分布较远或呈随机分布。
sub_position = Embeddings(data,"spatial")
sub_position = sub_position*0.2653
sub_position = as.data.frame(sub_position)
sub_position = rownames_to_column(sub_position,"Cell ID")
sub_position = cbind(sub_position,data@meta.data[,col_celltype])
colnames(sub_position)[2:4] = c("Cell X Position","Cell Y Position","Phenotype")
sub_position$Phenotype <- as.character(sub_position$Phenotype)
distances=find_nearest_distance(sub_position)
csd_with_distance=bind_cols(sub_position, distances)
csd_with_distance$Phenotype <- factor(
csd_with_distance$Phenotype,
levels = as.character(sort(as.numeric(unique(csd_with_distance$Phenotype))))
)# 自定义 40 种颜色
custom_40_colors <- c(
'#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', '#ff9896',
'#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7',
'#bcbd22', '#dbdb8d', '#17becf', '#9edae5', '#393b79', '#5254a3', '#6b6ecf', '#9c9ede',
'#637939', '#8ca252', '#b5cf6b', '#cedb9c', '#8c6d31', '#bd9e39', '#e7ba52', '#e7cb94',
'#843c39', '#794044', '#d6616b', '#e7969c', '#7b4173', '#a55194', '#ce6dbd', '#de9ed6'
)
# 绘图
ggplot(csd_with_distance, aes(x = `Distance to 1`, color = Phenotype)) +
geom_density(size = 1, key_glyph = "path") +
scale_color_manual(
values = custom_40_colors,
guide = guide_legend(ncol = 2)
) +
labs(
title = paste0("Sample: ",sample_name[1]), # 添加主标题
x = "Distance to 1", # 明确 X 轴标签
y = "Density" # 明确 Y 轴标签
) +
theme_bw() +
theme(
# 主标题样式:加粗、放大
plot.title = element_text(size = 18, face = "bold", hjust = 0.5),
# 坐标轴标题放大
axis.title = element_text(size = 14),
# 坐标轴刻度数字放大
axis.text = element_text(size = 12),
# 图例标题放大、加粗
legend.title = element_text(size = 12, face = "bold"),
# 图例文字放大
legend.text = element_text(size = 10),
# 图例位置保持在右侧
legend.position = "right"
)
