单细胞空间邻域统计:细胞空间分布与邻近类型分析
时长: 13 分钟
字数: 2.6k 字
更新: 2026-02-28
阅读: 0 次
加载分析 R 包
R
library(Seurat)
library(qs)
library(dplyr)
library(tibble)
library(stringr)
library(ggplot2)
library(phenoptr)output
Attaching SeuratObject
qs 0.27.3. Announcement: https://github.com/qsbase/qs/issues/103
Warning message:
“package ‘dplyr’ was built under R version 4.3.3”
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
Warning message:
“package ‘tibble’ was built under R version 4.3.3”
Warning message:
“package ‘stringr’ was built under R version 4.3.3”
Warning message:
“package ‘ggplot2’ was built under R version 4.3.3”
qs 0.27.3. Announcement: https://github.com/qsbase/qs/issues/103
Warning message:
“package ‘dplyr’ was built under R version 4.3.3”
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
Warning message:
“package ‘tibble’ was built under R version 4.3.3”
Warning message:
“package ‘stringr’ was built under R version 4.3.3”
Warning message:
“package ‘ggplot2’ was built under R version 4.3.3”
填写输入对象
- col_SAM: metadata 对应的样本列名
- SAMple_name: 分析的样本名
- col_celltype:metadata 对应的细胞类型列名
- analysis_celltype:分析的细胞类型
R
col_sam = "Sample"
sample_name = "T1"
col_celltype = "large_celltype"
analysis_celltype = "Neutrophils"读取 rds 和 metadata,对数据预处理
R
data = readRDS("../../data/AY1739779861133/input.rds")R
meta = read.delim("../../data/AY1739779861133/meta.tsv")
rownames(meta) = meta$barcode
data@meta.data = meta- 提取 rds 里想要分析的空间样本信息
- 对 SeekSpace 数据提供的空间位置信息进行转变,像素点→实际点(转化倍数 0.2653)
- 提取想要分析的细胞类型
- 计算每个细胞点之间的距离
R
sub = subset(data,cells = rownames(data@meta.data[data@meta.data[[col_sam]] %in% sample_name,]))
sub@meta.data$barcode = rownames(sub@meta.data)
df_labels = sub@meta.data[,c("barcode",col_celltype)]
sub_position = Embeddings(sub,"spatial")
sub_position = sub_position*0.2653
sub_position = as.data.frame(sub_position)
Neu_barcode = rownames(sub@meta.data[sub@meta.data[[col_celltype]] %in% analysis_celltype,])
dist_matrix <- as.matrix(dist(sub_position, method = "euclidean"))分别计算 4 个不同范围内的空间邻域组成
R
neigh_freq1 = data.frame()
for(threshold in c(20,50,100,200)){
# 找出每个点周围内的邻近点(排除自身)
neighbors = apply(dist_matrix, 1, function(row) {
nearby = which(row <= threshold & row > 0) # row > 0 排除自身
if (length(nearby) > 0) {
paste(names(nearby), collapse = ";") # 用分号分隔邻近点
} else {
NA # 如果没有邻近点,返回NA
}
})
# 将结果转换为数据框
result = data.frame(
Point = rownames(sub_position),
X = sub_position$spatial_1,
Y = sub_position$spatial_2,
Neighbors = neighbors,
stringsAsFactors = FALSE
)
result_Neu = result[result$Point %in% Neu_barcode,]
map_neighbors_to_labels = function(neighbor_ids, label_df) {
if (is.na(neighbor_ids)) return(NA)
ids = strsplit(neighbor_ids, ";")[[1]] # 拆分ID
labels = label_df$large_celltype[match(ids, label_df$barcode)] # 匹配细胞类型
paste(labels, collapse = "; ") # 合并为字符串
}
# 应用到 Neighbors 列
result_Neu$Neighbor_Labels = sapply(
result_Neu$Neighbors,
map_neighbors_to_labels,
label_df = df_labels
)
merged_string = paste(result_Neu$Neighbors, collapse = ";")
a = str_split(merged_string,";")[[1]]
neigh = data.frame(barcode = unique(a))
neigh = left_join(neigh,sub@meta.data[,c("barcode",col_celltype)])
neigh_freq = as.data.frame(table(neigh[[col_celltype]]))
colnames(neigh_freq) = c("Celltype","Cell_number")
neigh_freq$Distance = paste0(threshold,"um")
neigh_freq1 = rbind(neigh_freq1,neigh_freq)
}output
Joining with \`by = join_by(barcode)\`
Joining with \`by = join_by(barcode)\`
Joining with \`by = join_by(barcode)\`
Joining with \`by = join_by(barcode)\`
Joining with \`by = join_by(barcode)\`
Joining with \`by = join_by(barcode)\`
Joining with \`by = join_by(barcode)\`
R
neigh_freq1| Celltype | Cell_number | Distance |
|---|---|---|
| <fct> | <int> | <chr> |
| B_cells | 49 | 20um |
| Endothelial_cells | 9 | 20um |
| Epithelial_cells | 68 | 20um |
| Fibroblasts | 105 | 20um |
| Macrophages | 35 | 20um |
| Mast_cells | 2 | 20um |
| Nerve | 3 | 20um |
| Neutrophils | 39 | 20um |
| Plasma_cells | 34 | 20um |
| SMC | 4 | 20um |
| T_cells | 94 | 20um |
| B_cells | 188 | 50um |
| Endothelial_cells | 41 | 50um |
| Epithelial_cells | 443 | 50um |
| Fibroblasts | 312 | 50um |
| Macrophages | 106 | 50um |
| Mast_cells | 4 | 50um |
| Nerve | 7 | 50um |
| Neutrophils | 162 | 50um |
| Plasma_cells | 119 | 50um |
| SMC | 35 | 50um |
| T_cells | 310 | 50um |
| B_cells | 386 | 100um |
| Endothelial_cells | 82 | 100um |
| Epithelial_cells | 1401 | 100um |
| Fibroblasts | 596 | 100um |
| Macrophages | 188 | 100um |
| Mast_cells | 5 | 100um |
| Nerve | 10 | 100um |
| Neutrophils | 289 | 100um |
| Plasma_cells | 235 | 100um |
| SMC | 79 | 100um |
| T_cells | 556 | 100um |
| B_cells | 603 | 200um |
| Endothelial_cells | 111 | 200um |
| Epithelial_cells | 2902 | 200um |
| Fibroblasts | 800 | 200um |
| Macrophages | 262 | 200um |
| Mast_cells | 6 | 200um |
| Nerve | 14 | 200um |
| Neutrophils | 383 | 200um |
| Plasma_cells | 307 | 200um |
| SMC | 121 | 200um |
| T_cells | 750 | 200um |
- 计算每个距离范围内分析目标细胞类型周围邻域细胞类型占比
R
neigh_freq1 = neigh_freq1 %>%
group_by(Distance) %>% # 第一步:按Region分组
mutate(Region_Pct = Cell_number / sum(Cell_number) * 100) %>% # 第二步:计算组内占比
ungroup()
neigh_freq1$Distance = factor(neigh_freq1$Distance,c("20um","50um","100um","200um"))R
neigh_freq1| Celltype | Cell_number | Distance | Region_Pct |
|---|---|---|---|
| <fct> | <int> | <fct> | <dbl> |
| B_cells | 49 | 20um | 11.08597285 |
| Endothelial_cells | 9 | 20um | 2.03619910 |
| Epithelial_cells | 68 | 20um | 15.38461538 |
| Fibroblasts | 105 | 20um | 23.75565611 |
| Macrophages | 35 | 20um | 7.91855204 |
| Mast_cells | 2 | 20um | 0.45248869 |
| Nerve | 3 | 20um | 0.67873303 |
| Neutrophils | 39 | 20um | 8.82352941 |
| Plasma_cells | 34 | 20um | 7.69230769 |
| SMC | 4 | 20um | 0.90497738 |
| T_cells | 94 | 20um | 21.26696833 |
| B_cells | 188 | 50um | 10.88592936 |
| Endothelial_cells | 41 | 50um | 2.37405906 |
| Epithelial_cells | 443 | 50um | 25.65141865 |
| Fibroblasts | 312 | 50um | 18.06601042 |
| Macrophages | 106 | 50um | 6.13781123 |
| Mast_cells | 4 | 50um | 0.23161552 |
| Nerve | 7 | 50um | 0.40532716 |
| Neutrophils | 162 | 50um | 9.38042849 |
| Plasma_cells | 119 | 50um | 6.89056167 |
| SMC | 35 | 50um | 2.02663578 |
| T_cells | 310 | 50um | 17.95020266 |
| B_cells | 386 | 100um | 10.08622942 |
| Endothelial_cells | 82 | 100um | 2.14267050 |
| Epithelial_cells | 1401 | 100um | 36.60830938 |
| Fibroblasts | 596 | 100um | 15.57355631 |
| Macrophages | 188 | 100um | 4.91246407 |
| Mast_cells | 5 | 100um | 0.13065064 |
| Nerve | 10 | 100um | 0.26130128 |
| Neutrophils | 289 | 100um | 7.55160700 |
| Plasma_cells | 235 | 100um | 6.14058009 |
| SMC | 79 | 100um | 2.06428011 |
| T_cells | 556 | 100um | 14.52835119 |
| B_cells | 603 | 200um | 9.63412686 |
| Endothelial_cells | 111 | 200um | 1.77344624 |
| Epithelial_cells | 2902 | 200um | 46.36523406 |
| Fibroblasts | 800 | 200um | 12.78159450 |
| Macrophages | 262 | 200um | 4.18597220 |
| Mast_cells | 6 | 200um | 0.09586196 |
| Nerve | 14 | 200um | 0.22367790 |
| Neutrophils | 383 | 200um | 6.11918837 |
| Plasma_cells | 307 | 200um | 4.90493689 |
| SMC | 121 | 200um | 1.93321617 |
| T_cells | 750 | 200um | 11.98274485 |
结果可视化
可视化目标细胞类型周围细胞类型的占比
R
ggplot(neigh_freq1, aes(fill = Celltype, y = Region_Pct, x = Distance)) +
theme_bw() +
geom_bar(position = "fill", stat = "identity") +
scale_fill_manual(
values = c("#93cc82", "#4d97cd", "#f6f5ee", "#ea9c9d", "#c74546",
"#db6968", "#4d97cd", "#99cbeb", "#459943",
"#fdc58f", "#e8c559", "#a3d393", "#f8984e"), #可修改自己设定颜色
name = "Celltype" # 自定义图例名称
) +
theme(
plot.title = element_text(hjust = 0.5),
panel.background = element_blank()
)
可视化目标细胞类型周围指定细胞类型的数量
R
plot = neigh_freq1[neigh_freq1$Celltype %in% "T_cells",]
plot$Distance = factor(plot$Distance,levels = c("20um","50um","100um","200um"))R
plot| Celltype | Cell_number | Distance | Region_Pct |
|---|---|---|---|
| <fct> | <int> | <fct> | <dbl> |
| T_cells | 94 | 20um | 21.26697 |
| T_cells | 310 | 50um | 17.95020 |
| T_cells | 556 | 100um | 14.52835 |
| T_cells | 750 | 200um | 11.98274 |
R
ggplot(plot, aes(x = Distance, y = Cell_number, fill = Distance)) +
geom_col() + # 使用 geom_col() 代替 geom_bar(stat="identity")
scale_fill_manual(
values = c("#8c510a", "#d8b365", "#f6e8c3", "#c7eae5"),
name = "Range" # 自定义图例名称
) +
labs(
x = "Neutrophils Neighborhood", # x轴标签
y = "Epithelial Cells Number", # y轴标签
) +
theme_minimal()+ # 使用简洁的主题
geom_hline(yintercept = 750,color="red")+
annotate(
"text",
x = Inf, # 最右侧显示
y = 750,
label = "T1 total Epithelial cells:750",
color = "red",
hjust = 1.1, # 靠右对齐
vjust = -1
)
可视化目标细胞类型周围细胞类型密度分布
R
sub_position = rownames_to_column(sub_position,"Cell ID")
sub_position = cbind(sub_position,sub@meta.data[,col_celltype])
colnames(sub_position)[2:4] = c("Cell X Position","Cell Y Position","Phenotype")
sub_positionR
distances=find_nearest_distance(sub_position)
csd_with_distance=bind_cols(sub_position, distances)
ggplot(csd_with_distance,aes(`Distance to Neutrophils`, color=Phenotype))+geom_density(size=1)+theme_bw()
