Skip to content

单细胞空间邻域统计:细胞空间分布与邻近类型分析

作者: SeekGene
时长: 13 分钟
字数: 2.6k 字
更新: 2026-02-28
阅读: 0 次
空间转录组 Notebooks 生态位分析

加载分析 R 包

R
library(Seurat)
library(qs)
library(dplyr)
library(tibble)
library(stringr)
library(ggplot2)
library(phenoptr)
output
Attaching SeuratObject

qs 0.27.3. Announcement: https://github.com/qsbase/qs/issues/103

Warning message:
“package ‘dplyr’ was built under R version 4.3.3”

Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

filter, lag


The following objects are masked from ‘package:base’:

intersect, setdiff, setequal, union


Warning message:
“package ‘tibble’ was built under R version 4.3.3”
Warning message:
“package ‘stringr’ was built under R version 4.3.3”
Warning message:
“package ‘ggplot2’ was built under R version 4.3.3”

填写输入对象

  • col_SAM: metadata 对应的样本列名
  • SAMple_name: 分析的样本名
  • col_celltype:metadata 对应的细胞类型列名
  • analysis_celltype:分析的细胞类型
R
col_sam = "Sample"
sample_name = "T1"
col_celltype = "large_celltype"
analysis_celltype = "Neutrophils"

读取 rds 和 metadata,对数据预处理

R
data = readRDS("../../data/AY1739779861133/input.rds")
R
meta = read.delim("../../data/AY1739779861133/meta.tsv")
rownames(meta) = meta$barcode
data@meta.data = meta
  • 提取 rds 里想要分析的空间样本信息
  • 对 SeekSpace 数据提供的空间位置信息进行转变,像素点→实际点(转化倍数 0.2653)
  • 提取想要分析的细胞类型
  • 计算每个细胞点之间的距离
R
sub = subset(data,cells = rownames(data@meta.data[data@meta.data[[col_sam]] %in% sample_name,]))
sub@meta.data$barcode = rownames(sub@meta.data)
df_labels = sub@meta.data[,c("barcode",col_celltype)]
sub_position = Embeddings(sub,"spatial")
sub_position = sub_position*0.2653
sub_position = as.data.frame(sub_position)

Neu_barcode = rownames(sub@meta.data[sub@meta.data[[col_celltype]] %in% analysis_celltype,])
dist_matrix <- as.matrix(dist(sub_position, method = "euclidean"))

分别计算 4 个不同范围内的空间邻域组成

R
neigh_freq1 = data.frame()
for(threshold in c(20,50,100,200)){

    # 找出每个点周围内的邻近点(排除自身)
    neighbors = apply(dist_matrix, 1, function(row) {
      nearby = which(row <= threshold & row > 0)  # row > 0 排除自身
      if (length(nearby) > 0) {
        paste(names(nearby), collapse = ";")  # 用分号分隔邻近点
      } else {
        NA  # 如果没有邻近点,返回NA
      }
    })

    # 将结果转换为数据框
    result = data.frame(
      Point = rownames(sub_position),
      X = sub_position$spatial_1,
      Y = sub_position$spatial_2,
      Neighbors = neighbors,
      stringsAsFactors = FALSE
    )


    result_Neu = result[result$Point %in% Neu_barcode,]
    map_neighbors_to_labels = function(neighbor_ids, label_df) {
      if (is.na(neighbor_ids)) return(NA)
      ids = strsplit(neighbor_ids, ";")[[1]]  # 拆分ID
      labels = label_df$large_celltype[match(ids, label_df$barcode)]  # 匹配细胞类型
      paste(labels, collapse = "; ")  # 合并为字符串
    }

    # 应用到 Neighbors 列
    result_Neu$Neighbor_Labels = sapply(
      result_Neu$Neighbors, 
      map_neighbors_to_labels, 
      label_df = df_labels
    )

    merged_string = paste(result_Neu$Neighbors, collapse = ";")
    a = str_split(merged_string,";")[[1]]
    neigh = data.frame(barcode = unique(a))
    neigh = left_join(neigh,sub@meta.data[,c("barcode",col_celltype)])
    neigh_freq = as.data.frame(table(neigh[[col_celltype]]))
    colnames(neigh_freq) = c("Celltype","Cell_number")
    neigh_freq$Distance = paste0(threshold,"um")
    neigh_freq1 = rbind(neigh_freq1,neigh_freq)
}
output
Joining with \`by = join_by(barcode)\`
Joining with \`by = join_by(barcode)\`
Joining with \`by = join_by(barcode)\`
Joining with \`by = join_by(barcode)\`
R
neigh_freq1
A data.frame: 44 × 3
CelltypeCell_numberDistance
<fct><int><chr>
B_cells 4920um
Endothelial_cells 920um
Epithelial_cells 6820um
Fibroblasts 10520um
Macrophages 3520um
Mast_cells 220um
Nerve 320um
Neutrophils 3920um
Plasma_cells 3420um
SMC 420um
T_cells 9420um
B_cells 18850um
Endothelial_cells 4150um
Epithelial_cells 44350um
Fibroblasts 31250um
Macrophages 10650um
Mast_cells 450um
Nerve 750um
Neutrophils 16250um
Plasma_cells 11950um
SMC 3550um
T_cells 31050um
B_cells 386100um
Endothelial_cells 82100um
Epithelial_cells 1401100um
Fibroblasts 596100um
Macrophages 188100um
Mast_cells 5100um
Nerve 10100um
Neutrophils 289100um
Plasma_cells 235100um
SMC 79100um
T_cells 556100um
B_cells 603200um
Endothelial_cells 111200um
Epithelial_cells 2902200um
Fibroblasts 800200um
Macrophages 262200um
Mast_cells 6200um
Nerve 14200um
Neutrophils 383200um
Plasma_cells 307200um
SMC 121200um
T_cells 750200um
  • 计算每个距离范围内分析目标细胞类型周围邻域细胞类型占比
R
neigh_freq1 = neigh_freq1 %>%
  group_by(Distance) %>% # 第一步:按Region分组
  mutate(Region_Pct = Cell_number / sum(Cell_number) * 100) %>% # 第二步:计算组内占比
  ungroup()
neigh_freq1$Distance = factor(neigh_freq1$Distance,c("20um","50um","100um","200um"))
R
neigh_freq1
A tibble: 44 × 4
CelltypeCell_numberDistanceRegion_Pct
<fct><int><fct><dbl>
B_cells 4920um 11.08597285
Endothelial_cells 920um 2.03619910
Epithelial_cells 6820um 15.38461538
Fibroblasts 10520um 23.75565611
Macrophages 3520um 7.91855204
Mast_cells 220um 0.45248869
Nerve 320um 0.67873303
Neutrophils 3920um 8.82352941
Plasma_cells 3420um 7.69230769
SMC 420um 0.90497738
T_cells 9420um 21.26696833
B_cells 18850um 10.88592936
Endothelial_cells 4150um 2.37405906
Epithelial_cells 44350um 25.65141865
Fibroblasts 31250um 18.06601042
Macrophages 10650um 6.13781123
Mast_cells 450um 0.23161552
Nerve 750um 0.40532716
Neutrophils 16250um 9.38042849
Plasma_cells 11950um 6.89056167
SMC 3550um 2.02663578
T_cells 31050um 17.95020266
B_cells 386100um10.08622942
Endothelial_cells 82100um 2.14267050
Epithelial_cells 1401100um36.60830938
Fibroblasts 596100um15.57355631
Macrophages 188100um 4.91246407
Mast_cells 5100um 0.13065064
Nerve 10100um 0.26130128
Neutrophils 289100um 7.55160700
Plasma_cells 235100um 6.14058009
SMC 79100um 2.06428011
T_cells 556100um14.52835119
B_cells 603200um 9.63412686
Endothelial_cells 111200um 1.77344624
Epithelial_cells 2902200um46.36523406
Fibroblasts 800200um12.78159450
Macrophages 262200um 4.18597220
Mast_cells 6200um 0.09586196
Nerve 14200um 0.22367790
Neutrophils 383200um 6.11918837
Plasma_cells 307200um 4.90493689
SMC 121200um 1.93321617
T_cells 750200um11.98274485

结果可视化

可视化目标细胞类型周围细胞类型的占比

R
ggplot(neigh_freq1, aes(fill = Celltype, y = Region_Pct, x = Distance)) +
  theme_bw() +
  geom_bar(position = "fill", stat = "identity") +
  scale_fill_manual(
    values = c("#93cc82", "#4d97cd", "#f6f5ee", "#ea9c9d", "#c74546", 
               "#db6968", "#4d97cd", "#99cbeb", "#459943", 
               "#fdc58f", "#e8c559", "#a3d393", "#f8984e"), #可修改自己设定颜色
    name = "Celltype"  # 自定义图例名称
  ) +
  theme(
    plot.title = element_text(hjust = 0.5),
    panel.background = element_blank()
  )

可视化目标细胞类型周围指定细胞类型的数量

R
plot = neigh_freq1[neigh_freq1$Celltype %in% "T_cells",]
plot$Distance = factor(plot$Distance,levels = c("20um","50um","100um","200um"))
R
plot
A tibble: 4 × 4
CelltypeCell_numberDistanceRegion_Pct
<fct><int><fct><dbl>
T_cells 9420um 21.26697
T_cells31050um 17.95020
T_cells556100um14.52835
T_cells750200um11.98274
R
ggplot(plot, aes(x = Distance, y = Cell_number, fill = Distance)) +
  geom_col() +  # 使用 geom_col() 代替 geom_bar(stat="identity")
  scale_fill_manual(
    values = c("#8c510a", "#d8b365", "#f6e8c3", "#c7eae5"),
    name = "Range"  # 自定义图例名称
  ) +
  labs(
    x = "Neutrophils Neighborhood",  # x轴标签
    y = "Epithelial Cells Number",  # y轴标签
  ) +
  theme_minimal()+ # 使用简洁的主题
geom_hline(yintercept = 750,color="red")+
  annotate(
    "text", 
    x = Inf,  # 最右侧显示
    y = 750,
    label = "T1 total Epithelial cells:750", 
    color = "red",
    hjust = 1.1,  # 靠右对齐
    vjust = -1
  )

可视化目标细胞类型周围细胞类型密度分布

R
sub_position = rownames_to_column(sub_position,"Cell ID")
sub_position = cbind(sub_position,sub@meta.data[,col_celltype])
colnames(sub_position)[2:4] = c("Cell X Position","Cell Y Position","Phenotype")
sub_position
R
distances=find_nearest_distance(sub_position)
csd_with_distance=bind_cols(sub_position, distances)
ggplot(csd_with_distance,aes(`Distance to Neutrophils`, color=Phenotype))+geom_density(size=1)+theme_bw()
0 条评论·0 条回复