{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "cd57bfb6",
   "metadata": {},
   "source": [
    "---\n",
    "title: 细胞组成分析：样本与分组间的细胞类型占比柱状图\n",
    "author: SeekGene\n",
    "date: 2026-01-29\n",
    "tags:\n",
    "  - 3' 转录组\n",
    "  - 5' + 免疫组库\n",
    "  - ATAC + RNA 双组学\n",
    "  - FFPE 单细胞转录组\n",
    "  - Notebooks\n",
    "  - 全序列转录组\n",
    "  - 分析指南\n",
    "  - 甲基化 + RNA 双组学\n",
    "  - 空间转录组\n",
    "  - 绘图\n",
    "---\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "44129c9c-bd85-4f50-91b4-af0ce55d8db6",
   "metadata": {},
   "source": [
    "# 细胞组成分析：样本与分组间的细胞类型占比柱状图"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1ec29f87-ab87-4893-9271-9f78f379ce33",
   "metadata": {},
   "source": [
    "## 加载数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c64f0d16-9686-4fc1-9feb-5d7d55bbc097",
   "metadata": {},
   "outputs": [],
   "source": [
    "#加载云平台项目数据，包含Seurat的rds和对应的meta.data\n",
    "data <- readRDS(\"/home/demo-seekgene-com/workspace/data/AY1752565399550/input.rds\")\n",
    "meta <- read.table(\"/home/demo-seekgene-com/workspace/data/AY1752565399550/meta.tsv\", \n",
    "                  header = TRUE, \n",
    "                  sep = \"\\t\", \n",
    "                  row.names = 1)\n",
    "data <- AddMetaData(data, meta.data=meta)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "26473011-a10f-4e8c-92d8-5b4c82073d6e",
   "metadata": {},
   "source": [
    "### 计算每种细胞类型细胞总数和在不同样本中的占比"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d158abd7-7383-45f5-b3e3-ba8cebf073f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# 1. 准备数据,data是Seurat对象，CellAnnotation是一个注释结果在meta.data中的列名\n",
    "cell_counts <- data@meta.data %>%\n",
    "  count(CellAnnotation) %>%\n",
    "  mutate(n_thousands = n / 1000)\n",
    "\n",
    "# 2. 计算每个细胞类型中不同样本的百分比，CellAnnotation、Sample、Group都是meta.date的列名，注意这里按照自己的需求进行调整，\n",
    "cell_percentages <- data@meta.data %>%\n",
    "  count(CellAnnotation, Sample, Group) %>%\n",
    "  group_by(CellAnnotation) %>%  # 按细胞类型分组\n",
    "  mutate(percentage = n / sum(n) * 100) %>%\n",
    "  ungroup()\n",
    "\n",
    "# 3. 创建样本颜色映射\n",
    "sample_info <- data@meta.data %>%\n",
    "  distinct(CellAnnotation, Group) %>%\n",
    "  arrange(Group, Sample)\n",
    "\n",
    "# 为AD和Ctrl样本分配不同的颜色\n",
    "ad_samples <- sample_info %>% filter(Group == \"AD\") %>% pull(Sample)\n",
    "ctrl_samples <- sample_info %>% filter(Group == \"Ctrl\") %>% pull(Sample)\n",
    "\n",
    "#定义样本颜色，等号前面是样本名，等号后面是颜色\n",
    "sample_colors=c(\"3329\"=\"#004983\",\"4305\"=\"#0040d1\",\"4313\"=\"#3472ff\",\"4443\"=\"#5b8dff\",\n",
    "                \"4481\"=\"#6f9bff\",\"4482\"=\"#96b6ff\",\"4627\"=\"#aac4ff\",\"1224\"=\"#ff2668\",\n",
    "                \"1230\"=\"#ff3a76\",\"1238\"=\"#ff4e84\",\"3586\"=\"#ff6292\",\"HCT17HEX\"=\"#ff89ad\",\n",
    "                \"HCTZZT\"=\"#ffb0c8\",\"NT1261\"=\"#ffc4d6\",\"NT1271\"=\"#ffebf1\")\n",
    "sample_order <- names(sample_colors)  # 获取颜色映射中的样本顺序\n",
    "cell_percentages$Sample <- factor(cell_percentages$Sample, levels = sample_order)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "96fe96a0-155e-49ec-aa0a-b90cadbe4dc2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-02T02:24:51.158104Z",
     "iopub.status.busy": "2025-09-02T02:24:51.115578Z",
     "iopub.status.idle": "2025-09-02T02:24:51.181088Z",
     "shell.execute_reply": "2025-09-02T02:24:51.179212Z"
    }
   },
   "source": [
    "### ggplot2 画每种细胞类型细胞总数的柱状图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e60bf30a-f94a-411a-b4f9-e012bcaa1577",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5. 创建上半部分\n",
    "upper_plot <- ggplot(cell_counts, aes(x = CellAnnotation, y = n_thousands, fill = CellAnnotation)) +\n",
    "  geom_bar(stat = \"identity\", width = 0.7) +\n",
    "  scale_fill_manual(values = my36colors[1:length(unique(cell_counts$CellAnnotation))]) +\n",
    "  labs(title = \"Cell Count by Subclustering Type\",\n",
    "       x = \"Cell Type\", y = \"Cell Count (x1000)\") +\n",
    "  theme_minimal() +\n",
    "  theme(axis.text.x = element_text(angle = 45, hjust = 1),\n",
    "        legend.position = \"none\",\n",
    "        panel.grid.major = element_blank(),\n",
    "        panel.grid.minor = element_blank(),\n",
    "        panel.border = element_rect(fill = NA, color = \"black\"))\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf56a6a5-941b-4e9d-9430-aab1657cb068",
   "metadata": {},
   "source": [
    "### ggplot2 画每种细胞类型在不同样本中百分比柱状图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b3e12e0-7ab6-4ae7-b838-cbc18c5077b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5. 创建下半部分：百分比堆积柱状图\n",
    "lower_plot <- ggplot(cell_percentages, aes(x = CellAnnotation, y = percentage, fill = Sample)) +\n",
    "  geom_bar(stat = \"identity\", position = \"stack\", width = 0.7, color = \"black\", linewidth = 0.3) +\n",
    "  scale_fill_manual(values = sample_colors,\n",
    "                   name = \"Donor\",\n",
    "                   labels = function(x) paste0(x, \" (\", sample_info$CellAnnotation[match(x, sample_info$Sample)], \")\")) +\n",
    "  labs(x = \"Cell Type\", y = \"% From Donor\") +\n",
    "  theme_classic() +\n",
    "  theme(\n",
    "    axis.text.x = element_text(angle = 45, hjust = 1, size = 10),\n",
    "    axis.text.y = element_text(size = 10),\n",
    "    axis.title.x = element_text(size = 12, face = \"bold\"),\n",
    "    axis.title.y = element_text(size = 12, face = \"bold\"),\n",
    "    legend.position = \"right\",\n",
    "    legend.title = element_text(face = \"bold\"),\n",
    "    legend.key.size = unit(0.4, \"cm\"),\n",
    "    panel.grid.major.y = element_line(color = \"grey80\", linewidth = 0.2),\n",
    "    panel.border = element_rect(color = \"black\", fill = NA, linewidth = 0.5)\n",
    "  ) +\n",
    "  scale_y_continuous(labels = function(x) paste0(x, \"%\"),\n",
    "                     expand = expansion(mult = c(0, 0.05)))\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d6c396d4-14e8-44ef-b498-becc80473c58",
   "metadata": {},
   "source": [
    "### 图片组合和保存"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0c042fb4-56f0-4a3c-a3d5-67b28f00493c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 6. 组合图表（移除标题，更符合科研图表风格）\n",
    "combined_plot <- upper_plot / lower_plot +\n",
    "  plot_layout(heights = c(1, 1.2))\n",
    "\n",
    "# 7. 显示图表\n",
    "print(combined_plot)\n",
    "\n",
    "# 8. 保存为高质量图片\n",
    "ggsave(\"cell_distribution_plot.png\", combined_plot, \n",
    "       width = 10, height = 8, dpi = 300, bg = \"white\")\n",
    "\n",
    "# 9. 验证数据\n",
    "cat(\"验证每个细胞类型的百分比总和:\\n\")\n",
    "validation <- cell_percentages %>%\n",
    "  group_by(CellAnnotation) %>%\n",
    "  summarise(total_percent = sum(percentage), .groups = \"drop\")\n",
    "print(validation)\n",
    "\n",
    "cat(\"\\n样本信息:\\n\")\n",
    "print(sample_info)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "common_r",
   "language": "R",
   "name": "common_r"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "4.3.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
