Skip to content

SeekSpace 高级分析:空间基因表达密度分布图绘制

作者: SeekGene
时长: 17 分钟
字数: 3.4k 字
更新: 2026-02-28
阅读: 0 次
空间转录组 Notebooks 绘图
python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from mpl_toolkits.mplot3d import Axes3D

数据加载

先利用 R 从 rds 中保存空间坐标和注释结果

读取基因表达矩阵

python
celltype_df_all1 = pd.read_csv("/PROJ2/FLOAT/weichendan/seekspace/SGS2401017/updata_allsamples/10Density/26samples_IGF2_marker.csv")
python
celltype_df_all1
cell_idsample_idIGF2IGF1RIGF2RIGF2_markerIGF1R_markerIGF2R_marker
0AAGTACCATGTATGATGTAGTCATACGWTH10510.0000001.3377670.000000non_IGF2IGF1Rnon_IGF2R
1AAGTTCGATGCGCATCCAAGGCACTATWTH10510.0000000.0000000.000000non_IGF2non_IGF1Rnon_IGF2R
2ACAAGCTATGGTCATCTGGACTGTGGAWTH10510.0000000.0000000.000000non_IGF2non_IGF1Rnon_IGF2R
3ACCCTTGTACAAGTCATCTAAGTACGAWTH10510.0000000.0000000.000000non_IGF2non_IGF1Rnon_IGF2R
4ACTACGATACCGAGTGCGCTCTGCTTCWTH10510.0000001.3403610.000000non_IGF2IGF1Rnon_IGF2R
...........................
394417GTATTTCGCTGTAGAATGCTCTGCTTCNormal_50.0000000.2492310.000000non_IGF2IGF1Rnon_IGF2R
394418TGAGGTTGCTCGAGTTTGCTCTGCTTCNormal_54.1618860.0000000.445782IGF2non_IGF1RIGF2R
394419ATTCCCGATGCAACTGATAGTCATACGNormal_53.7983900.0000000.192101IGF2non_IGF1RIGF2R
394420TTGCATTATGCGCTTCGTTCTGCGCTCNormal_53.6578900.0000000.119740IGF2non_IGF1RIGF2R
394421CAAGCTAGCTGAGAATTCCTAATAACGNormal_53.7149300.1080640.108064IGF2IGF1RIGF2R

394422 rows × 8 columns

python
celltype_df_all = celltype_df_all1.drop(columns=celltype_df_all1.columns[5:8])
python
celltype_df_all
cell_idsample_idIGF2IGF1RIGF2R
0AAGTACCATGTATGATGTAGTCATACGWTH10510.0000001.3377670.000000
1AAGTTCGATGCGCATCCAAGGCACTATWTH10510.0000000.0000000.000000
2ACAAGCTATGGTCATCTGGACTGTGGAWTH10510.0000000.0000000.000000
3ACCCTTGTACAAGTCATCTAAGTACGAWTH10510.0000000.0000000.000000
4ACTACGATACCGAGTGCGCTCTGCTTCWTH10510.0000001.3403610.000000
..................
394417GTATTTCGCTGTAGAATGCTCTGCTTCNormal_50.0000000.2492310.000000
394418TGAGGTTGCTCGAGTTTGCTCTGCTTCNormal_54.1618860.0000000.445782
394419ATTCCCGATGCAACTGATAGTCATACGNormal_53.7983900.0000000.192101
394420TTGCATTATGCGCTTCGTTCTGCGCTCNormal_53.6578900.0000000.119740
394421CAAGCTAGCTGAGAATTCCTAATAACGNormal_53.7149300.1080640.108064

394422 rows × 5 columns

python
celltype_df_all = celltype_df_all.set_index('cell_id')

提取单样本的基因表达情况

python
samples = celltype_df_all["sample_id"].unique()
python
samples
output
array(['WTH1051', 'LGG_3', 'LGG_4', 'LGG_6_1', 'LGG_6_2', 'LGG_7_2',
'LGG_7_1', 'LGG_8', 'WTH973', 'WTH1029', 'WTH1030', 'ndGBM_4_2',
'ndGBM_5_1', 'ndGBM_5_2', 'ndGBM_6_2', 'ndGBM_6_3', 'ndGBM_5_3',
'ndGBM_7', 'ndGBM_8', 'nd_GBM_8', 'WTH1052', 'WTH1068', 'WTH974',
'rGBM_2', 'rGBM_3', 'Normal_5'], dtype=object)
python
sample = samples[13]
sample
output
'ndGBM_5_2'
python
celltype_df = celltype_df_all[celltype_df_all['sample_id'].str.contains(sample)]
python
celltype_df
sample_idIGF2IGF1RIGF2R
cell_id
AGAGCCCCGATAGCTACCCTAATAACGndGBM_5_20.0000000.0000000.000000
CACCGTTATGGGAAACGTCCTATTAGGndGBM_5_20.0000000.0000000.000000
GCCATTCTACGCCTTTGCTAAGTACGAndGBM_5_20.0000000.0000000.000000
TAAGTCGCGACATCCGGCTAAGTACGAndGBM_5_20.0000000.0000000.000000
TACGTCCTACGAAGTGGGGACTGTGGAndGBM_5_20.0000000.0000000.000000
...............
CGCGTGACGATGCAAGGAAGGCACTATndGBM_5_20.0208620.0809570.225520
GCCAGGTATGAGTCCGGGGACTGTGGAndGBM_5_20.0207700.1370690.273685
TGGGTTAATGACGTCGAAAGGCACTATndGBM_5_20.0206760.0607890.303916
CCCGGAACGAGTCAGCCCCTAATAACGndGBM_5_20.0000000.0912170.323553
TCAGGGCGCTTTCTATCAGCGACGGATndGBM_5_20.0000000.1561620.262727

14055 rows × 4 columns

读取空间坐标

python
spatial_df = pd.read_csv(f"/PROJ2/FLOAT/weichendan/seekspace/SGS2401017/allsamples/00data/down_oss/{sample}/{sample}_barcode_centers.csv",index_col=0,sep=',')
spatial_df.drop(columns=['RNA_snn_res.0.8'], inplace=True)
spatial_df = spatial_df[spatial_df.index.isin(celltype_df.index)]
python
spatial_df
xy
barcode
AAACCCAATGACAGCGTGCTCTGCTTC11109227
AAACCCAATGACTACGATTCTGCGCTC1186317525
AAACCCAATGGGCTTCTAAGGCACTAT3632610478
AAACCCAATGTCAATCCGGACTGTGGA402017168
AAACCCACGAAACGGTGGGACTGTGGA351762370
.........
TTTGTTGATGACAGCTGTTCTGCGCTC3485015295
TTTGTTGGCTCGGAAGTTTCTGCGCTC25424716
TTTGTTGTACAAACCATTTCTGCGCTC381557606
TTTGTTGTACCGTGCTCTTCTGCGCTC2550214087
TTTGTTGTACGTTCTATTAGTCATACG487647264

14055 rows × 2 columns

合并数据

python
merged_df = pd.merge(celltype_df, spatial_df, left_index=True, right_index=True,how="inner")
python
merged_df
sample_idIGF2IGF1RIGF2Rxy
AGAGCCCCGATAGCTACCCTAATAACGndGBM_5_20.0000000.0000000.0000002791315055
CACCGTTATGGGAAACGTCCTATTAGGndGBM_5_20.0000000.0000000.000000299204106
GCCATTCTACGCCTTTGCTAAGTACGAndGBM_5_20.0000000.0000000.000000345381969
TAAGTCGCGACATCCGGCTAAGTACGAndGBM_5_20.0000000.0000000.0000002768010535
TACGTCCTACGAAGTGGGGACTGTGGAndGBM_5_20.0000000.0000000.0000004313511168
.....................
CGCGTGACGATGCAAGGAAGGCACTATndGBM_5_20.0208620.0809570.2255203352812544
GCCAGGTATGAGTCCGGGGACTGTGGAndGBM_5_20.0207700.1370690.273685405507503
TGGGTTAATGACGTCGAAAGGCACTATndGBM_5_20.0206760.0607890.3039162095213660
CCCGGAACGAGTCAGCCCCTAATAACGndGBM_5_20.0000000.0912170.3235534163116713
TCAGGGCGCTTTCTATCAGCGACGGATndGBM_5_20.0000000.1561620.2627273437911688

14055 rows × 6 columns

绘制 3D 空间密度图

以细胞类型 AC 为例,绘制其 3D 空间密度图

python
for gene in ["IGF2","IGF1R","IGF2R"]:
        # 提取坐标和基因表达值
        xy = merged_df[['x', 'y']].values.T  # 形状为 (2, N)
        expression = merged_df[gene].values  # 基因表达值

        # 新增:判断数据点数和表达值有效性
        if xy.shape[1] <= 1 or np.all(np.isnan(expression)) or np.all(expression == 0):
            print(f"{sample} {gene} 数据点不足或表达值无效,跳过。")
            continue

        # 使用基因表达值作为权重进行核密度估计
        # 注意:这里直接使用表达值作为 z 轴高度
        kde_weights = gaussian_kde(xy, weights=expression, bw_method=0.2)

        # 创建网格
        xmin, xmax = merged_df.x.min(), merged_df.x.max()
        ymin, ymax = merged_df.y.min(), merged_df.y.max()
        xgrid = np.linspace(xmin, xmax, 200)
        ygrid = np.linspace(ymin, ymax, 200)
        X, Y = np.meshgrid(xgrid, ygrid)
        grid_points = np.vstack([X.ravel(), Y.ravel()])

        # 计算加权密度(即基因表达的空间分布)
        Z = kde_weights(grid_points).reshape(X.shape)

        # 创建画布
        fig = plt.figure(figsize=(7, 5))
        ax3d = fig.add_subplot(111, projection='3d')

        # 设置长宽比
        ax3d.set_box_aspect([15, 5, 5])

        # 美化设置
        ax3d.grid(False)
        ax3d.set_facecolor('white')
        ax3d.xaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
        ax3d.yaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
        ax3d.zaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))

        # 隐藏坐标轴
        ax3d.set_xticks([])
        ax3d.set_yticks([])
        ax3d.set_zticks([])
        ax3d.set_xticklabels([])
        ax3d.set_yticklabels([])
        ax3d.set_xlabel('')
        ax3d.set_ylabel('')
        ax3d.set_zlabel('')

        # 绘制基因表达曲面
        surf = ax3d.plot_surface(
            X, Y, Z,
            #cmap='viridis',          # 使用热图色系表示表达强度
            cmap='RdYlGn_r',
            rstride=5,               # 行采样步长
            cstride=5,               # 列采样步长
            alpha=0.8,               # 适当透明度
            antialiased=True,
            edgecolor='none',
            linewidth=0.1
        )

        # 添加颜色条表示表达强度
        cbar = fig.colorbar(surf, ax=ax3d, shrink=0.4, aspect=10, pad=0.05)
        cbar.set_label(f'{gene} Expression', fontsize=10)

        # 设置视角(从y轴方向俯视)
        ax3d.view_init(elev=30, azim=-90)

        # 调整布局并保存
        plt.tight_layout()
        plt.subplots_adjust(left=0, right=0.95, top=0.95, bottom=0.05)
        #plt.savefig(f'{sample}_{gene}_expression3d.pdf')
        plt.show()

绘制 3D 空间密度图的映射图

以细胞类型 AC 为例,绘制其 3D 空间密度图的映射图

python
for gene in ["IGF2","IGF1R","IGF2R"]:
        # 提取坐标和基因表达值
        xy = merged_df[['x', 'y']].values.T  # 形状为 (2, N)
        expression = merged_df[gene].values  # 基因表达值

        # 新增:判断数据点数和表达值有效性
        if xy.shape[1] <= 1 or np.all(np.isnan(expression)) or np.all(expression == 0):
            print(f"{sample} {gene} 数据点不足或表达值无效,跳过。")
            continue

        # 使用基因表达值作为权重进行核密度估计
        # 注意:这里直接使用表达值作为 z 轴高度
        kde_weights = gaussian_kde(xy, weights=expression, bw_method=0.2)

        # 创建网格
        xmin, xmax = merged_df.x.min(), merged_df.x.max()
        ymin, ymax = merged_df.y.min(), merged_df.y.max()
        xgrid = np.linspace(xmin, xmax, 200)
        ygrid = np.linspace(ymin, ymax, 200)
        X, Y = np.meshgrid(xgrid, ygrid)
        grid_points = np.vstack([X.ravel(), Y.ravel()])

        # 计算加权密度(即基因表达的空间分布)
        Z = kde_weights(grid_points).reshape(X.shape)

        # 创建画布
        fig = plt.figure(figsize=(7, 5))
        ax3d = fig.add_subplot(111, projection='3d')

        # 设置长宽比
        ax3d.set_box_aspect([15, 5, 0.01])

        # 美化设置
        ax3d.grid(False)
        ax3d.set_facecolor('white')
        ax3d.xaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
        ax3d.yaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
        ax3d.zaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))

        # 隐藏坐标轴
        ax3d.set_xticks([])
        ax3d.set_yticks([])
        ax3d.set_zticks([])
        ax3d.set_xticklabels([])
        ax3d.set_yticklabels([])
        ax3d.set_xlabel('')
        ax3d.set_ylabel('')
        ax3d.set_zlabel('')

        # 绘制基因表达曲面
        surf = ax3d.plot_surface(
            X, Y, Z,
            #cmap='viridis',          # 使用热图色系表示表达强度
            cmap='RdYlGn_r',
            rstride=5,               # 行采样步长
            cstride=5,               # 列采样步长
            alpha=0.8,               # 适当透明度
            antialiased=True,
            edgecolor='none',
            linewidth=0.1
        )

        # 添加颜色条表示表达强度
        cbar = fig.colorbar(surf, ax=ax3d, shrink=0.4, aspect=10, pad=0.05)
        cbar.set_label(f'{gene} Expression', fontsize=10)

        # 设置视角(从y轴方向俯视)
        ax3d.view_init(elev=30, azim=-90)

        # 调整布局并保存
        plt.tight_layout()
        plt.subplots_adjust(left=0, right=0.95, top=0.95, bottom=0.05)
#        plt.savefig(f'{sample}_{gene}_expression2d.pdf')
        plt.show()

绘制 2D 密度图

python
for gene in ["IGF2","IGF1R","IGF2R"]:
        # 提取坐标和基因表达值
        xy = merged_df[['x', 'y']].values.T  # 形状为 (2, N)
        expression = merged_df[gene].values  # 基因表达值

        # 新增:判断数据点数和表达值有效性
        if xy.shape[1] <= 1 or np.all(np.isnan(expression)) or np.all(expression == 0):
            print(f"{sample} {gene} 数据点不足或表达值无效,跳过。")
            continue

        # 使用基因表达值作为权重进行核密度估计
        # 注意:这里直接使用表达值作为 z 轴高度
        kde_weights = gaussian_kde(xy, weights=expression, bw_method=0.2)

        # 创建网格
        xmin, xmax = merged_df.x.min(), merged_df.x.max()
        ymin, ymax = merged_df.y.min(), merged_df.y.max()
        xgrid = np.linspace(xmin, xmax, 200)
        ygrid = np.linspace(ymin, ymax, 200)
        X, Y = np.meshgrid(xgrid, ygrid)
        grid_points = np.vstack([X.ravel(), Y.ravel()])

        # 计算加权密度(即基因表达的空间分布)
        Z = kde_weights(grid_points).reshape(X.shape)

        fig2d, ax2d = plt.subplots(figsize=(6, 4))
        contour = ax2d.contourf(X, Y, Z, levels=20, cmap='RdYlGn_r')

        # 坐标轴设置
        ax2d.set(xlim=(xmin, xmax), ylim=(ymin, ymax), aspect='equal')

        # 取消坐标轴刻度
        ax2d.set_xticks([])
        ax2d.set_yticks([])

        # 取消坐标轴标签
        ax2d.set_xlabel('')
        ax2d.set_ylabel('')

        plt.show()
python
0 条评论·0 条回复