Visualization of Single-cell Gene Expression Correlation Scatter Plots

Author: SeekGene

Time: 11 min

Words: 2.2k words

Updated: 2026-02-27

Reads: 0 times

3' scRNA-seq 5' + Immune Profiling FFPE scRNA-seq Notebooks Plotting Spatial-seq scATAC + RNA-seq scFAST-seq scMethyl + RNA-seq

Environment Setup

# Load required R packages
library(Seurat)      # For single-cell data analysis
library(ggplot2)     # For data visualization
library(ggpubr)      # For adding statistical info to plots
library(ggExtra)     # For adding marginal distribution plots
library(dplyr)       # For data processing and pipeline operations

Data Loading and Preprocessing

# Read data and preprocessing
seurat.obj <- readRDS("data/AY1739512568405/input.rds")  # Read Seurat object
meta <- read.table("data/AY1739512568405/meta.tsv", header=T, sep="\t", row.names = 1)  # Read metadata
obj <- AddMetaData(seurat.obj, meta)  # Add metadata to Seurat object
DefaultAssay(obj) = "RNA"  # Set default assay to RNA expression data
head(obj@meta.data)  # View first few rows of metadata

# View sample and cell type information
unique(obj@meta.data$Sample)  # View all unique sample names
unique(obj@meta.data$celltype)  # View all unique cell types

# Set target cell type and sample
target_celltypes <- c('T cell')  # Specify target cell type as T cell
target_samples1 <- c('S133T')    # Specify single target sample
# Create subset objects
sub_obj1 <- subset(obj, subset = Sample %in% target_samples1 & celltype %in% target_celltypes)  # Create single sample T cell subset
sub_obj2 <- subset(obj, subset = celltype %in% target_celltypes)  # Create all samples T cell subset

# Verify subset data
unique(sub_obj1@meta.data$Sample)    # Check sample of subset 1
unique(sub_obj1@meta.data$celltype)  # Check cell type of subset 1
unique(sub_obj2@meta.data$Sample)    # Check sample of subset 2
unique(sub_obj2@meta.data$celltype)  # Check cell type of subset 2

Single Sample Correlation Analysis

# Extract single sample gene expression data
plot_data1 <- FetchData(sub_obj1, 
                      vars = c("PPT1", "TRAF1"),
                      slot = "data")  # Extract PPT1 and TRAF1 expression values from normalized data

# Data quality check
print("Data summary:")
summary(plot_data1)  # Show statistical summary of data
print("Number of NA values:")
print(colSums(is.na(plot_data1)))  # Check number of NA values
print("Number of 0 values:")
print(colSums(plot_data1 == 0))  # Check number of 0 values
print("Number of cells with both genes as 0:")
sum(plot_data1$PPT1== 0 & plot_data1$TRAF1 == 0)  # Check number of cells with double zero expression

# Filter data
plot_data1_filtered <- plot_data1[plot_data1$PPT1 > 0 & plot_data1$TRAF1 > 0, ]  # Filter double gene expressing cells
print("Data summary after filtering:")
summary(plot_data1_filtered)  # Show statistical summary after filtering
print(paste0("Cell count after filtering:", nrow(plot_data1_filtered)))  # Show cell count after filtering

# Create single sample correlation scatter plot
options(repr.plot.height=8, repr.plot.width=8)  # Set figure size
p1 <- ggplot(plot_data1, aes(x = PPT1, y = TRAF1)) +
    geom_point(size = 1, alpha = 0.5, color = "grey30") +  # Add points
    geom_smooth(method = "lm", color = "blue", se = TRUE, fill = "grey90") +  # Add fitted line
    labs(x = "PPT1 expression", y = "TRAF1 expression") +  # Set axis labels
    theme_bw() +  # Set theme
    # Custom theme details
    theme(
        panel.grid.major = element_line(color = "grey90", linewidth = 0.3),
        panel.grid.minor = element_line(color = "grey90", linewidth = 0.3),
        panel.background = element_rect(fill = "white"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
        axis.ticks = element_line(color = "black", linewidth = 0.3),
        axis.text = element_text(color = "black", size = 10),
        axis.title = element_text(color = "black", size = 16),
        axis.ticks.length = unit(0.2, "cm"),
        plot.background = element_rect(fill = "white")
    ) +
    # Set x-axis ticks
    scale_x_continuous(
        breaks = seq(0, max(plot_data1$PPT1), by = 1),
        minor_breaks = seq(0, max(plot_data1$TRAF1), by = 0.5)
    ) +
    # Set y-axis ticks
    scale_y_continuous(
        limits = c(0, max(plot_data1$TRAF1)),
        breaks = seq(0, max(plot_data1$TRAF1), by = 1),
        minor_breaks = seq(0, max(plot_data1$TRAF1), by = 0.5)
    ) +
    # Add correlation statistics
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top",
             size = 4)
# Add marginal distribution plots
p1_with_marginal <- ggMarginal(p1,
                             type = "density",  # Set marginal plot type to density
                             margins = "both",  # Show marginal distribution for both x and y axes
                             size = 5,
                             xparams = list(fill = "orange", alpha = 1),  # Set x-axis marginal plot style
                             yparams = list(fill = "blue", alpha = 1))    # Set y-axis marginal plot style
p1_with_marginal  # Show plot

# Create scatter plot for filtered data
options(repr.plot.height=8, repr.plot.width=8)  # Set figure size
p1_filtered <- ggplot(plot_data1_filtered, aes(x = PPT1, y = TRAF1)) +
    geom_point(size = 1, alpha = 0.5, color = "grey30") +  # Add points
    geom_smooth(method = "lm", color = "blue", se = TRUE, fill = "grey90") +  # Add fitted line and confidence interval
    labs(x = "PPT1 expression", y = "TRAF1 expression") +  # Set axis labels
    # Set theme and style
    theme_bw() +
    theme(
        panel.grid.major = element_line(color = "grey90", linewidth = 0.3),  # Major grid lines
        panel.grid.minor = element_line(color = "grey90", linewidth = 0.3),  # Minor grid lines
        panel.background = element_rect(fill = "white"),  # Background color
        panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),  # Border
        axis.ticks = element_line(color = "black", linewidth = 0.3),  # Ticks
        axis.text = element_text(color = "black", size = 10),  # Axis text
        axis.title = element_text(color = "black", size = 16),  # Axis title
        axis.ticks.length = unit(0.2, "cm"),  # Tick length
        plot.background = element_rect(fill = "white")  # Plot background
    ) +
    # Set x-axis ticks
    scale_x_continuous(
        breaks = seq(0, max(plot_data1_filtered$PPT1), by = 1),
        minor_breaks = seq(0, max(plot_data1_filtered$TRAF1), by = 0.5)
    ) +
    # Set y-axis ticks
    scale_y_continuous(
        limits = c(0, max(plot_data1_filtered$TRAF1)),
        breaks = seq(0, max(plot_data1_filtered$TRAF1), by = 1),
        minor_breaks = seq(0, max(plot_data1_filtered$TRAF1), by = 0.5)
    ) +
    # Add correlation statistics
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top",
             size = 4)

# Add marginal distribution to filtered scatter plot
p1_filtered_with_marginal <- ggMarginal(p1_filtered,
                             type = "density",  # Set marginal plot type to density
                             margins = "both",  # Show marginal distribution for both axes
                             size = 5,
                             xparams = list(fill = "orange", alpha = 1),  # x-axis marginal plot style
                             yparams = list(fill = "blue", alpha = 1))    # y-axis marginal plot style
p1_filtered_with_marginal  # Show plot

Figure Legend

This is a gene expression correlation scatter plot showing the relationship between PPT1 and TRAF1 expression levels in cells:

(1) Main Plot

X-axis: Represents PPT1 gene expression level
Y-axis: Represents TRAF1 gene expression level
Points: Each gray dot represents a cell
Fitted Line: Blue line is the fitted line, indicating the overall trend of expression
Confidence Interval: Light gray area is the confidence interval of the fitted line

(2) Statistics in the plot

R = 0.72: Correlation coefficient, ranging from -1 to 1. 0.72 indicates a strong positive correlation
p = 2.5e-16: Extremely small p-value, indicating statistical significance

(3) Marginal Distribution Plots

Top orange curve: Shows distribution of PPT1 expression
Right blue curve: Shows distribution of TRAF1 expression

Biological Significance PPT1 and TRAF1 expression show significant positive correlation; when one increases, the other tends to increase. This correlation may suggest functional linkage or shared regulatory mechanisms.

Multi-sample Correlation Analysis

# Extract multi-sample expression data and group info
plot_data2 <- FetchData(sub_obj2, 
                      vars = c("PPT1", "TRAF1", "Sample"),  # Extract gene expression values and sample info
                      slot = "data")  # Use normalized data

# Data quality check
print("Data summary:")
summary(plot_data2)  # Show data summary
print("Number of NA values:")
print(colSums(is.na(plot_data2)))  # Check NA values
print("Number of 0 values:")
print(colSums(plot_data2 == 0))  # Check 0 values
print("Number of cells with both genes as 0:")
sum(plot_data2$PPT1== 0 & plot_data2$TRAF1 == 0)  # Check double zero expressing cells

# Filter multi-sample data
plot_data2_filtered <- plot_data2[plot_data2$PPT1 > 0 & plot_data2$TRAF1 > 0, ]  # Filter double gene expressing cells
print("Data summary after filtering:")
summary(plot_data2_filtered)  # Show statistical summary after filtering
print(paste0("Cell count after filtering:", nrow(plot_data2_filtered)))  # Show cell count after filtering

# Create multi-sample scatter plot
options(repr.plot.height=8, repr.plot.width=8)  # Set figure size
p2 <- ggplot(plot_data2, aes(x = PPT1, y = TRAF1, color = Sample)) +  # Color by sample
    geom_point(size = 1.5, alpha = 0.8) +  # Add points
    labs(title = "PPT1 vs TRAF1 Correlation",  # Set title
         x = "PPT1 expression",  # x-axis label
         y = "TRAF1 expression") +  # y-axis label
    # Set theme and style
    theme_bw() +
    theme(
        panel.grid.major = element_line(color = "grey90", linewidth = 0.3),  # Major grid lines
        panel.grid.minor = element_line(color = "grey95", linewidth = 0.3),  # Minor grid lines
        panel.background = element_rect(fill = "white"),  # Background
        panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),  # Border
        axis.ticks = element_line(color = "black", linewidth = 0.3),  # Ticks
        axis.text = element_text(color = "black", size = 16),  # Axis text
        axis.title = element_text(color = "black", size = 16),  # Axis title
        legend.title = element_text(size = 16),  # Legend title
        legend.text = element_text(size = 16),  # Legend text
        legend.position = "right",  # Legend position
        plot.title = element_text(size = 16, hjust = 0)  # Plot title
    ) +
    # Set x-axis ticks
    scale_x_continuous(
        breaks = seq(0, max(plot_data2$PPT1), by = 1),
        minor_breaks = seq(0, max(plot_data2$TRAF1), by = 0.5)
    ) +
    # Set y-axis ticks
    scale_y_continuous(
        limits = c(0, max(plot_data2$TRAF1)),
        breaks = seq(0, max(plot_data2$TRAF1), by = 1),
        minor_breaks = seq(0, max(plot_data2$TRAF1), by = 0.5)
    )
p2  # Show plot

Figure Legend

This is a multi-sample gene expression correlation scatter plot showing the relationship between PPT1 and TRAF1 expression in different samples (Correlation coefficients and P-values for each sample can be found in the output correlation_results.txt below):

(1) Basic Structure

X-axis: PPT1 gene expression level
Y-axis: TRAF1 gene expression level
Legend: Right legend, different colors represent different samples (S133A to S159T; 'A' suffix for adjacent normal, 'T' for tumor)
Points: Each dot represents a cell

(2) Data Features

Positive correlation between PPT1 and TRAF1 is observed in multiple samples
Many points near 0 on x/y axes indicate low expression of both genes in some cells
Correlation patterns may vary slightly between samples

Biological Significance

Compare expression relationship differences between samples (esp. tumor vs adjacent)
Helps understand regulatory relationships in different pathological states
May reveal potential sample-specific expression patterns

# Calculate correlation statistics for each sample
correlation_results <- plot_data2 %>%
  group_by(Sample) %>%  # Group by sample
  summarise(
    Correlation = cor(PPT1, TRAF1, method = "pearson"),  # Calculate Pearson correlation
    P_value = cor.test(PPT1, TRAF1, method = "pearson")$p.value,  # Calculate p-value
    Sample_size = n()  # Calculate sample size
  ) %>%
  mutate(
    P_value = format(P_value, scientific = TRUE, digits = 3)  # Format p-value to scientific notation
  )

# Save correlation results
write.table(correlation_results, 
            file = "Tcell_allsampes_PPT1_TRAF1_correlation_results.txt", 
            sep = "\t",             # Tab separated
            row.names = FALSE,      # Exclude row names
            quote = FALSE)          # Do not quote

# Create filtered multi-sample scatter plot
options(repr.plot.height=8, repr.plot.width=8)  # Set figure size
p2_filtered <- ggplot(plot_data2_filtered, aes(x = PPT1, y = TRAF1, color = Sample)) +  # Color by sample
    geom_point(size = 1.5, alpha = 0.8) +  # Add points
    labs(title = "PPT1 vs TRAF1 Correlation",  # Set title
         x = "PPT1 expression",  # x-axis label
         y = "TRAF1 expression") +  # y-axis label
    # Set theme and style
    theme_bw() +
    theme(
        panel.grid.major = element_line(color = "grey90", linewidth = 0.3),  # Major grid lines
        panel.grid.minor = element_line(color = "grey95", linewidth = 0.3),  # Minor grid lines
        panel.background = element_rect(fill = "white"),  # Background
        panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),  # Border
        axis.ticks = element_line(color = "black", linewidth = 0.3),# Axis ticks
        axis.text = element_text(color = "black", size = 16),  # Axis text
        axis.title = element_text(color = "black", size = 16),  # Axis title
        legend.title = element_text(size = 16),  # Legend title
        legend.text = element_text(size = 16),  # Legend text
        legend.position = "right",  # Legend position
        plot.title = element_text(size = 16, hjust = 0)  # Plot title
    ) +
    # Set x-axis ticks
    scale_x_continuous(
        breaks = seq(0, max(plot_data2_filtered$PPT1), by = 1),
        minor_breaks = seq(0, max(plot_data2_filtered$TRAF1), by = 0.5)
    ) +
    # Set y-axis ticks
    scale_y_continuous(
        limits = c(0, max(plot_data2_filtered$TRAF1)),
        breaks = seq(0, max(plot_data2_filtered$TRAF1), by = 1),
        minor_breaks = seq(0, max(plot_data2_filtered$TRAF1), by = 0.5)
    )
p2_filtered  # Show plot

# Calculate correlation statistics for each sample after filtering
correlation_results_filtered <- plot_data2_filtered %>%
  group_by(Sample) %>%  # Group by sample
  summarise(
    Correlation = cor(PPT1, TRAF1, method = "pearson"),  # Calculate Pearson correlation
    # Calculate p-value
    P_value = cor.test(PPT1, TRAF1, method = "pearson")$p.value,  # Calculate p-value
    Sample_size = n()  # Calculate sample size
  ) %>%
  # Convert values to scientific notation
  mutate(
    P_value = format(P_value, scientific = TRUE, digits = 3)
  )

# Save correlation results
write.table(correlation_results_filtered,  # Format p-values as scientific notation
            file = "Tcell_allsampes_PPT1_TRAF1_filtered_correlation_results.txt", 
            sep = "\t",             # Tab separated
            row.names = FALSE,      # Exclude row names
            quote = FALSE)          # Do not quote strings

# Save plot
ggsave("Tcell_S133T_PPT1_TRAF1_filtered_correlation_with_marginal.pdf", 
       plot = p1_filtered_with_marginal, width = 8, height = 8)  # Save filtered single-sample plot
ggsave("Tcell_allsamples_PPT1_TRAF1_correlation.pdf", 
       plot = p2, width = 8, height = 8)  # Save original multi-sample plot
ggsave("Tcell_allsamples_PPT1_TRAF1_filtered_correlation.pdf", 
       plot = p2_filtered, width = 8, height = 8)  # Save filtered multi-sample plot

Visualization of Single-cell Gene Expression Correlation Scatter Plots ​

Environment Setup ​

Data Loading and Preprocessing ​

Single Sample Correlation Analysis ​

Figure Legend ​

Multi-sample Correlation Analysis ​

Figure Legend ​

Visualization of Single-cell Gene Expression Correlation Scatter Plots

Environment Setup

Data Loading and Preprocessing

Single Sample Correlation Analysis

Figure Legend

Multi-sample Correlation Analysis

Figure Legend