Posts
Distribution plot 5
Jan 1, 0001
Data (CSV format):
,cell_line-1,cell_line-2,cell_line-3,cell_line-4,cell_line-5,cell_line-6,cell_line-7,cell_line-8
BCL2L1,-4.3,-2.545,-0.915,-0.442,-3.16,-3.035,-4.67,-3.99
CFLAR,-0.945,-2.665,0.396,-1.85,-3.665,-1.97,-3.995,-4.3
NXF1,-1.3,-1.231,-1.3,-0.392,-1.756,-1.488,-2.57,-2.835
COPA,-0.318,-1.88,-0.447,-0.85,-2.105,-2.085,-0.64,-2.91
EIF4A3,-2.86,-0.692,0.29,-0.345,-2.025,-0.186,-2.72,-1.784
HSPA5,-1.365,-2.595,-0.221,-1.057,-2.87,-2.3,-0.32,-0.53
LOC100507462,-0.579,-1.575,-0.166,-1.74,-0.717,0.236,-3.21,-3.85
COPB1,-1.116,-2.21,-0.533,-0.612,-3.025,-2.315,-0.168,-1.035
COPB2,-0.847,-1.31,-0.573,-0.416,-1.126,-1.308,-0.555,-1.355
SFPQ,-1.6,-0.792,-0.93,-1.027,-0.384,-0.352,-1.505,-1.738
Plot:
library(tidyverse)
data <- read_csv("data.csv") %>%
pivot_longer(cols=c(2:length(.)), names_to = "cell_line", values_to = "log2FC")
colnames(data)[1] = "gene"
gene_order <- data |> group_by(gene) |> summarise(median = median(log2FC)) |>
arrange(median) |> pull(gene)
data |>
mutate(gene = factor(gene, levels=gene_order)) |>
ggplot(aes(gene, log2FC, group=gene, color=cell_line)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "#34495e") +
geom_point() +
stat_summary(fun = median, fun.min = median, fun.max = median,
geom = "crossbar", width = 0.5, show.legend = FALSE) +
scale_color_brewer(name = "Cell line", palette = "Set1") +
theme_bw() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank()
)
ggsave("5.png", width = 6, height = 4)
Dot plot
Jan 1, 0001
More context (and code) for this plot can be found in my scRNA-seq workflow in the chapter “Expression of individual genes”.
library(tidyverse)
library(Seurat)
# load a single cell expression data set (generated in the lab I work at)
seurat <- readRDS('seurat.rds')
# cells will be grouped by clusters that they have been assigned to
cluster_ids <- levels(seurat@meta.data$seurat_clusters)
# select a set of genes for which we want to show expression
genes_to_show <- seurat@misc$marker_genes$by_cluster %>%
group_by(cluster) %>%
arrange(p_val_adj) %>%
slice(1) %>%
pull(gene)
# for every cluster-gene combination, calculate the average expression across
# all cells and then transform the data into a data frame
expression_levels_per_cluster <- vapply(
cluster_ids, FUN.VALUE = numeric(length(cluster_ids)), function(x) {
cells_in_current_cluster <- which(seurat@meta.data$seurat_cluster == x)
Matrix::rowMeans(seurat@assays$SCT@data[genes_to_show,cells_in_current_cluster])
}
) %>%
t() %>%
as.data.frame() %>%
mutate(cluster = rownames(.)) %>%
select(cluster, everything()) %>%
pivot_longer(
cols = c(2:ncol(.)),
names_to = 'gene'
) %>%
rename(expression = value) %>%
mutate(id_to_merge = paste0(cluster, '_', gene))
# for every cluster-gene combination, calculate the percentage of cells in the
# respective group that has at least 1 transcript (this means we consider it
# as expressing the gene) and then transform the data into a data frame
percentage_of_cells_expressing_gene <- vapply(
cluster_ids, FUN.VALUE = numeric(length(cluster_ids)), function(x) {
cells_in_current_cluster <- which(seurat@meta.data$seurat_cluster == x)
Matrix::rowSums(seurat@assays$SCT@data[genes_to_show,cells_in_current_cluster] != 0)
}
) %>%
t() %>%
as.data.frame() %>%
mutate(cluster = rownames(.)) %>%
select(cluster, everything()) %>%
pivot_longer(
cols = c(2:ncol(.)),
names_to = 'gene'
) %>%
rename(cell_count = value) %>%
left_join(
.,
seurat@meta.data %>%
group_by(seurat_clusters) %>%
tally() %>%
rename(cluster = seurat_clusters),
by = 'cluster') %>%
mutate(
id_to_merge = paste0(cluster, '_', gene),
percent_cells = cell_count / n
)
# merge the two data frames created before and plot the data
p <- left_join(
expression_levels_per_cluster,
percentage_of_cells_expressing_gene %>% select(id_to_merge, percent_cells),
by = 'id_to_merge'
) %>%
mutate(cluster = factor(cluster, levels = rev(cluster_ids))) %>%
ggplot(aes(gene, cluster)) +
geom_point(aes(color = expression, size = percent_cells)) +
scale_color_distiller(
palette = 'Reds',
direction = 1,
name = 'Log-normalised\nexpression',
guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")
) +
scale_size(name = 'Percent\nof cells', labels = scales::percent) +
labs(y = 'Cluster', color = 'Expression') +
coord_fixed() +
theme_bw() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)
ggsave('4.png', p, height = 7, width = 8)
Gene set enrichment results
Jan 1, 0001
Context and code for this plot can be found in my scRNA-seq workflow in the chapter “Gene set enrichment analysis”.
Heatmap 1
Jan 1, 0001
library(tidyverse)
library(Seurat)
library(ComplexHeatmap)
library(circlize)
library(gridExtra)
library(ggplotify)
# create color palette from flatuicolors.com
colors_dutch <- c(
'#FFC312','#C4E538','#12CBC4','#FDA7DF','#ED4C67',
'#F79F1F','#A3CB38','#1289A7','#D980FA','#B53471',
'#EE5A24','#009432','#0652DD','#9980FA','#833471',
'#EA2027','#006266','#1B1464','#5758BB','#6F1E51'
)
colors_spanish <- c(
'#40407a','#706fd3','#f7f1e3','#34ace0','#33d9b2',
'#2c2c54','#474787','#aaa69d','#227093','#218c74',
'#ff5252','#ff793f','#d1ccc0','#ffb142','#ffda79',
'#b33939','#cd6133','#84817a','#cc8e35','#ccae62'
)
custom_colors <- c(colors_dutch, colors_spanish)
# load a single cell expression data set (generated in the lab I work at)
seurat <- readRDS('seurat.rds')
# calculate average expression value for all variable genes for each cluster
average_expression_profiles_by_cluster <- seurat@assays$RNA@data[seurat@assays$RNA@var.features,] %>%
t() %>%
as.matrix() %>%
as_tibble() %>%
mutate(cluster = seurat@meta.data$seurat_clusters) %>%
select(cluster, everything()) %>%
group_by(cluster) %>%
summarize_all(~mean(.))
# calculate Spearman correlation matrix
correlation_matrix <- average_expression_profiles_by_cluster %>%
select(-1) %>%
as.matrix() %>%
t() %>%
cor(method = 'spearman')
# assign row and column names
rownames(correlation_matrix) <- levels(seurat@meta.data$seurat_clusters)
colnames(correlation_matrix) <- levels(seurat@meta.data$seurat_clusters)
# save cluster names for later
cluster <- rownames(correlation_matrix)
# assign a color to each cluster
colors_for_clusters <- c(custom_colors$discrete[1:length(cluster)])
names(colors_for_clusters) <- cluster
# create annotation function
func_cell_cluster <- function(i, j, x, y, width, height, fill) {
grid.text(cluster[j], x = x, y = y, gp = gpar(fontsize = 8))
}
# create main heatmap
ht_matrix <- Heatmap(
correlation_matrix,
name = 'Spearman\ncorrelation',
col = colorRamp2(
c(-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1),
c('#00007F', 'blue', '#007FFF', 'cyan', '#7FFF7F', 'yellow', '#FF7F00', 'red', '#7F0000')
),
cluster_rows = TRUE,
cluster_columns = TRUE,
show_row_names = TRUE,
show_column_names = TRUE,
heatmap_legend_param = list(
title = 'Spearman correlation',
legend_height = unit(6, 'cm'),
legend_width = unit(1, 'cm'),
title_position = 'lefttop-rot',
border = 'black'
)
)
# create heatmap for annotation of clusters
ht_cluster <- Heatmap(
cluster,
name = 'Cluster',
cell_fun = func_cell_cluster,
show_row_names = FALSE,
show_column_names = FALSE,
width = unit(15, 'mm'),
col = colors_for_clusters,
show_heatmap_legend = FALSE,
top_annotation = HeatmapAnnotation(
cn = anno_text('Cluster', rot = 0, just = 'center', gp = gpar(fontface = 'bold')),
height = max_text_height('Cluster')
)
)
# plot
p <- as.ggplot(grid.grabExpr(draw(ht_matrix + ht_cluster)))
ggsave('1.png', p, height = 6, width = 7)
library(tidyverse)
library(Seurat)
library(ComplexHeatmap)
library(circlize)
library(gridExtra)
library(ggplotify)
# create color palette from flatuicolors.com
colors_dutch <- c(
'#FFC312','#C4E538','#12CBC4','#FDA7DF','#ED4C67',
'#F79F1F','#A3CB38','#1289A7','#D980FA','#B53471',
'#EE5A24','#009432','#0652DD','#9980FA','#833471',
'#EA2027','#006266','#1B1464','#5758BB','#6F1E51'
)
colors_spanish <- c(
'#40407a','#706fd3','#f7f1e3','#34ace0','#33d9b2',
'#2c2c54','#474787','#aaa69d','#227093','#218c74',
'#ff5252','#ff793f','#d1ccc0','#ffb142','#ffda79',
'#b33939','#cd6133','#84817a','#cc8e35','#ccae62'
)
custom_colors <- c(colors_dutch, colors_spanish)
# load a single cell expression data set (generated in the lab I work at)
seurat <- readRDS('seurat.rds')
# calculate average expression value for all variable genes for each cluster
average_expression_profiles_by_cluster <- seurat@assays$RNA@data[seurat@assays$RNA@var.features,] %>%
t() %>%
as.matrix() %>%
as_tibble() %>%
mutate(cluster = seurat@meta.data$seurat_clusters) %>%
select(cluster, everything()) %>%
group_by(cluster) %>%
summarize_all(~mean(.))
# calculate Spearman correlation matrix
correlation_matrix <- average_expression_profiles_by_cluster %>%
select(-1) %>%
as.matrix() %>%
t() %>%
cor(method = 'spearman')
# assign row and column names
rownames(correlation_matrix) <- levels(seurat@meta.data$seurat_clusters)
colnames(correlation_matrix) <- levels(seurat@meta.data$seurat_clusters)
# save cluster names for later
cluster <- rownames(correlation_matrix)
# assign a color to each cluster
colors_for_clusters <- c(custom_colors$discrete[1:length(cluster)])
names(colors_for_clusters) <- cluster
# create annotation function
func_cell_cluster <- function(i, j, x, y, width, height, fill) {
grid.text(cluster[j], x = x, y = y, gp = gpar(fontsize = 8))
}
# create main heatmap
ht_matrix <- Heatmap(
correlation_matrix,
name = 'Spearman\ncorrelation',
col = colorRamp2(
c(-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1),
c('#00007F', 'blue', '#007FFF', 'cyan', '#7FFF7F', 'yellow', '#FF7F00', 'red', '#7F0000')
),
cluster_rows = TRUE,
cluster_columns = TRUE,
show_row_names = TRUE,
show_column_names = TRUE,
heatmap_legend_param = list(
title = 'Spearman correlation',
legend_height = unit(6, 'cm'),
legend_width = unit(1, 'cm'),
title_position = 'lefttop-rot',
border = 'black'
)
)
# create heatmap for annotation of clusters
ht_cluster <- Heatmap(
cluster,
name = 'Cluster',
cell_fun = func_cell_cluster,
show_row_names = FALSE,
show_column_names = FALSE,
width = unit(15, 'mm'),
col = colors_for_clusters,
show_heatmap_legend = FALSE,
top_annotation = HeatmapAnnotation(
cn = anno_text('Cluster', rot = 0, just = 'center', gp = gpar(fontface = 'bold')),
height = max_text_height('Cluster')
)
)
# plot
p <- as.ggplot(grid.grabExpr(draw(ht_matrix + ht_cluster)))
ggsave('1.png', p, height = 6, width = 7)
Intensity plot 1
Jan 1, 0001
library(tidyverse)
library(wesanderson)
data(iris)
p <- ggplot(iris) +
stat_density_2d(aes(Sepal.Length, Sepal.Width, fill = stat(level)), geom = 'polygon') +
theme_bw() +
labs(x = 'Sepal length', y = 'Sepal width', size = 'Petal width') +
scale_fill_gradientn(
colours = wes_palette('Zissou1', 21, type = 'continuous'),
guide = guide_colorbar(frame.colour = 'black', ticks.colour = 'black')
) +
theme(legend.position = 'right')
ggsave('1.png', p, height = 5, width = 6)
library(tidyverse)
library(wesanderson)
data(iris)
p <- ggplot(iris) +
stat_density_2d(aes(Sepal.Length, Sepal.Width, fill = stat(level)), geom = 'polygon') +
theme_bw() +
labs(x = 'Sepal length', y = 'Sepal width', size = 'Petal width') +
scale_fill_gradientn(
colours = wes_palette('Zissou1', 21, type = 'continuous'),
guide = guide_colorbar(frame.colour = 'black', ticks.colour = 'black')
) +
theme(legend.position = 'right')
ggsave('1.png', p, height = 5, width = 6)