Roman Hillje

Posts

Distribution plot 5

Jan 1, 0001

Data (CSV format):

,cell_line-1,cell_line-2,cell_line-3,cell_line-4,cell_line-5,cell_line-6,cell_line-7,cell_line-8
BCL2L1,-4.3,-2.545,-0.915,-0.442,-3.16,-3.035,-4.67,-3.99
CFLAR,-0.945,-2.665,0.396,-1.85,-3.665,-1.97,-3.995,-4.3
NXF1,-1.3,-1.231,-1.3,-0.392,-1.756,-1.488,-2.57,-2.835
COPA,-0.318,-1.88,-0.447,-0.85,-2.105,-2.085,-0.64,-2.91
EIF4A3,-2.86,-0.692,0.29,-0.345,-2.025,-0.186,-2.72,-1.784
HSPA5,-1.365,-2.595,-0.221,-1.057,-2.87,-2.3,-0.32,-0.53
LOC100507462,-0.579,-1.575,-0.166,-1.74,-0.717,0.236,-3.21,-3.85
COPB1,-1.116,-2.21,-0.533,-0.612,-3.025,-2.315,-0.168,-1.035
COPB2,-0.847,-1.31,-0.573,-0.416,-1.126,-1.308,-0.555,-1.355
SFPQ,-1.6,-0.792,-0.93,-1.027,-0.384,-0.352,-1.505,-1.738

Plot:

library(tidyverse)

data <- read_csv("data.csv") %>%
  pivot_longer(cols=c(2:length(.)), names_to = "cell_line", values_to = "log2FC")

colnames(data)[1] = "gene"

gene_order <- data |> group_by(gene) |> summarise(median = median(log2FC)) |>
  arrange(median) |> pull(gene)

data |>
  mutate(gene = factor(gene, levels=gene_order)) |>
  ggplot(aes(gene, log2FC, group=gene, color=cell_line)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "#34495e") +
  geom_point() +
  stat_summary(fun = median, fun.min = median, fun.max = median,
               geom = "crossbar", width = 0.5, show.legend = FALSE) +
  scale_color_brewer(name = "Cell line", palette = "Set1") +
  theme_bw() +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid.major.x = element_blank()
  )

ggsave("5.png", width = 6, height = 4)

Dose-response curve

Jan 1, 0001

The code for this plot can be found in this blog post.

Dot plot

Jan 1, 0001

More context (and code) for this plot can be found in my scRNA-seq workflow in the chapter “Expression of individual genes”.

library(tidyverse)
library(Seurat)

# load a single cell expression data set (generated in the lab I work at)
seurat <- readRDS('seurat.rds')

# cells will be grouped by clusters that they have been assigned to
cluster_ids <- levels(seurat@meta.data$seurat_clusters)

# select a set of genes for which we want to show expression
genes_to_show <- seurat@misc$marker_genes$by_cluster %>%
  group_by(cluster) %>%
  arrange(p_val_adj) %>%
  slice(1) %>%
  pull(gene)

# for every cluster-gene combination, calculate the average expression across
# all cells and then transform the data into a data frame
expression_levels_per_cluster <- vapply(
    cluster_ids, FUN.VALUE = numeric(length(cluster_ids)), function(x) {
      cells_in_current_cluster <- which(seurat@meta.data$seurat_cluster == x)
      Matrix::rowMeans(seurat@assays$SCT@data[genes_to_show,cells_in_current_cluster])
    }
  ) %>%
  t() %>%
  as.data.frame() %>%
  mutate(cluster = rownames(.)) %>%
  select(cluster, everything()) %>%
  pivot_longer(
    cols = c(2:ncol(.)),
    names_to = 'gene'
  ) %>%
  rename(expression = value) %>%
  mutate(id_to_merge = paste0(cluster, '_', gene))

# for every cluster-gene combination, calculate the percentage of cells in the
# respective group that has at least 1 transcript (this means we consider it
# as expressing the gene) and then transform the data into a data frame
percentage_of_cells_expressing_gene <- vapply(
    cluster_ids, FUN.VALUE = numeric(length(cluster_ids)), function(x) {
      cells_in_current_cluster <- which(seurat@meta.data$seurat_cluster == x)
      Matrix::rowSums(seurat@assays$SCT@data[genes_to_show,cells_in_current_cluster] != 0)
    }
  ) %>%
  t() %>%
  as.data.frame() %>%
  mutate(cluster = rownames(.)) %>%
  select(cluster, everything()) %>%
  pivot_longer(
    cols = c(2:ncol(.)),
    names_to = 'gene'
  ) %>%
  rename(cell_count = value) %>%
  left_join(
    .,
    seurat@meta.data %>%
      group_by(seurat_clusters) %>%
      tally() %>%
      rename(cluster = seurat_clusters),
    by = 'cluster') %>%
  mutate(
    id_to_merge = paste0(cluster, '_', gene),
    percent_cells = cell_count / n
  )

# merge the two data frames created before and plot the data
p <- left_join(
    expression_levels_per_cluster,
    percentage_of_cells_expressing_gene %>% select(id_to_merge, percent_cells),
    by = 'id_to_merge'
  ) %>%
  mutate(cluster = factor(cluster, levels = rev(cluster_ids))) %>%
  ggplot(aes(gene, cluster)) +
  geom_point(aes(color = expression, size = percent_cells)) +
  scale_color_distiller(
    palette = 'Reds',
    direction = 1,
    name = 'Log-normalised\nexpression',
    guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")
  ) +
  scale_size(name = 'Percent\nof cells', labels = scales::percent) +
  labs(y = 'Cluster', color = 'Expression') +
  coord_fixed() +
  theme_bw() +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )
ggsave('4.png', p, height = 7, width = 8)

Gene set enrichment results

Jan 1, 0001

Context and code for this plot can be found in my scRNA-seq workflow in the chapter “Gene set enrichment analysis”.

Heatmap 1

Jan 1, 0001

library(tidyverse)
library(Seurat)
library(ComplexHeatmap)
library(circlize)
library(gridExtra)
library(ggplotify)

# create color palette from flatuicolors.com
colors_dutch <- c(
  '#FFC312','#C4E538','#12CBC4','#FDA7DF','#ED4C67',
  '#F79F1F','#A3CB38','#1289A7','#D980FA','#B53471',
  '#EE5A24','#009432','#0652DD','#9980FA','#833471',
  '#EA2027','#006266','#1B1464','#5758BB','#6F1E51'
)
colors_spanish <- c(
  '#40407a','#706fd3','#f7f1e3','#34ace0','#33d9b2',
  '#2c2c54','#474787','#aaa69d','#227093','#218c74',
  '#ff5252','#ff793f','#d1ccc0','#ffb142','#ffda79',
  '#b33939','#cd6133','#84817a','#cc8e35','#ccae62'
)
custom_colors <- c(colors_dutch, colors_spanish)

# load a single cell expression data set (generated in the lab I work at)
seurat <- readRDS('seurat.rds')

# calculate average expression value for all variable genes for each cluster
average_expression_profiles_by_cluster <- seurat@assays$RNA@data[seurat@assays$RNA@var.features,] %>%
  t() %>%
  as.matrix() %>%
  as_tibble() %>%
  mutate(cluster = seurat@meta.data$seurat_clusters) %>%
  select(cluster, everything()) %>%
  group_by(cluster) %>%
  summarize_all(~mean(.))

# calculate Spearman correlation matrix
correlation_matrix <- average_expression_profiles_by_cluster %>%
  select(-1) %>%
  as.matrix() %>%
  t() %>%
  cor(method = 'spearman')

# assign row and column names
rownames(correlation_matrix) <- levels(seurat@meta.data$seurat_clusters)
colnames(correlation_matrix) <- levels(seurat@meta.data$seurat_clusters)

# save cluster names for later
cluster <- rownames(correlation_matrix)

# assign a color to each cluster
colors_for_clusters <- c(custom_colors$discrete[1:length(cluster)])
names(colors_for_clusters) <- cluster

# create annotation function
func_cell_cluster <- function(i, j, x, y, width, height, fill) {
  grid.text(cluster[j], x = x, y = y, gp = gpar(fontsize = 8))
}

# create main heatmap
ht_matrix <- Heatmap(
  correlation_matrix,
  name = 'Spearman\ncorrelation',
  col = colorRamp2(
    c(-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1),
    c('#00007F', 'blue', '#007FFF', 'cyan', '#7FFF7F', 'yellow', '#FF7F00', 'red', '#7F0000')
  ),
  cluster_rows = TRUE,
  cluster_columns = TRUE,
  show_row_names = TRUE,
  show_column_names = TRUE,
  heatmap_legend_param = list(
    title = 'Spearman correlation',
    legend_height = unit(6, 'cm'),
    legend_width = unit(1, 'cm'),
    title_position = 'lefttop-rot',
    border = 'black'
  )
)

# create heatmap for annotation of clusters
ht_cluster <- Heatmap(
  cluster,
  name = 'Cluster',
  cell_fun = func_cell_cluster,
  show_row_names = FALSE,
  show_column_names = FALSE,
  width = unit(15, 'mm'),
  col = colors_for_clusters,
  show_heatmap_legend = FALSE,
  top_annotation = HeatmapAnnotation(
    cn = anno_text('Cluster', rot = 0, just = 'center', gp = gpar(fontface = 'bold')),
    height = max_text_height('Cluster')
  )
)

# plot
p <- as.ggplot(grid.grabExpr(draw(ht_matrix + ht_cluster)))
ggsave('1.png', p, height = 6, width = 7)

Intensity plot 1

Jan 1, 0001

library(tidyverse)
library(wesanderson)

data(iris)

p <- ggplot(iris) +
  stat_density_2d(aes(Sepal.Length, Sepal.Width, fill = stat(level)), geom = 'polygon') +
  theme_bw() +
  labs(x = 'Sepal length', y = 'Sepal width', size = 'Petal width') +
  scale_fill_gradientn(
    colours = wes_palette('Zissou1', 21, type = 'continuous'),
    guide = guide_colorbar(frame.colour = 'black', ticks.colour = 'black')
  ) +
  theme(legend.position = 'right')
ggsave('1.png', p, height = 5, width = 6)