Posts

Scatter plot 4

Jan 1, 0001

library(tidyverse)

data(iris)

correlation_pearson <- cor(iris$Sepal.Length, iris$Petal.Length, method = 'pearson')
correlation_spearman <- cor(iris$Sepal.Length, iris$Petal.Length, method = 'spearman')

label <- list(
  paste0('Pearson~italic(R)^2 == ', round(correlation_pearson, 3)),
  paste0('Spearman~italic(R)^2 == ', round(correlation_spearman, 3))
)

p <- iris %>%
  mutate(Petal.Width.larger = Petal.Width * 1.5) %>%
  ggplot(aes(Sepal.Length, Petal.Length, color = Sepal.Width, size = Petal.Width)) +
  geom_point(aes(size = Petal.Width.larger), color = 'black', show.legend = FALSE) +
  geom_point() +
  geom_point() +
  geom_smooth(method = 'lm', formula = y~x, color = 'black', show.legend = FALSE) +
  scale_color_distiller(
    palette = 'YlOrRd', name = 'Sepal width', direction = 1,
    guide = guide_colorbar(frame.colour = 'black', ticks.colour = 'black')
  ) +
  labs(x = 'Sepal length', y = 'Petal length', size = 'Petal width') +
  theme_bw() +
  annotate('text', x = 4.5, y = c(7.75, 7.25), hjust = 0, parse = TRUE, label = label)
ggsave('4.png', p, height = 5, width = 6)

Scatter plot 5

Jan 1, 0001

library(tidyverse)
library(ggExtra)
library(viridis)

cells <- tibble(
  cell = colnames(transcripts$raw$merged),
  nCount = colSums(transcripts$raw$merged),
  nFeature = colSums(transcripts$raw$merged != 0)
)

cells %>% glimpse()
# Observations: 6,000
# Variables: 3
# $ cell     <chr> "TGCCCATGTGTTCGAT-A", "ACGGCCACAAGAAGAG-A", "CTGTGCTGTCAGAAG…
# $ nCount   <dbl> 5784, 6037, 4655, 42398, 14765, 4621, 14368, 4570, 6635, 113…
# $ nFeature <dbl> 1655, 1397, 1300, 5387, 2548, 295, 2929, 1056, 1720, 638, 27…

p <- ggplot(cells, aes(nCount, nFeature)) +
  geom_pointdensity(adjust = 500) +
  scale_x_continuous(name = 'Number of transcripts', labels = scales::comma) +
  scale_y_continuous(name = 'Numner of expressed genes', labels = scales::comma) +
  scale_color_viridis() +
  theme_bw() +
  theme(legend.position = 'none')
p <- ggMarginal(p, type = 'histogram', fill = 'grey', bins = 50)

ggsave('5.png', p, height = 4, width = 6)

Showing compositions in a bar plot

Jan 1, 0001

library(tidyverse)
library(patchwork)
library(plyr)
library(nycflights13)

colors_dutch <- c(
  '#FFC312','#C4E538','#12CBC4','#FDA7DF','#ED4C67',
  '#F79F1F','#A3CB38','#1289A7','#D980FA','#B53471',
  '#EE5A24','#009432','#0652DD','#9980FA','#833471',
  '#EA2027','#006266','#1B1464','#5758BB','#6F1E51'
)

colors_spanish <- c(
  '#40407a','#706fd3','#f7f1e3','#34ace0','#33d9b2',
  '#2c2c54','#474787','#aaa69d','#227093','#218c74',
  '#ff5252','#ff793f','#d1ccc0','#ffb142','#ffda79',
  '#b33939','#cd6133','#84817a','#cc8e35','#ccae62'
)

custom_colors <- c(colors_dutch, colors_spanish)

data(flights)

t <- flights %>%
  dplyr::select(origin, carrier) %>%
  group_by(origin, carrier) %>%
  tally() %>%
  ungroup() %>%
  filter(n > 2000) %>%
  mutate(carrier = factor(carrier, levels = carrier %>% unique() %>% sort()))

t_labels_number <- t %>%
  mutate(carrier = as.character(carrier)) %>%
  mutate(carrier = factor(carrier, levels = carrier %>% unique() %>% sort(decreasing = TRUE))) %>%
  arrange(origin, carrier) %>%
  plyr::ddply('origin', transform, pos = cumsum(n) - (0.5 * n))

p1 <- ggplot() +
  geom_bar(data = t, aes(origin, n, fill = carrier), color = 'black', stat = 'identity') +
  geom_label(
    data = t_labels_number,
    aes(x = origin, y = pos, label = paste0(carrier, ': ', scales::comma(n)), group = carrier),
    alpha = 0.75,
    size = 3,
    fill = 'white',
    color = 'black'
  ) +
  scale_fill_manual(values = custom_colors) +
  scale_y_continuous(labels = scales::comma, expand = c(0.01,0)) +
  labs(x = 'Airport of departure', y = 'Number of flights', fill = 'Carrier') +
  theme_bw() +
  theme(
    legend.position = 'none',
    plot.title = element_text(hjust = 0.5),
    text = element_text(size = 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )

inv_cumsum <- function(x) {
  sum(x) - cumsum(x) + x
}

t_labels_percent <- plyr::ddply(
  t, 'origin', plyr::mutate,
  prop = n / sum(n),
  cumprop = inv_cumsum(n) / sum(n),
  ylabel = (inv_cumsum(n) - n / 2) / sum(n)
)

p2 <- ggplot() +
  geom_bar(data = t, aes(origin, n, fill = carrier), color = 'black', position = 'fill', stat = 'identity') +
  geom_label(
    data = t_labels_percent,
    aes(x = origin, y = ylabel, label = paste0(carrier, ': ', scales::percent(prop, accuracy = 0.1)), group = carrier),
    alpha = 0.75,
    size = 3,
    fill = 'white',
    color = 'black'
  ) +
  scale_fill_manual(values = custom_colors) +
  scale_y_continuous(labels = scales::percent_format(), expand = c(0.01,0)) +
  ggtitle('') +
  labs(x = 'Airport of departure', y = 'Percent of flights', fill = 'Carrier') +
  theme_bw() +
  theme(
    legend.position = 'none',
    plot.title = element_text(hjust = 0.5),
    text = element_text(size = 16),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )

ggsave('flights_by_airport_and_carrier.png', p1 + p2, height = 9, width = 10)

Time zones

Jan 1, 0001

library(tidyverse)
library(sf)
library(lwgeom)
library(patchwork)
library(wesanderson)

blankbg <- theme(
  axis.line = element_blank(),
  axis.text.x = element_blank(),
  axis.text.y = element_blank(),
  axis.ticks = element_blank(),
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),
  panel.background = element_blank(),
  panel.border = element_blank(),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  plot.background = element_blank(),
)

# download SHP files for country borders and time zones from
# https://www.naturalearthdata.com/downloads/
countries <- read_sf('ne_10m_admin_0_countries.shp')
time_zones <- read_sf('ne_10m_time_zones.shp')

p <-
  ggplot() +
  geom_sf(
    data = countries, fill = 'grey', color = 'black', alpha = 1, size = 0.1,
    show.legend = FALSE
  ) +
  geom_sf(
    data = time_zones %>% mutate(name = as.integer(name)), aes(fill = name),
    alpha = 0.8, color = 'black', size = 0.35, show.legend = FALSE
  ) +
  blankbg +
  scale_fill_gradientn(
    colours = wes_palette('Zissou1', 21, type = 'continuous')
  ) +
  theme(plot.background = element_rect(fill = 'white', color = NA))

ggsave('time_zones_1.png',p , width = 9.5, height = 5)

World map

Jan 1, 0001

library(tidyverse)
library(cowplot)
library(sf)
library(lwgeom)
library(rworldmap)

world_sf <- st_as_sf(getMap(resolution = "low"))

crs_wintri <- "+proj=wintri +datum=WGS84 +no_defs +over"
world_wintri <- st_transform_proj(world_sf, crs = crs_wintri)

# we will highlight the countries from which people attended our event
countries_to_highlight <- c(
  'Austria', 'Bosnia and Herzegovina', 'China', 'Colombia', 'Croatia',
  'Czech Republic', 'Denmark', 'France', 'Germany', 'Greece', 'India',
  'Indonesia', 'Iran', 'Ireland', 'Israel', 'Italy', 'Mexico', 'Netherlands',
  'Nigeria', 'Poland', 'Romania', 'Russia', 'Serbia', 'Spain', 'Sweden',
  'Switzerland', 'UK'
)

plot_data <- world_wintri %>%
  mutate(highlight = NAME %in% countries_to_highlight)

grat_wintri <-
  st_graticule(lat = c(-89.9, seq(-80, 80, 20), 89.9)) %>%
  st_transform_proj(crs = crs_wintri)

p <- ggplot() +
  geom_sf(data = grat_wintri, color = "gray30", size = 0.1) +
  geom_sf(
    data = subset(plot_data, highlight == FALSE),
    aes(fill = highlight),
    color = "#1e92bd", size = 0.2, show.legend = FALSE
  ) +
  geom_sf(
    data = subset(plot_data, highlight == TRUE),
    aes(fill = highlight),
    color = 'white', size = 0.3, show.legend = FALSE
  ) +
  coord_sf(datum = NULL) +
  scale_fill_manual(values = c("#9fd4e1", "#1e92bd")) +
  scale_color_manual(values = c("#1e92bd", "white")) +
  theme_map()
ggsave('world_1.png', p, height = 4, width = 6.43)