Scraping

library(tidyverse)
library(rvest)

get_wimbledon <- function(year, gender) {
  url <- paste0("https://en.wikipedia.org/wiki/", 
                year,
                "_Wimbledon_Championships_-_",
                gender,
                "'s_Singles")
  
  info <- read_html(url) %>%
    html_text() %>%
    str_replace_all("\\)", "\\)\n") %>%
    read_lines()
  
  result <- tibble(info) %>%
    filter(str_detect(info, "^[0-9]{2}\\.")) %>%
    mutate(info = str_squish(info),
           year = year,
           gender = gender)
}

mens <- tibble()
for (year in 2001:2019) {
  mens <- mens %>% 
    bind_rows(get_wimbledon(year, "Men"))
}

womens <- tibble()
for (year in 2001:2019) {
  womens <- womens %>% 
    bind_rows(get_wimbledon(year, "Women"))
}

wimbledon <- mens %>% 
  bind_rows(womens) %>% 
  mutate(
    seed = as.numeric(str_remove(info, "\\..*")),
    name = str_sub(info,
                   str_locate(info, "\\.")[, 1] + 1,
                   str_locate(info, "\\(")[, 1] - 1),
    result = case_when(
      str_detect(info, "Champion") ~ "W",
      str_detect(info, "Final") ~ "F",
      str_detect(info, "Semifinal") ~ "SF",
      str_detect(info, "Quarterfinal") ~ "QF",
      str_detect(info, "Fourth") ~ "4R",
      str_detect(info, "Third") ~ "3R",
      str_detect(info, "Second") ~ "2R",
      str_detect(info, "First") ~ "1R",
      str_detect(info, "Withdrew") ~ "WD",
      TRUE ~ as.character(info),
    ),
    result = factor(result,
                    levels = c("WD", "1R", "2R", "3R", "4R", "QF", "SF", "F", "W"))
  )

Plotting

p1 <- wimbledon %>%
  filter(gender == "Women") %>% 
  ggplot(aes(x = seed, y = year, fill = result, group = name)) +
  geom_tile(color = "white") +
  scale_y_continuous(breaks = 2001:2019) +
  scale_x_continuous(breaks = 1:34, position = "top") +
  scale_fill_brewer(palette = "PRGn") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    plot.caption = element_text(hjust = 0.5, face = "italic"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    legend.title = element_blank(),
    legend.position = "bottom",
    legend.margin = margin(-10, 15, 5, 0),
    legend.key.size = unit(0.4, "cm"),
    axis.text.y = element_text(margin = margin(0, -20, 0, 0)),
    axis.text.x.top = element_text(margin = margin(0, 0, -9, 0))
  ) +
  guides(fill = guide_legend(nrow = 1)) + 
  labs(
    x = "",
    y = "",
    title = "\nWimbledon Women's Seedings and Performances",
    subtitle = "(since seedings increased to 32 in 2001)",
    caption = "*Seeds higher than 32 were given to replacements when seeded players withdrew after the draw has been made"
  )
p1

p2 <- wimbledon %>%
  filter(gender == "Men") %>% 
  ggplot(aes(x = seed, y = year, fill = result, group = name)) +
  geom_tile(color = "white") +
  scale_y_continuous(breaks = 2001:2019) +
  scale_x_continuous(breaks = 1:35, position = "top") +
  scale_fill_brewer(palette = "PRGn") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    plot.caption = element_text(hjust = 0.5, face = "italic"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    legend.title = element_blank(),
    legend.position = "bottom",
    legend.margin = margin(-10, 15, 5, 0),
    legend.key.size = unit(0.4, "cm"),
    axis.text.y = element_text(margin = margin(0, -20, 0, 0)),
    axis.text.x.top = element_text(margin = margin(0, 0, -9, 0))
  ) +
  guides(fill = guide_legend(nrow = 1)) + 
  labs(
    x = "",
    y = "",
    title = "\nWimbledon Men's Seedings and Performances",
    subtitle = "(since seedings increased to 32 in 2001)",
    caption = "*Seeds higher than 32 were given to replacements when seeded players withdrew after the draw has been made"
  )
p2

library(plotly)
ggplotly(p1) %>% layout(showlegend = FALSE)
ggplotly(p2) %>% layout(showlegend = FALSE)