Analysing Transcontinental race 2022. Part 2: Leaderboards and route choices

data
cycling
trc
trcno8
web scraping
Tekijä
Julkaistu

4. syyskuuta 2022

In the Part 1 I demonstrated how to obtain the data from the tracking service, and in this part I will analyse route choices between control points.

[/kode]
library(dplyr)
library(sf)
library(ggplot2)
library(tidyr)
library(reactable)
library(lubridate)
library(leaflet)
library(leaflet.extras)

In order to create the leaderboards, we first need the locations of the control points. These are sourced manually from the tracking service, and are formed into a spatial data below.

[/kode]
cps <- structure(list(cp = c("Start", "CP1", "CP2", "CP3", "CP4", 
"Finish"), loc = c(" Muur-Kapelmuur, Geraardsbergen, Belgium", 
" Krupka, Czech Republic", " Passo di Gavia, Italy", "Durmitor National Park, Montenegro", 
" Drumul Strategic Transalpina, Romania", " Burgas, Bulgaria"
), geom = structure(list(structure(c(3.88277, 50.77159), class = c("XY", 
"POINT", "sfg")), structure(c(13.856462, 50.706776), class = c("XY", 
"POINT", "sfg")), structure(c(10.491406, 46.412645), class = c("XY", 
"POINT", "sfg")), structure(c(18.84155, 43.153476), class = c("XY", 
"POINT", "sfg")), structure(c(23.636142, 45.434891), class = c("XY", 
"POINT", "sfg")), structure(c(27.53645, 42.56094), class = c("XY", 
"POINT", "sfg"))), n_empty = 0L, crs = structure(list(input = "+proj=longlat +datum=WGS84", 
    wkt = "GEOGCRS[\"unknown\",\n    DATUM[\"World Geodetic System 1984\",\n        ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n            LENGTHUNIT[\"metre\",1]],\n        ID[\"EPSG\",6326]],\n    PRIMEM[\"Greenwich\",0,\n        ANGLEUNIT[\"degree\",0.0174532925199433],\n        ID[\"EPSG\",8901]],\n    CS[ellipsoidal,2],\n        AXIS[\"longitude\",east,\n            ORDER[1],\n            ANGLEUNIT[\"degree\",0.0174532925199433,\n                ID[\"EPSG\",9122]]],\n        AXIS[\"latitude\",north,\n            ORDER[2],\n            ANGLEUNIT[\"degree\",0.0174532925199433,\n                ID[\"EPSG\",9122]]]]"), class = "crs"), class = c("sfc_POINT", 
"sfc"), precision = 0, bbox = structure(c(xmin = 3.88277, ymin = 42.56094, 
xmax = 27.53645, ymax = 50.77159), class = "bbox"))), row.names = c(NA, 
6L), sf_column = "geom", agr = structure(c(cp = NA_integer_, 
loc = NA_integer_), class = "factor", levels = c("constant", 
"aggregate", "identity")), class = c("sf", "tbl_df", "tbl", "data.frame"
))

Creating leaderboards from replay data

In order to find out the positions at each control point, I will create a 5 km buffer around each point, and claim the rider first crossing the buffer the leader. Because of this method, the times and time differences may differ from the one observed in place.

[/kode]
start_time <- as.POSIXct("2022-07-24 22:00:00", tz = "CEST")


cps_buff <- sf::st_buffer(cps, dist = 5000)

cpoints <- unique(cps$cp)
cpoints <- cpoints[cpoints != "Start"]
lst <- list()
for (i in seq(cpoints)){
lst[[i]] <- dat_all_points %>% 
  st_intersection(cps_buff %>% filter(cp == cpoints[i])) %>% 
  st_drop_geometry() %>% 
  group_by(riderName) %>% 
  filter(time == min(time))  
}
leaderdata <- do.call("bind_rows", lst) %>%
  filter(!grepl("Car", riderName)) %>% 
  select(time,riderName,teamNumber,cp) %>% 
  group_by(cp) %>% 
  arrange(time) %>% 
  mutate(position = 1:n()) %>% 
  select(cp,position,riderName,time,teamNumber) %>% 
  # compute the time difference from start
  mutate(behind = difftime(time, min(time), units = "secs"),
         behind = seconds_to_period(behind),
        behind = sprintf('%01dd %02d:%02d', behind@day, behind@hour, minute(behind)),
         race_time = difftime(time, start_time, units = "secs"),
         race_time = seconds_to_period(race_time),
                 race_time = sprintf('%02dd %02d:%02d', race_time@day, race_time@hour, minute(race_time)))

Leaderboard at each control points

Leaderboard at CP1

[/kode]
rmarkdown::paged_table(leaderdata %>% select(cp,position,riderName,teamNumber,race_time, behind) %>% 
                         filter(cp == "CP1"))

Leaderboard at CP2

[/kode]
rmarkdown::paged_table(leaderdata %>% select(cp,position,riderName,teamNumber,race_time, behind) %>% 
                         filter(cp == "CP2"))

Leaderboard at CP3

[/kode]
rmarkdown::paged_table(leaderdata %>% select(cp,position,riderName,teamNumber,race_time, behind) %>% 
                         filter(cp == "CP3"))

Leaderboard at CP4

[/kode]
rmarkdown::paged_table(leaderdata %>% select(cp,position,riderName,teamNumber,race_time, behind) %>% 
                         filter(cp == "CP4"))

Leaderboard at Finish

[/kode]
rmarkdown::paged_table(leaderdata %>% select(cp,position,riderName,teamNumber,race_time, behind) %>% 
                         filter(cp == "Finish"))
[/kode]
leaderdata_leg <- do.call("bind_rows", lst) %>%
  filter(!grepl("Car", riderName)) %>% 
  filter(data_quality == "good") %>% 
  select(time,riderName,teamNumber,cp) %>% 
  # filter(grepl("Kainu", riderName)) %>% 
  arrange(cp) %>% 
  group_by(riderName) %>%
  mutate(split_time = difftime(time, lag(time), units = "secs"),
         split_time = seconds_to_period(split_time)) %>% 
  filter(!is.na(split_time)) %>% 
  mutate(leg = case_when(
    cp == "CP2" ~ "2. CP1 to CP2",
    cp == "CP3" ~ "3. CP2 to CP3",
    cp == "CP4" ~ "4. CP3 to CP4",
    cp == "Finish" ~ "5. CP4 to Finish"
  )) %>% 
  ungroup() %>%
  filter(!is.na(leg), split_time > 0) %>%
  # filter(split_time == min(split_time))
  # arrange(split_time)
  group_by(leg) %>% 
  # filter(leg == "CP1 to CP2") %>% 
  # summarise(split_time = min(split_time))
  arrange(split_time) %>% 
  mutate(position = 1:n(),
         behind = split_time - split_time[1]
         ) %>% 
  select(leg,position,riderName,teamNumber,split_time,behind) %>%
  mutate(
      split_time = sprintf('%01dd %02d:%02d', split_time@day, split_time@hour, minute(split_time)),
      behind = sprintf('%01dd %02d:%02d', behind@day, behind@hour, minute(behind))
  ) %>% 
  ungroup() %>% 
  arrange(leg,position)

# reactable::reactable(leaderdata_leg %>% filter(grepl("^2.", leg)),
#           filterable = TRUE)

Route choices between legs

Route choices are split into legs by control points, and analyzed by each leg. Route choices are analyzed based on speed, distance and elevation gain.

Fastest routes by leg

[/kode]
ld <- leaderdata %>% 
  ungroup() %>% 
  distinct(riderName,teamNumber,cp,time) %>% 
  pivot_wider(names_from = cp, values_from = time)
  
# dat_all_points_fin <- dat_all_points %>% 
#   filter(grepl("Finland", country))

dat_all_points %>%
  filter(data_quality == "good",
         scratched == 0) %>%
  left_join(ld) %>% 
  mutate(leg = case_when(
    time < CP1 ~ "1. Start to CP1",
    time >= CP1 & time < CP2 ~ "2. CP1 to CP2",
    time >= CP2 & time < CP3 ~ "3. CP2 to CP3",
    time >= CP3 & time < CP4 ~ "4. CP3 to CP4",
    time >= CP4  ~ "5. CP4 to Finish"
  )) -> dat_all_points_leg

dat_all_routes2 <- dat_all_points_leg %>% 
  filter(!is.na(leg)) %>%
group_by(riderName,leg) %>% 
  summarize(m = mean(row_number()),
            leg = leg[1],
            country = country[1],
            groupHeaderLabel = groupHeaderLabel[1],
            movingTimePercentage = movingTimePercentage[1],
            totalDistance = max(totalDistance),
            finishTime = min(finishTime),
            do_union=FALSE) %>%
  st_cast("MULTILINESTRING") %>% 
  ungroup()

leaderdata_leg_total <- bind_rows(
leaderdata_leg,
# add start to CP1
leaderdata %>% 
  ungroup() %>% 
  filter(cp == "CP1") %>% 
  select(position, riderName,teamNumber,race_time,behind) %>% 
  rename(split_time = race_time) %>% 
  mutate(leg = "1. Start to CP1")
)

dat_all_routes_leg <- left_join(dat_all_routes2, 
                                leaderdata_leg_total) %>% 
  mutate(lab = paste0(stringr::str_pad(position, width = 2, pad = 0), ". ", stringr::word(riderName)))



legs <- sort(unique(dat_all_routes_leg$leg))
# lst <- list()
# for(i in seq(legs)){
# p <- ggplot(dat_all_routes_leg %>% 
#          filter(leg == legs[i],
#                 position %in% 1:9
#                 ), 
#          aes(color = lab)) + 
#   geom_sf(alpha = .8) +
#   scale_color_viridis_d() +
#   labs(title = legs[i], color = NULL) +
#   theme(panel.grid.major = element_blank(),
#         panel.grid.minor = element_blank(),
#         axis.text.x = element_blank(),
#         axis.text.y = element_blank())
# # if (i != 1) p <- p + theme(legend.position = "none")
# lst[[i]] <- p
# }
# patchwork::wrap_plots(lst, ncol = 1)

ddat <- dat_all_linestring %>% 
  filter(grepl("Mäkipää|Marin", riderName))
ddat2 <- dat_all_points %>% 
  filter(grepl("Mäkipää|Marin", riderName))


create_leaflet_fastest <- function(legi = 1){
ddat <- dat_all_routes_leg %>% 
         filter(leg == legs[legi],
                position %in% 1:10
                )

pal <- leaflet::colorFactor(palette = "Set1", domain = ddat$riderName)
labels <- sprintf(
  "%s<br/>%s behind",
  ddat$riderName, ddat$behind
) %>% lapply(htmltools::HTML)

leaflet(ddat) %>% 
  addProviderTiles(provider = providers$OpenTopoMap, options = providerTileOptions(opacity = .4)) %>%
  addPolylines(color = ~pal(riderName),
              # label = labels,
              # labelOptions = labelOptions(noHide = FALSE,
              #                           style = list("font-family" = "Lato",
              #                           "font-size" = "1.0em",
              #                           "line-height" = "1",
              #                           "font-weight" = "700",
              #                           "background-color" = "rgba(0,0,0,0)",
              #                           "border-color" = "rgba(0,0,0,0)"))
  ) %>% 
  addLegend(pal = pal, 
            values = ~riderName, opacity = 0.7, title = "riderName",
            position = "bottomright")  %>% 
  addCircleMarkers(data = ddat2,color = ~pal(riderName)) %>% 
  addFullscreenControl()
}
[/kode]
# reactable(leaderdata %>% select(cp,position,riderName,teamNumber,race_time, behind) %>% 
#             filter(cp == "CP1"),
#           filterable = TRUE)
# 
rmarkdown::paged_table(leaderdata %>% select(cp,position,riderName,teamNumber,race_time, behind) %>% 
             filter(cp == "CP1"))
[/kode]
create_leaflet_fastest(legi = 1)
[/kode]
rmarkdown::paged_table(leaderdata_leg %>% filter(grepl("^2.", leg)))
[/kode]
create_leaflet_fastest(legi = 2)
[/kode]
rmarkdown::paged_table(leaderdata_leg %>% filter(grepl("^3.", leg)))
[/kode]
create_leaflet_fastest(legi = 3)
[/kode]
rmarkdown::paged_table(leaderdata_leg %>% filter(grepl("^4.", leg)))
[/kode]
create_leaflet_fastest(legi = 4)
[/kode]
rmarkdown::paged_table(leaderdata_leg %>% filter(grepl("^5.", leg)))
[/kode]
create_leaflet_fastest(legi = 5)

Shortest and longest routes by leg

Here are the top 5 shortest and longest routes for each leg

[/kode]
dat_all_routes_leg_distance <- 
  dat_all_routes_leg %>% 
  filter(!is.na(finishTime)) %>% 
  mutate(distance = as.integer(st_length(geometry)/1000)) %>% 
  group_by(leg) %>% 
  arrange(distance) %>% 
  mutate(rank_dist = 1:n()) %>%
  mutate(lab = paste0(stringr::str_pad(rank_dist, width = 3, pad = 0), ". ", stringr::word(riderName), " ", distance, "km")) %>% 
  ungroup()

legs <- sort(unique(dat_all_routes_leg_distance$leg))
# lst <- list()
# for(i in seq(legs)){
# p <- ggplot(dat_all_routes_leg_distance %>% 
#          filter(leg == legs[i]) %>% 
#          filter(rank_dist %in% c(1:5,(nrow(.)-4):nrow(.))), 
#          aes(color = lab)) + 
#   geom_sf(alpha = .8) +
#   scale_color_viridis_d() +
#   labs(title = legs[i], color = NULL) +
#   theme(panel.grid.major = element_blank(),
#         panel.grid.minor = element_blank(),
#         axis.text.x = element_blank(),
#         axis.text.y = element_blank())
# lst[[i]] <- p
# }
# patchwork::wrap_plots(lst, ncol = 1)

create_leaflet_shortest <- function(legi = 1){

  ddat <- dat_all_routes_leg_distance %>% 
         filter(grepl(paste0("^", legi), leg)) %>% 
         filter(rank_dist %in% c(1:5,(nrow(.)-4):nrow(.)))

pal <- leaflet::colorFactor(palette = "Set1", domain = ddat$lab)
labels <- sprintf(
  "%s. %s %s km",
  
  stringr::str_pad(ddat$rank_dist, width = 3, pad = 0), ddat$riderName, ddat$distance
) %>% lapply(htmltools::HTML)

leaflet(ddat) %>% 
  addProviderTiles(provider = providers$OpenTopoMap, options = providerTileOptions(opacity = .4)) %>% 
  addPolylines(color = ~pal(lab),
              label = labels,
              labelOptions = labelOptions(noHide = FALSE,
                                        style = list("font-family" = "Lato",
                                        "font-size" = "1.0em",
                                        "line-height" = "1",
                                        "font-weight" = "700",
                                        "background-color" = "rgba(0,0,0,0)",
                                        "border-color" = "rgba(0,0,0,0)"))
  ) %>% 
  addLegend(pal = pal, 
            values = ~lab, opacity = 0.7, title = "riderName",
            position = "bottomright")  %>% 
  addFullscreenControl()
}
[/kode]
create_leaflet_shortest(legi = 1)
[/kode]
create_leaflet_shortest(legi = 2)
[/kode]
create_leaflet_shortest(legi = 3)
[/kode]
create_leaflet_shortest(legi = 4)
[/kode]
create_leaflet_shortest(legi = 5)

Routes with highest and lowest elevation gain

[/kode]
dat_all_points_leg %>% 
  st_drop_geometry() %>%
  filter(scratched == 0, 
         data_quality == "good") %>%
  group_by(riderName,leg) %>% 
  arrange(time) %>% 
  mutate(gain = elevation-lag(elevation),
         gain = ifelse(gain < 0, 0, gain),
         gain = ifelse(is.na(gain), 0, gain)#,
         ) %>% 
  summarise(total_gain = sum(gain, na.rm = TRUE)) %>% 
  arrange(leg,desc(total_gain)) %>% 
  filter(!is.na(total_gain)) %>% 
  group_by(leg) %>% 
  mutate(climb_rank = 1:n()) %>% 
  ungroup() -> climb_by_leg

Here are the top 6 routes with least total climb for each leg

[/kode]
dat_all_routes_leg_climb <- 
  dat_all_routes_leg %>% 
  left_join(climb_by_leg) %>%
  filter(!is.na(climb_rank)) %>% 
  mutate(lab = paste0(stringr::str_pad(climb_rank, width = 3, pad = 0), ". ", stringr::word(riderName), " ", total_gain, " m")) %>% 
  ungroup()

# legs <- sort(unique(dat_all_routes_leg_climb$leg))
# lst <- list()
# for(i in seq(legs)){
# p <- ggplot(dat_all_routes_leg_climb %>% 
#          filter(leg == legs[i]) %>% 
#          filter(climb_rank %in% c(1:5,(nrow(.)-4):nrow(.))
#                 ), 
#          aes(color = lab)) + 
#   geom_sf(alpha = .8) +
#   scale_color_viridis_d() +
#   labs(title = legs[i], color = NULL) +
#   theme(panel.grid.major = element_blank(),
#         panel.grid.minor = element_blank(),
#         axis.text.x = element_blank(),
#         axis.text.y = element_blank())
# # if (i != 1) p <- p + theme(legend.position = "none")
# lst[[i]] <- p
# }
# patchwork::wrap_plots(lst, ncol = 1)

create_leaflet_elevate <- function(legi = 1){

  ddat <- dat_all_routes_leg_climb %>% 
         filter(grepl(paste0("^", legi), leg)) %>% 
         filter(climb_rank %in% c(1:5,(nrow(.)-4):nrow(.)))

pal <- leaflet::colorFactor(palette = "Set1", domain = ddat$lab)
labels <- sprintf(
  "%s. %s %s km",
  
  stringr::str_pad(ddat$climb_rank, width = 3, pad = 0), ddat$riderName, ddat$total_gain
) %>% lapply(htmltools::HTML)

leaflet(ddat) %>% 
  addProviderTiles(provider = providers$OpenTopoMap, options = providerTileOptions(opacity = .4)) %>% 
  addPolylines(color = ~pal(lab),
              label = labels,
              labelOptions = labelOptions(noHide = FALSE,
                                        style = list("font-family" = "Lato",
                                        "font-size" = "1.0em",
                                        "line-height" = "1",
                                        "font-weight" = "700",
                                        "background-color" = "rgba(0,0,0,0)",
                                        "border-color" = "rgba(0,0,0,0)"))
  ) %>% 
  addLegend(pal = pal, 
            values = ~lab, opacity = 0.7, title = "riderName",
            position = "bottomright")  %>% 
  addFullscreenControl()
}
[/kode]
create_leaflet_elevate(legi = 1)
[/kode]
create_leaflet_elevate(legi = 2)
[/kode]
create_leaflet_elevate(legi = 3)
[/kode]
create_leaflet_elevate(legi = 4)
[/kode]
create_leaflet_elevate(legi = 5)

Uudelleenkäyttö

Viittaus

BibTeX-viittaus:
@online{kainu2022,
  author = {Kainu, Markus},
  title = {Analysing Transcontinental race 2022. Part 2: Leaderboards
    and route choices},
  date = {2022-09-04},
  url = {https://markuskainu.fi/posts/2022-09-04-tcrno8-leaderboards},
  langid = {fi}
}
Viitatkaa tähän teokseen seuraavasti:
Kainu, Markus. 2022. “Analysing Transcontinental race 2022. Part 2: Leaderboards and route choices.” September 4, 2022. https://markuskainu.fi/posts/2022-09-04-tcrno8-leaderboards.