This project compares and classifies gravel bike frames.

Notes on the project

  1. The project uses 2D scatterplots of frame measures to compare all bikes in the database across all sizes and across all frames that are spec’d to my size (generally M or 56, but this varies among makes and models). There is noise in size charts, which adds some arbitrariness to the precise location of a model, especially for the length measures that vary across frame sizes.
  2. The project uses hierarchical trees to give a sense of frame geometry similarity. There are many subjective decisions in tree building. so do not take these as objective or even fixed. The clustering is pretty stable but not rigid – adding a bike can occasionally move a frame from one cluster to another!
  3. The project uses the hierarchical trees to divide the frames spec’d to my size into three classes: “gravel-race”, “gravel-endurance”, and “gravel-trail”. There are multiple subjective decisions on the workflow to this classification.
  4. The 2D scatterplots show the bikes on the boundary between different classes and why new data or different tree algorithms can create slightly different classifications.

Some more notes

  1. This page is disorganized and some measures may be left undefined and analyses unexplained. This is a spare-time project – it will get cleaned and polished.
  2. I don’t have a permanent web location for this. Hoping to get one.

Notes on data

  1. All bike data were taken from manufacturer’s web sites. Some missing data were computed based on other measures or taken from online reviews.
  2. bikinsights.com, geometrygeeks.bike and 99spokes.com are invaluable sites. This project offers a different way of comparing frames.

Some bike geometry links:

The bike geometry Bible - Everything you need to know about the shape of your bike

Frame Geometry Masterclass: Does The Evil Chamois Hagar Make ANY Sense?

MATTER of FACT: How to Understand Gravel Bike Geometry

Advanced Bicycle Frame Geometry: Steering Speed, Weight Distribution, Tipping Angles (YouTube)

knitr::opts_chunk$set(echo = TRUE,
                      message = FALSE,
                      warning = FALSE,
                      knitr.kable.NA = '')
# wrangling packages
library(here) # here makes a project transportable
library(janitor) # clean_names
library(readxl) # read excel, duh!
library(data.table) # magical data frames
library(magrittr) # pipes
library(stringr) # string functions
library(forcats) # factor functions

# analysis packages
library(emmeans) # the workhorse for inference
library(nlme) # gls and some lmm
library(lme4) # linear mixed models
library(lmerTest) # linear mixed model inference
library(afex) # ANOVA linear models
library(glmmTMB) # generalized linear models
library(MASS) # negative binomial and some other functions
library(car) # model checking and ANOVA
library(DHARMa) # model checking
library(mvtnorm)

# graphing packages
library(ggsci) # color palettes
library(ggpubr) # publication quality plots
library(ggforce) # better jitter
library(cowplot) # combine plots
library(knitr) # kable tables
library(kableExtra) # kable_styling tables
library(ggdendro) # dendrogram
library(dendextend) # better dendrogram
library(ggiraph)
library(GGally)

# ggplot_the_model.R packages not loaded above
library(insight)
library(lazyWeave)

# use here from the here package
here <- here::here
# use clean_names from the janitor package
clean_names <- janitor::clean_names
# use transpose from data.table
transpose <- data.table::transpose

# load functions used by this text written by me
# ggplot_the_model.R needs to be in the folder "R"
# if you didn't download this and add to your R folder in your
# project, then this line will cause an error
#source_path <- here("R", "ggplot_the_model.R")
#source(source_path)

data_folder <- "data"
image_folder <- "images"
output_folder <- "output"
pal_okabe_ito <- c(
  "#E69F00",
  "#56B4E9",
  "#009E73",
  "#F0E442",
  "#0072B2",
  "#D55E00",
  "#CC79A7"
)
pal_okabe_ito_blue <- pal_okabe_ito[c(5,6,1,2,3,7,4)] 
pal_okabe_ito_red <- pal_okabe_ito[c(6,5,3,1,2,7,4)] 
pal_okabe_ito_2 <- pal_okabe_ito[c(5,6)]
pal_okabe_ito_3 <- pal_okabe_ito[c(5,6,7)]
pal_okabe_ito_3_light <- pal_okabe_ito[c(1,2,7)]
pal_okabe_ito_4 <- pal_okabe_ito[c(5,6,7,2)]

1 Functions

1.1 General

deg_2_rad <- function(x){
  rad <- x*pi/180
  return(rad)
}

1.2 Dendrogram

# https://atrebas.github.io/post/2019-06-08-lightweight-dendrograms/
dendro_data_k <- function(hc, k) {
  hcdata    <-  ggdendro::dendro_data(hc, type = "rectangle")
  seg       <-  hcdata$segments
  labclust  <-  cutree(hc, k)[hc$order]
  segclust  <-  rep(0L, nrow(seg))
  heights   <-  sort(hc$height, decreasing = TRUE)
  height    <-  mean(c(heights[k], heights[k - 1L]), na.rm = TRUE)
  
  for (i in 1:k) {
    xi      <-  hcdata$labels$x[labclust == i]
    idx1    <-  seg$x    >= min(xi) & seg$x    <= max(xi)
    idx2    <-  seg$xend >= min(xi) & seg$xend <= max(xi)
    idx3    <-  seg$yend < height
    idx     <-  idx1 & idx2 & idx3
    segclust[idx] <- i
  }
  
  idx                    <-  which(segclust == 0L)
  segclust[idx]          <-  segclust[idx + 1L]
  hcdata$segments$clust  <-  segclust
  hcdata$segments$line   <-  as.integer(segclust < 1L)
  hcdata$labels$clust    <-  labclust
  
  hcdata
}

set_labels_params <- function(nbLabels,
                              direction = c("tb", "bt", "lr", "rl"),
                              fan       = FALSE) {
  if (fan) {
    angle       <-  360 / nbLabels * 1:nbLabels + 90
    idx         <-  angle >= 90 & angle <= 270
    angle[idx]  <-  angle[idx] + 180
    hjust       <-  rep(0, nbLabels)
    hjust[idx]  <-  1
  } else {
    angle       <-  rep(0, nbLabels)
    hjust       <-  0
    if (direction %in% c("tb", "bt")) { angle <- angle + 45 }
    if (direction %in% c("tb", "rl")) { hjust <- 1 }
  }
  list(angle = angle, hjust = hjust, vjust = 0.5)
}

plot_ggdendro <- function(hcdata,
                          direction   = c("lr", "rl", "tb", "bt"),
                          fan         = FALSE,
                          scale.color = NULL,
                          branch.size = 1,
                          label.size  = 3,
                          nudge.label = 0.01,
                          expand.y    = 0.1) {
  

  direction <- match.arg(direction) # if fan = FALSE
  ybreaks   <- pretty(segment(hcdata)$y, n = 5)
  ymax      <- max(segment(hcdata)$y)
  
  ## branches
  p <- ggplot() +
    geom_segment(data         =  segment(hcdata),
                 aes(x        =  x,
                     y        =  y,
                     xend     =  xend,
                     yend     =  yend,
                     linetype =  factor(line),
                     colour   =  factor(clust)),
                 lineend      =  "round",
                 show.legend  =  FALSE,
                 size         =  branch.size)
  
  ## orientation
  if (fan) {
    p <- p +
      coord_polar(direction = -1) +
      scale_x_continuous(breaks = NULL,
                         limits = c(0, nrow(label(hcdata)))) +
      scale_y_reverse(breaks = ybreaks)
  } else {
    p <- p + scale_x_continuous(breaks = NULL)
    if (direction %in% c("rl", "lr")) {
      p <- p + coord_flip()
    }
    if (direction %in% c("bt", "lr")) {
      p <- p + scale_y_reverse(breaks = ybreaks)
    } else {
      p <- p + scale_y_continuous(breaks = ybreaks)
      nudge.label <- -(nudge.label)
    }
  }
  
  # labels
  labelParams <- set_labels_params(nrow(hcdata$labels), direction, fan)
  hcdata$labels$angle <- labelParams$angle
  
  p <- p +
    geom_text(data        =  label(hcdata),
              aes(x       =  x,
                  y       =  y,
                  label   =  label,
                  colour  =  factor(clust),
                  angle   =  angle),
              vjust       =  labelParams$vjust,
              hjust       =  labelParams$hjust,
              nudge_y     =  ymax * nudge.label,
              size        =  label.size,
              show.legend =  FALSE)
  
  # theme
    # p <- p + theme_pubr() +
    #   theme(axis.text.x=element_blank())
  
  # colors and limits
  if (!is.null(scale.color)) {
    scale.color <- c("#000000", scale.color) #my addition
    p <- p + scale_color_manual(values = scale.color)
  }
  
  ylim <- -round(ymax * expand.y, 1)
  p    <- p + expand_limits(y = ylim)

  
  p
}
get_tree <- function(geobike_subset,
                  y_cols,
                  scale_it = TRUE,
                  center_it = TRUE,
                  hclust_method = "ward.D2"
){
  # dd <- dist(scale(geobike_subset[, .SD, .SDcols = y_cols],
  #                  center = center_it,
  #                  scale = scale_it),
  #            method = "euclidean")
  # dendro <- hclust(dd, method = hclust_method) %>%
  #   as.dendrogram() %>%
  #   place_labels(paste(geobike_subset[, model],
  #                      geobike_subset[, frame_size],
  #                      sep = ", "))
  
  cluster_data <- geobike_subset[, .SD, .SDcols = y_cols] %>%
    data.frame
  row.names(cluster_data) <- paste(geobike_subset[, model],
                                   geobike_subset[, frame_size],
                                   sep = ", ")
  d_matrix <- dist(scale(cluster_data,
                         center = center_it,
                         scale = scale_it),
                   method = "euclidean")
  hc <- hclust(d_matrix, method = hclust_method)
  return(hc)
  
}

1.3 Bike Geometry

get_axle_crown <- function(){
  
}

get_chainstay_h <- function(chainstay_length, 
                            bottom_bracket_drop){
  # the horizontal component of chainstay length 
  # bbd = bottom bracket drop
  # csl = chainstay length
  chainstay_h <- sqrt(chainstay_length^2 - bottom_bracket_drop^2)
  return(chainstay_h)
}


get_rake_h <- function(offset, hta){
  # the horizontal component of fork offset
  rake_h <- offset/sin(deg_2_rad(hta))
  return(rake_h)
}

get_ht_h <- function(hta, htl){
  # the horizontal component of head_tube
  # hta = head tube angle
  # htl = head tube length
  ht_h <- htl*cos(deg_2_rad(hta))
  return(ht_h)
}
get_ht_v <- function(hta, htl){
  # the vertical component of head_tube
  # hta = head tube angle
  # htl = head tube length
  ht_v <- htl*sin(deg_2_rad(hta))
  return(ht_v)
}


get_fork_angle <- function(offset, axle_crown, head_tube_angle){
  # angle of fork axle-crown axis to horizontal
  # beta is angle of fork axle-crow to offset line
  beta <- acos(offset/axle_crown)*180/pi
  # delta is angle from offset line to horizontal
  delta <- 90 - head_tube_angle
  fork_angle <- beta - delta
  return(rake_h)
}
# Solace OM3 does not specify head tube length. This can be
# computed using specs of Whisky MCX fork assuming this is
# the fork used to spec wheelbase
head_tube_length <- function(axle_crown, rake, stack, wheelbase){
  rake_h <- get_rake_h(geobike[, fork_offset_rake],
                       geobike[, head_tube_angle])
  fork_angle <- get_fork_angle(geobike[, fork_offset_rake],
                               geobike[, fork_axle_crown],
                               geobike[, head_tube_angle])
}
# Vagabond Genesis does not specify chainstay length.
get_chainstay_length <- function(rake, reach, stack, wheelbase,
                                 hta, htl, bbd){
  head_tube_h <- get_ht_h(hta, htl)
  head_tube_v <- get_ht_v(hta, htl)
  fork_v <- stack -  bbd -
    head_tube_v
  
  fork_h1 = fork_v/tan(deg_2_rad(hta))
  rake_h <- get_rake_h(rake,
                       hta)
  chainstay_h <- wheelbase - reach - head_tube_h - fork_h1 - 
    rake_h
  
  chainstay <- sqrt(chainstay_h^2 + bbd^2)
  
  return(chainstay)
}
get_fork_offset <- function(stack, reach, head_tube_angle, chainstay_length, bottom_bracket_drop, wheelbase){
  # steer_axis_h is base of triangle from top-head-tube to vertex created by steering axis and wheelbase.
  # tan hta <- stack/steer_axis_h
  steer_axis_v <- stack - bottom_bracket_drop
  steer_axis_h <- steer_axis_v /
    tan(deg_2_rad(head_tube_angle))
  chainstay_h <- get_chainstay_h(chainstay_length,
                                 bottom_bracket_drop)
  rake_h <- wheelbase - chainstay_h - reach - steer_axis_h
  rake <- rake_h * sin(deg_2_rad(head_tube_angle))
  return(rake)
}
get_effective_top_tube_length <- function(stack,
                                          reach,
                                          seat_tube_angle){
  # amigo bug out is missing this
  #
  seat_h <- stack/tan(deg_2_rad(seat_tube_angle))
  effective_top_tube_length <- seat_h + reach
  return(effective_top_tube_length)
}
geom_checker <- function(chainstay_length, # chainstay length
                         bottom_bracket_drop, # bottom bracket drop
                         reach,
                         stack,
                         head_tube_angle, # head tube angle
                         rake, # head tube length
                         wheelbase){ # wheelbase
  # do all the horizontal components add to wheelbase?
  chainstay_length_h <- get_chainstay_h(chainstay_length,
                                        bottom_bracket_drop)
  steer_axis_v <- stack - bottom_bracket_drop
  steer_axis_h <- steer_axis_v /
    tan(deg_2_rad(head_tube_angle))

  rake_h <- get_rake_h(rake,
                       head_tube_angle)
  wheelbase_computed <- chainstay_length_h + reach +
    steer_axis_h + rake_h

  }

1.4 Importer

# data_path <- here(data_folder, "ghost_grappler.txt")
# dt <- fread(data_path)
# bike_label = "Tumbleweed Stargazer 2022"
# bike_range = "b1:h21"

read_bike <- function(bike_label = "Breezer Radar X Pro 2022",
                      bike_range = "B1:I19"){
  data_file <- "bikes.xlsx"
  data_path <- here(data_folder, data_file)
  bike_wide <- read_excel(data_path,
                          sheet = bike_label,
                          range = bike_range) %>%
    data.table
  # re-read with coltype = numeric
  # col_type_list <- c("text", "text", rep("numeric", ncol(bike_wide)-2))
  # bike_wide <- read_excel(data_path,
  #                         sheet = bike_label,
  #                         range = bike_range,
  #                         col_types = col_type_list) %>%
  #   data.table
  
  
  bike_model <- substr(bike_label, 1, nchar(bike_label) - 5)
  model_year <- substr(bike_label,
                       nchar(bike_label) - 4,
                       nchar(bike_label))
  bike_wide <- bike_wide[, -2]
  bike <- data.table(
    model = bike_model,
    year = model_year,
    transpose(bike_wide,
              keep.names = "frame_size",
              make.names = 1)
  )
  keep_names <- c("model","frame_size", "seat_tube_length", "top_tube_effective_length", "head_tube_length", "seat_tube_angle", "head_tube_angle", "chainstay_length", "wheelbase", "bottom_bracket_drop", "fork_offset_rake", "stack", "reach", "standover", "stem_length", "handlebar_width", "crank_length", "wheel_size", "tire_width_spec", "tire_width_max")
  bike <- bike[, .SD, .SDcols = keep_names]
  
  # fill in missing
    # chainstay_length
  bike[, chainstay_length := 
         ifelse(is.na(chainstay_length),
                get_chainstay_length(fork_offset_rake,
                                     reach,
                                     stack,
                                     wheelbase,
                                     head_tube_angle,
                                     head_tube_length,
                                     bottom_bracket_drop),
                chainstay_length)]
    # fork_offset_rake
  bike[, fork_offset_rake := 
         ifelse(is.na(fork_offset_rake),
                get_fork_offset(stack,
                                reach,
                                head_tube_angle,
                                chainstay_length,
                                bottom_bracket_drop,
                                wheelbase),
                fork_offset_rake)]
  # top_tube_effective_length
  bike[, top_tube_effective_length := 
         ifelse(is.na(top_tube_effective_length),
                get_effective_top_tube_length(stack,
                                              reach,
                                              seat_tube_angle),
                top_tube_effective_length)] 
  
  # constructed measures
  radius <- (ifelse(bike$wheel_size == 700 | bike$wheel_size == 29, 622, 584) + bike$tire_width_spec*2)/2
  bike[, trail := radius/tan(head_tube_angle*pi/180) - 
         get_rake_h(fork_offset_rake, head_tube_angle)]
# from wikipedia
# bike[, trail := ((diameter + tire_width_spec*2)/2 * cos(head_tube_angle*pi/180) -
#                     fork_offset_rake) / sin(head_tube_angle*pi/180)]
  bike[, model_size := paste(model, frame_size)]
  bike[, rear_center := sqrt(chainstay_length^2 - bottom_bracket_drop^2)] # horizontal
  bike[, front_center := wheelbase - rear_center] # horizontal
  bike[, seat_center := stack/tan(deg_2_rad(seat_tube_angle))]
  
  # ratios
  bike[, stack_reach := stack/reach]
  bike[, front_rear := front_center/rear_center]
  bike[, rear_wheelbase := rear_center/wheelbase]
  bike[, front_wheelbase := front_center/wheelbase]
  bike[, sta_hta := seat_tube_angle/head_tube_angle]

  # decompositions
  # seat_tube_v and seat_tube_h are decomp of seat tube
  bike[, seat_tube_v := seat_tube_length *
         sin(deg_2_rad(seat_tube_angle))]
  bike[, seat_tube_h := seat_tube_length *
         cos(deg_2_rad(seat_tube_angle))]
  # seat_v and seat_h are decomp of seat positioned at stack height
  # tan(STA) = seat_h/seat_v
  bike[, seat_v := stack]
  bike[, seat_h := stack /
         tan(deg_2_rad(seat_tube_angle))]
  # head_v and head_h are decomp of head tube
  bike[, head_v := head_tube_length * sin(deg_2_rad(head_tube_angle))]
  bike[, head_h := head_tube_length * cos(deg_2_rad(head_tube_angle))]

  # landmarks with rear axle as origin
  bike[, x1 := 0] # rear axle
  bike[, y1 := 0]
  bike[, x2 := rear_center - seat_h] # seat at stack height
  bike[, y2 := stack - bottom_bracket_drop]
  bike[, x3 := rear_center + reach] # head tube top
  bike[, y3 := stack - bottom_bracket_drop]
  bike[, x4 := x3 + head_h] # head tube base
  bike[, y4 := y3 - head_v]
  bike[, x5 := wheelbase] # front axle
  bike[, y5 := 0]
  bike[, x6 := rear_center] # bottom bracket
  bike[, y6 := -bottom_bracket_drop]
  bike[, x7 := rear_center - seat_tube_h] # seat tube
  bike[, y7 := seat_tube_v]
  
  # landmarks_named
  bike[, rear_x := x1]
  bike[, rear_y := y1]
  bike[, seat_x := x2]
  bike[, seat_y := y2]
  bike[, head_x := x3]
  bike[, head_y := y3]
  bike[, crown_x := x4]
  bike[, crown_y := y4]
  bike[, front_x := x5]
  bike[, front_y := y5]
  bike[, bottom_x := x6]
  bike[, bottom_y := y6]
  bike[, seattube_x := x7]
  bike[, seattube_y := y7]
  
  return(bike)
}

1.5 Import

data_path <- here(data_folder, "bike_list.txt")
bike_list <- fread(data_path)
geobike <- data.table(NULL)
for(i in 1:nrow(bike_list)){
  bike_label_i <- as.character(bike_list[i, "model"])
  bike_range_i <- as.character(bike_list[i, "data_range"])
  bike_i <- read_bike(bike_label = bike_label_i,
                      bike_range = bike_range_i)
  bike_i[, my_fit := ifelse(frame_size == c(bike_list[i, "my_fit"]), TRUE, FALSE)]
  geobike <- rbind(geobike, bike_i)
}

# my_fit: use 176 cm (I am 175.5)
# add Breezer small to my_fit
# geobike[model == "Breezer Radar X Pro" & frame_size == "48cm (S)", my_fit := TRUE]
# add Boone 54 to my_fit
# geobike[model == "Trek Boone 6" & frame_size == "54 cm", my_fit := TRUE]


# add column of shape id for plots
shape_list <- c(15,17,19,0,2)
n_shapes <- length(shape_list)
n_models <- length(unique(geobike[, model]))
n_recycles <- floor(n_models/n_shapes)
left_over <- n_models - n_recycles*n_shapes
model_2_shape_map <- c(rep(shape_list, n_recycles), shape_list[1:left_over])
geobike[, shape_id := model_2_shape_map[as.integer(as.factor(model))]]

1.6 Center landmarks at bottom bracket

y_cols <- c("rear_x", "rear_y",
            "seat_x", "seat_y",
            "head_x", "head_y",
            "crown_x", "crown_y",
            "front_x", "front_y",
            "bottom_x", "bottom_y",
            "seattube_x", "seattube_y")

# center X at bottom bracket
geobike[, rear_x := rear_x - bottom_x]
geobike[, seat_x := seat_x - bottom_x]
geobike[, head_x := head_x - bottom_x]
geobike[, crown_x := crown_x - bottom_x]
geobike[, front_x := front_x - bottom_x]
geobike[, bottom_x := bottom_x - bottom_x]
geobike[, seattube_x := seattube_x - bottom_x]

2 Frame size classification – Initial

The goal here was to create an objective measure of frame size relevant to a rider based on measures related to the virtual front triangle (with a horizontal top-tube) but this turned out to be a fool’s errand because more progressive geometry bikes have extended top tubes and/or head tubes to increase stack and/or reach. So the frequent advice to use stack and reach is useless if using your road bike measures when purchasing many gravel bike frames.

Three measures of frame size are computed

  1. \(\texttt{stack_reach_size_geomean}\) is the geometric mean of stack and reach.
  2. \(\texttt{rider_size}\) is the geometric mean of \(\texttt{seat_tube_effective_length}\) and \(\texttt{top_tube_effective_length}\). \(\texttt{seat_tube_effective_length}\) is the size component related to the rider’s leg length. \(\texttt{top_tube_effective_length}\) is the size component related to the rider’s torso and arm length.
  3. \(\texttt{centroid_size}\) of the three vertices of the front triangle created by the top of the virtual seat tube, the top of the head tube, and the bottom bracket.
# stack + reach size
geobike[, stack_reach_size_euclid := sqrt(stack^2 + reach^2)]
geobike[, stack_reach_size_geomean := sqrt(stack * reach)]

# effective seat tube + effective top tube size
geobike[, seat_tube_effective_length :=
          sqrt((seat_x - bottom_x)^2 + (seat_y - bottom_y)^2)]
geobike[, rider_size := sqrt(seat_tube_effective_length * 
                               top_tube_effective_length)]

# upper triangle centroid size
geobike[, centroid_x := (seat_x + bottom_x + head_x)/3]
geobike[, centroid_y := (seat_y + bottom_y + head_y)/3]
geobike[, centroid_size := 
          sqrt((seat_x - centroid_x)^2 +
          (seat_y - centroid_y)^2 +
          (bottom_x - centroid_x)^2 +
          (bottom_y - centroid_y)^2 +
          (head_x - centroid_x)^2 +
          (head_y - centroid_y)^2)]

# bike centroid size
geobike[, bike_centroid_x := (rear_x + seat_x + head_x + crown_x + front_x + bottom_x)/3]
geobike[, bike_centroid_y := (rear_y + seat_y + head_y + crown_y + front_y + bottom_y)/3]
geobike[, bike_centroid_size := 
          sqrt(
            (rear_x - bike_centroid_x)^2 +
              (rear_y - bike_centroid_y)^2 +
              (seat_x - bike_centroid_x)^2 +
              (seat_y - bike_centroid_y)^2 +
              (head_x - bike_centroid_x)^2 +
              (head_y - bike_centroid_y)^2 +
              (crown_x - bike_centroid_x)^2 +
              (crown_y - bike_centroid_y)^2 +
              (front_x - bike_centroid_x)^2 +
              (front_y - bike_centroid_y)^2 +
              (bottom_x - bike_centroid_x)^2 +
              (bottom_y - bike_centroid_y)^2
          )]
size <- "bike_centroid_size"
size <- geobike[, get(size)]
c.x <- geobike[, bike_centroid_x]
c.y <- geobike[, bike_centroid_y]

# do not scale
# size <- 1
# c.x <- 0
# c.y <- 0

# centroid size based on seat/headtube/bottom bracket triangle
geobike[, rear_xs := (rear_x - c.x)/size]
geobike[, rear_ys := (rear_y - c.y)/size]
geobike[, seat_xs := (seat_x - c.x)/size]
geobike[, seat_ys := (seat_y - c.y)/size]
geobike[, head_xs := (head_x - c.x)/size]
geobike[, head_ys := (head_y - c.y)/size]
geobike[, crown_xs := (crown_x - c.x)/size]
geobike[, crown_ys := (crown_y - c.y)/size]
geobike[, front_xs := (front_x - c.x)/size]
geobike[, front_ys := (front_y - c.y)/size]
geobike[, bottom_xs := (bottom_x - c.x)/size]
geobike[, bottom_ys := (bottom_y - c.y)/size]
geobike[, seattube_xs := (seattube_x - c.x)/size]
geobike[, seattube_ys := (seattube_y - c.y)/size]
my_fit <- geobike[my_fit == TRUE,]

shape_map <- setNames(geobike$shape_id, geobike$model)

nudge_percent <- 0.01
gg1 <- ggplot(data = geobike,
             aes(x = centroid_size,
                 y = rider_size,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent*(max(my_fit$stack_reach_size_geomean) - min(my_fit$stack_reach_size_geomean))

gg2 <- ggplot(data = geobike,
             aes(x = stack_reach_size_geomean,
                 y = centroid_size,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent*(max(my_fit$stack_reach_size_geomean) - min(my_fit$stack_reach_size_geomean))

gg3 <- ggplot(data = my_fit,
             aes(x = stack_reach_size_geomean,
                 y = rider_size,
                 color = model,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)

Figure 2.1: Hover over points to identify model and frame size

girafe(ggobj = gg2)

Figure 2.2: Hover over points to identify model and frame size

girafe(ggobj = gg3)

Figure 2.3: Hover over points to identify model and frame size

The goal here is to use the frame size measures to classify the bikes into size classes. First, here is the number of bike models that offer a specific number of frame sizes.

frame_sizes_per_model <- geobike[, .(n_sizes = .N), by = .(model)]
size_dist <- frame_sizes_per_model[, .(n_models = .N), by = .(n_sizes)]
ggplot(data = size_dist,
       aes(x = n_sizes,
           y = n_models)) +
  geom_col() +
  ylab("Number of models") +
  xlab("Number of frame sizes") +
  theme_pubr()
Distribution of bike models that offer a specific number of frame sizes

Figure 2.4: Distribution of bike models that offer a specific number of frame sizes

Use k-means clustering to classify into five size classes and seven size classes. The three frame size variables are the inputs.

y_cols <- c("stack_reach_size_geomean", "rider_size", "centroid_size")

y_cols <- "centroid_size"

# 5 sizes
sizes <- c("extra-small", "small", "medium", "large", "extra-large")
n_sizes <- length(sizes)
size_groups <- kmeans(x = geobike[, .SD, .SDcols = y_cols],
                                  centers = n_sizes)
sizing <- size_groups$cluster
geobike[, size_cluster_5 := sizing]
cluster_means <- geobike[, .(cluster_mean = mean(stack_reach_size_geomean)),
                         by = .(size_cluster_5)] %>%
  dplyr::arrange(cluster_mean) %>%
  data.table()
cluster_means[, sizes := sizes]
cluster_means <- dplyr::arrange(cluster_means, size_cluster_5)
geobike[, frame_size_5 := cluster_means$sizes[size_cluster_5]]
geobike[, frame_size_5 := factor(frame_size_5,
                                 levels = sizes)]

# 7 sizes
sizes <- c("extra-small", "small", "small-medium", "medium", "medium-large", "large", "extra-large")
n_sizes <- length(sizes)
size_groups <- kmeans(x = geobike[, .SD, .SDcols = y_cols],
                                  centers = n_sizes) 
sizing <- size_groups$cluster
geobike[, size_cluster_7 := sizing]
cluster_means <- geobike[, .(cluster_mean = mean(stack_reach_size_geomean)),
                         by = .(size_cluster_7)] %>%
  dplyr::arrange(cluster_mean) %>%
  data.table()
cluster_means[, sizes := sizes]
cluster_means <- dplyr::arrange(cluster_means, size_cluster_7)
geobike[, frame_size_7 := cluster_means$sizes[size_cluster_7]]
geobike[, frame_size_7 := factor(frame_size_7,
                                 levels = sizes)]
y_cols <- c("model", "frame_size", "frame_size_5", "frame_size_7")
#y_cols <- c("model", "frame_size", "frame_size_7")
# View(geobike[, .SD, .SDcols = y_cols])
gg1 <- ggplot(data = geobike,
              aes(x = frame_size_5,
                  y = top_tube_effective_length,
                  color = model,
                  shape = model)) + 
  geom_jitter_interactive(aes(tooltip = model_size,
                              data_id = model_size),
                          width = 0.2,
                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  ylab("Top Tube, Effective Length (mm)")


gg2 <- ggplot(data = geobike,
              aes(x = frame_size_7,
                  y = top_tube_effective_length,
                  color = model,
                  shape = model)) + 
  geom_jitter_interactive(aes(tooltip = model_size,
                              data_id = model_size),
                          width = 0.2,
                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  ylab("Top Tube, Effective Length (mm)")

girafe(ggobj = gg1)

Figure 2.5: Hover over points to identify model and frame size

# girafe(ggobj = gg2)

Notes

  1. Because the bikepacking/off-road bikes have extra high stack and/or extra long reach, the only extra-large bikes are bikepacking/off-road models and all of the all-road/race gravel bikes are classified into smaller bins then their specified size.
  2. This suggests re-classifying within style classifications.

3 Style classification

3.1 Geometric frame shape

var_labels <- c("Rear wheel X", "Rear wheel Y",
                "Seat at stack height, X",
                "Head tube X", "Head tube Y",
                "Fork crown X", "Fork crown Y",
                "Front wheel X", "Front wheel Y",
                "Bottom bracket X", "Bottom bracket Y")
data.table(
  Coordinates = var_labels
) %>%
  kable() %>%
  kable_styling(full_width = FALSE)
Coordinates
Rear wheel X
Rear wheel Y
Seat at stack height, X
Head tube X
Head tube Y
Fork crown X
Fork crown Y
Front wheel X
Front wheel Y
Bottom bracket X
Bottom bracket Y
y_cols <- c("rear_xs", "rear_ys",
            # seat_ys is redundant with head_ys
            "seat_xs",
            "head_xs", "head_ys",
            "crown_xs", "crown_ys",
            "front_xs", "front_ys",
            "bottom_xs", "bottom_ys")
geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- FALSE
center_it <- FALSE
tree_geom <- get_tree(geobike_subset,
                y_cols,
                scale_it,
                center_it,
                hclust_method = "average") %>%
  as.dendrogram
gg <- ggdendrogram(tree_geom, rotate = TRUE)

gg

Notes

  1. Method – UPGMA method using landmark coordinates centered at the frame centroid and scaled by frame centroid size, for frames spec’d to my size.

3.2 Traditional measures

y_cols <- c("stack", "reach", "front_center", "rear_center", "bottom_bracket_drop", "fork_offset_rake", "head_tube_angle", "seat_tube_angle")
var_labels <- c("Stack", "Reach",
                "Front-center horizontal",
                "Rear-center horizontal",
                "Bottom bracket drop",
                "Fork offset",
                "Head tube angle",
                "Seat tube angle")
data.table(
  Variables = var_labels
) %>%
  kable() %>%
  kable_styling(full_width = FALSE)
Variables
Stack
Reach
Front-center horizontal
Rear-center horizontal
Bottom bracket drop
Fork offset
Head tube angle
Seat tube angle
y_cols <- c("stack", "reach", "front_center", "rear_center", "head_tube_angle", "seat_tube_angle", "bottom_bracket_drop", "fork_offset_rake")

geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE

# old code
# tree_v1 <- get_tree(geobike_subset,
#                 y_cols,
#                 scale_it,
#                 center_it,
#                 hclust_method = "ward.D2") %>%
#   as.dendrogram()
# gg <- ggdendrogram(tree_v1, rotate = TRUE)
# gg

tree_v1 <- get_tree(geobike_subset,
                y_cols,
                scale_it,
                center_it,
                hclust_method = "ward.D2")
tree_v1_color <- dendro_data_k(tree_v1, k = 3)
gg <- plot_ggdendro(tree_v1_color,
                      direction   = "rl",
                      expand.y    = 0.2,
                      scale.color = pal_okabe_ito)
 
gg

Notes

  1. Method – Ward’s method using centered/scaled measures of frames spec’d for my height
  2. Three major clusters, from left to right
  • trail: drop-bar mtn bikes and flat-bar gravel bikes
  • all-road and race gravel
  • bikepacking

3.3 Style classification table

Using the traditional-measures tree above, the frames spec’d to my size can be classified into the three styles: All-road, Bikepacking, Trail

options(knitr.kable.NA = '')

style_class <- tree_v1_color$labels %>%
  data.table()
style_class[, model := tstrsplit(label, ",", keep = 1)]

cluster_labels <- numeric(3)
trail <- "Breezer Radar X Pro"
cluster_labels[style_class[model == trail, clust]] <- "Trail"
all_road <- "OPEN U.P."
cluster_labels[style_class[model == all_road, clust]] <- "All-Road"
endurance <- "Mason InSearchOf"
cluster_labels[style_class[model == endurance, clust]] <- "Endurance"

style_class[, style := cluster_labels[clust]]
style_class[, style := factor(style,
                              levels = cluster_labels)]

# add style to geobike
geobike <- plyr::join(geobike,
                      style_class[, .SD, .SDcols = c("model",
                                                     "style")],
                      by = "model")
my_fit <- geobike[my_fit == TRUE,]

# dcast(setDT(DF), rowid(ID) ~ ID, value.var = "total")
# cluster_labels <- c("All-road", "Bikepacking", "Trail")

style_table <-dcast(setDT(style_class), rowid(style) ~ style, value.var = "model")[, .SD, .SDcols = cluster_labels]

style_table %>%
  kable() %>%
  kable_styling(full_width = FALSE)
Trail Endurance All-Road
Surly Ghost Grappler Tout Terrain Scrambler 28 Canyon Grizl 7 1by
Otso Fenrir Ritchey Outback frameset Lauf Siegla
Nordest Kutxo Tumbleweed Stargazer Why R+ V4
Cotic Cascade Reeb Sams Pants Wilier Rave SLR
Chumba Yaupon Genesis Vagabond Trek Checkpoint SL5
Amigo Bug Out Noble GX 5 Wilier Jena
Rondo MYLC CF Hi BlackMtnCy Monstercross V5 Trek Boone 6
Evil Chamois Hagar GRX Bearclaw Beaux Jaxon Santa Cruz Stigmata
Specialized Diverge Evo Salsa Vaya Niner RLT 9 RDO
Hudski Doggler Gravel Bombtrack Beyond 2 Otso Warakin Stainless
Breezer Radar X Pro Light Blue Darwin All-City Cosmic Stallion
Bombtrack Beyond+ Adv Salsa Fargo rear dropout Cervelo Aspero
Enigma Escape Flat-bar BlackMtnCy La Cabra Rose Backroad XPLR
Revel Rover Salsa Fargo front dropout Obed Boundary
Rondo MYLC CF Lo Cinelli Hobootleg Geo Ribble Gravel SL
Fustle Causway GR1 Panorama Taiga EXP Chumba Terlingua steel fdo
BMC URS One Kona Sutra ULTD All-City Gorilla Monsoon
BMC URS AL Mosaic GT-1X Shand Stooshie
Fiftyone Assassin long-low Salsa Cutthroat Bombtrack Hook
BMC URS AL SUS Mason InSearchOf Solace OM-3 Short
Whyte Friston Gravel Moots Routt ESC Squid Gravtron
Knolly Cache Steel Chiru Kegeti Thesis OB1
Merida Silex Open WI.DE
Fiftyone Assassin short-hi OPEN U.P.
Sonder Camino AL Blackheart All Road TI
Fezzari Shafer Pinarello Grevil F
Marin DSX 2 Cannondale SuperSix Evo
Kanzo Adventure New Salsa Warbird
Alchemy Rogue
Devinci Hatchet
No22 Drifter X
Scott Addict Gravel 10
Canyon Grail 7 1by
Specialized Diverge

4 Pairwise

4.1 Stack and Reach

Notes

  1. Stack and reach are the most common quick & dirty measure of frame size. These are imperfect measures of frame size because both measures are confounded by bike style – more bike-packing and mountain-bike inspired (“trail”) gravel bikes have high stack or long reach, or both, for their specified size class relative to all-road gravel bikes of the same size class.
gg1 <- ggplot(data = geobike,
             aes(x = reach,
                 y = stack,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
   scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent*(max(my_fit$reach) - min(my_fit$reach))

gg2 <- ggplot(data = my_fit,
             aes(x = reach,
                 y = stack,
                 color = model,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

gg3 <- ggplot(data = my_fit,
             aes(x = reach,
                 y = stack,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)
girafe(ggobj = gg1)

Figure 4.1: Hover over points to identify model and frame size

girafe(ggobj = gg3)

Figure 4.2: Hover over points to identify model and frame size

4.2 Rear-center and Front-center

Notes

  1. Rear-center and front-center here are the horizontal components. Combined, the two sum to the wheelbase.
gg1 <- ggplot(data = geobike,
             aes(x = front_center,
                 y = rear_center,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent * (max(my_fit$front_center) -
                                min(my_fit$front_center))
gg2 <- ggplot(data = my_fit,
             aes(x = front_center,
                 y = rear_center,
                 color = model,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

gg3 <- ggplot(data = my_fit,
             aes(x = front_center,
                 y = rear_center,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)
girafe(ggobj = gg1)

Figure 4.3: Hover over points to identify model and frame size

girafe(ggobj = gg3)

Figure 4.4: Hover over points to identify model and frame size

nudge_pos <- nudge_percent * (max(my_fit$front_wheelbase) -
                                min(my_fit$front_wheelbase))
gg4 <- ggplot(data = my_fit,
             aes(x = front_wheelbase,
                 y = stack_reach,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2,
            show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)
girafe(ggobj = gg4)

4.3 Seat Tube Angle and Head Tube Angle

y_cols <- c("seat_tube_angle", "stack", "reach", "rear_center", "front_center", "head_tube_angle")

ggpairs(geobike[, .SD, .SDcols = y_cols])

gghistogram(data = my_fit,
            x = "seat_tube_angle",
            color = "style",
            fill = "style")
gg1 <- ggplot(data = geobike,
             aes(x = head_tube_angle,
                 y = seat_tube_angle,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent * (max(my_fit$head_tube_angle) -
                                min(my_fit$head_tube_angle))
gg2 <- ggplot(data = my_fit,
             aes(x = head_tube_angle,
                 y = seat_tube_angle,
                 color = model,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2,
            show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

gg3 <- ggplot(data = my_fit,
             aes(x = head_tube_angle,
                 y = seat_tube_angle,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2,
            show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

nudge_pos <- nudge_percent * (max(my_fit$rear_wheelbase) -
                                min(my_fit$rear_wheelbase))
gg4 <- ggplot(data = my_fit,
             aes(x = rear_wheelbase,
                 y = seat_tube_angle,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2,
            show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)
girafe(ggobj = gg1)

Figure 4.5: Hover over points to identify model and frame size

girafe(ggobj = gg3)

Figure 4.6: Hover over points to identify model and frame size

girafe(ggobj = gg4)

Figure 4.7: Hover over points to identify model and frame size

4.4 Rear Center vs. Trail

gg1 <- ggplot(data = geobike,
             aes(x = rear_center,
                 y = trail,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent * (max(my_fit$rear_center) -
                                min(my_fit$rear_center))
gg2 <- ggplot(data = my_fit,
             aes(x = rear_center,
                 y = trail,
                 color = model,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

gg3 <- ggplot(data = my_fit,
             aes(x = rear_center,
                 y = trail,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)
girafe(ggobj = gg1)

Figure 4.8: Hover over points to identify model and frame size

girafe(ggobj = gg3)

Figure 4.9: Hover over points to identify model and frame size

4.5 ratios

nudge_pos <- nudge_percent * (max(my_fit$rear_wheelbase) -
                                min(my_fit$rear_wheelbase))
gg1 <- ggplot(data = my_fit,
             aes(x = front_wheelbase,
                 y = stack_reach,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2,
            show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

nudge_pos <- nudge_percent * (max(my_fit$rear_wheelbase) -
                                min(my_fit$rear_wheelbase))
gg2 <- ggplot(data = my_fit,
             aes(x = front_wheelbase,
                 y = seat_tube_angle/head_tube_angle,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2,
            show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

nudge_pos <- nudge_percent * (max(my_fit$stack_reach) -
                                min(my_fit$stack_reach))
gg3 <- ggplot(data = my_fit,
             aes(x = stack_reach,
                 y = sta_hta,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2,
            show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
girafe(ggobj = gg3)

4.6 Head Tube Angle vs. Fork Offset

Notes

  1. Head Tube Angle, Fork Offset, and Head Tube length are frame geometry contributions to trail but also affect toe-overlap in small bikes, especially with wide tires. I didn’t include trail in these analysis because it is a function of wheel plus tire diameter. I could use the spec’d wheel and tire and add this.
gg1 <- ggplot(data = geobike,
             aes(x = head_tube_angle,
                 y = fork_offset_rake,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent * (max(my_fit$head_tube_angle) -
                                min(my_fit$head_tube_angle))
gg2 <- ggplot(data = my_fit,
             aes(x = head_tube_angle,
                 y = fork_offset_rake,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2,
            show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

gg3 <- ggplot(data = my_fit,
             aes(x = head_tube_angle,
                 y = fork_offset_rake,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2,
            show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

girafe(ggobj = gg1)

Figure 4.10: Hover over points to identify model and frame size

girafe(ggobj = gg2)

Figure 4.10: Hover over points to identify model and frame size

girafe(ggobj = gg3)

Figure 4.10: Hover over points to identify model and frame size

5 PCA

Principal Component Analysis is a cheap way of exploring similarity of bike frames through different 2D views of a multidimensional space.

5.1 Coordinates

Coordinates are unscaled and centered at the intersection of the bottom bracket chord and the wheelbase chord.

y_cols <- c("rear_xs", "rear_ys",
            # seat_ys is redundant with head_ys
            "seat_xs",
            "head_xs", "head_ys",
            "crown_xs", "crown_ys",
            # front_ys is redundant with rear_ys
            "front_xs",
            "bottom_xs", "bottom_ys")
y_labs <- c("Rear wheel X", "Rear wheel Y",
            "Seat X",
            "Head tube X", "Head tube Y",
            "Fork Crown X", "Fork Crown Y",
            "Front wheel X",
            "Bottom Bracket X", "Bottom Bracket Y")

y_cols <- c("rear_x",
            "seat_x",
            "head_x", "head_y",
            "crown_x", "crown_y",
            "front_x",
            "bottom_y")
y_labs <- c("Rear wheel X",
            "Seat X",
            "Head tube X", "Head tube Y",
            "Fork Crown X", "Fork Crown Y",
            "Front wheel X",
            "Bottom Bracket Y")


geobike_subset <- geobike[my_fit == TRUE]
X <- geobike_subset[, .SD, .SDcols = y_cols] %>%
  scale(center = TRUE, scale = FALSE) %>%
  as.matrix()

S <- cov(X)

geo_eigen <- eigen(S)

L <- geo_eigen$values
E <- geo_eigen$vector
scores <- X %*% E
pc1 <- scores[, 1]
pc2 <- scores[, 2]
pc3 <- scores[, 3]
geobike_subset[, pc1 := pc1]
geobike_subset[, pc2 := pc2]
geobike_subset[, pc3 := pc3]

coord_loadings <- cor(cbind(scores[,1:3], X))[-(1:3), 1:3]
row.names(coord_loadings) <- y_labs
table_cap <- "Correlations (or loadings) between PCs and coordinates centered at the bottom bracket with bike facing in positive X direction (right)."
coord_loadings %>%
  kable(digits = 2,
        caption = table_cap) %>%
  kable_styling(full_width = FALSE)
Table 5.1: Correlations (or loadings) between PCs and coordinates centered at the bottom bracket with bike facing in positive X direction (right).
Rear wheel X 0.47 -0.38 -0.15
Seat X 0.59 -0.38 -0.49
Head tube X -0.38 -0.88 -0.13
Head tube Y -0.87 0.33 0.35
Fork Crown X -0.38 -0.91 0.15
Fork Crown Y -0.83 0.49 -0.25
Front wheel X -0.85 -0.50 -0.08
Bottom Bracket Y -0.18 0.28 0.06
gg1 <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc2,
                  color = model,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()

gg1b <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc2,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed() +
  scale_color_manual(values = pal_okabe_ito)

gg2 <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc3,
                  color = model,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()

gg2b <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc3,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed() +
  scale_color_manual(values = pal_okabe_ito)

gg3 <- ggplot(data = geobike_subset,
              aes(x = pc2,
                  y = pc3,
                  color = model,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()

gg3b <- ggplot(data = geobike_subset,
              aes(x = pc2,
                  y = pc3,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed() +
  scale_color_manual(values = pal_okabe_ito)

Notes

  1. High PC1 describes a frame with a short front center and short stack.
  2. High PC2 describes a frame with short reach.
girafe(ggobj = gg1b)

Figure 5.1: Hover over points to identify model and frame size

Notes

  1. High PC2 describes a frame with small reach.
  2. PC3 describes noise
girafe(ggobj = gg2b)

Figure 5.2: Hover over points to identify model and frame size

girafe(ggobj = gg3b)

Figure 5.3: Hover over points to identify model and frame size

5.2 Traditional measures and angles

Notes:

  1. PCA using the centered and scaled measures used to compute the dendrogram above and the classification.
y_cols <- c("stack", "reach", "front_center", "rear_center", "bottom_bracket_drop", "fork_offset_rake", "head_tube_angle", "seat_tube_angle")
y_labs <- c("stack", "reach", "front center", "rear center", "bottom bracket drop", "fork offset", "head tube angle", "seat tube angle")

geobike_subset <- geobike[my_fit == TRUE]
X <- geobike_subset[, .SD, .SDcols = y_cols] %>%
  scale()

S <- cov(X)

geo_eigen <- eigen(S)

L <- geo_eigen$values
E <- geo_eigen$vector
scores <- X %*% E
geobike_subset[, pc1 := scores[, 1]]
geobike_subset[, pc2 := scores[, 2]]
geobike_subset[, pc3 := scores[, 3]]


coord_loadings <- cor(cbind(scores[,1:3], X))[-(1:3), 1:3]
row.names(coord_loadings) <- y_labs
table_cap <- "Correlations (or loadings) between PCs and traditional frame measures."

coord_loadings %>%
  kable(digits = 2,
        caption = table_cap) %>%
  kable_styling(full_width = FALSE)
Table 5.2: Correlations (or loadings) between PCs and traditional frame measures.
stack -0.65 -0.53 0.13
reach -0.64 0.64 0.07
front center -0.97 0.12 0.04
rear center -0.39 -0.70 -0.02
bottom bracket drop -0.03 0.31 -0.75
fork offset -0.08 -0.42 -0.72
head tube angle 0.90 0.02 0.07
seat tube angle -0.23 0.55 -0.13
gg1 <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc2,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed() +
  scale_color_manual(values = pal_okabe_ito)

gg2 <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc3,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed() +
  scale_color_manual(values = pal_okabe_ito)

gg3 <- ggplot(data = geobike_subset,
              aes(x = pc2,
                  y = pc3,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed() +
  scale_color_manual(values = pal_okabe_ito)
girafe(ggobj = gg1)

Figure 5.4: Hover over points to identify model and frame size

Notes

  1. High PC1 describes a frame with a short front-center, steep head-angle, and short stack and reach.
  2. High PC2 describes a frame with long reach, a small rear-center, and high seat tube angle
girafe(ggobj = gg2)

Figure 5.5: Hover over points to identify model and frame size

Notes

  1. High PC1 describes a frame with a long front-center, slack head-angle, and high stack.
  2. High PC3 describes a frame with a large bottom bracket drop (a low bottom bracket)
girafe(ggobj = gg3)

Figure 5.6: Hover over points to identify model and frame size

Notes

  1. High PC2 describes a frame with long reach, a small rear-center, and high seat tube angle
  2. High PC3 describes a frame with a small bottom bracket drop (a high bottom bracket)

6 Style Re-classification

6.1 Traditional measures – reduced set

y_cols <- c("stack", "reach", "front_center", "rear_center", "head_tube_angle", "seat_tube_angle")
var_labels <- c("Stack", "Reach",
                "Front-center horizontal",
                "Rear-center horizontal",
                "Head tube angle", "Seat tube angle")
data.table(
  Variables = var_labels
) %>%
  kable() %>%
  kable_styling(full_width = FALSE)
Variables
Stack
Reach
Front-center horizontal
Rear-center horizontal
Head tube angle
Seat tube angle
y_cols <- c("stack", "reach", "front_center", "rear_center", "head_tube_angle", "seat_tube_angle")

geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE

tree_v2 <- get_tree(geobike_subset,
                y_cols,
                scale_it,
                center_it,
                hclust_method = "ward.D2")
tree_v2_color <- dendro_data_k(tree_v2, k = 3)
gg <- plot_ggdendro(tree_v2_color,
                      direction   = "lr",
                      expand.y    = 0.2,
                      scale.color = pal_okabe_ito)
 
gg

Notes

  1. Method – Ward’s method using centered/scaled measures of frames spec’d for my height
  2. Three major clusters, from left to right
  • trail: drop-bar mtn bikes and flat-bar gravel bikes
  • all-road and race gravel
  • bikepacking

6.2 Style re-classification table

options(knitr.kable.NA = '')

style_class <- tree_v2_color$labels %>%
  data.table()
style_class[, model := tstrsplit(label, ",", keep = 1)]

cluster_labels <- numeric(3)
trail <- "Breezer Radar X Pro"
cluster_labels[style_class[model == trail, clust]] <- "Trail"
all_road <- "OPEN U.P."
cluster_labels[style_class[model == all_road, clust]] <- "All-Road"
endurance <- "Mason InSearchOf"
cluster_labels[style_class[model == endurance, clust]] <- "Endurance"

style_class[, restyle := cluster_labels[clust]]
style_class[, restyle := factor(restyle,
                              levels = cluster_labels)]

# add style to geobike
geobike <- plyr::join(geobike,
                      style_class[, .SD, .SDcols = c("model", "restyle")],
                      by = "model")
my_fit <- geobike[my_fit == TRUE,]

style_table <-dcast(setDT(style_class), rowid(restyle) ~ restyle, value.var = "model")[, .SD, .SDcols = cluster_labels]

style_table %>%
  kable() %>%
  kable_styling(full_width = FALSE)
Trail Endurance All-Road
Enigma Escape Flat-bar Tout Terrain Scrambler 28 Bombtrack Hook
Revel Rover Ritchey Outback frameset Rose Backroad XPLR
Merida Silex Bombtrack Beyond 2 All-City Cosmic Stallion
Knolly Cache Steel Light Blue Darwin Ribble Gravel SL
Whyte Friston Gravel Bearclaw Beaux Jaxon All-City Gorilla Monsoon
Fiftyone Assassin short-hi Salsa Vaya Cervelo Aspero
BMC URS One Genesis Vagabond Cannondale SuperSix Evo
Marin DSX 2 Otso Warakin Stainless No22 Drifter X
BMC URS AL Salsa Cutthroat Chumba Terlingua steel fdo
Fiftyone Assassin long-low Mason InSearchOf Shand Stooshie
BMC URS AL SUS Moots Routt ESC Pinarello Grevil F
Sonder Camino AL Chiru Kegeti Open WI.DE
Fezzari Shafer Salsa Fargo rear dropout Solace OM-3 Short
Kanzo Adventure New Salsa Fargo front dropout Squid Gravtron
Mosaic GT-1X Cinelli Hobootleg Geo OPEN U.P.
Cotic Cascade Panorama Taiga EXP Thesis OB1
Chumba Yaupon Tumbleweed Stargazer Blackheart All Road TI
Bombtrack Beyond+ Adv Reeb Sams Pants Wilier Jena
Breezer Radar X Pro Kona Sutra ULTD Specialized Diverge
Surly Ghost Grappler BlackMtnCy La Cabra Trek Boone 6
Specialized Diverge Evo Santa Cruz Stigmata
Fustle Causway GR1 Noble GX 5
Otso Fenrir Obed Boundary
Amigo Bug Out Salsa Warbird
Nordest Kutxo Niner RLT 9 RDO
Rondo MYLC CF Lo BlackMtnCy Monstercross V5
Rondo MYLC CF Hi Trek Checkpoint SL5
Evil Chamois Hagar GRX Canyon Grail 7 1by
Hudski Doggler Gravel Canyon Grizl 7 1by
Wilier Rave SLR
Scott Addict Gravel 10
Alchemy Rogue
Devinci Hatchet
Lauf Siegla
Why R+ V4

6.3 Pairwise V2

nudge_pos <- nudge_percent * (max(my_fit$reach) -
                                min(my_fit$reach))
gg1 <- ggplot(data = my_fit,
             aes(x = reach,
                 y = stack,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

gg2 <- ggplot(data = my_fit,
             aes(x = front_center,
                 y = rear_center,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

nudge_pos <- nudge_percent * (max(my_fit$head_tube_angle) -
                                min(my_fit$head_tube_angle))
gg3 <- ggplot(data = my_fit,
             aes(x = head_tube_angle,
                 y = seat_tube_angle,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

nudge_pos <- nudge_percent * (max(my_fit$rear_center) -
                                min(my_fit$rear_center))
gg4 <- ggplot(data = my_fit,
             aes(x = rear_center,
                 y = trail,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
girafe(ggobj = gg3)
girafe(ggobj = gg4)

6.4 Ratio measures

y_cols <- c("stack_reach", "front_wheelbase", "sta_hta")
var_labels <- c("Stack:Reach",
                "Front-center:Wheelbase",
                "STA:HTA")
data.table(
  Variables = var_labels
) %>%
  kable() %>%
  kable_styling(full_width = FALSE)
Variables
Stack:Reach
Front-center:Wheelbase
STA:HTA
y_cols <- c("stack_reach", "front_wheelbase", "sta_hta")

geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE
dendro_v2_ratios <- get_tree(geobike_subset,
                y_cols,
                scale_it,
                center_it,
                hclust_method = "ward.D2")
dendro_v2_ratios_color <- dendro_data_k(dendro_v2_ratios, k = 3)
gg <- plot_ggdendro(dendro_v2_ratios_color,
                      direction   = "lr",
                      expand.y    = 0.2,
                      scale.color = pal_okabe_ito)
 
gg

6.5 ratios v2

front_wheelbase is the ratio \(\frac{frontcenter}{wheelbase}\), where frontcenter is the horizontal component of the bottom-bracket to front-wheel-axle chord.

nudge_pos <- nudge_percent * (max(my_fit$front_wheelbase) -
                                min(my_fit$front_wheelbase))
gg1 <- ggplot(data = my_fit,
             aes(x = front_wheelbase,
                 y = stack_reach,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

nudge_pos <- nudge_percent * (max(my_fit$front_wheelbase) -
                                min(my_fit$front_wheelbase))
gg2 <- ggplot(data = my_fit,
             aes(x = front_wheelbase,
                 y = seat_tube_angle/head_tube_angle,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)

nudge_pos <- nudge_percent * (max(my_fit$stack_reach) -
                                min(my_fit$stack_reach))
gg3 <- ggplot(data = my_fit,
             aes(x = stack_reach,
                 y = sta_hta,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_color_manual(values = pal_okabe_ito)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
girafe(ggobj = gg3)