Hyperparameters tuning with tidymodels

Hyperparameter tuning is a important skill to 🚀 boost model performance 🚀

but…

you waste time 😴 without an intentional & systematic approach.

From last time

Load Pacakges & Set Options

library(tidyverse)      
library(tidymodels)     
library(palmerpenguins) # penguin dataset
library(gt)             # better tables
library(bonsai)         # tree-based models
library(conflicted)     # function conflicts
tidymodels_prefer()     # handle conflicts
conflict_prefer("penguins", "palmerpenguins")
options(tidymodels.dark = TRUE) # dark mode
theme_set(theme_bw()) # set default ggplot2 theme

Exploratory Data Analysis

penguins |>
  filter(!is.na(sex)) |>
  ggplot(aes(x     = flipper_length_mm,
             y     = bill_length_mm,
             color = sex,
             size  = body_mass_g)) +
  geom_point(alpha = 0.5) +
  facet_wrap(~species)

Prepare & Split Data

# remove rows with missing sex
# exclude year and island
penguins_df <-
  penguins |>
  drop_na(sex) |>
  select(-year, -island)

# set the seed for reproducibility
set.seed(1234)

# Split the data into train and test sets
# stratify by sex
penguin_split <- initial_split(penguins_df,
                               strata = sex)
penguin_train <- training(penguin_split)
penguin_test  <- testing(penguin_split)

# create folds for cross validation
penguin_folds <- vfold_cv(penguin_train, v = 10, strata = sex)

Create Recipe

For more on creating recipes, see the getting started page from the {recipes} pkgdown site. You can also learn more about the recommended ordering of steps. For recommend preprocessing steps based on model type, see the Appendix from Tidy Modeling with R.

penguin_rec <-
  recipe(sex ~ ., data = penguin_train) |>    
  step_dummy(species)

Specify Model

Let’s specify a boosted tree model with boost_tree() from {bonsai}.

rlang::check_installed("lightgbm")
bt_bonsai_spec <-
  boost_tree(learn_rate     = tune(),
             stop_iter      = tune(),
             trees          = 1000) |>
  set_engine(engine     = "lightgbm",
             num_leaves = tune()) |>
  set_mode("classification")

Build Grid for Tuning

bt_bonsai_spec |> 
  extract_parameter_set_dials()

Collection of 3 parameters for tuning

 identifier       type    object
 learn_rate learn_rate nparam[+]
  stop_iter  stop_iter nparam[+]
 num_leaves num_leaves nparam[+]

num_leaves()

Number of Leaves (quantitative)
Range: [5, 100]

Build Grid for Tuning

grid_tune <- 
  bt_bonsai_spec |> 
  extract_parameter_set_dials() |> 
  grid_latin_hypercube(size = 50)

grid_tune |> glimpse(width = 50)

Rows: 50
Columns: 3
$ learn_rate <dbl> 4.499336e-02, 3.174162e-05, 9…
$ stop_iter  <int> 7, 17, 11, 8, 8, 13, 5, 20, 9…
$ num_leaves <int> 81, 39, 34, 74, 22, 79, 46, 8…

Fit Models & Tune Hyperparameters

Construct our workflow

bt_bonsai_wf <-
  workflow() |> 
  add_recipe(penguin_rec) |> 
  add_model(bt_bonsai_spec)

Specify the grid control parameters

cntl   <- control_grid(save_pred     = TRUE,
                       save_workflow = TRUE)

Fit Models & Tune Hyperparameters

bt_tune_grid <- 
  bt_bonsai_wf |> 
  tune_grid(
    resamples = penguin_folds,
    grid      = grid_tune,
    control   = cntl
  )

Tuning Results

autoplot(bt_tune_grid)

Racing with `{finetune}`

library(finetune)
race_cntl <- control_race(save_pred     = TRUE,
                          save_workflow = TRUE)

Racing with `{finetune}`

library(finetune)
race_cntl <- control_race(save_pred     = TRUE,
                          save_workflow = TRUE)
bt_tune_race <- 
  bt_bonsai_wf |> 
  tune_race_anova(
    resamples = penguin_folds,
    grid      = grid_tune,
    control   = race_cntl
  )

Racing Results

autoplot(bt_tune_race)

Racing Results

plot_race(bt_tune_race)

Faster 🏎️💨

big_grid <- 
  bt_bonsai_spec |> 
  extract_parameter_set_dials() |> 
  grid_latin_hypercube(size = 250)

Faster 🏎️💨

# tune in parallel
library(doMC)
registerDoMC(cores = 10)

Faster 🏎️💨

# tune in parallel
library(doMC)
registerDoMC(cores = 10)

bt_tune_fast <- 
  bt_bonsai_wf |> 
  tune_race_anova(
    resamples  = penguin_folds,
    grid       = big_grid,
    control    = race_cntl
  )

Faster 🏎️💨

autoplot(bt_tune_fast)

Faster 🏎️💨

plot_race(bt_tune_fast)

🚀 Finalize Model 🚀

bt_best_id <-
  bt_tune_fast |>
  select_best(metric = "roc_auc")

🚀 Finalize Model 🚀

bt_best_id <-
  bt_tune_fast |>
  select_best(metric = "roc_auc")

# extract the best model from the workflow
best_bt_race <-
  bt_tune_fast |>
  extract_workflow() |>
  finalize_workflow(bt_best_id) |>
  last_fit(penguin_split)

🚀 Finalize Model 🚀

# collect the metrics for the best model
best_bt_race |>
  collect_metrics()

# A tibble: 2 × 4
  .metric  .estimator .estimate .config             
  <chr>    <chr>          <dbl> <chr>               
1 accuracy binary         0.905 Preprocessor1_Model1
2 roc_auc  binary         0.973 Preprocessor1_Model1

🚀 Finalize Model 🚀

# plot results of test set fit
best_bt_race |>
  collect_predictions() |>
  roc_curve(sex, .pred_female) |>
  autoplot()

Hyperparameter Tuning Summary

Use {tune} and {finetune} to perform hyperparameter tuning
Use race functions (e.g., tune_race_anova()) to screen parameters most likely to perform well
Use parallel processing to speed up tuning

Hyperparameters tuning with tidymodels

From last time

Create Recipe

Specify Model

Build Grid for Tuning

Build Grid for Tuning

Fit Models & Tune Hyperparameters

Fit Models & Tune Hyperparameters

Tuning Results

Racing with `{finetune}`

Racing with `{finetune}`

Racing Results

Racing Results

Faster 🏎️💨

Faster 🏎️💨

Faster 🏎️💨

Faster 🏎️💨

Faster 🏎️💨

🚀 Finalize Model 🚀

🚀 Finalize Model 🚀

🚀 Finalize Model 🚀

🚀 Finalize Model 🚀

Hyperparameter Tuning Summary

Where to Next?

Iterative hyperparameter tuning 🌀

Hyperparameters tuning with tidymodels

From last time

Create Recipe

Specify Model

Build Grid for Tuning

Build Grid for Tuning

Fit Models & Tune Hyperparameters

Fit Models & Tune Hyperparameters

Tuning Results

Racing with {finetune}

Racing with {finetune}

Racing Results

Racing Results

Faster 🏎️💨

Faster 🏎️💨

Faster 🏎️💨

Faster 🏎️💨

Faster 🏎️💨

🚀 Finalize Model 🚀

🚀 Finalize Model 🚀

🚀 Finalize Model 🚀

🚀 Finalize Model 🚀

Hyperparameter Tuning Summary

Where to Next?

Iterative hyperparameter tuning 🌀

Racing with `{finetune}`

Racing with `{finetune}`