Regularization for linear models in R

Day 7

Felix Nößler

Freie Universität Berlin @ Theoretical Ecology

January 19, 2024

Reproduce slides

1 A note on the library glmnetUtils

  • glmnet is a package for fitting regularized linear models
  • glmnetUtils is a package that extends glmnet with some functions as the formula interface

A tutorial on glmnet can be found here

2 Introduction to the dataset

  • 20 years of data on the population size of the Rocky Mountain Apollo butterfly (Parnassius smintheus)
  • What are wheather covariates that influence the population size of the butterfly?
  • Rt is the change in population size from year \(t\) to year \(t+1\)
  • meada is the letter of the meadow where the butterfly was observed
  • 72 wheather covariates
butterfly <- read_csv("data/07_butterfly.csv")

df <- butterfly %>%
  filter(! is.na(Rt)) %>%
  select(-year, -Nt, -logNt) %>%
  select_if(~ all(!is.na(.)))  

J. Roland and S. F. Matter, “Pivotal effect of early‐winter temperatures and snowfall on population growth of alpine Parnassius smintheus butterflies,” Ecological Monographs, 2016. doi: 10.1002/ecm.1225.

the dataset is also used in:
A. T. Tredennick, G. Hooker, S. P. Ellner, and P. B. Adler, “A practical guide to selecting models for exploration, inference, and prediction in ecology,” Ecology, 2021. doi: 10.1002/ecy.3336.

3 Find a good value for \(\alpha\)

lm_cva <- cva.glmnet(Rt ~ ., data = df, 
                     alpha = c(0, 0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1))
plot(lm_cva)
  • look for the colour of the curve with the lowest minimum and then look in the legendbox for the corresponding value of \(\alpha\)
  • here: \(\alpha > 0.7\) are good values

4 Find a good value for \(\lambda\)

lm_cv <- cv.glmnet(Rt ~ ., data = df, alpha = 0.9, nfolds = 30)
plot(lm_cv)
  • mean squared error (MSE) with standard error (SE) for different values of \(\lambda\)
  • the left vertical line is the value of \(\lambda\) with the lowest cross-validated error (lambda.min)
  • the right vertical line is the value of \(\lambda\) with the lowest cross-validated error + 1 standard error (lambda.1se)

5 Coefficient value as a function of \(\lambda\)

lm1 <- glmnet(Rt ~ ., data = df, alpha = 0.9) 
plot(lm1, xvar = "lambda")

6 Extract coefficients

coef(lm_cv, s = "lambda.1se") # "lambda.1se", "lambda.min"
84 x 1 sparse Matrix of class "dgCMatrix"
                        s1
(Intercept)  -0.1859470136
meadaF        .           
meadag        .           
meadaG        .           
meadaJ        .           
meadaL        .           
meadaM        .           
meadaN        .           
meadaO        .           
meadaR        .           
meadaS        .           
meadaY        .           
maxsnow       .           
Datesnowmelt  .           
jansnow       .           
jansnowfall   .           
janmean       .           
janmeanmax    .           
janmeanmin    .           
janextmax     .           
janextmin     .           
febsnow       .           
febsnowfall   .           
febmeanmin    .           
febextmin     .           
marsnow       .           
marsnowfall   .           
marmean       .           
marmeanmax    0.0001697007
marmeanmin    .           
marextmax     .           
marextmin     .           
aprsnow       .           
aprsnowfall   .           
maysnow       .           
maymean       0.1131836622
maymeanmax    .           
maymeanmin    0.0078090816
mayextmax     .           
mayextmin     .           
junsnow       .           
junmean       .           
junmeanmax    .           
junmeanmin    0.0071916610
junextmax     .           
junextmin     .           
julsnow       .           
julmean       .           
julmeanmax    .           
julmeanmin    .           
julextmax     .           
julextmin     .           
augsnow       0.0124387278
augmean       .           
augmeanmax    .           
augmeanmin    .           
augextmax    -0.0060987645
augextmin     0.0196316088
sepsnow       .           
sepsnowfall   .           
sepmean       .           
sepmeanmax    .           
sepmeanmin    .           
sepextmax     .           
sepextmin    -0.0245992877
octmean       .           
octmeanmax    .           
octmeanmin    0.0335255661
octextmax     .           
octextmin     .           
novsnow       .           
novsnowfall   .           
novmean       .           
novmeanmax    .           
novmeanmin   -0.0302745006
novextmax    -0.0474869699
novextmin     .           
decsnow       .           
decsnowfall   .           
decmean       .           
decmeanmax    .           
decmeanmin    .           
decextmax     .           
decextmin    -0.0064699643

7 Visualize the standardized coefficients

Code
df_std <- df %>% 
  mutate_at(vars(-Rt, -meada), scale) 
  
lm_std <- glmnet(Rt ~ ., data = df_std, 
       alpha = 0.9, 
       lambda = lm_cv$lambda.1se,
       standardize = F) 

c1 <- coef(lm_std)
coef_df <- tibble(name = rownames(c1), value = as.numeric(c1))

coef_continous <- coef_df %>%
  filter(abs(value) > 1e-8) %>%
  filter(name  != "(Intercept)" & ! str_starts(name, "meda")) 

ggplot(coef_continous, aes(x = value, y = fct_reorder(name, value))) +
  geom_col() +
  labs(y = "parameter name", x = "standardized coefficient value") 

8 Calculate the \(R^2\)

df_pred <- df %>%
  mutate(Rt_pred = predict(lm_cv, newdata = ., s = "lambda.1se"),
         residual = Rt - Rt_pred,
         residual_total = Rt - mean(Rt))
  
RSS <- sum(df_pred$residual ^ 2)
RSS_tot <- sum(df_pred$residual_total ^ 2)

RSS / RSS_tot
[1] 0.5034184

9 Marginal effect plots

Code
df_extrema <- df %>%
  select(-Rt, -meada) %>%
  summarise_all(list(min = min, max = max)) %>%
  pivot_longer(everything(), 
               names_to = c("name", "stat"), 
               names_sep = "_") %>%
  pivot_wider(names_from = stat, values_from = value) 

df_variables <- tibble(name = colnames(df)) %>%
  filter(name != "Rt") %>% 
  left_join(df_extrema, by = "name") %>%
  right_join(coef_continous, by = "name") %>%
  mutate(name = fct_reorder(name, value, .desc = T))

df_pred_mean <- df %>% 
  select(-Rt, -meada) %>%
  mutate_all(mean)  %>%
  slice_head(n = 1) %>%
  mutate(meada = df$meada[1])

df1 <- df_variables %>%
  rowwise() %>%
  mutate(df = 
      list(expand_grid(
        variable_varied = name,
        "{name}" := seq(min, max, length = 30),
        select(df_pred_mean, -name))))

df_pred_marginal <- bind_rows(df1$df) %>%
  mutate(Rt = predict(lm_cv, newdata = ., s = "lambda.1se")[ , 1]) %>%
  select(-meada) %>%
  pivot_longer(c(-Rt, -variable_varied), values_to = "x") %>%
  filter(name == variable_varied) 

df_plot_marginal <- df %>%
  select(-meada) %>%
  pivot_longer(c(-Rt), values_to = "x", names_to = "name") %>%
  right_join(coef_continous, by = "name") %>%
  mutate(variable_varied = fct_reorder(name, value, .desc = T))

ggplot(df_pred_marginal, aes(x = x, y = Rt)) +
  geom_point(data = df_plot_marginal,
             alpha = 0.3) +
  geom_line(color = "red", linewidth = 1) +
  facet_wrap(~ variable_varied, scales = "free_x") +
  theme_light() +
  theme(text = element_text(size = 14)) +
  labs(x = "")