library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
library(stringr)
library(readr)
library(purrr)
library(glmnetUtils)
## theme for ggplot
theme_set(theme_classic())
theme_update(text = element_text(size = 14))R
Day 7
Freie Universität Berlin @ Theoretical Ecology
January 19, 2024
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
library(stringr)
library(readr)
library(purrr)
library(glmnetUtils)
## theme for ggplot
theme_set(theme_classic())
theme_update(text = element_text(size = 14))glmnetUtils
glmnet is a package for fitting regularized linear modelsglmnetUtils is a package that extends glmnet with some functions as the formula interfaceA tutorial on glmnet can be found here
Rt is the change in population size from year \(t\) to year \(t+1\)
meada is the letter of the meadow where the butterfly was observedJ. Roland and S. F. Matter, “Pivotal effect of early‐winter temperatures and snowfall on population growth of alpine Parnassius smintheus butterflies,” Ecological Monographs, 2016. doi: 10.1002/ecm.1225.
the dataset is also used in:
A. T. Tredennick, G. Hooker, S. P. Ellner, and P. B. Adler, “A practical guide to selecting models for exploration, inference, and prediction in ecology,” Ecology, 2021. doi: 10.1002/ecy.3336.
lm_cva <- cva.glmnet(Rt ~ ., data = df,
alpha = c(0, 0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1))
plot(lm_cva)lm_cv <- cv.glmnet(Rt ~ ., data = df, alpha = 0.9, nfolds = 30)
plot(lm_cv)lambda.min)lambda.1se)lm1 <- glmnet(Rt ~ ., data = df, alpha = 0.9)
plot(lm1, xvar = "lambda")coef(lm_cv, s = "lambda.1se") # "lambda.1se", "lambda.min"84 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) -0.1859470136
meadaF .
meadag .
meadaG .
meadaJ .
meadaL .
meadaM .
meadaN .
meadaO .
meadaR .
meadaS .
meadaY .
maxsnow .
Datesnowmelt .
jansnow .
jansnowfall .
janmean .
janmeanmax .
janmeanmin .
janextmax .
janextmin .
febsnow .
febsnowfall .
febmeanmin .
febextmin .
marsnow .
marsnowfall .
marmean .
marmeanmax 0.0001697007
marmeanmin .
marextmax .
marextmin .
aprsnow .
aprsnowfall .
maysnow .
maymean 0.1131836622
maymeanmax .
maymeanmin 0.0078090816
mayextmax .
mayextmin .
junsnow .
junmean .
junmeanmax .
junmeanmin 0.0071916610
junextmax .
junextmin .
julsnow .
julmean .
julmeanmax .
julmeanmin .
julextmax .
julextmin .
augsnow 0.0124387278
augmean .
augmeanmax .
augmeanmin .
augextmax -0.0060987645
augextmin 0.0196316088
sepsnow .
sepsnowfall .
sepmean .
sepmeanmax .
sepmeanmin .
sepextmax .
sepextmin -0.0245992877
octmean .
octmeanmax .
octmeanmin 0.0335255661
octextmax .
octextmin .
novsnow .
novsnowfall .
novmean .
novmeanmax .
novmeanmin -0.0302745006
novextmax -0.0474869699
novextmin .
decsnow .
decsnowfall .
decmean .
decmeanmax .
decmeanmin .
decextmax .
decextmin -0.0064699643
df_std <- df %>%
mutate_at(vars(-Rt, -meada), scale)
lm_std <- glmnet(Rt ~ ., data = df_std,
alpha = 0.9,
lambda = lm_cv$lambda.1se,
standardize = F)
c1 <- coef(lm_std)
coef_df <- tibble(name = rownames(c1), value = as.numeric(c1))
coef_continous <- coef_df %>%
filter(abs(value) > 1e-8) %>%
filter(name != "(Intercept)" & ! str_starts(name, "meda"))
ggplot(coef_continous, aes(x = value, y = fct_reorder(name, value))) +
geom_col() +
labs(y = "parameter name", x = "standardized coefficient value") df_extrema <- df %>%
select(-Rt, -meada) %>%
summarise_all(list(min = min, max = max)) %>%
pivot_longer(everything(),
names_to = c("name", "stat"),
names_sep = "_") %>%
pivot_wider(names_from = stat, values_from = value)
df_variables <- tibble(name = colnames(df)) %>%
filter(name != "Rt") %>%
left_join(df_extrema, by = "name") %>%
right_join(coef_continous, by = "name") %>%
mutate(name = fct_reorder(name, value, .desc = T))
df_pred_mean <- df %>%
select(-Rt, -meada) %>%
mutate_all(mean) %>%
slice_head(n = 1) %>%
mutate(meada = df$meada[1])
df1 <- df_variables %>%
rowwise() %>%
mutate(df =
list(expand_grid(
variable_varied = name,
"{name}" := seq(min, max, length = 30),
select(df_pred_mean, -name))))
df_pred_marginal <- bind_rows(df1$df) %>%
mutate(Rt = predict(lm_cv, newdata = ., s = "lambda.1se")[ , 1]) %>%
select(-meada) %>%
pivot_longer(c(-Rt, -variable_varied), values_to = "x") %>%
filter(name == variable_varied)
df_plot_marginal <- df %>%
select(-meada) %>%
pivot_longer(c(-Rt), values_to = "x", names_to = "name") %>%
right_join(coef_continous, by = "name") %>%
mutate(variable_varied = fct_reorder(name, value, .desc = T))
ggplot(df_pred_marginal, aes(x = x, y = Rt)) +
geom_point(data = df_plot_marginal,
alpha = 0.3) +
geom_line(color = "red", linewidth = 1) +
facet_wrap(~ variable_varied, scales = "free_x") +
theme_light() +
theme(text = element_text(size = 14)) +
labs(x = "")Regularization for linear models