Solution to the crab exercise

1 Solution with output

library(dplyr)
library(readr)
library(tidyr)
library(ggplot2)
library(ggfortify)

## theme for ggplot
theme_set(theme_classic())
theme_update(text = element_text(size = 14))

## load data set
df_crabs <- read_csv("data/06_crab_size.csv")

## plot latitude vs air temperature
ggplot(df_crabs, aes(latitude, water_temp)) +
  geom_point() +
  labs(x = "Latitude [degree]", y = "Water temperature [°C]") 

## fit linear model
lm1 <- lm(size ~ latitude + water_temp, data = df_crabs)

## check model assumptions
autoplot(lm1, which = 1:2)

## make and plot marginal effects
# create tibbles with predictions
df_lat <- tibble(
  latitude = seq(min(df_crabs$latitude), max(df_crabs$latitude), 
                 length.out = 30),
  water_temp = mean(df_crabs$water_temp)) %>%
  mutate(size = predict(lm1, newdata = .)) %>%
  select(-water_temp) %>%
  mutate(name = "Latitude [degree]") %>%
  rename(x = latitude)

df_watertemp <- tibble(
  latitude = mean(df_crabs$latitude),
  water_temp = seq(min(df_crabs$water_temp), max(df_crabs$water_temp), 
                   length.out = 30)) %>%
  mutate(size = predict(lm1, newdata = .)) %>%
  select(-latitude) %>%
  mutate(name = "Water temperature [°C]") %>%
  rename(x = water_temp)

df_pred <- bind_rows(df_lat, df_watertemp) 

# bring original data into long data format
df_plot <- df_crabs %>%
  select(size, latitude, water_temp) %>%
  pivot_longer(c(latitude, water_temp), 
               values_to = "x") %>%
  mutate(name = factor(name, levels = c("latitude", "water_temp"),
                       labels = c("Latitude [degree]", "Water temperature [°C]")))

# plot marginal effects
ggplot(df_pred, aes(x = x, y = size)) +
  geom_point(data = df_plot, size = 2, alpha = 0.5) +
  geom_line(color = "red", linewidth = 1.5) +  
  facet_wrap(~ name, scales = "free_x") +
  labs(x = "", y = "Carapace width [mm]") +
  theme_light() +
  theme(text = element_text(size = 14)) 

## Bonus: plot both effects together
df_raster <- expand_grid(
  water_temp = seq(0.9 * min(df_crabs$water_temp), 1.1 * max(df_crabs$water_temp), 
                   length.out = 200),
  latitude = seq(0.9 * min(df_crabs$latitude), 1.1 * max(df_crabs$latitude),
                  length.out = 200)) %>%
  mutate(size = predict(lm1, newdata = .)) 

ggplot(df_raster, aes(water_temp, latitude)) +
  geom_contour_filled(aes(z = size)) +
  geom_point(data = df_crabs, aes(water_temp, latitude, size = size), 
             color = "red") +
  scale_size_continuous(range = c(0.1, 5)) +
  labs(x = "Water temperature [°C]", y = "Latitude [degree]", 
       size = "Carapace width [mm]",
       fill = "Predicted\ncarapace width [mm]") +
  theme(text = element_text(size = 12))

2 Equations

lm1

Call:
lm(formula = size ~ latitude + water_temp, data = df_crabs)

Coefficients:
(Intercept)     latitude   water_temp  
   -22.9352       0.8042       0.4128  
sigma(lm1)
[1] 2.804983

The linear model is given by:

\[ \begin{align} \text{size} &= -22.94 + 0.80 \cdot \text{latitude} + 0.42 \cdot \text{watertemp} + \epsilon \\ \epsilon &\sim \mathcal{N}(0,\,2.80) \end{align} \]

  • interestingly, the carapace width increase with higher water temperature, this changes if the latitude is omitted from the model
  • we have repeated measurements from the sites, therefore the observations are not independent and it would be better to use a linear mixed model

3 Script for copy-pasting

library(dplyr)
library(readr)
library(tidyr)
library(ggplot2)
library(ggfortify)

## theme for ggplot
theme_set(theme_classic())
theme_update(text = element_text(size = 14))

## load data set
df_crabs <- read_csv("data/06_crab_size.csv")

## plot latitude vs air temperature
ggplot(df_crabs, aes(latitude, water_temp)) +
  geom_point() +
  labs(x = "Latitude [degree]", y = "Water temperature [°C]") 
  
## fit linear model
lm1 <- lm(size ~ latitude + water_temp, data = df_crabs)

## check model assumptions
autoplot(lm1, which = 1:2)

## make and plot marginal effects
# create tibbles with predictions
df_lat <- tibble(
  latitude = seq(min(df_crabs$latitude), max(df_crabs$latitude), 
                 length.out = 30),
  water_temp = mean(df_crabs$water_temp)) %>%
  mutate(size = predict(lm1, newdata = .)) %>%
  select(-water_temp) %>%
  mutate(name = "Latitude [degree]") %>%
  rename(x = latitude)

df_watertemp <- tibble(
  latitude = mean(df_crabs$latitude),
  water_temp = seq(min(df_crabs$water_temp), max(df_crabs$water_temp), 
                   length.out = 30)) %>%
  mutate(size = predict(lm1, newdata = .)) %>%
  select(-latitude) %>%
  mutate(name = "Water temperature [°C]") %>%
  rename(x = water_temp)

df_pred <- bind_rows(df_lat, df_watertemp) 

# bring original data into long data format
df_plot <- df_crabs %>%
  select(size, latitude, water_temp) %>%
  pivot_longer(c(latitude, water_temp), 
               values_to = "x") %>%
  mutate(name = factor(name, levels = c("latitude", "water_temp"),
                       labels = c("Latitude [degree]", "Water temperature [°C]")))

# plot marginal effects
ggplot(df_pred, aes(x = x, y = size)) +
  geom_point(data = df_plot, size = 2, alpha = 0.5) +
  geom_line(color = "red", linewidth = 1.5) +  
  facet_wrap(~ name, scales = "free_x") +
  labs(x = "", y = "Carapace width [mm]") +
  theme_light() +
  theme(text = element_text(size = 14)) 
  
## Bonus: plot both effects together
df_raster <- expand_grid(
  water_temp = seq(0.9 * min(df_crabs$water_temp), 1.1 * max(df_crabs$water_temp), 
                   length.out = 200),
  latitude = seq(0.9 * min(df_crabs$latitude), 1.1 * max(df_crabs$latitude),
                  length.out = 200)) %>%
  mutate(size = predict(lm1, newdata = .)) 

ggplot(df_raster, aes(water_temp, latitude)) +
  geom_contour_filled(aes(z = size)) +
  geom_point(data = df_crabs, aes(water_temp, latitude, size = size), 
             color = "red") +
  scale_size_continuous(range = c(0.1, 5)) +
  labs(x = "Water temperature [°C]", y = "Latitude [degree]", 
       size = "Carapace width [mm]",
       fill = "Predicted\ncarapace width [mm]") +
  theme(text = element_text(size = 12))