Solution to the bacteria exercise

1 Script with output

library(dplyr)
library(readr)
library(ggplot2)
library(ggfortify)

## theme for ggplot
theme_set(theme_classic())
theme_update(text = element_text(size = 14))

df_bacteria <- read_csv("data/06_bacteria.csv")
df_bacteria
# A tibble: 150 × 3
   species  temperature density
   <chr>          <dbl>   <dbl>
 1 species1        28.9    3.7 
 2 species1        28      3.25
 3 species1        31.6    4.17
 4 species1        23.7    2.46
 5 species1        24.2    1.78
 6 species1        31.9    4.45
 7 species1        23.9    2.47
 8 species1        29.4    3.85
 9 species1        23.3    2.07
10 species1        30.5    4.95
# ℹ 140 more rows
## fit model without interaction
lm1 <- lm(density ~ species + temperature, data = df_bacteria)

autoplot(lm1, which = 1:2)

df_pred1 <- mutate(df_bacteria, density_pred = predict(lm1)) 

ggplot(df_pred1, aes(x = temperature, y = density, color = species)) +
    geom_point() +
    geom_line(aes(y = density_pred)) 

## fit model with interaction
lm2 <- lm(density ~ species * temperature, data = df_bacteria)

autoplot(lm2, which = 1:2)

df_pred2 <- mutate(df_bacteria, density_pred = predict(lm2)) 

ggplot(df_pred2, aes(x = temperature, y = density, color = species)) +
    geom_point() +
    geom_line(aes(y = density_pred)) 

## or with facets
ggplot(df_pred2, aes(x = temperature, y = density)) +
    geom_point() +
    geom_line(aes(y = density_pred)) +
    facet_wrap(~ species) +
    theme_light() +
    theme(text = element_text(size = 14))

2 Explanation and equations

Model without interaction

lm1

Call:
lm(formula = density ~ species + temperature, data = df_bacteria)

Coefficients:
    (Intercept)  speciesspecies2  speciesspecies3      temperature  
        -6.4072           0.8829          -2.6237           0.3560  
sigma(lm1)
[1] 0.7871561

\[ \begin{align} \text{density} &= \cases { -6.41 + 0.36 \cdot \text{temperature} + \epsilon, & \text{if species = 1} \\ -6.41 + 0.88 + 0.36 \cdot \text{temperature} + \epsilon, & \text{if species = 2} \\ -6.41 - 2.62 + 0.36 \cdot \text{temperature} + \epsilon, & \text{if species = 3} \\ } \\ \epsilon &\sim N(0, 0.79) \end{align} \]

Model with interaction

lm2

Call:
lm(formula = density ~ species * temperature, data = df_bacteria)

Coefficients:
                (Intercept)              speciesspecies2  
                    -5.2822                     -10.1938  
            speciesspecies3                  temperature  
                     3.9040                       0.3153  
speciesspecies2:temperature  speciesspecies3:temperature  
                     0.4030                      -0.2401  
sigma(lm2)
[1] 0.2856908

\[ \begin{align} \text{density} &= \cases { -5.28 + 0.32 \cdot \text{temperature} + \epsilon, & \text{if species = 1} \\ -5.28 - 10.19 + (0.32 + 0.40) \cdot \text{temperature} + \epsilon, & \text{if species = 2} \\ -5.28 + 3.90 + (0.32 - 0.24) \cdot \text{temperature} + \epsilon, & \text{if species = 3} \\ } \\ \epsilon &\sim N(0, 0.29) \end{align} \]

The model with interaction fits much better to the data. For the model without interaction, the residuals are not normally distributed.

3 Script for copy-paste

library(dplyr)
library(readr)
library(ggplot2)
library(ggfortify)

## theme for ggplot
theme_set(theme_classic())
theme_update(text = element_text(size = 14))

df_bacteria <- read_csv("data/06_bacteria.csv")
df_bacteria

## fit model without interaction
lm1 <- lm(density ~ species + temperature, data = df_bacteria)

autoplot(lm1, which = 1:2)

df_pred1 <- mutate(df_bacteria, density_pred = predict(lm1)) 

ggplot(df_pred1, aes(x = temperature, y = density, color = species)) +
    geom_point() +
    geom_line(aes(y = density_pred)) 


## fit model with interaction
lm2 <- lm(density ~ species * temperature, data = df_bacteria)

autoplot(lm2, which = 1:2)

df_pred2 <- mutate(df_bacteria, density_pred = predict(lm2)) 

ggplot(df_pred2, aes(x = temperature, y = density, color = species)) +
    geom_point() +
    geom_line(aes(y = density_pred)) 
    
## or with facets
ggplot(df_pred2, aes(x = temperature, y = density)) +
    geom_point() +
    geom_line(aes(y = density_pred)) +
    facet_wrap(~ species) +
    theme_light() +
    theme(text = element_text(size = 14))