Solution to the population growth exercise

Data source:

Hannah Ritchie, Lucas Rodés-Guirao, Edouard Mathieu, Marcel Gerber, Esteban Ortiz-Ospina, Joe Hasell and Max Roser (2023) - “Population Growth”. Published online at OurWorldInData.org. Retrieved from: https://ourworldindata.org/population-growth [Online Resource]

1 Load packages

library(dplyr)
library(readr)
library(ggplot2)
library(ggfortify)

## theme for ggplot
theme_set(theme_classic())
theme_update(text = element_text(size = 14))

2 Load data

df <- read_csv("data/07_population-and-demography.csv")
df
# A tibble: 18,288 × 24
   `Country name`  Year Population Population of childr…¹ Population of childr…²
   <chr>          <dbl>      <dbl>                  <dbl>                  <dbl>
 1 Afghanistan     1950    7480464                 301735                1248282
 2 Afghanistan     1951    7571542                 299368                1246857
 3 Afghanistan     1952    7667534                 305393                1248220
 4 Afghanistan     1953    7764549                 311574                1254725
 5 Afghanistan     1954    7864289                 317584                1267817
 6 Afghanistan     1955    7971933                 323910                1291129
 7 Afghanistan     1956    8087730                 330888                1322342
 8 Afghanistan     1957    8210207                 337874                1354752
 9 Afghanistan     1958    8333827                 344796                1387274
10 Afghanistan     1959    8468220                 352235                1421808
# ℹ 18,278 more rows
# ℹ abbreviated names: ¹​`Population of children under the age of 1`,
#   ²​`Population of children under the age of 5`
# ℹ 19 more variables: `Population of children under the age of 15` <dbl>,
#   `Population under the age of 25` <dbl>,
#   `Population aged 15 to 64 years` <dbl>,
#   `Population older than 15 years` <dbl>, …

3 Calculate world population

df_world <- summarise(df, Population = sum(Population), .by = Year)
df_world
# A tibble: 72 × 2
    Year  Population
   <dbl>       <dbl>
 1  1950 15450675880
 2  1951 15731647959
 3  1952 16033936161
 4  1953 16354360176
 5  1954 16685947167
 6  1955 17033211987
 7  1956 17386864191
 8  1957 17753428483
 9  1958 18128530668
10  1959 18480719829
# ℹ 62 more rows

4 Visualize data

ggplot(df_world, aes(Year, Population)) +
    geom_line() +
    geom_point()

5 Check if linear or an exponential model fits better

lm1 <- lm(Population ~ Year, data = df_world)
autoplot(lm1, which = 1:2)

lm2 <- lm(log(Population) ~ Year, data = df_world)
autoplot(lm2, which = 1:2)

Explanation

Both models do not fit well to the data. The growth is not constant over time.

6 Calculate the growth rate and visualize it

df_world <- mutate(df_world, growth_rate = Population / lag(Population))
    
ggplot(df_world, aes(Year, growth_rate)) +
    geom_line() +
    geom_point() +
    labs(y = "Growth rate")

Link

see also here of the Our World in Data project.

7 Complete script

library(dplyr)
library(readr)
library(ggplot2)
library(ggfortify)

## theme for ggplot
theme_set(theme_classic())
theme_update(text = element_text(size = 14))

## Load data
df <- read_csv("data/07_population-and-demography.csv")

## Calculate world population and visualize data
df_world <- summarise(df, Population = sum(Population), .by = Year)

ggplot(df_world, aes(Year, Population)) +
    geom_line() +
    geom_point()
    
## Check if linear or an exponential model fits better
lm1 <- lm(Population ~ Year, data = df_world)
autoplot(lm1, which = 1:2)

lm2 <- lm(log(Population) ~ Year, data = df_world)
autoplot(lm2, which = 1:2)

## Calculate the growth rate and visualize it
df_world <- mutate(df_world, growth_rate = Population / lag(Population))
    
ggplot(df_world, aes(Year, growth_rate)) +
    geom_line() +
    geom_point() +
    labs(y = "Growth rate")