Extending the Normal Regression

Dr. Mine Dogucu

The notes for this lecture are derived from Chapter 11 of the Bayes Rules! book

weather_WU <- weather_australia %>% 
  filter(location %in% c("Wollongong", "Uluru")) %>%
  mutate(location = droplevels(as.factor(location))) %>% 
  select(location, windspeed9am, humidity9am, 
    pressure9am, temp9am, temp3pm)

glimpse(weather_WU)

Rows: 200
Columns: 6
$ location     <fct> Uluru, Uluru, Uluru, Uluru, Uluru, Uluru, Uluru, Uluru, U…
$ windspeed9am <dbl> 20, 9, 7, 28, 24, 22, 22, 4, 2, 9, 20, 20, 9, 22, 9, 24, …
$ humidity9am  <int> 23, 71, 15, 29, 10, 32, 43, 57, 64, 40, 28, 30, 95, 47, 7…
$ pressure9am  <dbl> 1023.3, 1012.9, 1012.3, 1016.0, 1010.5, 1012.2, 1025.7, 1…
$ temp9am      <dbl> 20.9, 23.4, 24.1, 26.4, 36.7, 25.1, 14.9, 15.9, 24.6, 15.…
$ temp3pm      <dbl> 29.7, 33.9, 39.7, 34.2, 43.3, 33.5, 24.0, 22.6, 33.2, 24.…

ggplot(weather_WU, aes(x = temp9am, y = temp3pm)) +
  geom_point()

\(\text{likelihood model:} \; \; \; Y_i | \beta_0, \beta_1, \sigma \;\;\;\stackrel{ind}{\sim} N\left(\mu_i, \sigma^2\right)\text{ with } \mu_i = \beta_0 + \beta_1X_{i1}\)

\(\text{prior models:}\)

\(\beta_0\sim N(\ldots, \ldots )\)
\(\beta_1\sim N(\ldots, \ldots )\)
\(\sigma \sim \text{Exp}(\ldots)\)

weather_model_1 <- stan_glm(temp3pm ~ temp9am, 
                            data = weather_WU,
                            family = gaussian,
                            chains = 4, 
                            iter = 5000*2, 
                            seed = 84735)

mcmc_dens_overlay(weather_model_1)

posterior_interval(weather_model_1, prob = 0.80)

                  10%      90%
(Intercept) 2.9197355 5.490149
temp9am     0.9785207 1.104129
sigma       3.8778471 4.407116

pp_check(weather_model_1)

Considering a categorical predictor

ggplot(weather_WU, aes(x = temp3pm, fill = location)) +
  geom_density(alpha = 0.5)

\[X_{i2} = \begin{cases} 1 & \text{ Wollongong} \\ 0 & \text{ otherwise (ie. Uluru)} \\ \end{cases}\]

\(\text{likelihood model:} \; \; \; Y_i | \beta_0, \beta_1, \sigma \;\;\;\stackrel{ind}{\sim} N\left(\mu_i, \sigma^2\right)\text{ with } \mu_i = \beta_0 + \beta_1X_{i2}\)

\(\text{prior models:}\)

\(\beta_0\sim N(\ldots, \ldots )\)
\(\beta_1\sim N(\ldots, \ldots )\)
\(\sigma \sim \text{Exp}(\ldots)\)

For Uluru, \(X_{i2} = 0\) and the trend in 3pm temperature simplifies to

\[\beta_0 + \beta_1 \cdot 0 = \beta_0 \; .\] For Wollongong, \(X_{i2} = 1\) and the trend in 3pm temperature is

\[\beta_0 + \beta_1 \cdot 1 = \beta_0 + \beta_1 \; .\]

Simulating the Posterior

weather_model_2 <- stan_glm(temp3pm ~ location, 
                            data = weather_WU, 
                            family = gaussian, 
                            chains = 4, 
                            iter = 5000*2, 
                            seed = 84735)

mcmc_dens_overlay(weather_model_2)

# Posterior summary statistics
model_summary <- summary(weather_model_2)
head(as.data.frame(model_summary), -2) %>% 
  select(mean, "10%", "90%", Rhat)

                         mean        10%       90%      Rhat
(Intercept)         29.720510  29.016209 30.432417 0.9999003
locationWollongong -10.321964 -11.322672 -9.321884 0.9999647
sigma                5.495842   5.153241  5.855416 0.9999023

b0 <- model_summary[1,1]
b1 <- model_summary[2,1]

ggplot(weather_WU, aes(x = temp3pm, fill = location)) + 
  geom_density(alpha = 0.5) + 
  geom_vline(xintercept = c(b0, b0 + b1), 
    linetype = "dashed")

Two Predictors

ggplot(weather_WU, 
    aes(y = temp3pm, x = temp9am, color = location)) + 
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE)

\(\text{likelihood model:}\) \(Y_i | \beta_0, \beta_1, \beta_2 \sigma \;\;\;\stackrel{ind}{\sim} N\left(\mu_i, \sigma^2\right)\text{ with } \mu_i = \beta_0 + \beta_1X_{i1} + \beta_2X_{i2}\)

\(\text{prior models:}\)

\(\beta_0\sim N(m_0, s_0 )\)
\(\beta_1\sim N(m_1, s_1 )\)
\(\beta_2\sim N(m_2, s_2 )\)
\(\sigma \sim \text{Exp}(l)\)

In Uluru, \(X_{i2} = 0\) and the trend in the relationship between 3pm and 9am temperature simplifies to

\[\beta_0 + \beta_1 X_{i1} + \beta_2 \cdot 0 = \beta_0 + \beta_1 X_{i1} \; .\]

In Wollongong, \(X_{i2} = 1\) and the trend in the relationship between 3pm and 9am temperature simplifies to

\[\beta_0 + \beta_1 X_{i1} + \beta_2 \cdot 1 = (\beta_0 + \beta_2) + \beta_1 X_{i1} \; .\]

weather_model_3 <- stan_glm(temp3pm ~ temp9am + location, 
                            data = weather_WU, 
  
                            family = gaussian, 
                            chains = 4, 
                            iter = 5000*2, 
                            seed = 84735)

weather_model_3_df <- as.data.frame(weather_model_3)
head(weather_model_3_df, 3)

  (Intercept)   temp9am locationWollongong    sigma
1    11.21958 0.8988170          -7.932687 2.588490
2    10.35556 0.9015759          -6.722425 2.559874
3    10.55716 0.8917270          -6.731531 2.556703

first_50 <- head(weather_model_3_df, 50)

ggplot(weather_WU, aes(x = temp9am, y = temp3pm)) + 
  geom_point(size = 0.01) + 
  geom_abline(data = first_50, size = 0.1,
    aes(intercept = `(Intercept)`, slope = temp9am)) + 
  geom_abline(data = first_50, size = 0.1,
    aes(intercept = `(Intercept)` + locationWollongong, 
    slope = temp9am), color = "blue")

# Simulate a set of predictions
set.seed(84735)
temp3pm_prediction <- posterior_predict(
  weather_model_3, 
  newdata = data.frame(
    temp9am = c(10, 10), location = c("Uluru", "Wollongong")))

Posterior Predictive Model

shortcut_df <- data.frame(uluru = temp3pm_prediction[,1],
                          woll = temp3pm_prediction[,2])
ggplot(shortcut_df, aes(x = uluru)) +
  geom_density() +
  geom_density(aes(x = woll), color = "blue")

ggplot(weather_WU,
       aes(y = temp3pm, x = humidity9am, color = location)) + 
  geom_point(size = 0.5) +
  geom_smooth(method = "lm", se = FALSE)

\(\text{likelihood model:}\) \(Y_i | \beta_0, \beta_1, \beta_2, \beta_3 \sigma \;\;\;\stackrel{ind}{\sim} N\left(\mu_i, \sigma^2\right)\text{ with }\) \(\mu_i = \beta_0 + \beta_1X_{i2} + \beta_2X_{i3} + \beta_3X_{i2}X_{i3}\)

\(\text{prior models:}\)

\(\beta_0\sim N(m_0, s_0 )\)
\(\beta_1\sim N(m_1, s_1 )\)
\(\beta_2\sim N(m_2, s_2 )\)
\(\beta_3\sim N(m_3, s_3 )\)
\(\sigma \sim \text{Exp}(l)\)

In Uluru, \(X_{2} = 0\) and the trend in the relationship between temperature and humidity simplifies to

\[\mu = \beta_0 + \beta_2 X_{3} \; .\]

In Wollongong, \(X_{2} = 1\) and the trend in the relationship between temperature and humidity simplifies to

\[\mu = \beta_0 + \beta_1 + \beta_2 X_{3} + \beta_3 X_{3} = (\beta_0 + \beta_1) + (\beta_2 + \beta_3) X_3 \; .\]

interaction_model <- stan_glm(temp3pm ~ location + humidity9am + 
                                location:humidity9am,
                              data = weather_WU, 
                              family = gaussian,
                              chains = 4, 
                              iter = 5000*2, 
                              seed = 84735)

model_summary <- summary(interaction_model)
head(as.data.frame(model_summary), -2) %>%
select(`10%`, `50%`, `90%`) %>%
round(3)

                                   10%     50%     90%
(Intercept)                     36.439  37.594  38.769
locationWollongong             -24.817 -21.890 -18.939
humidity9am                     -0.215  -0.190  -0.166
locationWollongong:humidity9am   0.199   0.246   0.293
sigma                            4.196   4.466   4.771

\[\begin{array}{lrl} \text{Uluru:} & \mu & = 37.594 - 0.19 \text{ humidity9am} \\ \text{Wollongong:} & \mu & = (37.594 - 21.89) + (-0.19 + 0.246) \text{ humidity9am}\\ && = 15.704 + 0.056 \text{ humidity9am}\\ \end{array}\]

Do you need an interaction term?

Context.
Visualizations.
Hypothesis tests.

More than two predictors

\(\text{likelihood model:} \; \; \; Y_i | \beta_0, \beta_1,\beta_2,...\beta_p, \sigma \;\;\;\stackrel{ind}{\sim} N\left(\mu_i, \sigma^2\right)\text{ with }\) \(\mu_i = \beta_0 + \beta_1X_{i1} + \beta_2X_{i2} + \ldots +\beta_pX_{ip}\)

\(\text{prior models:}\)

\(\beta_0, \beta_1,\beta_2, ...,\beta_p\sim N(\ldots, \ldots )\)

\(\sigma \sim \text{Exp}(\ldots)\)

weather_model_4 <- stan_glm(temp3pm ~ ., 
                            data = weather_WU, 
                            family = gaussian, 
                            chains = 4, 
                            iter = 5000*2, 
                            seed = 84735)

Model evaluation and comparison

Model	Formula
`weather_model_1`	`temp3pm ~ temp9am`
`weather_model_2`	`temp3pm ~ location`
`weather_model_3`	`temp3pm ~ temp9am + location`
`weather_model_4`	`temp3pm ~ .`

set.seed(84735)
predictions_1 <- posterior_predict(weather_model_1, 
  newdata = weather_WU)

# Posterior predictive models for weather_model_1
ppc_intervals(weather_WU$temp3pm, 
  yrep = predictions_1, 
  x = weather_WU$temp9am, 
  prob = 0.5, prob_outer = 0.95)

# Posterior predictive models for weather_model_2
ppc_violin_grouped(weather_WU$temp3pm, 
  yrep = predictions_2, 
  group = weather_WU$location,
  y_draw = "points")

prediction_summary_cv(data = weather_WU, 
                      model = weather_model_1, 
                      k = 10)

model	mae	mae scaled	within 50	within 95
weather model 1	3.285	0.789	0.4	0.97
weather model 2	3.652	0.661	0.49	0.94
weather model 3	1.142	0.483	0.67	0.96
weather model 4	1.207	0.522	0.63	0.95