NMSA407 Linear Regression: Tutorial

Checking uncorrelated errors

Data Olympic




Introduction

Load used data and calculate basic summaries

data(Olympic, package = "mffSM")
head(Olympic)
##   t year high.jump discus.throw long.jump
## 1 0 1896       181         2915       634
## 2 1 1900       190         3604       719
## 3 2 1904       180         3928       734
## 4 3 1908       190         4089       748
## 5 4 1912       193         4521       760
## 6 5 1920       194         4468       715
dim(Olympic)
## [1] 27  5
summary(Olympic)
##        t             year        high.jump      discus.throw    long.jump    
##  Min.   : 0.0   Min.   :1896   Min.   :180.0   Min.   :2915   Min.   :634.0  
##  1st Qu.: 6.5   1st Qu.:1926   1st Qu.:195.5   1st Qu.:4674   1st Qu.:758.5  
##  Median :13.0   Median :1960   Median :216.0   Median :5918   Median :807.0  
##  Mean   :13.0   Mean   :1956   Mean   :213.6   Mean   :5639   Mean   :798.7  
##  3rd Qu.:19.5   3rd Qu.:1986   3rd Qu.:235.0   3rd Qu.:6708   3rd Qu.:852.0  
##  Max.   :26.0   Max.   :2012   Max.   :239.0   Max.   :6989   Max.   :890.0




Performance of the high jump winner over the years

Scatterplot

par(mfrow = c(1, 1), mar = c(5, 4, 1, 1) + 0.1)
plot(high.jump ~ t, data = Olympic, xlab = "Olympic index", ylab = "High jump [cm]",
     pch = PCH, col = COL, bg = BGC, cex = 1.5)

plot of chunk fig-CheckModelAssumpt-05-01

Initial plot, check influence of first and last few games

print(subset(Olympic, select = c("t", "year", "high.jump")))
##     t year high.jump
## 1   0 1896       181
## 2   1 1900       190
## 3   2 1904       180
## 4   3 1908       190
## 5   4 1912       193
## 6   5 1920       194
## 7   6 1924       198
## 8   7 1928       194
## 9   8 1932       197
## 10  9 1936       203
## 11 10 1948       198
## 12 11 1952       204
## 13 12 1956       211
## 14 13 1960       216
## 15 14 1964       218
## 16 15 1968       224
## 17 16 1972       223
## 18 17 1976       225
## 19 18 1980       236
## 20 19 1984       235
## 21 20 1988       238
## 22 21 1992       234
## 23 22 1996       239
## 24 23 2000       235
## 25 24 2004       236
## 26 25 2008       236
## 27 26 2012       238
yvar <- "high.jump"
YLAB <- "High jump [cm]"
plot(Olympic[,"t"], Olympic[, yvar], xlab = "Year", ylab = YLAB, pch = PCH, col = COL, bg = BGC)
abline(lm(paste(yvar, "~ t"), data = Olympic), col = "blue4")
abline(lm(paste(yvar, "~ t"), data = Olympic, subset = (year >= 1920 & year <= 1996)), col = "darkgreen")

plot of chunk fig-CheckModelAssumpt-05-02




All Olympic games (1896 - 2012)

Regression line

m0 <- lm(high.jump ~ t, data = Olympic)

Basic residual plots

library("mffSM")
plotLM(m0)

plot of chunk fig-CheckModelAssumpt-05-03

Fitted regression function

par(mfrow = c(1, 1), mar = c(5, 4, 1, 1) + 0.1)
plot(high.jump ~ t, data = Olympic, xlab = "Olympic index", ylab = "High jump [cm]",
     pch = PCH, col = COL2, bg = BGC2, cex = 1.5)
abline(m0, col = "red3", lwd = 2)

plot of chunk fig-CheckModelAssumpt-05-04




Only 1920 - 1996 Olympics

Regression line

OlympSub <- subset(Olympic, year >= 1920 & year <= 1996)
m1 <- lm(high.jump ~ t, data = OlympSub)

Basic residual plots

library("mffSM")
plotLM(m1)

plot of chunk fig-CheckModelAssumpt-05-05

Fitted regression function

par(mfrow = c(1, 1), mar = c(5, 4, 1, 1) + 0.1)
plot(high.jump ~ t, data = OlympSub, xlab = "Olympic index", ylab = "High jump [cm]",
     pch = PCH, col = COL2, bg = BGC2, cex = 1.5)
abline(m1, col = "blue3", lwd = 2)

plot of chunk fig-CheckModelAssumpt-05-06

Plot of delayed residuals

u <- residuals(m1)
uy <- u[2:length(u)]
ux <- u[1:(length(u) - 1)]
par(mfrow = c(1, 1), mar = c(5, 4, 1, 1) + 0.1)
plot(ux, uy, pch = PCH3, col = COL3, bg = BGC3, cex = 1.5, xlab = expression(u[i-1]), ylab = expression(u[i]))
abline(h = 0, col = "grey60")
lines(lowess(ux, uy), col = "red3", lwd = 2)

plot of chunk fig-CheckModelAssumpt-05-07




Durbin-Watson test

P-value calculated by Farebrother's approximations

Two-sided alternative

library("lmtest")      
dwtest(m1, alternative = "two.sided")
## 
##  Durbin-Watson test
## 
## data:  m1
## DW = 1.4162, p-value = 0.113
## alternative hypothesis: true autocorrelation is not 0

One-sided alternative

dwtest(m1)
## 
##  Durbin-Watson test
## 
## data:  m1
## DW = 1.4162, p-value = 0.05651
## alternative hypothesis: true autocorrelation is greater than 0

P-value calculated by bootstrap

Two-sided alternative

library("car")
set.seed(20010911)
durbinWatsonTest(m1)
##  lag Autocorrelation D-W Statistic p-value
##    1       0.2472284      1.416224   0.126
##  Alternative hypothesis: rho != 0

One-sided alternative

set.seed(20010911)
durbinWatsonTest(m1, alternative = "positive")
##  lag Autocorrelation D-W Statistic p-value
##    1       0.2472284      1.416224   0.063
##  Alternative hypothesis: rho > 0