NMSA407 Linear Regression: Tutorial

Checking homoscedasticity

Data Draha




Introduction

Load used data and calculate basic summaries

data(Draha, package = "mffSM")
head(Draha)
##   veloc breakd
## 1     8     13
## 2    12     11
## 3    13     18
## 4    12     21
## 5    13     15
## 6    27     57
dim(Draha)
## [1] 63  2
summary(Draha)
##      veloc           breakd      
##  Min.   : 4.00   Min.   :  2.00  
##  1st Qu.:10.00   1st Qu.: 13.50  
##  Median :18.00   Median : 30.00  
##  Mean   :18.97   Mean   : 39.22  
##  3rd Qu.:26.50   3rd Qu.: 56.50  
##  Max.   :40.00   Max.   :138.00




Dependence of breakd (breaking distance) on veloc (velocity)

Scatterplot

par(mfrow = c(1, 1), bty = BTY, mar = c(5, 4, 1, 2) + 0.1)
plot(breakd ~ veloc, data = Draha, xlab = "Velocity [km/h]", ylab = "Breaking distance [m]", pch = PCH, col = COL, bg = BGC)

plot of chunk fig-CheckModelAssumpt-04-01

Quadratic dependence of breaking distance on velocity

m1 <- lm(breakd ~ veloc + I(veloc^2) - 1, data = Draha, x = TRUE)
summary(m1)
## 
## Call:
## lm(formula = breakd ~ veloc + I(veloc^2) - 1, data = Draha, x = TRUE)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.2378  -5.0753  -0.3436   4.1195  27.9197 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## veloc      0.555256   0.198663   2.795  0.00693 ** 
## I(veloc^2) 0.062692   0.006855   9.145 4.85e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.821 on 61 degrees of freedom
## Multiple R-squared:  0.9643, Adjusted R-squared:  0.9632 
## F-statistic: 824.6 on 2 and 61 DF,  p-value: < 2.2e-16
vpred <- seq(0, 45, length = 250)
p1 <- predict(m1, newdata = data.frame(veloc = vpred))

Scatterplot and the fitted regression function

par(mfrow = c(1, 1), bty = BTY, mar = c(5, 4, 1, 2) + 0.1)
plot(breakd ~ veloc, data = Draha, xlim = range(vpred), xlab = "Velocity [km/h]", ylab = "Breaking distance [m]",
     pch = PCH, col = COL2, bg = BGC2)
lines(vpred, p1, col = "red3", lwd = 2)

plot of chunk fig-CheckModelAssumpt-04-02

Residuals versus fitted values or the covariate

par(mfrow = c(1, 2), bty = BTY, mar = c(5, 4, 3, 2) + 0.1)
plot(m1, which = 1, pch = 21, col = "blue4", bg = "skyblue", lwd = 2)
#
plot(m1[["x"]][, 1], residuals(m1), pch = 21, col = "blue4", bg = "skyblue", xlab = "Velocity [km/h]", ylab = "Residuals",
     main = "Residuals vs Covariate")
lines(lowess(m1[["x"]][, 1], residuals(m1)), col = "red3", lwd = 2)

plot of chunk fig-CheckModelAssumpt-04-03

Scale-location plot

par(mfrow = c(1, 2), bty = BTY, mar = c(5, 4, 3, 2) + 0.1)
plot(m1, which = 3, pch = 21, col = "blue4", bg = "skyblue", lwd = 2)
#
plot(m1[["x"]][, 1], sqrt(abs(rstandard(m1))), pch = 21, col = "blue4", bg = "skyblue",
     xlab = "Velocity [km/h]", ylab = as.expression(substitute(sqrt(abs(yL)), list(yL = as.name("Standardized residuals")))),
     main = "Scale-Location (Velocity)")
lines(lowess(m1[["x"]][, 1], sqrt(abs(rstandard(m1)))), col = "red3", lwd = 2)

plot of chunk fig-CheckModelAssumpt-04-04




Tests of homoscedasticity

Alternative of a dependence of the residual variance on the response expectation

Breusch-Pagan test

library("car")
ncvTest(m1)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 23.33658    Df = 1     p = 1.359891e-06

Breusch-Pagan test again

library("lmtest")
bptest(m1, varformula = ~fitted(m1), studentize = FALSE)
## 
##  Breusch-Pagan test
## 
## data:  m1
## BP = 23.337, df = 1, p-value = 1.36e-06

Koenker's studentized version of the Breusch-Pagan test

bptest(m1, varformula = ~fitted(m1))
## 
##  studentized Breusch-Pagan test
## 
## data:  m1
## BP = 18.253, df = 1, p-value = 1.934e-05

Alternative of a dependence of the residual variance on the covariate (velocity)

Breusch-Pagan test

ncvTest(m1, var.formula = ~veloc, data = Draha)
## Non-constant Variance Score Test 
## Variance formula: ~ veloc 
## Chisquare = 23.52651    Df = 1     p = 1.232046e-06

Breusch-Pagan test again

bptest(m1, varformula = ~veloc, studentize = FALSE, data = Draha)
## 
##  Breusch-Pagan test
## 
## data:  m1
## BP = 23.527, df = 1, p-value = 1.232e-06

Koenker's studentized version of the Breusch-Pagan test

bptest(m1, varformula = ~veloc, data = Draha)
## 
##  studentized Breusch-Pagan test
## 
## data:  m1
## BP = 18.402, df = 1, p-value = 1.789e-05