NMSA407 Linear Regression: Tutorial

Numeric covariate: regression splines

Data Motorcycle




Introduction

Load used data and calculate basic summaries

data(Motorcycle, package = "mffSM")
head(Motorcycle)
##   time haccel
## 1  2.4    0.0
## 2  2.6   -1.3
## 3  3.2   -2.7
## 4  3.6    0.0
## 5  4.0   -2.7
## 6  6.2   -2.7
dim(Motorcycle)
## [1] 133   2
summary(Motorcycle)
##       time           haccel       
##  Min.   : 2.40   Min.   :-134.00  
##  1st Qu.:15.80   1st Qu.: -54.90  
##  Median :24.00   Median : -13.30  
##  Mean   :25.55   Mean   : -25.55  
##  3rd Qu.:35.20   3rd Qu.:   0.00  
##  Max.   :65.60   Max.   :  75.00




Dependence of haccel on time

Basis scatterplot

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "orange", col = "red3",
     xlab = "Time [ms]", ylab = "Head acceleration [g]")

plot of chunk fig-ParamCovar-02-01

Basic scatterplot equipped with LOWESS

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "palegoldenrod", col = "orange",
     xlab = "Time [ms]", ylab = "Head acceleration [g]")
lines(with(Motorcycle, lowess(time, haccel)), col = "red3", lwd = 2)

plot of chunk fig-ParamCovar-02-02




Regression spline (cubic B-spline)

Knots of the spline

knots <- c(0, 11, 12, 13, 20, 30, 32, 34, 40, 50, 60)
inner <- knots[-c(1, length(knots))]
bound <- knots[c(1, length(knots))]

B-spline basis used here

library("splines")
xgrid <- seq(knots[1], knots[length(knots)], length = 500)
Bgrid <- bs(xgrid, knots = inner, Boundary.knots = bound, degree = 3, intercept = TRUE)
COL <- rainbow_hcl(ncol(Bgrid), c = 80)
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(xgrid, Bgrid[,1], type = "l", col = COL[1], lwd = 2, xlab = "z", ylab = "B(z)",
     xlim = range(knots), ylim = range(Bgrid, na.rm = TRUE))
for (j in 2:ncol(Bgrid)) lines(xgrid, Bgrid[,j], col = COL[j], lwd = 2)
points(knots, rep(0, length(knots)), pch = 21, bg = "blue", col = "skyblue", cex = 1.5)

plot of chunk fig-ParamCovar-02-03

B-spline model matrix

Data <- Motorcycle
#Data <- subset(Motorcycle, time <= knots[length(knots)])    ## remove 1 observation with time = 65.6
B <- bs(Data[, "time"], knots = inner, Boundary.knots = bound, degree = 3, intercept = TRUE)
## Warning in bs(Data[, "time"], knots = inner, Boundary.knots = bound, degree = 3, : some 'x' values
## beyond boundary knots may cause ill-conditioned bases
round(B[1:5,], 3)
##          1     2     3     4 5 6 7 8 9 10 11 12 13
## [1,] 0.478 0.409 0.105 0.008 0 0 0 0 0  0  0  0  0
## [2,] 0.445 0.424 0.120 0.010 0 0 0 0 0  0  0  0  0
## [3,] 0.357 0.454 0.170 0.019 0 0 0 0 0  0  0  0  0
## [4,] 0.304 0.463 0.206 0.027 0 0 0 0 0  0  0  0  0
## [5,] 0.258 0.463 0.242 0.037 0 0 0 0 0  0  0  0  0

Fit

m1 <- lm(haccel ~ B - 1, data = Data)
summary(m1)
## 
## Call:
## lm(formula = haccel ~ B - 1, data = Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -71.857 -11.740  -0.268  10.356  55.249 
## 
## Coefficients:
##     Estimate Std. Error t value Pr(>|t|)    
## B1   -11.614     83.252  -0.140    0.889    
## B2    12.450     81.282   0.153    0.879    
## B3   -13.988     45.345  -0.308    0.758    
## B4     2.984     18.894   0.158    0.875    
## B5     6.115     12.139   0.504    0.615    
## B6  -237.283     18.850 -12.588  < 2e-16 ***
## B7    17.346     14.094   1.231    0.221    
## B8    53.248     12.635   4.214 4.88e-05 ***
## B9     5.102     13.051   0.391    0.697    
## B10   12.609     17.146   0.735    0.464    
## B11  -21.738     23.825  -0.912    0.363    
## B12   10.879     16.284   0.668    0.505    
## B13    7.885     17.624   0.447    0.655    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.61 on 120 degrees of freedom
## Multiple R-squared:  0.8447, Adjusted R-squared:  0.8278 
## F-statistic: 50.19 on 13 and 120 DF,  p-value: < 2.2e-16

Coefficient of determination

m0 <- lm(haccel ~ 1, data = Data)
SSe <- deviance(m1)
SST <- deviance(m0)
R2 <- 1 - SSe/SST
R2adj <- 1 - (SSe/m1$df.residual) / (SST/m0$df.residual)
R2B <- c(R2 = R2, R2adj = R2adj)
print(R2B)
##        R2     R2adj 
## 0.8009163 0.7810079

Fitted regression function

pm1 <- as.numeric(Bgrid %*% coef(m1))
pm1[1:10]
##  [1] -11.613930 -10.842009 -10.104058  -9.399422  -8.727445  -8.087471  -7.478845  -6.900910
##  [9]  -6.353011  -5.834492

Scatterplot equiped by the fitted regression function

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "palegoldenrod", col = "orange",
     xlab = "Time [ms]", ylab = "Head acceleration [g]")
lines(xgrid, pm1, col = "red3", lwd = 2)
points(knots, rep(min(Motorcycle[, "haccel"]), length(knots)), pch = 21, bg = "blue", col = "skyblue", cex = 1.5)

plot of chunk fig-ParamCovar-02-04

Basic residual plots

library("mffSM")
plotLM(m1)

plot of chunk fig-ParamCovar-02-05

Residuals versus the covariate

lwsRes <- lowess(Motorcycle[, "time"], residuals(m1))
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(residuals(m1) ~ Motorcycle[, "time"], pch = 21, col = "blue4", bg = "skyblue",
     xlab = "Time", ylab = "Residuals")
lines(lwsRes[["x"]], lwsRes[["y"]], col = "red", lwd = 2)

plot of chunk fig-ParamCovar-02-06