### -------------------------------------------------------------------------###
### NMSA407 - Linear Regression                                              ###
### Winter Term 2021/2022                                                    ###
###                                                                          ###
### EXERCISE #4 Regression model diagnostics                                 ###
### -------------------------------------------------------------------------###

### -------------------------------------------------------------------------###
###  Block 1: TEACHING MATERIAL                                              ###
### -------------------------------------------------------------------------###                                             

library("mffSM")
library("splines")       # B-splines modelling
library("MASS")          # function stdres for standardized residuals

### Working data 
data(Dris, package = "mffSM")
help("Dris")

head(Dris)
summary(Dris)

### a simple linear model fitted in the last session
aLogMg <- lm(yield ~ log2(Mg), data = Dris)

summary(aLogMg) 

### Basic model checking
###
### EXPLANATION OF THE THREE BASIC RESIDUAL PLOTS
### (see Section 3.2 of the lecture notes)
###
###    Raw residuals U_1, ..., U_n satisfy:
###      E(U_i | X) = 0, var(U_i| X) = sigma^2 * m_{i,i}
###      U_i ~ NORMAL in a normal LM, 
###            but with different variances for different i
###
###    Standardized residuals V_1, ..., V_n are defined as
###      V_i = U_i/sqrt(MS_e * m_{i,i})
###      and satisfy
###      E(V_i | X) = 0, var(V_i | X) = 1 (in a normal model)
###
###    V_i's are not normal (even in a normal model)
###    but normality is often checked by using normality
###    diagnostic tools based on the standardized residuals.

### Using functionality of standard R function plot.lm
layout(matrix(c(0,1,1,0,2,2,3,3), nrow = 2, byrow = TRUE))
plot(aLogMg, which = 1:3, ask = FALSE)
par(mfrow = c(1, 1))

### equivalently with the plotLM() function from package mffSM
plotLM(aLogMg)

### QUESTIONS
### Can we reconstruct these plots?

### First plot - fitted vs residuals
plot(aLogMg, which = 1)
### manual computation
points(fitted(aLogMg),residuals(aLogMg),lwd=2,col="blue4",bg="skyblue",pch=21)
lines(lowess(fitted(aLogMg),residuals(aLogMg)),col=2,lwd=3)
### lowess is a nonparametric robust regression estimator
### more on this function in NMST434: Modern Statistical Methods

### Second plot - QQ plot of the standardized residuals
plot(aLogMg, which = 2, ask = FALSE)
### standardized residuals
H = qr.Q(aLogMg[["qr"]]) %*% t(qr.Q(aLogMg[["qr"]]))
M = diag(nrow(H))-H
stdres = residuals(aLogMg)/sqrt(deviance(aLogMg)/aLogMg$df.residual*diag(M))   
qq = qqnorm(stdres,plot.it=FALSE)
points(qq$x, qq$y,lwd=2,col="blue4",bg="skyblue",pch=21)
all.equal(qq$y,stdres)
qqline(stdres,lwd=2,lty=3)

### or using function stdres from library MASS
all.equal(stdres(aLogMg),stdres)

### Third plot - fitted vs sqrt(abs(standardized residuals))
plot(aLogMg, which = 3, ask = FALSE)  
points(fitted(aLogMg),sqrt(abs(stdres)),lwd=2,col="blue4",bg="skyblue",pch=21)
lines(lowess(fitted(aLogMg),sqrt(abs(stdres))),col=2,lwd=3)

### Overall, all three diagnostic plots
plotLM(aLogMg)

### Is there possibly something wrong with model aLogMg?
### Note that there are infinitely many possibilities how to check that 
### E(U_i| X) = 0. (a scatterplot of fitted values vs. residuals is only one 
### of them, other common possibility is to plot the covariate values vs. 
### residuals) 

plot(log2(Dris$Mg), residuals(aLogMg), xlab=expression(log[2](Mg)), 
     ylab="Residuals", main="Residuals vs. covariate", pch = 21, col = "blue4", 
     bg = "skyblue");
lines(lowess(residuals(aLogMg)~log2(Dris$Mg)), col="red")

### Is the following surprising? 
abline(mean(aLogMg$residuals), 0, col = "green", lwd=2)

### This shouldn't surprise us anymore
sum(aLogMg$residuals)
sum(aLogMg$residuals*log2(Dris$Mg))


### ======================================================================== ###
### Three types of confidence intervals                                      ###
### ======================================================================== ###

### Suppose for the moment that the model aLogMg is a USEFUL model
summary(aLogMg)

### 1. Pointwise confidence intervals (region/band) around the regression line
### (constructing a sequence of confidence intervals for E(Y|x) as x 
### varies along the X grid

### What is the purpose of the predict command?
help(predict)
help(predict.lm)

### What is the difference between the outputs from "predict" and "fitted"?
### Only the first 10 values are printed here
data.frame(Predict = predict(aLogMg), Fitted = fitted(aLogMg))[1:10,]
all.equal(predict(aLogMg), fitted(aLogMg))

### Point prediction of E(Y|X=10)
predict(aLogMg, newdata = data.frame(Mg = 10))

(point.fit<-aLogMg$coef[1]+aLogMg$coef[2]*log2(10))

### Now, with the standard error
predict(aLogMg, newdata = data.frame(Mg = 10), se.fit = TRUE)

### Calculation of "se.fit"
lvec <- c(1, log2(10))
(se.fit <- c(sqrt(t(lvec) %*% vcov(aLogMg) %*% lvec)))
all.equal(predict(aLogMg, newdata = data.frame(Mg = 10), se.fit = TRUE)$se.fit,
          se.fit)


# Recall - residual degrees of freedom, i.e. $df
aLogMg$df.residual
# sqrt(MS_e) = estimate of sigma, i.e. $residual scale
sqrt(deviance(aLogMg)/aLogMg$df.residual)
summary(aLogMg)$sigma

## or, using a function from mffSM
LSest(aLogMg, L = c(1, log2(10)))

### And now with the confidence interval for E(Y|x) (using "predict")
predict(aLogMg, newdata = data.frame(Mg = 10), interval = "confidence", 
        level = 0.95)

### Manual computation of the confidence interval 
crossprod(c(1, log2(10)), coef(aLogMg))   ## What is this?
as.numeric(crossprod(c(1, log2(10)), coef(aLogMg))) + 
  c(-1, 1) * se.fit * qt(0.975, df = aLogMg[["df.residual"]])

### We can calculate a sequence of confidence intervals for E(Y|x) 
### for different values of x
predict(aLogMg, newdata = data.frame(Mg = 10:20), interval = "confidence", 
        level = 0.95)
predict(aLogMg, newdata = data.frame(Mg = 10:20), interval = "confidence", 
        level = 0.95, se.fit = TRUE)

### A denser grid: preparation of a grid of points for the pointwise confidence 
### band around the regression line
x.Mg <- seq(min(Dris[, "Mg"]) - 1, max(Dris[, "Mg"]) + 1, length = 101)

### Calculate the confidence intervals
ciEY.aLogMg <- predict(aLogMg, data.frame(Mg = x.Mg), interval = "confidence", 
                       level = 0.95)

### Observations together with the estimated regression line
### and a sequence of confidence intervals for E(Y|x) visualized
### as a band --> confidence region (band) AROUND the regression line.
plot(yield ~ log2(Mg), pch = 23, col = "orange", bg = "beige", 
     xlab = "Log2(Mg)", ylab = "Yield", data = Dris)
lines(log2(x.Mg), ciEY.aLogMg[, "fit"], lwd = 2, col = "red3")
lines(log2(x.Mg), ciEY.aLogMg[, "lwr"], lwd = 2, col = "red3", lty = 2)
lines(log2(x.Mg), ciEY.aLogMg[, "upr"], lwd = 2, col = "red3", lty = 2)

### Where is the band narrowest? 
lines(rep(mean(log2(Dris$Mg)), 2), c(min(Dris$yield), max(Dris$yield)))
lines(c(min(log2(Dris$Mg)), max(log2(Dris$Mg))), rep(mean(Dris$yield), 2))

### We can also plot everything on the original scale of Mg
plot(yield ~ Mg, pch = 23, col = "orange", bg = "beige", xlab = "Mg", 
     ylab = "Yield", data = Dris)
lines(x.Mg, ciEY.aLogMg[, "fit"], lwd = 2, col = "red3")
lines(x.Mg, ciEY.aLogMg[, "lwr"], lwd = 2, col = "red3", lty = 2)
lines(x.Mg, ciEY.aLogMg[, "upr"], lwd = 2, col = "red3", lty = 2)

### What about now?
lines(rep(mean(Dris$Mg), 2), c(min(Dris$yield), max(Dris$yield)))
lines(c(min(Dris$Mg), max(Dris$Mg)), rep(mean(Dris$yield), 2))


### 2. Confidence region (band) FOR the (whole) regression line.
### (later in the lecture, we will discuss also a confidence region (band) 
### FOR the regression line.
###
### Code towards the band FOR the regression line should now be considered as 
### purely illustrative. It is now only important to understand the difference 
### between "AROUND" and "FOR".

ciEY2.aLogMg <- predict(aLogMg, data.frame(Mg = x.Mg), se.fit = TRUE)
forEY.aLogMg <- data.frame(fit = ciEY2.aLogMg[["fit"]],
          lwr = ciEY2.aLogMg[["fit"]] - 
                   ciEY2.aLogMg[["se.fit"]] * 
                     sqrt(qf(0.95, 2, ciEY2.aLogMg[["df"]])*2),
          upr = ciEY2.aLogMg[["fit"]] + 
                   ciEY2.aLogMg[["se.fit"]] * 
                     sqrt(qf(0.95, 2, ciEY2.aLogMg[["df"]])*2))
head(forEY.aLogMg)

### The computation of the confidence band is based on the confidence ellipse 
### for the regression parameters (beta_0, beta_1)
confidenceEllipse(aLogMg)

### x-axis: Log(Mg)
plot(yield ~ log2(Mg), pch = 23, col = "orange", bg = "beige", xlab = "Log(Mg)",
     ylab = "Yield", data = Dris)
lines(log2(x.Mg), ciEY.aLogMg[, "fit"], lwd = 2, col = "darkgreen")
### Band AROUND
lines(log2(x.Mg), ciEY.aLogMg[, "lwr"], lwd = 2, col = "red3", lty = 2)
lines(log2(x.Mg), ciEY.aLogMg[, "upr"], lwd = 2, col = "red3", lty = 2)
### Band FOR
lines(log2(x.Mg), forEY.aLogMg[, "lwr"], lwd = 2, col = "blue2", lty = 4)
lines(log2(x.Mg), forEY.aLogMg[, "upr"], lwd = 2, col = "blue2", lty = 4)
legend(4, 8.5, legend = c("For", "Around"), col = c("blue2", "red3"), 
       lty = c(4, 2), lwd = 2)

### x-axis: Mg
plot(yield ~ Mg, pch = 23, col = "orange", bg = "beige", xlab = "Mg", 
     ylab = "Yield", data = Dris)
lines(x.Mg, ciEY.aLogMg[, "fit"], lwd = 2, col = "darkgreen")
### Band AROUND
lines(x.Mg, ciEY.aLogMg[, "lwr"], lwd = 2, col = "red3", lty = 2)
lines(x.Mg, ciEY.aLogMg[, "upr"], lwd = 2, col = "red3", lty = 2)
### Band FOR
lines(x.Mg, forEY.aLogMg[, "lwr"], lwd = 2, col = "blue2", lty = 4)
lines(x.Mg, forEY.aLogMg[, "upr"], lwd = 2, col = "blue2", lty = 4)
legend(18, 8.5, legend = c("For", "Around"), col = c("blue2", "red3"), 
       lty = c(4, 2), lwd = 2)


### 3. Prediction interval (for given x)
### (interval which covers with probability (1 - alpha) 
### a NEW value of response obtained for X being set to x)

### Expression towards the prediction interval will be explained
### during one of the later lectures. Concentrate now on correct interpretation
### and understanding its difference from a confidence interval for E(Y|x)

### Calculation in R
predict(aLogMg, newdata = data.frame(Mg = 10), interval = "prediction")    
## compare with conf. int. for E(Y|x)
predict(aLogMg, newdata = data.frame(Mg = 10), interval = "confidence") 

### Manual computation of the prediction interval
s = summary(aLogMg)$sigma
se.fit2 = sqrt(s^2+(se.fit)^2)
as.numeric(crossprod(c(1, log2(10)), coef(aLogMg))) + 
  c(-1, 1) * se.fit2 * qt(0.975, df = aLogMg[["df.residual"]])
  
### Prediction band - a sequence of prediction intervals for a dense grid of 
### x-values
pred.aLogMg <- predict(aLogMg, data.frame(Mg = x.Mg), interval = "prediction", 
                       level = 0.95)
head(pred.aLogMg)

### x-axis: Log(Mg)
plot(yield ~ log2(Mg), pch = 23, col = "orange", bg = "beige", xlab = "Log(Mg)",
     ylab = "Yield", data = Dris)
lines(log2(x.Mg), ciEY.aLogMg[, "fit"], lwd = 2, col = "red3")
### Band AROUND the regression line
lines(log2(x.Mg), ciEY.aLogMg[, "lwr"], lwd = 2, col = "red3", lty = 2)
lines(log2(x.Mg), ciEY.aLogMg[, "upr"], lwd = 2, col = "red3", lty = 2)
### Prediction band
lines(log2(x.Mg), pred.aLogMg[, "lwr"], lwd = 2, col = "blue2", lty = 4)
lines(log2(x.Mg), pred.aLogMg[, "upr"], lwd = 2, col = "blue2", lty = 4)
legend("topleft", legend = c("Prediction", "Around the regression line"), 
       col = c("blue2", "red3"), lty = c(4, 2), lwd = 2)

### x-axis: Mg
plot(yield ~ Mg, pch = 23, col = "orange", bg = "beige", xlab = "Mg", 
     ylab = "Yield", data = Dris)
lines(x.Mg, ciEY.aLogMg[, "fit"], lwd = 2, col = "red3")
### Band AROUND the regression line
lines(x.Mg, ciEY.aLogMg[, "lwr"], lwd = 2, col = "red3", lty = 2)
lines(x.Mg, ciEY.aLogMg[, "upr"], lwd = 2, col = "red3", lty = 2)
### Prediction band
lines(x.Mg, pred.aLogMg[, "lwr"], lwd = 2, col = "blue2", lty = 4)
lines(x.Mg, pred.aLogMg[, "upr"], lwd = 2, col = "blue2", lty = 4)
legend("bottomright", legend = c("Prediction", "Around the regression line"), 
       col = c("blue2", "red3"), lty = c(4, 2), lwd = 2)

###
### Polynomials and splines as regressors
###

###  EXAMPLE
###  A data frame giving a series of measurements of head acceleration
###  in a simulated motorcycle accident, used to test crash helmets.

data(Motorcycle, package = "mffSM")
help(Motorcycle)

head(Motorcycle)
dim(Motorcycle)
summary(Motorcycle)

### We are interested how head accelleration [haccel] depends on times 
### after accident in miliseconds [time]. 

BTY <- "o";
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "orange", col = "red3",
     xlab = "Time [ms]", ylab = "Head acceleration [g]")

fit0 = lm(haccel~time, data=Motorcycle)
abline(fit0)

### In the diagnostic plots, we now clearly see that model fit0 
### is hardly appropriate
plotLM(fit0)

### Quite a flexible shape is observed in the plot => 
### we use POLYNOMIALS for fiting the model 
### (still staying in the linear regression framework)

### Fitting polynomial of various degrees
fit6 <- lm(haccel~poly(time, 6, raw=TRUE), data=Motorcycle)  
fit12 <- lm(haccel~poly(time, 12, raw=TRUE), data=Motorcycle)
### raw polynomials, i.e. the "usual" ones

### try this with 70
xgrid <- seq(0, 60, length = 500)

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "orange", col = "red3",
     xlab = "Time [ms]", ylab = "Head acceleration [g]")
lines(xgrid, predict(fit6, newdata=data.frame(time=xgrid)), lwd=2, col="blue")
lines(xgrid, predict(fit12, newdata=data.frame(time=xgrid)), lwd=2, 
      col="darkgreen")
legend(37,-70, lty=rep(1, 2), col=c("blue", "darkgreen"), 
       legend=paste("p=", c(6,12),sep=""), lwd=c(2,2), cex=1.2)

### Using SPLINES 
### Choice of knots 
knots <- c(0, 11, 12, 13, 20, 30, 32, 34, 40, 50, 60) 
(inner <- knots[-c(1, length(knots))])  # inner knots 
(bound <- knots[c(1, length(knots))])   # boundary knots 

### B-splines of degree 3
DEGREE <- 3; # piecewise cubic 
Bgrid <- bs(xgrid, knots = inner, Boundary.knots = bound, degree = DEGREE, 
            intercept = TRUE)
dim(Bgrid)   # 13-dimensional linear space of piecewise polynomials

COL <- rainbow_hcl(ncol(Bgrid), c = 80)
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(xgrid, Bgrid[,1], type = "l", col = COL[1], lwd = 2, xlab = "x", 
     ylab = "B(x)", xlim = range(knots), ylim = range(Bgrid, na.rm = TRUE))
for (j in 2:ncol(Bgrid)) lines(xgrid, Bgrid[,j], col = COL[j], lwd = 2)
points(knots, rep(0, length(knots)), pch = 21, bg = "blue", col = "skyblue", 
       cex = 1.5)

### an animation
for (j in 1:ncol(Bgrid)){
  plot(xgrid, Bgrid[,j], col = COL[j], ylim = c(0,1), lwd = 2,type="l", 
       xlab="x", ylab="B(x)")
  points(knots, rep(0, length(knots)), pch = 21, bg = "blue", col = "skyblue", 
         cex = 1.5)
  Sys.sleep(.3)
}

### Fitting the model with splines 
### http://www.chebfun.org/examples/approx/BSplineConv.html
m1 <- lm(haccel ~ bs(time, knots = inner, Boundary.knots = bound, 
                     degree = DEGREE, intercept = TRUE) - 1, data = Motorcycle)
names(m1$coefficients) <- paste("B", 1:13)
summary(m1)

### Note that as the intercept is not formally included in m1 (because of
### the -1 in the model formula), neither 'Multiple R-squared'
### nor the 'F-statistic' quantity in the output are meaningful numbers 

# R-squared from the model
summary(m1)$r.squared
# manual computation: two ways
1 - deviance(m1)/sum(Motorcycle$haccel^2)
sum(fitted(m1)^2)/sum(Motorcycle$haccel^2)
# why are all these numbers the same?

# interpretation the F-test from summary(m1) as a test of submodel
summary(m1)$fstatistic

# the usual F-test is the following
m2 <- lm(haccel ~ 1, data = Motorcycle);
anova(m2,m1)

# in a model without intercept it is this one
(m0 <- lm(haccel ~ -1, data=Motorcycle))
anova(m0,m1)
  
### Nevertheless, as the intercept is in the linear span of the columns of X,
### it makes sense to calculcate the (usual) multiple R-squared. 
### One can do it 'manually' as
var(fitted(m1))/var(Motorcycle$haccel)
cor(Motorcycle$haccel, fitted(m1))^2
1 - deviance(m1)/deviance(m2)

### Alternatively one can use B-splines (the basis without intercept) 
### with an intercept in the model to get some meaningful numbers in the output 
m0 <- lm(haccel ~ bs(time, knots = inner, Boundary.knots = bound, 
                     degree = DEGREE, intercept = FALSE), data = Motorcycle)
names(m0$coefficients) <- c("(Intercept)", paste("B", 1:12))
summary(m0)

anova(m2,m0)

### The fitted values given by the models m0 and m1 are the same 
summary(fitted(m0)-fitted(m1))

### Predicted values 
pm1 <- predict(m1, newdata=data.frame(time=xgrid))
plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "palegoldenrod", 
     col = "orange", xlab = "Time [ms]", ylab = "Head acceleration [g]")
lines(xgrid, pm1, col = "red", lwd = 3)
points(knots, rep(min(Motorcycle[, "haccel"]), length(knots)), pch = 21, 
       bg = "blue", col = "skyblue", cex = 1.5)
lines(xgrid, predict(fit12, newdata=data.frame(time=xgrid)), lwd=2, 
      col="darkgreen");
legend("topright", col=c("red", "darkgreen"), legend=c("splines", "p=12"), 
       lwd=c(3,2), cex=1.2)

### Add a prediction interval 
ci.pred.spline <- predict(m1, newdata=data.frame(time=xgrid), 
                          interval = "prediction", level = 0.95)

plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "palegoldenrod", 
     col = "orange", xlab = "Time [ms]", ylab = "Head acceleration [g]", 
     ylim=range(ci.pred.spline))
lines(xgrid, pm1, col = "blue", lwd = 2)
lines(xgrid, ci.pred.spline[,2], col = "blue", lwd = 2, lty="dashed")
lines(xgrid, ci.pred.spline[,3], col = "blue", lwd = 2, lty="dashed")

### Do you expect any problem with this prediction interval?
### Think about the homoscedasticity assumption.

### Is this problem obvious from any of the following figures? 
plotLM(m1)

### And what about the following figures?  
### Residuals vs. covariate plot 
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(residuals(m1) ~ Motorcycle[, "time"], pch = 21, col = "blue4", 
     bg = "skyblue", xlab = "Time", ylab = "Residuals")
lines(lowess(residuals(m1)~Motorcycle[, "time"]), col="red")

### Scale vs. covariate plot 
plot(abs(rstandard(m1)) ~ Motorcycle[, "time"], pch = 21, col = "blue4", 
     bg = "skyblue", xlab = "Time", 
     ylab = expression(sqrt("|standardized residuals|")))
lines(lowess(abs(rstandard(m1))~Motorcycle[, "time"]), col="red")

### -------------------------------------------------------------------------###
###  Block 2: INDIVIDUAL WORK                                                ###
### -------------------------------------------------------------------------###

### plotLM truly only calls plot.lm and displays all plots in a single figure
plotLM

### QQ-plot of standardized residuals in model aLogMg
plot(aLogMg, which = 2, ask = FALSE)

### For comparison, the QQ plot of raw residuals
qqnorm(residuals(aLogMg))
qqline(residuals(aLogMg))
### Is such a diagnostic plot useful?

### Confidence intervals for E(Y|x) again
plot(yield ~ log2(Mg), pch = 23, col = "orange", bg = "beige", 
     xlab = "Log2(Mg)", ylab = "Yield", data = Dris)
lines(log2(x.Mg), ciEY.aLogMg[, "fit"], lwd = 2, col = "red3")
lines(log2(x.Mg), ciEY.aLogMg[, "lwr"], lwd = 2, col = "red3", lty = 2)
lines(log2(x.Mg), ciEY.aLogMg[, "upr"], lwd = 2, col = "red3", lty = 2)

### At which point x is the confidence interval the most narrow?
f = Vectorize(function(x) sqrt(t(c(1,x)) %*% vcov(aLogMg) %*% c(1,x)))
t = seq(3,4,length=101)
plot(t,f(t),type="l",main="Estimated std. dev. of the predicted E(Y|x)", 
     ylab="std.dev.",xlab="log2(Mg)", lwd=2, col=2)
abline(v=mean(log2(Dris$Mg)),lty=2)
legend("bottomleft","average lMg", lty=2)
### Verify by computation that the confidence interval is most narrrow at the 
### average of the x-values. Is this true only in simple regression models, 
### i.e. models of the form E(Y|x) = beta_0 + beta_1*x?
### Can we say something analogous for more general regression models?


### Fitting orthogonal polynomials to Motorcycle data

### Fitting polynomial of various degrees
fit6 <- lm(haccel~poly(time, 6, raw=TRUE), data=Motorcycle)  
### raw polynomials, i.e. the "usual" ones
fit6.orto <- lm(haccel~poly(time, 6), data=Motorcycle)       
### orthogonal polynomials

fit12 <- lm(haccel~poly(time, 12, raw=TRUE), data=Motorcycle);  # raw
fit12.orto <- lm(haccel~poly(time, 12), data=Motorcycle);       # orthogonal

xgrid <- seq(0, 60, length = 500)

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "orange", col = "red3",
     xlab = "Time [ms]", ylab = "Head acceleration [g]")
lines(xgrid, predict(fit6, newdata=data.frame(time=xgrid)), lwd=2, col="blue")
# 
lines(xgrid, predict(fit6.orto, newdata=data.frame(time=xgrid)), lwd=3, col=2, 
      lty="dotted")
lines(xgrid, predict(fit12, newdata=data.frame(time=xgrid)), lwd=2, 
      col="darkgreen")
lines(xgrid, predict(fit12.orto, newdata=data.frame(time=xgrid)), lwd=2, 
      col=2, lty="dotted")
legend(37,-70, lty=rep(1, 2), col=c("blue", "darkgreen"), 
       legend=paste("p=", c(6,12),sep=""), lwd=c(2,2), cex=1.2)
### The fitted lines using orthogonal polynomials appear to be the same as 
### for usual raw polynomials. Is this always true? What is different in fitting
### orthogonal polynomials, as opposed to the raw ones? Which model do you 
### prefer, and why?

### QUESTION: 
### What is the form of the model matrix used for the two fits in the 
### previous example?

fit6x = lm(haccel~poly(time, 6, raw=TRUE), data=Motorcycle, x=TRUE)
unname(head(fit6x$x))

### the first row of the matrix X
unname(fit6x$x[1,])
Motorcycle$time[1]^c(0:6)
poly(Motorcycle$time,6,raw=TRUE)[1,]

### Difference between raw and orthogonal polynomials
### Difference in fitted values
summary(predict(fit12.orto, newdata=data.frame(time=xgrid)) - 
          predict(fit12, newdata=data.frame(time=xgrid))); 

#### Difference in regression coefficients 
coef12 = cbind(fit12$coef,fit12.orto$coef)
rownames(coef12) = paste("degree ",0:12)
colnames(coef12) = c("Raw","Orthogonal")
signif(coef12,digits=3)

### -------------------------------------------------------------------------###
###  Block 3: ADDITIONAL MATERIAL                                            ###
### -------------------------------------------------------------------------###

### plot.lm also produces additional diagnostic plots 
### These will be discussed (much) later in the course
plot(aLogMg,which=4)
plot(aLogMg,which=5)
plot(aLogMg,which=6)

### Neither residuals nor standardized residuals are in general indpendent, or
### identically distributed. This is a consequence of matrix M not being 
### diagonal. For the purpose of diagnostics, this is usually ignored, but 
### should be kept in mind

### H matrix, projection on columns of X
### M matrix, projection on the orthogonal complement to the columns of X
H = qr.Q(aLogMg[["qr"]]) %*% t(qr.Q(aLogMg[["qr"]]))
M = diag(nrow(H))-H

### hat(Y) = H * Y
### U = M * Y
### Y = hat(Y) + U
all.equal(unname(fitted(aLogMg)),c(H%*%Dris$yield))
all.equal(unname(residuals(aLogMg)),c(M%*%Dris$yield))
all.equal(unname(fitted(aLogMg) + residuals(aLogMg)), Dris$yield)

# H and M are symmetric
all.equal(H,t(H))
all.equal(M,t(M))
# H and M are idempotent
all.equal(H%*%H,H)
all.equal(M%*%M,M)
# but they are not diagonal
H[1:5,1:5]
M[1:5,1:5]
# visualisation of the two matrices
image(H)
image(M)

### Motorcycle data
### Try different choices of knots and different degrees of smoothness of the 
### splines
knots <- c(0, 10, 14.5, 21, 31, 40, 50, 60)
inner <- knots[-c(1, length(knots))] # inner knots
bound <- knots[c(1, length(knots))]  # boundary knots

DEGREE <- 1; # 1 for piecewise linear functions
 
### B-splines 
Bgrid2 <- bs(xgrid, knots = inner, Boundary.knots = bound, degree = DEGREE, 
             intercept = TRUE)

COL <- rainbow_hcl(ncol(Bgrid2), c = 80)
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(xgrid, Bgrid2[,1], type = "l", col = COL[1], lwd = 2, xlab = "x", 
     ylab = "B(x)", xlim = range(knots), ylim = range(Bgrid2, na.rm = TRUE))
for (j in 2:ncol(Bgrid2)) lines(xgrid, Bgrid2[,j], col = COL[j], lwd = 2)
points(knots, rep(0, length(knots)), pch = 21, bg = "blue", col = "skyblue", 
       cex = 1.5)
 
### Fitting the model with splines
B2 <- bs(Motorcycle[, "time"], knots = inner, Boundary.knots = bound, 
         degree = DEGREE, intercept = TRUE)
m2 <- lm(haccel ~ B2 - 1, data = Motorcycle)
summary(m2)
 
### Predicted values
pm2 <- as.numeric(Bgrid2 %*% coef(m2))

plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "palegoldenrod", 
     col = "orange", xlab = "Time [ms]", ylab = "Head acceleration [g]")
lines(xgrid, pm2, col = "red3", lwd = 2)
points(knots, rep(min(Motorcycle[, "haccel"]), length(knots)), pch = 21, 
       bg = "blue", col = "skyblue", cex = 1.5)