### -------------------------------------------------------------------------###
### NMSA407 - Linear Regression                                              ###
### Winter Term 2020/2021 | Online                                           ###
###                                                                          ###
### EXERCISE #12 Multiple Comparisons and Sandwich Estimator                 ###
### -------------------------------------------------------------------------###

### -------------------------------------------------------------------------###
###  Block 1: TEACHING MATERIAL                                              ###
### -------------------------------------------------------------------------### 

library("mffSM")
library("lmtest")
library("multcomp")


### working data file
load(url("http://msekce.karlin.mff.cuni.cz/~maciak/NMSA407/mana.RData"))


### B R I E F   D A T A   D E S C R I P T I O N
### Data comes from the project MANA (Maturita na necisto) in launched in 
### the Czech Republic in 2005. This project was a part  of a bigger project 
### whose goal was to prepare a new form of graduation exam ("statni maturita").  
### This dataset contains the results in English language at grammar schools. 
###
### VARIABLES
###    region:  Region of the Czech Republic
###                 1 = Praha
###                 2 = Stredocesky
###                 3 = Jihocesky
###                 4 = Plzensky
###                 5 = Karlovarsky
###                 6 = Ustecky
###                 7 = Liberecky
###                 8 = Kralovehradecky
###                 9 = Pardubicky
###                10 = Vysocina
###                11 = Jihomoravsky
###                12 = Olomoucky
###                13 = Zlinsky
###                14 = Moravskoslezsky
###
###    specialization: The specialization of the intended university
###                 1 = education
###                 2 = social science
###                 3 = languages
###                 4 = law
###                 5 = math-physics
###                 6 = engineering
###                 7 = science
###                 8 = medicine
###                 9 = economics
###                10 = agriculture
###                11 = art
###
###    gender:     Gender of the student,
###                 0 = female, 1 = male
###
###    graduation:  Is the student going to graduate in English?
###                 0 = no, 1 = yes
###
###    entry.exam: Is an exam on English knowledge part of an entry exam at the
###                   university (where the student is going to apply)?
###                 0 = no, 1 = yes
###
###    score:         The score in English language in MANA test.
###
###    avg.mark:  Average mark (grade) from all subjects at the last report card
###                   (vysvedceni).
###
###    mark.eng:      Average mark (grade) from the English language at the last
###                   5 report cards (vysvedceni).

head(mana)
dim(mana)
summary(mana)

with(mana, table(region))
with(mana, table(specialization))
with(mana, table(gender))
with(mana, table(graduation))
with(mana, table(entry.exam))

### Instead of the numbers appropriate codes are used 
### A - Capital City Prague (Praha)
### B - South Moravia - Jihomoravsky kraj (Brno) 
### C - South Bohemia - Jihocesky kraj (Ceske Budejovice) 
### E - Pardubice - Pardubicky kraj 
### H - Hradec Kralove - Kralovehradecky kraj 
### J - Highland (Vysocina) Region - Kraj Vysocina (Jihlava) 
### K - Karlovy Vary - Karlovarsky kraj 
### L - Liberec - Liberecky kraj 
### M - Olomouc - Olomoucky kraj 
### P - Plzen - Plzensky kraj 
### S - Central Bohemia (Prague) - Stredocesky kraj 
### T - Moravia-Silesia - Moravskoslezsky kraj (Ostrava) 
### U - Usti nad Labem - Ustecky kraj 
### Z - Zlin - Zlinsky kraj 

mana <- transform(mana, fregion = factor(region, levels = 1:14, 
                         labels = c("A", "S", "C", "P", "K", "U", "L", "H", "E",
                                    "J", "B", "M", "Z", "T")),
                        fspec   = factor(specialization, levels = 1:11, 
                                         labels = c("Educ", "Social", "Lang", 
                                                    "Law", "Mat-Phys", "Engin", 
                                                    "Science", "Medic", 
                                                    "Econom", "Agric", "Art")),
                        fgender = factor(gender, levels = 0:1, 
                                        labels = c("Female", "Male")),
                        fgrad   = factor(graduation, levels = 0:1,
                                         labels = c("No", "Yes")),
                        fentry  = factor(entry.exam, levels = 0:1, 
                                         labels = c("No", "Yes")))

summary(mana)
with(mana, table(fregion))
with(mana, table(fspec))

### We are interested in possible differences in grading among Czech regions. 
### The quality of education is (at least for our purposes) expexted to be 
### expressed by an average mark on the last certificate [avg.mark]. 
### The prospective specializations are taken into account.

### Exploratory analysis
lSPEC <- levels(mana[, "fspec"])
colSPEC <- diverge_hcl(length(lSPEC))
names(colSPEC) <- lSPEC

lREG <- levels(mana[, "fregion"])
colREG <- diverge_hcl(length(lREG))
names(colREG) <- lREG

with(mana,
   interaction.plot(fregion, fspec, avg.mark, col = colSPEC, type = "b",
                   lwd = 2, lty = 1, xlab = "Region",
                   trace.label = "Specialization", ylab = "Mean of avg.mark"))

with(mana,
     interaction.plot(fspec, fregion, avg.mark, col = colREG, type = "b",
                      lwd = 2, lty = 1, xlab = "Specialization",
                      trace.label = "Region", ylab = "Mean of avg.mark"))

### Not really insightful with so many groups... But still, do you think that 
### an additive model will be a "maybe wrong but useful" model?

### Absolute counts for the given categories
addmargins(with(mana, table(fregion, fspec)))

### Note, there are some zeros in the Agric column. Are there any consequences 
### for the two-way ANOVA linear model with interactions?

mInter <- lm(avg.mark ~ fregion*fspec, data = mana)
summary(mInter)

### Do you have an idea why those NA's occur in the output?
### Is this a usefull model or it is worth of going for some simplification?

Anova(mInter)
Anova(mInter, type = "II")
Anova(mInter, type = "III")

with(mana, tapply(avg.mark,list(fregion, fspec),mean)) # cellwise averages

### Interaction just not significant, but with our (quite huge) sample size,
### can we assume that the interaction terms are practically unimportant?

### Additive model
mAddit <- lm(avg.mark ~ fregion + fspec, data = mana)
summary(mAddit)

### Why there are no NA's observations now?

### Basic diagnostics
plotLM(mAddit)

### Breusch-Pagan test for homoscedasticity

ncvTest(mAddit, var.formula = ~fitted(mAddit))     
bptest(mAddit, varformula = ~fitted(mAddit))   # Koenker's studentized version 
bptest(mAddit)

### Shapiro-Wilk test for normality of errors based on raw residuals
shapiro.test(residuals(mAddit))          

## The implemented algorithm does not allow for more than 5000 observations.
## Are we actually really interested in testing normality?
                 
###   Another commonly used test of normality is D'Agostino test                                   
library(fBasics);
dagoTest(residuals(mAddit))

### for more information see 
### https://ncss-wpengine.netdna-ssl.com/wp-content/themes/ncss/pdf/Procedures/NCSS/Normality_Tests.pdf

### Tests for partial effects of region and specialization
Anova(mAddit, type = "II")
Anova(mAddit, type = "III")


### 1. Multiple comparisons: TukeyHSD

aAddit <- aov(avg.mark ~ fregion + fspec, data = mana)
TukeyHSD(aAddit)

all.equal(coef(aAddit),coef(mAddit))

### Which regions are different (given specialization)?
(treg <- TukeyHSD(aAddit)[["fregion"]])
(treg <- treg[order(treg[, "p adj"]),])
treg[treg[, "p adj"] < 0.05,]

### Compare the results with 
treg2 <- TukeyHSD(aov(avg.mark ~ fregion, data = mana))[["fregion"]]
treg2 <- treg2[order(treg2[, "p adj"]),]
treg2[treg2[, "p adj"] < 0.05,]

sort(reg.means <- tapply(mana$avg.mark, mana$fregion, mean));
(diff.reg.means <- outer(reg.means, reg.means, "-"));

diff.reg.means["E","C"]
treg[treg[, "p adj"] < 0.05,][1,1]
treg2[treg2[, "p adj"] < 0.05,][1,1]


### Tukey HSD procedure in a one-way ANOVA model
G = nrow(diff.reg.means)
n = nrow(mana)
nG = table(mana$fregion)
s2 = sum(aov(avg.mark ~ fregion, data = mana)$res^2)/(n-G)

diff.reg.means["E", "C"] + c(-1,1) * qtukey(0.95,G,n-G) *
                                     sqrt(1/2*(1/nG["E"]+1/nG["C"])*s2)
treg2["E-C",2:3]
  

### Tukey HSD procedure in a two-way ANOVA model
H = length(levels(mana$fspec))
s2 = sum(aAddit$res^2)/(n-G-H+1)

diff.reg.means["E", "C"] + c(-1,1) * qtukey(0.95,G,n-G-H+1) * 
                                     sqrt(1/2*(1/nG["E"]+1/nG["C"])*s2)
treg["E-C",2:3]
  

### 2. Multiple comparisons: Hothorn-Bretz-Westfall MCP

### Now, we provide the corresponding estimates of the partial effects
### (differences in means of avg.mark between regions given specialization).

coef(mAddit)

### Contrast matrix for the regions 
CX <- contr.treatment(length(lREG))
rownames(CX) <- lREG
colnames(CX) <- names(coef(mAddit))[2:length(lREG)]
print(CX)

### Contrast matrix for the specializations
CZ <- contr.treatment(length(lSPEC))
rownames(CZ) <- lSPEC
colnames(CZ) <- names(coef(mAddit))[(length(lREG) + 1):(length(coef(mAddit)))]
print(CZ)


### Estimating the group means under the assumption of the additive model
LmuAddit <- cbind(1, kronecker(rep(1, length(lSPEC)), CX), 
                     kronecker(CZ, rep(1, length(lREG))))

colnames(LmuAddit) <- names(coef(mAddit))
rownames(LmuAddit) <- outer(lREG, lSPEC, paste, sep = ":")
print(LmuAddit[1:3,])
print(LmuAddit[15:17,])
LSest(mAddit, L = LmuAddit)

### The estimated group means reported in a table
muAddit <- matrix(LSest(mAddit, L = LmuAddit)[["Estimate"]], 
                  nrow = length(lREG), ncol = length(lSPEC))

rownames(muAddit) <- lREG
colnames(muAddit) <- lSPEC
print(muAddit)


### This is not the same as the cell-wise averages. Why?
with(mana, tapply(avg.mark,list(fregion, fspec),mean))


### Observed group means in a plot
with(mana, interaction.plot(fspec, fregion, avg.mark, col = colREG, type = "b",
                      lwd = 2, lty = 1, xlab = "Specialization",
                      trace.label = "Region", ylab = "Mean of avg.mark"))

### Model based group means in a plot
dfrAddit <- data.frame(avg.mark = as.numeric(muAddit),
                                  fregion = rep(lREG, length(lSPEC)),
                                  fspec = rep(lSPEC, each = length(lREG)))
print(dfrAddit)

dfrAddit$fspec = factor(dfrAddit$fspec, levels=levels(mana$fspec))

with(dfrAddit, interaction.plot(fspec, fregion, avg.mark, col = colREG, 
                      type = "b", lwd = 2, lty = 1, xlab = "Specialization", 
                      pch=levels(fregion), cex=0.5,  trace.label = "Region", 
                      ylab = "Model based mean avg.mark"))

### What is the link between the observed and the model based group means?
### What are we trying to do when estimating pairwise differences with respect 
### to region (partial effects of region given specialization)?


### The "L" matrix to calculate all pairwise differences with respect to regions
Lcore <- matrix(nrow = 0, ncol = ncol(CX)) ## the core part
colnames(Lcore) <- colnames(CX)
print(Lcore)                               ## matrix with 0 rows
rname <- character(0)
for (g1 in 1:(length(lREG) - 1)){
  for (g2 in (g1 + 1):length(lREG)){
    Lcore <- rbind(Lcore, CX[g2,] - CX[g1,])
    rname <- c(rname, paste(lREG[g2], "-", lREG[g1]))
  }    
}
rownames(Lcore) <- rname

dim(Lcore)
print(Lcore)

L <- cbind(0, Lcore, matrix(0, nrow = nrow(Lcore),  ## the whole matrix
                               ncol = length(lSPEC) - 1))
colnames(L) <- names(coef(mAddit))
rownames(L) <- rownames(Lcore)

dim(L)
print(L)


### Unadjusted estimation of the partial effects | based on the additive model
temp <- LSest(mAddit, L = L)
class(temp) <- c("data.frame")
temp <- round(temp,4)
temp <- temp[order(temp[["P value"]]),];
temp[temp[["P value"]] <= 0.05,]

temp["T - Z",]  ## for example, the difference T-Z not significant


### Bretz-Hothorn-Westfall procedure - all functions from package multcomp
mcp <- glht(mAddit, linfct = L, rhs = 0)
print(mcp)


### The above can be done more easily with the help of the built-in option 
mcp <- glht(mAddit, linfct = mcp(fregion = "Tukey"), rhs = 0)
print(mcp)


### Note, that the output is the same. The word "Tukey" in the function call 
### is just a keyword to ask for all pairwise differences with respect to the 
### factor covariate fregion.

### With respect to the underlying theory, the Tukey procedure is not involved 
### in the calculations above. 


### Sandwich estimator to adjust also for possible heteroscedasticity 
library(sandwich)


### Estimate of the variance matrix of the regression coefficients 
(VHC <- vcovHC(mAddit, type = "HC3"));  

### The option 'type = "HC3"' uses residuals(mFin)^2/(1 - hatvalues(mFin))^2
### on the diagonal of the "meat" matrix of the sandwich.             


### Manual calculation
X     <- model.matrix(mAddit);                                               
Bread <- solve(t(X) %*% X) %*% t(X)
Meat  <- diag(residuals(mAddit)^2 / (1 - hatvalues(mAddit))^2)
Bread %*% Meat %*% t(Bread)

all.equal(Bread %*% Meat %*% t(Bread), VHC)


### Summary table using the standard error estimator under the homoscedasticity 
summary(mAddit)

### Summary table using the sandwich estimator to adjust for heteroscedasticity 
coeftest(mAddit, vcov = VHC)


### Hothorn-Bretz-Westfall MCP with the sandwich estimator
mcp_sandwich <- glht(mAddit, linfct = mcp(fregion = "Tukey"), 
                             rhs = 0, vcov=sandwich)


### compare the partial effects under the homoscedasticity/heteroscedasticity
print(mcp)
print(mcp_sandwich)


### P-values adjusted for MCP will now be calculated.
### Multivariate integrals now must be computed by a Monte Carlo method.
### This may take some time... 

(load(url("http://msekce.karlin.mff.cuni.cz/~maciak/NMSA407/nmsa407-mult-comparison.RData"))) ## path to this file might need some adjustment 

### set.seed(20010911) ## (cca 15 sec. @ 3.20GH, 2 x 8 GB RAM)
### system.time(smcp <- summary(mcp)) 

### set.seed(20010911) ## (cca 15 sec.  @ 3.20GH, 2 x 8 GB RAM)
### system.time(smcp_sandwich <- summary(mcp_sandwich)) 

smcp # == summary(mcp)
smcp_sandwich # == summary(mcp_sandwich)

### Manual computation of std. errors in summary(mcp)
X = model.matrix(mAddit)
V = L %*% solve(t(X) %*% X) %*% t(L)
D = diag(V)
s = summary(mAddit)$sigma
(std1 = s*sqrt(D))
(std2 = smcp$test$sigma)
all.equal(std1, std2)


### Simultaneous confidence intervals will now be calculated.
### For this, only one quantile of the max-abs-t distribution
### must be calculated (again by Monte Carlo method).
### Again, some time (but less) is needed.

### set.seed(20010911) ## (cca 5 sec. @ 3.20GH, 2 x 8 GB RAM)
### system.time(cimcp <- confint(mcp, level = 0.95)) 

### set.seed(20010911) ## (cca 5 sec. @ 3.20GH, 2 x 8 GB RAM)
### system.time(cimcp_sandwich <- confint(mcp_sandwich, level = 0.95)) 

### print(cimcp)
### print(cimcp_sandwich)


### Put both intervals and P-values into one data.frame
MCP <- data.frame(Estimate = cimcp[["confint"]][, "Estimate"],
                  Lower    = cimcp[["confint"]][, "lwr"],
                  Upper    = cimcp[["confint"]][, "upr"],
                  Pvalue   = smcp[["test"]][["pvalues"]])
MCP <- MCP[order(MCP[, "Pvalue"]),]
round(MCP[MCP[, "Pvalue"] < 0.05,], digits=4)

### For comparison when the sandwich estimator is used 
MCP_sandwich <- data.frame(Estimate = cimcp_sandwich[["confint"]][, "Estimate"],
                  Lower    = cimcp_sandwich[["confint"]][, "lwr"],
                  Upper    = cimcp_sandwich[["confint"]][, "upr"],
                  Pvalue   = smcp_sandwich[["test"]][["pvalues"]])
MCP_sandwich <- MCP_sandwich[order(MCP_sandwich[, "Pvalue"]),]
round(MCP_sandwich[MCP_sandwich[, "Pvalue"] < 0.05,], digits=4)


### Compare with model based means of regions given specialisation == Educ
round(sort(muAddit[,1]), 3)

### For comparison Tukey HSD 
round(treg[treg[, "p adj"] < 0.05,], 4)


### -------------------------------------------------------------------------###
###  Block 2: INDIVIDUAL WORK                                                ###
### -------------------------------------------------------------------------###


### How would the calculations of the TukeyHSD and Hothorn-Bretz-Westfall MCP 
### change if some different parametrization of the factor covariates would be 
### used? For instance, consider the contrast sum parametrization. 

mAdditSum <- lm(avg.mark ~ fregion + fspec, data = mana, 
                   contrasts = list(fregion = "contr.sum", fspec = "contr.sum"))

summary(mAdditSum)


### Interpret the estimated parameters - for instance, for intercept: 
newdata = with(mana,data.frame(expand.grid(levels(fregion),levels(fspec))))
colnames(newdata) = c("fregion", "fspec")
mean(predict(mAdditSum,newdata=newdata))
mAdditSum$coef[1]


### Slightly more complicated model, now with [score] as the response
m11Sum <- lm(score ~ fgender + avg.mark + fspec*mark.eng, data = mana, 
                               contrasts = list(fspec = "contr.sum"))

summary(m11Sum)
drop1(m11Sum, test = "F")

### Specialization is an effect modifier of the relationship between [score] 
### and [mark.eng]. Which specializations differ with respect to the size of the 
### effect of [mark.eng] on [score]?

### => Pairwise comparisons of slopes pertaining to the [mark.eng] regressor 
### for different specializations.

### THe underlying ontrast matrix to parameterize categorical covariate "fspec"
CZ <- contr.sum(lSPEC)
rownames(CZ) <- lSPEC
colnames(CZ) <- names(coef(m11Sum))[4:13]
print(CZ)

### "L" matrix to calculate [mark.eng] slopes for different specializations
Lslope <- cbind(matrix(0, nrow = nrow(CZ), ncol = 13), 1, CZ)
rownames(Lslope) <- lSPEC
colnames(Lslope) <- names(coef(m11Sum))
print(Lslope)

LSest(m11Sum, L = Lslope)
(slps <- LSest(m11Sum, L = Lslope)[["Estimate"]])

### Is there any specialization for which there is no significant relationship
### between [mark.eng] (English at school) and [score] (English at MANA test)
### given gender and [avg.mark]?

mean(slps)
coef(m11Sum)["mark.eng"]      ## the same as mean(slps)

### Core part of the "L" matrix to calculate all pairwise differences between 
### the slope parameter estimates 
Lcore2 <- matrix(nrow = 0, ncol = ncol(CZ))
colnames(Lcore2) <- colnames(CZ)
print(Lcore2)                               ## matrix with 0 rows
rname <- character(0)
for (h1 in 1:(length(lSPEC) - 1)){
  for (h2 in (h1 + 1):length(lSPEC)){
    Lcore2 <- rbind(Lcore2, CZ[h2,] - CZ[h1,])
    rname <- c(rname, paste(lSPEC[h2], "-", lSPEC[h1]))
  }    
}
rownames(Lcore2) <- rname
print(Lcore2)                             

### Complete "L" matrix to calculate all pairwise differences in slopes
### Can you explain why is the "L" matrix constructed in this way?
coef(m11Sum)

LslopeDiff <- cbind(matrix(0, nrow = nrow(Lcore2), ncol = 14), Lcore2)
colnames(LslopeDiff) <- names(coef(m11Sum))
rownames(LslopeDiff) <- rownames(Lcore2)

print(LslopeDiff)


### Unadjusted inference
temp <- LSest(m11Sum, LslopeDiff);
class(temp) <- c("data.frame")
temp <- round(temp,4)
temp <- temp[order(temp[["P value"]]),];
temp[temp[["P value"]] <= 0.05,]

### Bretz-Hothorn-Westfall MCP
mcpSlp <- glht(m11Sum, linfct = LslopeDiff, rhs = 0)
mcpSlp_sandwich <- glht(m11Sum, linfct = LslopeDiff, rhs = 0, vcov=sandwich)

### With the manual calculations
### set.seed(20010911)
### smcpSlp <- summary(mcpSlp)                              ## 7 sec.
### cimcpSlp <- confint(mcpSlp, level = 0.95)               ## 4 sec.

### set.seed(20010911)
### smcpSlp_sandwich <- summary(mcpSlp_sandwich)                   ## 7 sec.
### cimcpSlp_sandwich <- confint(mcpSlp_sandwich, level = 0.95)    ## 4 sec.


print(smcpSlp, 4)
print(smcpSlp_sandwich, 4)

print(cimcpSlp, 2)
print(cimcpSlp_sandwich, 2)

### When MCP tis aken into account, are there any two specializations
### with significantly different [mark.eng] slopes?
### Which specializations differ with respect to the mean [score]
### when [mark.eng] is equal to 2 (median of [mark.eng]) while comparing 
### students of the same gender and the same value of [avg.mark].
###
### => Pairwise comparisons of specializations for a particular value
###    of [mark.eng] adjusted for a possible effect of gender and [avg.mark].

median(mana[, "mark.eng"])

### Complete "L" matrix to calculate all requested pairwise differences from the
### fitted linear model.

coef(m11Sum)
mmark.eng <- 2

LspecDiff <- cbind(0, 0, 0, Lcore2, 0, mmark.eng * Lcore2)
colnames(LspecDiff) <- names(coef(m11Sum))
rownames(LspecDiff) <- rownames(Lcore2)

print(LspecDiff)
print(LspecDiff[1:5,])


### Unadjusted inference
temp <- LSest(m11Sum, LspecDiff)
class(temp) <- c("data.frame")
temp <- round(temp,4)
temp <- temp[order(temp[["P value"]]),];
temp[temp[["P value"]] <= 0.05,]


### Bretz-Hothorn-Westfall MCP
mcpSpec <- glht(m11Sum, linfct = LspecDiff, rhs = 0)
mcpSpec_sandwich <- glht(m11Sum, linfct = LspecDiff, rhs = 0, vcov=sandwich)

set.seed(20010911)
smcpSpec <- summary(mcpSpec)  
cimcpSpec <- confint(mcpSpec, level = 0.95) 

set.seed(20010911)
smcpSpec_sandwich <- summary(mcpSpec_sandwich)  
cimcpSpec_sandwich <- confint(mcpSpec_sandwich, level = 0.95) 

print(smcpSpec)
print(smcpSpec_sandwich)

print(cimcpSpec)
print(cimcpSpec_sandwich)


### Put both intervals and P-values into one data.frame
MCPSpec <- data.frame(Estimate = cimcpSpec[["confint"]][, "Estimate"],
                      Lower    = cimcpSpec[["confint"]][, "lwr"],
                      Upper    = cimcpSpec[["confint"]][, "upr"],
                      Pvalue   = smcpSpec[["test"]][["pvalues"]])
MCPSpec <- MCPSpec[order(MCPSpec[, "Pvalue"]),]
MCPSpec[, "PvalueF"] <- format.pval(MCPSpec[, "Pvalue"], digits = 2, eps = 1e-4)
MCPSpec[MCPSpec[, "Pvalue"] < 0.05, c("Estimate", "Lower", "Upper", "PvalueF")]


MCPSpec_sandwich <- data.frame(
                        Estimate = cimcpSpec_sandwich[["confint"]][,"Estimate"],
                        Lower    = cimcpSpec_sandwich[["confint"]][, "lwr"],
                        Upper    = cimcpSpec_sandwich[["confint"]][, "upr"],
                        Pvalue   = smcpSpec_sandwich[["test"]][["pvalues"]])
MCPSpec_sandwich <- MCPSpec_sandwich[order(MCPSpec_sandwich[, "Pvalue"]),]
MCPSpec_sandwich[, "PvalueF"] <- format.pval(MCPSpec_sandwich[, "Pvalue"], 
                                             digits = 2, eps = 1e-4)
MCPSpec_sandwich[MCPSpec_sandwich[, "Pvalue"] < 0.05, 
                 c("Estimate", "Lower", "Upper", "PvalueF")]


### -------------------------------------------------------------------------###
###  Block 3: ADDITIONAL MATERIAL                                            ###
### -------------------------------------------------------------------------###

### Consider different options for the 'type' parameter in the call of 
### the vcovHC function and try to understand  the underlying model. 

(vcovHC(mAddit, type = "const"))

### For instance, compare the following: 
all.equal(vcov(mAddit), vcovHC(mAddit, type = "const"))

### Use the help for the vcovHC function to get an idea about different options.