Multiple comparison procedures (Hothorn–Bretz–Westfall)
Data Cars2004nh
We partly continue with the analysis started here and there.
data(Cars2004nh, package = "mffSM")
head(Cars2004nh)
## vname type drive price.retail price.dealer price cons.city cons.highway
## 1 Chevrolet.Aveo.4dr 1 1 11690 10965 11327.5 8.4 6.9
## 2 Chevrolet.Aveo.LS.4dr.hatch 1 1 12585 11802 12193.5 8.4 6.9
## 3 Chevrolet.Cavalier.2dr 1 1 14610 13697 14153.5 9.0 6.4
## 4 Chevrolet.Cavalier.4dr 1 1 14810 13884 14347.0 9.0 6.4
## 5 Chevrolet.Cavalier.LS.2dr 1 1 16385 15357 15871.0 9.0 6.4
## 6 Dodge.Neon.SE.4dr 1 1 13670 12849 13259.5 8.1 6.5
## consumption engine.size ncylinder horsepower weight iweight lweight wheel.base length width
## 1 7.65 1.6 4 103 1075 0.0009302326 6.980076 249 424 168
## 2 7.65 1.6 4 103 1065 0.0009389671 6.970730 249 389 168
## 3 7.70 2.2 4 140 1187 0.0008424600 7.079184 264 465 175
## 4 7.70 2.2 4 140 1214 0.0008237232 7.101676 264 465 173
## 5 7.70 2.2 4 140 1187 0.0008424600 7.079184 264 465 175
## 6 7.30 2.0 4 132 1171 0.0008539710 7.065613 267 442 170
## ftype fdrive
## 1 personal front
## 2 personal front
## 3 personal front
## 4 personal front
## 5 personal front
## 6 personal front
dim(Cars2004nh)
## [1] 425 20
summary(Cars2004nh)
## vname type drive price.retail price.dealer
## Length:425 Min. :1.000 Min. :1.000 Min. : 10280 Min. : 9875
## Class :character 1st Qu.:1.000 1st Qu.:1.000 1st Qu.: 20370 1st Qu.: 18973
## Mode :character Median :1.000 Median :1.000 Median : 27905 Median : 25672
## Mean :2.219 Mean :1.692 Mean : 32866 Mean : 30096
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.: 39235 3rd Qu.: 35777
## Max. :6.000 Max. :3.000 Max. :192465 Max. :173560
##
## price cons.city cons.highway consumption engine.size ncylinder
## Min. : 10078 Min. : 6.20 Min. : 5.100 Min. : 5.65 Min. :1.300 Min. :-1.000
## 1st Qu.: 19600 1st Qu.:11.20 1st Qu.: 8.100 1st Qu.: 9.65 1st Qu.:2.400 1st Qu.: 4.000
## Median : 26656 Median :12.40 Median : 9.000 Median :10.70 Median :3.000 Median : 6.000
## Mean : 31481 Mean :12.36 Mean : 9.142 Mean :10.75 Mean :3.208 Mean : 5.791
## 3rd Qu.: 37514 3rd Qu.:13.80 3rd Qu.: 9.800 3rd Qu.:11.65 3rd Qu.:3.900 3rd Qu.: 6.000
## Max. :183012 Max. :23.50 Max. :19.600 Max. :21.55 Max. :8.300 Max. :12.000
## NA's :14 NA's :14 NA's :14
## horsepower weight iweight lweight wheel.base length
## Min. :100.0 Min. : 923 Min. :0.0003067 Min. :6.828 Min. :226.0 Min. :363.0
## 1st Qu.:165.0 1st Qu.:1412 1st Qu.:0.0005542 1st Qu.:7.253 1st Qu.:262.0 1st Qu.:450.0
## Median :210.0 Median :1577 Median :0.0006341 Median :7.363 Median :272.0 Median :472.0
## Mean :216.8 Mean :1626 Mean :0.0006412 Mean :7.373 Mean :274.9 Mean :470.6
## 3rd Qu.:255.0 3rd Qu.:1804 3rd Qu.:0.0007082 3rd Qu.:7.498 3rd Qu.:284.0 3rd Qu.:490.0
## Max. :500.0 Max. :3261 Max. :0.0010834 Max. :8.090 Max. :366.0 Max. :577.0
## NA's :2 NA's :2 NA's :2 NA's :2 NA's :26
## width ftype fdrive
## Min. :163.0 personal:242 front:223
## 1st Qu.:175.0 wagon : 30 rear :110
## Median :180.0 SUV : 60 4x4 : 92
## Mean :181.1 pickup : 24
## 3rd Qu.:185.0 sport : 49
## Max. :206.0 minivan : 20
## NA's :28
To be able to compare a model fitted here with other models where also other covariates will be included, we restrict ourselves to a subset of the dataset where all variables consumption
, lweight
and engine.size
are known.
isComplete <- complete.cases(Cars2004nh[, c("consumption", "lweight", "engine.size")])
sum(!isComplete)
## [1] 16
CarsNow <- subset(Cars2004nh, isComplete, select = c("consumption", "drive", "fdrive", "weight", "lweight", "engine.size"))
dim(CarsNow)
## [1] 409 6
summary(CarsNow)
## consumption drive fdrive weight lweight engine.size
## Min. : 5.65 Min. :1.000 front:212 Min. : 923 Min. :6.828 Min. :1.300
## 1st Qu.: 9.65 1st Qu.:1.000 rear :108 1st Qu.:1415 1st Qu.:7.255 1st Qu.:2.400
## Median :10.70 Median :1.000 4x4 : 89 Median :1577 Median :7.363 Median :3.000
## Mean :10.75 Mean :1.699 Mean :1622 Mean :7.371 Mean :3.178
## 3rd Qu.:11.65 3rd Qu.:2.000 3rd Qu.:1804 3rd Qu.:7.498 3rd Qu.:3.800
## Max. :21.55 Max. :3.000 Max. :2903 Max. :7.973 Max. :6.000
consumption
on lweight
and fdrive
mInter <- lm(consumption ~ fdrive + lweight + fdrive:lweight, data = CarsNow)
summary(mInter)
##
## Call:
## lm(formula = consumption ~ fdrive + lweight + fdrive:lweight,
## data = CarsNow)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4038 -0.6438 -0.1021 0.5672 4.3237
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -52.8047 2.5266 -20.900 < 2e-16 ***
## fdriverear 19.8445 5.1297 3.869 0.000128 ***
## fdrive4x4 -12.5366 4.6506 -2.696 0.007319 **
## lweight 8.5716 0.3461 24.763 < 2e-16 ***
## fdriverear:lweight -2.5890 0.6956 -3.722 0.000226 ***
## fdrive4x4:lweight 1.7837 0.6240 2.858 0.004480 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9404 on 403 degrees of freedom
## Multiple R-squared: 0.8081, Adjusted R-squared: 0.8057
## F-statistic: 339.4 on 5 and 403 DF, p-value: < 2.2e-16
anova(mInter)
## Analysis of Variance Table
##
## Response: consumption
## Df Sum Sq Mean Sq F value Pr(>F)
## fdrive 2 519.89 259.94 293.935 < 2.2e-16 ***
## lweight 1 954.26 954.26 1079.040 < 2.2e-16 ***
## fdrive:lweight 2 26.70 13.35 15.097 4.758e-07 ***
## Residuals 403 356.40 0.88
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ldrive <- levels(CarsNow[, "fdrive"])
CZ <- contr.treatment(3)
rownames(CZ) <- ldrive
print(CZ)
## 2 3
## front 0 0
## rear 1 0
## 4x4 0 1
L.slope <- cbind(0, 0, 0, 1, CZ)
colnames(L.slope) <- names(coef(mInter))
rownames(L.slope) <- ldrive
print(L.slope)
## (Intercept) fdriverear fdrive4x4 lweight fdriverear:lweight fdrive4x4:lweight
## front 0 0 0 1 0 0
## rear 0 0 0 1 1 0
## 4x4 0 0 0 1 0 1
library("mffSM")
LSest(mInter, L = L.slope)
## Estimate Std. Error t value P value Lower Upper
## front 8.571564 0.3461413 24.763196 < 2.22e-16 7.891096 9.252032
## rear 5.982551 0.6033947 9.914822 < 2.22e-16 4.796357 7.168745
## 4x4 10.355263 0.5192488 19.942776 < 2.22e-16 9.334488 11.376037
print(LSest(mInter, L = L.slope), digits = 3)
## Estimate Std. Error t value P value Lower Upper
## front 8.57 0.346 24.76 < 2.22e-16 7.89 9.25
## rear 5.98 0.603 9.91 < 2.22e-16 4.80 7.17
## 4x4 10.36 0.519 19.94 < 2.22e-16 9.33 11.38
L.diff.slope <- cbind(0, 0, 0, 0, rbind(CZ[2, ] - CZ[1, ],
CZ[3, ] - CZ[1, ],
CZ[3, ] - CZ[2, ]))
colnames(L.diff.slope) <- names(coef(mInter))
rownames(L.diff.slope) <- paste(ldrive[c(2, 3, 3)], "-", ldrive[c(1, 1, 2)], sep = "")
print(L.diff.slope)
## (Intercept) fdriverear fdrive4x4 lweight fdriverear:lweight fdrive4x4:lweight
## rear-front 0 0 0 0 1 0
## 4x4-front 0 0 0 0 0 1
## 4x4-rear 0 0 0 0 -1 1
LSunadj <- LSest(mInter, L = L.diff.slope)
print(LSunadj, digits = 3)
## Estimate Std. Error t value P value Lower Upper
## rear-front -2.59 0.696 -3.72 0.00022589 -3.957 -1.22
## 4x4-front 1.78 0.624 2.86 0.00448045 0.557 3.01
## 4x4-rear 4.37 0.796 5.49 7.0143e-08 2.808 5.94
LSBonf <- LSest(mInter, L = L.diff.slope, conf.level = 1 - 0.05/3)
LSBonf[["P value"]] <- 3 * LSBonf[["P value"]]
print(LSBonf, digits = 3)
## Estimate Std. Error t value P value Lower Upper
## rear-front -2.59 0.696 -3.72 0.00067767 -4.261 -0.917
## 4x4-front 1.78 0.624 2.86 0.01344135 0.283 3.284
## 4x4-rear 4.37 0.796 5.49 2.1043e-07 2.459 6.286
library("multcomp")
mcp <- glht(mInter, linfct = L.diff.slope, rhs = 0)
print(mcp)
##
## General Linear Hypotheses
##
## Linear Hypotheses:
## Estimate
## rear-front == 0 -2.589
## 4x4-front == 0 1.784
## 4x4-rear == 0 4.373
set.seed(20010911)
smcp <- summary(mcp)
print(smcp)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Fit: lm(formula = consumption ~ fdrive + lweight + fdrive:lweight,
## data = CarsNow)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## rear-front == 0 -2.5890 0.6956 -3.722 0.000613 ***
## 4x4-front == 0 1.7837 0.6240 2.858 0.012110 *
## 4x4-rear == 0 4.3727 0.7961 5.493 < 1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
set.seed(20010911)
cimcp <- confint(mcp, level = 0.95)
print(cimcp)
##
## Simultaneous Confidence Intervals
##
## Fit: lm(formula = consumption ~ fdrive + lweight + fdrive:lweight,
## data = CarsNow)
##
## Quantile = 2.3449
## 95% family-wise confidence level
##
##
## Linear Hypotheses:
## Estimate lwr upr
## rear-front == 0 -2.5890 -4.2202 -0.9578
## 4x4-front == 0 1.7837 0.3204 3.2470
## 4x4-rear == 0 4.3727 2.5061 6.2394
MCP <- data.frame(Estimate = LSunadj[["Estimate"]],
PvalUnadj = format.pval(LSunadj[["P value"]], digits = 2, eps = 1e-4),
PvalBonf = format.pval(LSBonf[["P value"]], digits = 2, eps = 1e-4),
PvalHBW = format.pval(smcp[["test"]][["pvalues"]], digits = 2, eps = 1e-4),
LowerUnadj = LSunadj[["Lower"]],
UpperUnadj = LSunadj[["Upper"]],
LowerBonf = LSBonf[["Lower"]],
UpperBonf = LSBonf[["Upper"]],
LowerHBW = cimcp[["confint"]][, "lwr"],
UpperHBW = cimcp[["confint"]][, "upr"],
stringsAsFactors = FALSE)
print(MCP[, 1:4], digits = 3)
## Estimate PvalUnadj PvalBonf PvalHBW
## rear-front -2.59 0.00023 0.00068 0.00061
## 4x4-front 1.78 0.00448 0.01344 0.01211
## 4x4-rear 4.37 < 1e-04 < 1e-04 < 1e-04
print(MCP[, 5:10], digits = 3)
## LowerUnadj UpperUnadj LowerBonf UpperBonf LowerHBW UpperHBW
## rear-front -3.957 -1.22 -4.261 -0.917 -4.22 -0.958
## 4x4-front 0.557 3.01 0.283 3.284 0.32 3.247
## 4x4-rear 2.808 5.94 2.459 6.286 2.51 6.239