NMSA407 Linear Regression: Tutorial

Multiple comparison procedures (Hothorn–Bretz–Westfall)

Data Cars2004nh

Introduction

We partly continue with the analysis started here and there.

Load used data and calculate basic summaries

data(Cars2004nh, package = "mffSM")
head(Cars2004nh)

##                         vname type drive price.retail price.dealer   price cons.city cons.highway
## 1          Chevrolet.Aveo.4dr    1     1        11690        10965 11327.5       8.4          6.9
## 2 Chevrolet.Aveo.LS.4dr.hatch    1     1        12585        11802 12193.5       8.4          6.9
## 3      Chevrolet.Cavalier.2dr    1     1        14610        13697 14153.5       9.0          6.4
## 4      Chevrolet.Cavalier.4dr    1     1        14810        13884 14347.0       9.0          6.4
## 5   Chevrolet.Cavalier.LS.2dr    1     1        16385        15357 15871.0       9.0          6.4
## 6           Dodge.Neon.SE.4dr    1     1        13670        12849 13259.5       8.1          6.5
##   consumption engine.size ncylinder horsepower weight      iweight  lweight wheel.base length width
## 1        7.65         1.6         4        103   1075 0.0009302326 6.980076        249    424   168
## 2        7.65         1.6         4        103   1065 0.0009389671 6.970730        249    389   168
## 3        7.70         2.2         4        140   1187 0.0008424600 7.079184        264    465   175
## 4        7.70         2.2         4        140   1214 0.0008237232 7.101676        264    465   173
## 5        7.70         2.2         4        140   1187 0.0008424600 7.079184        264    465   175
## 6        7.30         2.0         4        132   1171 0.0008539710 7.065613        267    442   170
##      ftype fdrive
## 1 personal  front
## 2 personal  front
## 3 personal  front
## 4 personal  front
## 5 personal  front
## 6 personal  front

dim(Cars2004nh)

## [1] 425  20

summary(Cars2004nh)

##     vname                type           drive        price.retail     price.dealer   
##  Length:425         Min.   :1.000   Min.   :1.000   Min.   : 10280   Min.   :  9875  
##  Class :character   1st Qu.:1.000   1st Qu.:1.000   1st Qu.: 20370   1st Qu.: 18973  
##  Mode  :character   Median :1.000   Median :1.000   Median : 27905   Median : 25672  
##                     Mean   :2.219   Mean   :1.692   Mean   : 32866   Mean   : 30096  
##                     3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.: 39235   3rd Qu.: 35777  
##                     Max.   :6.000   Max.   :3.000   Max.   :192465   Max.   :173560  
##                                                                                      
##      price          cons.city      cons.highway     consumption     engine.size      ncylinder     
##  Min.   : 10078   Min.   : 6.20   Min.   : 5.100   Min.   : 5.65   Min.   :1.300   Min.   :-1.000  
##  1st Qu.: 19600   1st Qu.:11.20   1st Qu.: 8.100   1st Qu.: 9.65   1st Qu.:2.400   1st Qu.: 4.000  
##  Median : 26656   Median :12.40   Median : 9.000   Median :10.70   Median :3.000   Median : 6.000  
##  Mean   : 31481   Mean   :12.36   Mean   : 9.142   Mean   :10.75   Mean   :3.208   Mean   : 5.791  
##  3rd Qu.: 37514   3rd Qu.:13.80   3rd Qu.: 9.800   3rd Qu.:11.65   3rd Qu.:3.900   3rd Qu.: 6.000  
##  Max.   :183012   Max.   :23.50   Max.   :19.600   Max.   :21.55   Max.   :8.300   Max.   :12.000  
##                   NA's   :14      NA's   :14       NA's   :14                                      
##    horsepower        weight        iweight             lweight        wheel.base        length     
##  Min.   :100.0   Min.   : 923   Min.   :0.0003067   Min.   :6.828   Min.   :226.0   Min.   :363.0  
##  1st Qu.:165.0   1st Qu.:1412   1st Qu.:0.0005542   1st Qu.:7.253   1st Qu.:262.0   1st Qu.:450.0  
##  Median :210.0   Median :1577   Median :0.0006341   Median :7.363   Median :272.0   Median :472.0  
##  Mean   :216.8   Mean   :1626   Mean   :0.0006412   Mean   :7.373   Mean   :274.9   Mean   :470.6  
##  3rd Qu.:255.0   3rd Qu.:1804   3rd Qu.:0.0007082   3rd Qu.:7.498   3rd Qu.:284.0   3rd Qu.:490.0  
##  Max.   :500.0   Max.   :3261   Max.   :0.0010834   Max.   :8.090   Max.   :366.0   Max.   :577.0  
##                  NA's   :2      NA's   :2           NA's   :2       NA's   :2       NA's   :26     
##      width            ftype       fdrive   
##  Min.   :163.0   personal:242   front:223  
##  1st Qu.:175.0   wagon   : 30   rear :110  
##  Median :180.0   SUV     : 60   4x4  : 92  
##  Mean   :181.1   pickup  : 24              
##  3rd Qu.:185.0   sport   : 49              
##  Max.   :206.0   minivan : 20              
##  NA's   :28

Complete cases subset used here

To be able to compare a model fitted here with other models where also other covariates will be included, we restrict ourselves to a subset of the dataset where all variables consumption, lweight and engine.size are known.

isComplete <- complete.cases(Cars2004nh[, c("consumption", "lweight", "engine.size")])
sum(!isComplete)

## [1] 16

CarsNow <- subset(Cars2004nh, isComplete, select = c("consumption", "drive", "fdrive", "weight", "lweight", "engine.size"))
dim(CarsNow)

## [1] 409   6

summary(CarsNow)

##   consumption        drive         fdrive        weight        lweight       engine.size   
##  Min.   : 5.65   Min.   :1.000   front:212   Min.   : 923   Min.   :6.828   Min.   :1.300  
##  1st Qu.: 9.65   1st Qu.:1.000   rear :108   1st Qu.:1415   1st Qu.:7.255   1st Qu.:2.400  
##  Median :10.70   Median :1.000   4x4  : 89   Median :1577   Median :7.363   Median :3.000  
##  Mean   :10.75   Mean   :1.699               Mean   :1622   Mean   :7.371   Mean   :3.178  
##  3rd Qu.:11.65   3rd Qu.:2.000               3rd Qu.:1804   3rd Qu.:7.498   3rd Qu.:3.800  
##  Max.   :21.55   Max.   :3.000               Max.   :2903   Max.   :7.973   Max.   :6.000

Dependence of `consumption` on `lweight` and `fdrive`

Interaction model

mInter  <- lm(consumption ~ fdrive + lweight + fdrive:lweight, data = CarsNow)
summary(mInter)

## 
## Call:
## lm(formula = consumption ~ fdrive + lweight + fdrive:lweight, 
##     data = CarsNow)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4038 -0.6438 -0.1021  0.5672  4.3237 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -52.8047     2.5266 -20.900  < 2e-16 ***
## fdriverear          19.8445     5.1297   3.869 0.000128 ***
## fdrive4x4          -12.5366     4.6506  -2.696 0.007319 ** 
## lweight              8.5716     0.3461  24.763  < 2e-16 ***
## fdriverear:lweight  -2.5890     0.6956  -3.722 0.000226 ***
## fdrive4x4:lweight    1.7837     0.6240   2.858 0.004480 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9404 on 403 degrees of freedom
## Multiple R-squared:  0.8081, Adjusted R-squared:  0.8057 
## F-statistic: 339.4 on 5 and 403 DF,  p-value: < 2.2e-16

Significant interaction?

anova(mInter)

## Analysis of Variance Table
## 
## Response: consumption
##                 Df Sum Sq Mean Sq  F value    Pr(>F)    
## fdrive           2 519.89  259.94  293.935 < 2.2e-16 ***
## lweight          1 954.26  954.26 1079.040 < 2.2e-16 ***
## fdrive:lweight   2  26.70   13.35   15.097 4.758e-07 ***
## Residuals      403 356.40    0.88                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Estimated slopes (for each drive group)

ldrive <- levels(CarsNow[, "fdrive"])

CZ <- contr.treatment(3)
rownames(CZ) <- ldrive
print(CZ)

##       2 3
## front 0 0
## rear  1 0
## 4x4   0 1

L.slope <- cbind(0, 0, 0, 1, CZ)
colnames(L.slope) <- names(coef(mInter))
rownames(L.slope) <- ldrive
print(L.slope)

##       (Intercept) fdriverear fdrive4x4 lweight fdriverear:lweight fdrive4x4:lweight
## front           0          0         0       1                  0                 0
## rear            0          0         0       1                  1                 0
## 4x4             0          0         0       1                  0                 1

library("mffSM")
LSest(mInter, L = L.slope)

##        Estimate Std. Error   t value    P value    Lower     Upper
## front  8.571564  0.3461413 24.763196 < 2.22e-16 7.891096  9.252032
## rear   5.982551  0.6033947  9.914822 < 2.22e-16 4.796357  7.168745
## 4x4   10.355263  0.5192488 19.942776 < 2.22e-16 9.334488 11.376037

print(LSest(mInter, L = L.slope), digits = 3)

##       Estimate Std. Error t value    P value Lower Upper
## front     8.57      0.346   24.76 < 2.22e-16  7.89  9.25
## rear      5.98      0.603    9.91 < 2.22e-16  4.80  7.17
## 4x4      10.36      0.519   19.94 < 2.22e-16  9.33 11.38

Inference for differences in slopes

No adjustment for multiple testing

L.diff.slope <- cbind(0, 0, 0, 0, rbind(CZ[2, ] - CZ[1, ],
                                        CZ[3, ] - CZ[1, ],
                                        CZ[3, ] - CZ[2, ]))
colnames(L.diff.slope) <- names(coef(mInter))
rownames(L.diff.slope) <- paste(ldrive[c(2, 3, 3)], "-", ldrive[c(1, 1, 2)], sep = "")
print(L.diff.slope)

##            (Intercept) fdriverear fdrive4x4 lweight fdriverear:lweight fdrive4x4:lweight
## rear-front           0          0         0       0                  1                 0
## 4x4-front            0          0         0       0                  0                 1
## 4x4-rear             0          0         0       0                 -1                 1

LSunadj <- LSest(mInter, L = L.diff.slope)
print(LSunadj, digits = 3)

##            Estimate Std. Error t value    P value  Lower Upper
## rear-front    -2.59      0.696   -3.72 0.00022589 -3.957 -1.22
## 4x4-front      1.78      0.624    2.86 0.00448045  0.557  3.01
## 4x4-rear       4.37      0.796    5.49 7.0143e-08  2.808  5.94

Bonferroni MCP

Simultaneous confidence intervals with a coverage of 95% and Bonferroni corrected P-values

LSBonf <- LSest(mInter, L = L.diff.slope, conf.level = 1 - 0.05/3)
LSBonf[["P value"]] <- 3 * LSBonf[["P value"]] 
print(LSBonf, digits = 3)

##            Estimate Std. Error t value    P value  Lower  Upper
## rear-front    -2.59      0.696   -3.72 0.00067767 -4.261 -0.917
## 4x4-front      1.78      0.624    2.86 0.01344135  0.283  3.284
## 4x4-rear       4.37      0.796    5.49 2.1043e-07  2.459  6.286

Hothorn-Bretz-Westfall MCP

library("multcomp")

Basic results (only point estimates of differences)

mcp <- glht(mInter, linfct = L.diff.slope, rhs = 0)
print(mcp)

## 
##   General Linear Hypotheses
## 
## Linear Hypotheses:
##                 Estimate
## rear-front == 0   -2.589
## 4x4-front == 0     1.784
## 4x4-rear == 0      4.373

Calculation of P-values adjusted for multiple comparison

set.seed(20010911)
smcp <- summary(mcp)
print(smcp)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lm(formula = consumption ~ fdrive + lweight + fdrive:lweight, 
##     data = CarsNow)
## 
## Linear Hypotheses:
##                 Estimate Std. Error t value Pr(>|t|)    
## rear-front == 0  -2.5890     0.6956  -3.722 0.000613 ***
## 4x4-front == 0    1.7837     0.6240   2.858 0.012110 *  
## 4x4-rear == 0     4.3727     0.7961   5.493  < 1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Simultaneous confidence intervals with a coverage of 95%

set.seed(20010911)
cimcp <- confint(mcp, level = 0.95)
print(cimcp)

## 
##   Simultaneous Confidence Intervals
## 
## Fit: lm(formula = consumption ~ fdrive + lweight + fdrive:lweight, 
##     data = CarsNow)
## 
## Quantile = 2.3449
## 95% family-wise confidence level
##  
## 
## Linear Hypotheses:
##                 Estimate lwr     upr    
## rear-front == 0 -2.5890  -4.2202 -0.9578
## 4x4-front == 0   1.7837   0.3204  3.2470
## 4x4-rear == 0    4.3727   2.5061  6.2394

All results in one data frame

MCP <- data.frame(Estimate   = LSunadj[["Estimate"]],
                  PvalUnadj  = format.pval(LSunadj[["P value"]], digits = 2, eps = 1e-4),
                  PvalBonf   = format.pval(LSBonf[["P value"]], digits = 2, eps = 1e-4),
                  PvalHBW    = format.pval(smcp[["test"]][["pvalues"]], digits = 2, eps = 1e-4),
                  LowerUnadj = LSunadj[["Lower"]],
                  UpperUnadj = LSunadj[["Upper"]],                 
                  LowerBonf  = LSBonf[["Lower"]],
                  UpperBonf  = LSBonf[["Upper"]],
                  LowerHBW   = cimcp[["confint"]][, "lwr"],
                  UpperHBW   = cimcp[["confint"]][, "upr"],                  
                  stringsAsFactors = FALSE)
print(MCP[, 1:4], digits = 3)

##            Estimate PvalUnadj PvalBonf PvalHBW
## rear-front    -2.59   0.00023  0.00068 0.00061
## 4x4-front      1.78   0.00448  0.01344 0.01211
## 4x4-rear       4.37   < 1e-04  < 1e-04 < 1e-04

print(MCP[, 5:10], digits = 3)

##            LowerUnadj UpperUnadj LowerBonf UpperBonf LowerHBW UpperHBW
## rear-front     -3.957      -1.22    -4.261    -0.917    -4.22   -0.958
## 4x4-front       0.557       3.01     0.283     3.284     0.32    3.247
## 4x4-rear        2.808       5.94     2.459     6.286     2.51    6.239