NMSA407 Linear Regression: Tutorial

Numeric and categorical covariate

Data Cars2004nh




Introduction

Load used data and calculate basic summaries

data(Cars2004nh, package = "mffSM")
head(Cars2004nh)
##                         vname type drive price.retail price.dealer   price cons.city cons.highway
## 1          Chevrolet.Aveo.4dr    1     1        11690        10965 11327.5       8.4          6.9
## 2 Chevrolet.Aveo.LS.4dr.hatch    1     1        12585        11802 12193.5       8.4          6.9
## 3      Chevrolet.Cavalier.2dr    1     1        14610        13697 14153.5       9.0          6.4
## 4      Chevrolet.Cavalier.4dr    1     1        14810        13884 14347.0       9.0          6.4
## 5   Chevrolet.Cavalier.LS.2dr    1     1        16385        15357 15871.0       9.0          6.4
## 6           Dodge.Neon.SE.4dr    1     1        13670        12849 13259.5       8.1          6.5
##   consumption engine.size ncylinder horsepower weight      iweight  lweight wheel.base length width
## 1        7.65         1.6         4        103   1075 0.0009302326 6.980076        249    424   168
## 2        7.65         1.6         4        103   1065 0.0009389671 6.970730        249    389   168
## 3        7.70         2.2         4        140   1187 0.0008424600 7.079184        264    465   175
## 4        7.70         2.2         4        140   1214 0.0008237232 7.101676        264    465   173
## 5        7.70         2.2         4        140   1187 0.0008424600 7.079184        264    465   175
## 6        7.30         2.0         4        132   1171 0.0008539710 7.065613        267    442   170
##      ftype fdrive
## 1 personal  front
## 2 personal  front
## 3 personal  front
## 4 personal  front
## 5 personal  front
## 6 personal  front
dim(Cars2004nh)
## [1] 425  20
summary(Cars2004nh)
##     vname                type           drive        price.retail     price.dealer   
##  Length:425         Min.   :1.000   Min.   :1.000   Min.   : 10280   Min.   :  9875  
##  Class :character   1st Qu.:1.000   1st Qu.:1.000   1st Qu.: 20370   1st Qu.: 18973  
##  Mode  :character   Median :1.000   Median :1.000   Median : 27905   Median : 25672  
##                     Mean   :2.219   Mean   :1.692   Mean   : 32866   Mean   : 30096  
##                     3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.: 39235   3rd Qu.: 35777  
##                     Max.   :6.000   Max.   :3.000   Max.   :192465   Max.   :173560  
##                                                                                      
##      price          cons.city      cons.highway     consumption     engine.size      ncylinder     
##  Min.   : 10078   Min.   : 6.20   Min.   : 5.100   Min.   : 5.65   Min.   :1.300   Min.   :-1.000  
##  1st Qu.: 19600   1st Qu.:11.20   1st Qu.: 8.100   1st Qu.: 9.65   1st Qu.:2.400   1st Qu.: 4.000  
##  Median : 26656   Median :12.40   Median : 9.000   Median :10.70   Median :3.000   Median : 6.000  
##  Mean   : 31481   Mean   :12.36   Mean   : 9.142   Mean   :10.75   Mean   :3.208   Mean   : 5.791  
##  3rd Qu.: 37514   3rd Qu.:13.80   3rd Qu.: 9.800   3rd Qu.:11.65   3rd Qu.:3.900   3rd Qu.: 6.000  
##  Max.   :183012   Max.   :23.50   Max.   :19.600   Max.   :21.55   Max.   :8.300   Max.   :12.000  
##                   NA's   :14      NA's   :14       NA's   :14                                      
##    horsepower        weight        iweight             lweight        wheel.base        length     
##  Min.   :100.0   Min.   : 923   Min.   :0.0003067   Min.   :6.828   Min.   :226.0   Min.   :363.0  
##  1st Qu.:165.0   1st Qu.:1412   1st Qu.:0.0005542   1st Qu.:7.253   1st Qu.:262.0   1st Qu.:450.0  
##  Median :210.0   Median :1577   Median :0.0006341   Median :7.363   Median :272.0   Median :472.0  
##  Mean   :216.8   Mean   :1626   Mean   :0.0006412   Mean   :7.373   Mean   :274.9   Mean   :470.6  
##  3rd Qu.:255.0   3rd Qu.:1804   3rd Qu.:0.0007082   3rd Qu.:7.498   3rd Qu.:284.0   3rd Qu.:490.0  
##  Max.   :500.0   Max.   :3261   Max.   :0.0010834   Max.   :8.090   Max.   :366.0   Max.   :577.0  
##                  NA's   :2      NA's   :2           NA's   :2       NA's   :2       NA's   :26     
##      width            ftype       fdrive   
##  Min.   :163.0   personal:242   front:223  
##  1st Qu.:175.0   wagon   : 30   rear :110  
##  Median :180.0   SUV     : 60   4x4  : 92  
##  Mean   :181.1   pickup  : 24              
##  3rd Qu.:185.0   sport   : 49              
##  Max.   :206.0   minivan : 20              
##  NA's   :28

Complete cases subset used here

To be able to compare a model fitted here with other models where also other covariates will be included, we restrict ourselves to a subset of the dataset where all variables consumption, lweight and engine.size are known.

isComplete <- complete.cases(Cars2004nh[, c("consumption", "lweight", "engine.size")])
sum(!isComplete)
## [1] 16
CarsNow <- subset(Cars2004nh, isComplete, select = c("consumption", "drive", "fdrive", "weight", "lweight", "engine.size"))
dim(CarsNow)
## [1] 409   6
summary(CarsNow)
##   consumption        drive         fdrive        weight        lweight       engine.size   
##  Min.   : 5.65   Min.   :1.000   front:212   Min.   : 923   Min.   :6.828   Min.   :1.300  
##  1st Qu.: 9.65   1st Qu.:1.000   rear :108   1st Qu.:1415   1st Qu.:7.255   1st Qu.:2.400  
##  Median :10.70   Median :1.000   4x4  : 89   Median :1577   Median :7.363   Median :3.000  
##  Mean   :10.75   Mean   :1.699               Mean   :1622   Mean   :7.371   Mean   :3.178  
##  3rd Qu.:11.65   3rd Qu.:2.000               3rd Qu.:1804   3rd Qu.:7.498   3rd Qu.:3.800  
##  Max.   :21.55   Max.   :3.000               Max.   :2903   Max.   :7.973   Max.   :6.000

Some color and symbol vectors used in plots below

FCOL <- rainbow_hcl(3)
FCOL2 <- c("red3", "darkgreen", "darkblue")
FPCH <- c(21, 23, 24)
names(FCOL) <- names(FCOL2) <- names(FPCH) <- levels(CarsNow[, "fdrive"])




Dependence of consumption on weight or lweight and fdrive

Scatterplots consumption on weight

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ weight, data = CarsNow, pch = PCH, col = COL, bg = BGC, xlab = "Weight [kg]", ylab = "Consumption [l/100 km]")

plot of chunk fig-AdditInter-02-00

Scatterplots consumption on weight by fdrive

par(mfrow = c(2, 2), bty = BTY, mar = c(5, 4, 3, 1) + 0.1)
for (dr in levels(CarsNow[, "fdrive"])){
    plot(consumption ~ weight, data = subset(CarsNow, fdrive == dr), pch = PCH, col = COL, bg = BGC,
         xlab = "Weight [kg]", ylab = "Consumption [l/100 km]", main = dr,
         xlim = range(CarsNow[, "weight"]), ylim = range(CarsNow[, "consumption"]))
}

plot of chunk fig-AdditInter-02-01

Scatterplots consumption on weight by fdrive in one plot

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ weight, data = CarsNow, pch = FPCH[fdrive], col = FCOL2[fdrive], bg = FCOL[fdrive],
     xlab = "Weight [kg]", ylab = "Consumption [l/100 km]")
legend(1000, 21, legend = levels(CarsNow[, "fdrive"]), title = "Drive", pch = FPCH, col = FCOL2, pt.bg = FCOL)

plot of chunk fig-AdditInter-02-02

Scatterplots consumption on lweight by fdrive

par(mfrow = c(2, 2), bty = BTY, mar = c(5, 4, 3, 1) + 0.1)
for (dr in levels(CarsNow[, "fdrive"])){
    plot(consumption ~ lweight, data = subset(CarsNow, fdrive == dr), pch = PCH, col = COL, bg = BGC,
         xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", main = dr,
         xlim = range(CarsNow[, "lweight"]), ylim = range(CarsNow[, "consumption"]))
}

plot of chunk fig-AdditInter-02-03

Scatterplots consumption on lweight by fdrive in one plot

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsNow, pch = FPCH[fdrive], col = FCOL2[fdrive], bg = FCOL[fdrive],
     xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]")
legend(6.9, 21, legend = levels(CarsNow[, "fdrive"]), title = "Drive", pch = FPCH, col = FCOL2, pt.bg = FCOL)

plot of chunk fig-AdditInter-02-04




Model with common line for all three groups (no effect of fdrive)

mOneLine <- lm(consumption ~ lweight, data = CarsNow)
summary(mOneLine)
## 
## Call:
## lm(formula = consumption ~ lweight, data = CarsNow)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6544 -0.7442 -0.1526  0.5160  5.1616 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -58.2480     1.8941  -30.75   <2e-16 ***
## lweight       9.3606     0.2569   36.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.035 on 407 degrees of freedom
## Multiple R-squared:  0.7654, Adjusted R-squared:  0.7648 
## F-statistic:  1328 on 1 and 407 DF,  p-value: < 2.2e-16




Model with three lines, not necessarily parallel (interaction model)

Intercept not explicitely included in the model

mNoInt <- lm(consumption ~ -1 + fdrive + fdrive:lweight, data = CarsNow)
summary(mNoInt)
## 
## Call:
## lm(formula = consumption ~ -1 + fdrive + fdrive:lweight, data = CarsNow)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4038 -0.6438 -0.1021  0.5672  4.3237 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## fdrivefront         -52.8047     2.5266 -20.900  < 2e-16 ***
## fdriverear          -32.9602     4.4644  -7.383 8.95e-13 ***
## fdrive4x4           -65.3414     3.9045 -16.735  < 2e-16 ***
## fdrivefront:lweight   8.5716     0.3461  24.763  < 2e-16 ***
## fdriverear:lweight    5.9826     0.6034   9.915  < 2e-16 ***
## fdrive4x4:lweight    10.3553     0.5192  19.943  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9404 on 403 degrees of freedom
## Multiple R-squared:  0.9927, Adjusted R-squared:  0.9926 
## F-statistic:  9193 on 6 and 403 DF,  p-value: < 2.2e-16
coef(mNoInt)
##         fdrivefront          fdriverear           fdrive4x4 fdrivefront:lweight  fdriverear:lweight 
##          -52.804723          -32.960224          -65.341359            8.571564            5.982551 
##   fdrive4x4:lweight 
##           10.355263

Plot data and the fitted lines

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsNow, pch = FPCH[fdrive], col = "grey80", bg = FCOL[fdrive],
     xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]")
for (g in 1:3) abline(a = coef(mNoInt)[g], b = coef(mNoInt)[3 + g], col = FCOL2[g], lwd = 2)    
legend(6.9, 21, legend = levels(CarsNow[, "fdrive"]), title = "Drive", pch = FPCH, col = FCOL2, pt.bg = FCOL, lty = 1, lwd = 2)

plot of chunk fig-AdditInter-02-05

Plot data and the fitted lines (original weight on the \(x\)-axis)

wgrid <- seq(900, 3000, length = 500)
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ weight, data = CarsNow, pch = FPCH[fdrive], col = "grey80", bg = FCOL[fdrive],
     xlab = "Weight [kg]", ylab = "Consumption [l/100 km]")
for (g in 1:3) lines(wgrid, coef(mNoInt)[g] + coef(mNoInt)[3 + g] * log(wgrid), col = FCOL2[g], lwd = 2)    
legend(1000, 21, legend = levels(CarsNow[, "fdrive"]), title = "Drive", pch = FPCH, col = FCOL2, pt.bg = FCOL, lty = 1, lwd = 2)

plot of chunk fig-AdditInter-02-06

Separate fits per group

summary(mFront <- lm(consumption ~ lweight, data = subset(CarsNow, fdrive == "front")))
## 
## Call:
## lm(formula = consumption ~ lweight, data = subset(CarsNow, fdrive == 
##     "front"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4038 -0.5157 -0.0845  0.5466  2.8956 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -52.8047     2.1369  -24.71   <2e-16 ***
## lweight       8.5716     0.2928   29.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7954 on 210 degrees of freedom
## Multiple R-squared:  0.8032, Adjusted R-squared:  0.8023 
## F-statistic: 857.3 on 1 and 210 DF,  p-value: < 2.2e-16
summary(mRear <- lm(consumption ~ lweight, data = subset(CarsNow, fdrive == "rear")))
## 
## Call:
## lm(formula = consumption ~ lweight, data = subset(CarsNow, fdrive == 
##     "rear"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.27988 -0.68070 -0.09102  0.58671  2.71058 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -32.9602     4.4254  -7.448 2.66e-11 ***
## lweight       5.9826     0.5981  10.002  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9322 on 106 degrees of freedom
## Multiple R-squared:  0.4855, Adjusted R-squared:  0.4807 
## F-statistic:   100 on 1 and 106 DF,  p-value: < 2.2e-16
summary(m4x4 <- lm(consumption ~ lweight, data = subset(CarsNow, fdrive == "4x4")))
## 
## Call:
## lm(formula = consumption ~ lweight, data = subset(CarsNow, fdrive == 
##     "4x4"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1902 -0.8615 -0.1934  0.5017  4.3237 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -65.3414     5.1033  -12.80   <2e-16 ***
## lweight      10.3553     0.6787   15.26   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.229 on 87 degrees of freedom
## Multiple R-squared:  0.728,  Adjusted R-squared:  0.7248 
## F-statistic: 232.8 on 1 and 87 DF,  p-value: < 2.2e-16

Reference group pseudocontrast parameterization of a categorical covariate fdrive

mInter <- lm(consumption ~ fdrive + lweight + fdrive:lweight, data = CarsNow)
#mInter <- lm(consumption ~ fdrive*lweight, data = CarsNow)                    ## the same
summary(mInter)
## 
## Call:
## lm(formula = consumption ~ fdrive + lweight + fdrive:lweight, 
##     data = CarsNow)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4038 -0.6438 -0.1021  0.5672  4.3237 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -52.8047     2.5266 -20.900  < 2e-16 ***
## fdriverear          19.8445     5.1297   3.869 0.000128 ***
## fdrive4x4          -12.5366     4.6506  -2.696 0.007319 ** 
## lweight              8.5716     0.3461  24.763  < 2e-16 ***
## fdriverear:lweight  -2.5890     0.6956  -3.722 0.000226 ***
## fdrive4x4:lweight    1.7837     0.6240   2.858 0.004480 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9404 on 403 degrees of freedom
## Multiple R-squared:  0.8081, Adjusted R-squared:  0.8057 
## F-statistic: 339.4 on 5 and 403 DF,  p-value: < 2.2e-16

Estimated intercepts in the three groups

Lint <- cbind(1, contr.treatment(3), 0, 0, 0)
rownames(Lint) <- levels(CarsNow[, "fdrive"])
colnames(Lint) <- names(coef(mInter))
print(Lint)
##       (Intercept) fdriverear fdrive4x4 lweight fdriverear:lweight fdrive4x4:lweight
## front           1          0         0       0                  0                 0
## rear            1          1         0       0                  0                 0
## 4x4             1          0         1       0                  0                 0
library("mffSM")
LSest(mInter, L = Lint)
##        Estimate Std. Error    t value    P value     Lower     Upper
## front -52.80472   2.526590 -20.899601 < 2.22e-16 -57.77167 -47.83778
## rear  -32.96022   4.464356  -7.382973 8.9529e-13 -41.73656 -24.18389
## 4x4   -65.34136   3.904452 -16.735090 < 2.22e-16 -73.01700 -57.66572

Estimated slopes in the three groups

Lslope <- cbind(0, 0, 0, 1, contr.treatment(3))
rownames(Lslope) <- levels(CarsNow[, "fdrive"])
colnames(Lslope) <- names(coef(mInter))
print(Lslope)
##       (Intercept) fdriverear fdrive4x4 lweight fdriverear:lweight fdrive4x4:lweight
## front           0          0         0       1                  0                 0
## rear            0          0         0       1                  1                 0
## 4x4             0          0         0       1                  0                 1
LSest(mInter, L = Lslope)
##        Estimate Std. Error   t value    P value    Lower     Upper
## front  8.571564  0.3461413 24.763196 < 2.22e-16 7.891096  9.252032
## rear   5.982551  0.6033947  9.914822 < 2.22e-16 4.796357  7.168745
## 4x4   10.355263  0.5192488 19.942776 < 2.22e-16 9.334488 11.376037

Estimated differences between the slopes

LdiffSlope <- cbind(0, 0, 0, matrix(c(0,  1, 0,
                                      0,  0, 1,
                                      0, -1, 1), ncol = 3, byrow = TRUE))
rownames(LdiffSlope) <- c("rear - front", "4x4 - front", "4x4 - rear")
colnames(LdiffSlope) <- names(coef(mInter))
print(LdiffSlope)
##              (Intercept) fdriverear fdrive4x4 lweight fdriverear:lweight fdrive4x4:lweight
## rear - front           0          0         0       0                  1                 0
## 4x4 - front            0          0         0       0                  0                 1
## 4x4 - rear             0          0         0       0                 -1                 1
LSest(mInter, L = LdiffSlope)
##               Estimate Std. Error   t value    P value      Lower     Upper
## rear - front -2.589013  0.6956285 -3.721833 0.00022589 -3.9565266 -1.221499
## 4x4 - front   1.783699  0.6240458  2.858282 0.00448045  0.5569072  3.010490
## 4x4 - rear    4.372712  0.7960556  5.492973 7.0143e-08  2.8077715  5.937652

Sum contrast parameterization of a categorical covariate fdrive

levels(CarsNow[, "fdrive"])
## [1] "front" "rear"  "4x4"
mInterSum <- lm(consumption ~ fdrive + lweight + fdrive:lweight, data = CarsNow, contrasts = list(fdrive = contr.sum))
summary(mInterSum)
## 
## Call:
## lm(formula = consumption ~ fdrive + lweight + fdrive:lweight, 
##     data = CarsNow, contrasts = list(fdrive = contr.sum))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4038 -0.6438 -0.1021  0.5672  4.3237 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -50.3688     2.1489 -23.440  < 2e-16 ***
## fdrive1          -2.4360     2.5972  -0.938    0.349    
## fdrive2          17.4085     3.3558   5.188 3.38e-07 ***
## lweight           8.3031     0.2894  28.696  < 2e-16 ***
## fdrive1:lweight   0.2684     0.3517   0.763    0.446    
## fdrive2:lweight  -2.3206     0.4529  -5.124 4.64e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9404 on 403 degrees of freedom
## Multiple R-squared:  0.8081, Adjusted R-squared:  0.8057 
## F-statistic: 339.4 on 5 and 403 DF,  p-value: < 2.2e-16

Intercept and slope effect (“\(\alpha\)” parameters) for the third group

Leff3 <- matrix(c(0, contr.sum(3)[3,], rep(0, 3),
                  rep(0, 4), contr.sum(3)[3,]),
                nrow = 2, byrow = TRUE)
rownames(Leff3) <- c("fdrive3", "fdrive3:lweight")
colnames(Leff3) <- names(coef(mInterSum))
print(Leff3)
##                 (Intercept) fdrive1 fdrive2 lweight fdrive1:lweight fdrive2:lweight
## fdrive3                   0      -1      -1       0               0               0
## fdrive3:lweight           0       0       0       0              -1              -1

Estimated intercepts in the three groups

LintSum <- cbind(1, contr.sum(3), 0, 0, 0)
rownames(LintSum) <- levels(CarsNow[, "fdrive"])
colnames(LintSum) <- names(coef(mInterSum))
print(LintSum)
##       (Intercept) fdrive1 fdrive2 lweight fdrive1:lweight fdrive2:lweight
## front           1       1       0       0               0               0
## rear            1       0       1       0               0               0
## 4x4             1      -1      -1       0               0               0
LSest(mInterSum, L = LintSum)
##        Estimate Std. Error    t value    P value     Lower     Upper
## front -52.80472   2.526590 -20.899601 < 2.22e-16 -57.77167 -47.83778
## rear  -32.96022   4.464356  -7.382973 8.9529e-13 -41.73656 -24.18389
## 4x4   -65.34136   3.904452 -16.735090 < 2.22e-16 -73.01700 -57.66572

Estimated slopes in the three groups

LslopeSum <- cbind(0, 0, 0, 1, contr.sum(3))
rownames(LslopeSum) <- levels(CarsNow[, "fdrive"])
colnames(LslopeSum) <- names(coef(mInterSum))
print(LslopeSum)
##       (Intercept) fdrive1 fdrive2 lweight fdrive1:lweight fdrive2:lweight
## front           0       0       0       1               1               0
## rear            0       0       0       1               0               1
## 4x4             0       0       0       1              -1              -1
LSest(mInterSum, L = LslopeSum)
##        Estimate Std. Error   t value    P value    Lower     Upper
## front  8.571564  0.3461413 24.763196 < 2.22e-16 7.891096  9.252032
## rear   5.982551  0.6033947  9.914822 < 2.22e-16 4.796357  7.168745
## 4x4   10.355263  0.5192488 19.942776 < 2.22e-16 9.334488 11.376037




Model with three parallel lines (additive model)

Model fit (reference group pseudocontrasts for a categorical covariate fdrive)

mAddit <- lm(consumption ~ fdrive + lweight, data = CarsNow)
summary(mAddit)
## 
## Call:
## lm(formula = consumption ~ fdrive + lweight, data = CarsNow)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4064 -0.6649 -0.1323  0.5747  5.1533 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -52.5605     1.9627 -26.780  < 2e-16 ***
## fdriverear    0.6964     0.1181   5.897 7.83e-09 ***
## fdrive4x4     0.8787     0.1363   6.445 3.29e-10 ***
## lweight       8.5381     0.2688  31.762  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9726 on 405 degrees of freedom
## Multiple R-squared:  0.7937, Adjusted R-squared:  0.7922 
## F-statistic: 519.5 on 3 and 405 DF,  p-value: < 2.2e-16

Estimated intercepts in the three groups

LintAddit <- cbind(1, contr.treatment(3), 0)
rownames(LintAddit) <- levels(CarsNow[, "fdrive"])
colnames(LintAddit) <- names(coef(mAddit))
print(LintAddit)
##       (Intercept) fdriverear fdrive4x4 lweight
## front           1          0         0       0
## rear            1          1         0       0
## 4x4             1          0         1       0
LSest(mAddit, L = LintAddit)
##        Estimate Std. Error   t value    P value     Lower     Upper
## front -52.56051   1.962670 -26.78011 < 2.22e-16 -56.41880 -48.70222
## rear  -51.86413   1.990695 -26.05328 < 2.22e-16 -55.77752 -47.95075
## 4x4   -51.68176   2.023316 -25.54311 < 2.22e-16 -55.65928 -47.70425

Confidence intervals for the model parameters

coef(mAddit)
## (Intercept)  fdriverear   fdrive4x4     lweight 
## -52.5605090   0.6963756   0.8787454   8.5380959
confint(mAddit, conf.level = 0.95)
##                   2.5 %      97.5 %
## (Intercept) -56.4188011 -48.7022168
## fdriverear    0.4642124   0.9285389
## fdrive4x4     0.6107164   1.1467744
## lweight       8.0096458   9.0665461

Plot data and the fitted lines

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsNow, pch = FPCH[fdrive], col = "grey80", bg = FCOL[fdrive],
     xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]")
for (gr in levels(CarsNow[, "fdrive"])){
  abline(a = coef(mAddit)["(Intercept)"] +
             ifelse(gr == "front", 0, coef(mAddit)[paste("fdrive", gr, sep = "")]),
         b = coef(mAddit)["lweight"], col = FCOL2[gr], lwd = 2)
}  
legend(6.9, 21, legend = levels(CarsNow[, "fdrive"]), title = "Drive", pch = FPCH, col = FCOL2, pt.bg = FCOL, lty = 1, lwd = 2)

plot of chunk fig-AdditInter-02-07

Plot data and the fitted lines (original weight on the \(x\)-axis)

wgrid <- seq(900, 3000, length = 500)
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ weight, data = CarsNow, pch = FPCH[fdrive], col = "grey80", bg = FCOL[fdrive],
     xlab = "Weight [kg]", ylab = "Consumption [l/100 km]")
for (gr in levels(CarsNow[, "fdrive"])){
  lines(wgrid, coef(mAddit)["(Intercept)"] +
               ifelse(gr == "front", 0, coef(mAddit)[paste("fdrive", gr, sep = "")]) + coef(mAddit)["lweight"] * log(wgrid),
        col = FCOL2[gr], lwd = 2)
}    
legend(1000, 21, legend = levels(CarsNow[, "fdrive"]), title = "Drive", pch = FPCH, col = FCOL2, pt.bg = FCOL, lty = 1, lwd = 2)

plot of chunk fig-AdditInter-02-08

Model fit (sum contrasts for a categorical covariate fdrive)

mAdditSum <- lm(consumption ~ fdrive + lweight, data = CarsNow, contrasts = list(fdrive = "contr.sum"))
summary(mAdditSum)
## 
## Call:
## lm(formula = consumption ~ fdrive + lweight, data = CarsNow, 
##     contrasts = list(fdrive = "contr.sum"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4064 -0.6649 -0.1323  0.5747  5.1533 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -52.03547    1.99090 -26.137  < 2e-16 ***
## fdrive1      -0.52504    0.07044  -7.454 5.53e-13 ***
## fdrive2       0.17134    0.07465   2.295   0.0222 *  
## lweight       8.53810    0.26882  31.762  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9726 on 405 degrees of freedom
## Multiple R-squared:  0.7937, Adjusted R-squared:  0.7922 
## F-statistic: 519.5 on 3 and 405 DF,  p-value: < 2.2e-16

Estimates of all \(\alpha\) parameters

Csum <- contr.sum(3)
Lalpha <- cbind(0, Csum, 0)
colnames(Lalpha) <- names(coef(mAdditSum))
rownames(Lalpha) <- levels(CarsNow[, "fdrive"])
LSest(mAdditSum, L = Lalpha)
##         Estimate Std. Error   t value    P value       Lower      Upper
## front -0.5250404 0.07043545 -7.454206 5.5325e-13 -0.66350509 -0.3865756
## rear   0.1713353 0.07464863  2.295224   0.022231  0.02458813  0.3180824
## 4x4    0.3537051 0.08437896  4.191864 3.3999e-05  0.18782965  0.5195805




Some submodel F-tests

Interaction versus additive model

mInter <- lm(consumption ~ fdrive + lweight + fdrive:lweight, data = CarsNow)
mAddit <- lm(consumption ~ fdrive + lweight, data = CarsNow)
anova(mAddit, mInter)
## Analysis of Variance Table
## 
## Model 1: consumption ~ fdrive + lweight
## Model 2: consumption ~ fdrive + lweight + fdrive:lweight
##   Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
## 1    405 383.1                                  
## 2    403 356.4  2    26.702 15.097 4.758e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The two models on a plot

par(mfrow = c(1, 2), bty = BTY, mar = c(4, 4, 3, 1) + 0.1)
plot(consumption ~ lweight, data = CarsNow, pch = FPCH[fdrive], col = "grey80", bg = FCOL[fdrive],
     xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", main = "Interaction")
for (gr in levels(CarsNow[, "fdrive"])){
    abline(a = coef(mInter)["(Intercept)"] + ifelse(gr == "front", 0, coef(mInter)[paste("fdrive", gr, sep = "")]),
           b = coef(mInter)["lweight"] + ifelse(gr == "front", 0, coef(mInter)[paste("fdrive", gr, ":lweight", sep = "")]),
           col = FCOL2[gr], lwd = 2)
}    
legend(6.9, 21, legend = levels(CarsNow[, "fdrive"]), title = "Drive", pch = FPCH, col = FCOL2, pt.bg = FCOL, lty = 1, lwd = 2)
#
plot(consumption ~ lweight, data = CarsNow, pch = FPCH[fdrive], col = "grey80", bg = FCOL[fdrive],
     xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", main = "Additive")
for (gr in levels(CarsNow[, "fdrive"])){
  abline(a = coef(mAddit)["(Intercept)"] + ifelse(gr == "front", 0, coef(mAddit)[paste("fdrive", gr, sep = "")]),
         b = coef(mAddit)["lweight"],
         col = FCOL2[gr], lwd = 2)
}    
legend(6.9, 21, legend = levels(CarsNow[, "fdrive"]), title = "Drive", pch = FPCH, col = FCOL2, pt.bg = FCOL, lty = 1, lwd = 2)

plot of chunk fig-AdditInter-02-09

Interaction versus one line model

mInter <- lm(consumption ~ fdrive + lweight + fdrive:lweight, data = CarsNow)
mOneLine <- lm(consumption ~ lweight, data = CarsNow)
anova(mOneLine, mInter)
## Analysis of Variance Table
## 
## Model 1: consumption ~ lweight
## Model 2: consumption ~ fdrive + lweight + fdrive:lweight
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    407 435.68                                  
## 2    403 356.40  4    79.279 22.412 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The two models on a plot

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsNow, pch = FPCH[fdrive], col = "grey80", bg = FCOL[fdrive],
     xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]")
for (gr in levels(CarsNow[, "fdrive"])){
  abline(a = coef(mInter)["(Intercept)"] + ifelse(gr == "front", 0, coef(mInter)[paste("fdrive", gr, sep = "")]),
         b = coef(mInter)["lweight"] + ifelse(gr == "front", 0, coef(mInter)[paste("fdrive", gr, ":lweight", sep = "")]),
         col = FCOL2[gr], lwd = 2)
}  
#
abline(mOneLine, col = "orange", lwd = 2)
#
legend(6.9, 21, legend = c(levels(CarsNow[, "fdrive"]), "All drive types"), title = "Drive", pch = c(FPCH, NULL),
       col = c(FCOL2, "orange"), pt.bg = c(FCOL, "white"), lty = 1, lwd = 2)

plot of chunk fig-AdditInter-02-10




Analysis of covariance to evaluate effect of drive given log(weight)

This part assumes that the additive model (mAddit) is a useful model for dependence of the mean consumption on fdrive and lweight (which is not really true given quite significant fdrive:lweight interaction in the above model mInter.

ANCOVA F-test

mAddit <- lm(consumption ~ fdrive + lweight, data = CarsNow)
mOneLine <- lm(consumption ~ lweight, data = CarsNow)
anova(mOneLine, mAddit)
## Analysis of Variance Table
## 
## Model 1: consumption ~ lweight
## Model 2: consumption ~ fdrive + lweight
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    407 435.68                                  
## 2    405 383.10  2    52.577 27.791 4.896e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The two models on a plot

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsNow, pch = FPCH[fdrive], col = "grey80", bg = FCOL[fdrive],
     xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]")
for (gr in levels(CarsNow[, "fdrive"])){
  abline(a = coef(mAddit)["(Intercept)"] + ifelse(gr == "front", 0, coef(mAddit)[paste("fdrive", gr, sep = "")]),
         b = coef(mAddit)["lweight"], col = FCOL2[gr], lwd = 2)
}  
#
abline(mOneLine, col = "orange", lwd = 2)
#
legend(6.9, 21, legend = c(levels(CarsNow[, "fdrive"]), "All drive types"), title = "Drive", pch = c(FPCH, NULL),
       col = c(FCOL2, "orange"), pt.bg = c(FCOL, "white"), lty = 1, lwd = 2)

plot of chunk fig-AdditInter-02-11

Estimated differences between drives given log(weight)

LdiffAddit <- matrix(c(0,  1, 0, 0,
                       0,  0, 1, 0,
                       0, -1, 1, 0), ncol = 4, byrow = TRUE)
rownames(LdiffAddit) <- c("rear - front", "4x4 - front", "4x4 - rear")
colnames(LdiffAddit) <- names(coef(mAddit))
print(LdiffAddit)
##              (Intercept) fdriverear fdrive4x4 lweight
## rear - front           0          1         0       0
## 4x4 - front            0          0         1       0
## 4x4 - rear             0         -1         1       0
LSest(mAddit, L = LdiffAddit)
##               Estimate Std. Error  t value    P value       Lower     Upper
## rear - front 0.6963756  0.1180988 5.896550 7.8326e-09  0.46421240 0.9285389
## 4x4 - front  0.8787454  0.1363433 6.445093 3.2875e-10  0.61071642 1.1467744
## 4x4 - rear   0.1823698  0.1429101 1.276115    0.20265 -0.09856843 0.4633080