Multiple regression model
Data Cars2004nh
data(Cars2004nh, package = "mffSM")
head(Cars2004nh)
## vname type drive price.retail price.dealer price cons.city cons.highway
## 1 Chevrolet.Aveo.4dr 1 1 11690 10965 11327.5 8.4 6.9
## 2 Chevrolet.Aveo.LS.4dr.hatch 1 1 12585 11802 12193.5 8.4 6.9
## 3 Chevrolet.Cavalier.2dr 1 1 14610 13697 14153.5 9.0 6.4
## 4 Chevrolet.Cavalier.4dr 1 1 14810 13884 14347.0 9.0 6.4
## 5 Chevrolet.Cavalier.LS.2dr 1 1 16385 15357 15871.0 9.0 6.4
## 6 Dodge.Neon.SE.4dr 1 1 13670 12849 13259.5 8.1 6.5
## consumption engine.size ncylinder horsepower weight iweight lweight wheel.base length width
## 1 7.65 1.6 4 103 1075 0.0009302326 6.980076 249 424 168
## 2 7.65 1.6 4 103 1065 0.0009389671 6.970730 249 389 168
## 3 7.70 2.2 4 140 1187 0.0008424600 7.079184 264 465 175
## 4 7.70 2.2 4 140 1214 0.0008237232 7.101676 264 465 173
## 5 7.70 2.2 4 140 1187 0.0008424600 7.079184 264 465 175
## 6 7.30 2.0 4 132 1171 0.0008539710 7.065613 267 442 170
## ftype fdrive
## 1 personal front
## 2 personal front
## 3 personal front
## 4 personal front
## 5 personal front
## 6 personal front
dim(Cars2004nh)
## [1] 425 20
summary(Cars2004nh)
## vname type drive price.retail price.dealer
## Length:425 Min. :1.000 Min. :1.000 Min. : 10280 Min. : 9875
## Class :character 1st Qu.:1.000 1st Qu.:1.000 1st Qu.: 20370 1st Qu.: 18973
## Mode :character Median :1.000 Median :1.000 Median : 27905 Median : 25672
## Mean :2.219 Mean :1.692 Mean : 32866 Mean : 30096
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.: 39235 3rd Qu.: 35777
## Max. :6.000 Max. :3.000 Max. :192465 Max. :173560
##
## price cons.city cons.highway consumption engine.size ncylinder
## Min. : 10078 Min. : 6.20 Min. : 5.100 Min. : 5.65 Min. :1.300 Min. :-1.000
## 1st Qu.: 19600 1st Qu.:11.20 1st Qu.: 8.100 1st Qu.: 9.65 1st Qu.:2.400 1st Qu.: 4.000
## Median : 26656 Median :12.40 Median : 9.000 Median :10.70 Median :3.000 Median : 6.000
## Mean : 31481 Mean :12.36 Mean : 9.142 Mean :10.75 Mean :3.208 Mean : 5.791
## 3rd Qu.: 37514 3rd Qu.:13.80 3rd Qu.: 9.800 3rd Qu.:11.65 3rd Qu.:3.900 3rd Qu.: 6.000
## Max. :183012 Max. :23.50 Max. :19.600 Max. :21.55 Max. :8.300 Max. :12.000
## NA's :14 NA's :14 NA's :14
## horsepower weight iweight lweight wheel.base length
## Min. :100.0 Min. : 923 Min. :0.0003067 Min. :6.828 Min. :226.0 Min. :363.0
## 1st Qu.:165.0 1st Qu.:1412 1st Qu.:0.0005542 1st Qu.:7.253 1st Qu.:262.0 1st Qu.:450.0
## Median :210.0 Median :1577 Median :0.0006341 Median :7.363 Median :272.0 Median :472.0
## Mean :216.8 Mean :1626 Mean :0.0006412 Mean :7.373 Mean :274.9 Mean :470.6
## 3rd Qu.:255.0 3rd Qu.:1804 3rd Qu.:0.0007082 3rd Qu.:7.498 3rd Qu.:284.0 3rd Qu.:490.0
## Max. :500.0 Max. :3261 Max. :0.0010834 Max. :8.090 Max. :366.0 Max. :577.0
## NA's :2 NA's :2 NA's :2 NA's :2 NA's :26
## width ftype fdrive
## Min. :163.0 personal:242 front:223
## 1st Qu.:175.0 wagon : 30 rear :110
## Median :180.0 SUV : 60 4x4 : 92
## Mean :181.1 pickup : 24
## 3rd Qu.:185.0 sport : 49
## Max. :206.0 minivan : 20
## NA's :28
To be able to compare a model fitted here with other models where also other covariates will be included, we restrict ourselves to a subset of the dataset where all variables consumption
, fdrive', and
lweight are known.
isComplete <- complete.cases(Cars2004nh[, c("consumption", "fdrive", "lweight", "engine.size")])
sum(!isComplete)
## [1] 16
CarsNow <- subset(Cars2004nh, isComplete, select = c("consumption", "drive", "fdrive", "weight", "lweight", "engine.size"))
dim(CarsNow)
## [1] 409 6
summary(CarsNow)
## consumption drive fdrive weight lweight engine.size
## Min. : 5.65 Min. :1.000 front:212 Min. : 923 Min. :6.828 Min. :1.300
## 1st Qu.: 9.65 1st Qu.:1.000 rear :108 1st Qu.:1415 1st Qu.:7.255 1st Qu.:2.400
## Median :10.70 Median :1.000 4x4 : 89 Median :1577 Median :7.363 Median :3.000
## Mean :10.75 Mean :1.699 Mean :1622 Mean :7.371 Mean :3.178
## 3rd Qu.:11.65 3rd Qu.:2.000 3rd Qu.:1804 3rd Qu.:7.498 3rd Qu.:3.800
## Max. :21.55 Max. :3.000 Max. :2903 Max. :7.973 Max. :6.000
consumption
on fdrive',
lweightand
engine.sizelibrary("car")
## Loading required package: carData
palette(c("darkblue", "red3", "olivedrab", rainbow_hcl(5)))
scatterplotMatrix(~consumption + fdrive + lweight + engine.size, pch = 16, col = "olivedrab",
regLine = list(method = lm, lty = 1, lwd = 2, col = "red3"), smooth = FALSE,
diagonal = list(method = "histogram"), data = CarsNow)
mAddit <- lm(consumption ~ fdrive + engine.size + lweight, data = CarsNow)
summary(mAddit)
##
## Call:
## lm(formula = consumption ~ fdrive + engine.size + lweight, data = CarsNow)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1058 -0.5892 -0.0899 0.5472 4.9148
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -35.84930 3.08092 -11.636 < 2e-16 ***
## fdriverear 0.46260 0.11715 3.949 9.26e-05 ***
## fdrive4x4 0.98198 0.13019 7.543 3.07e-13 ***
## engine.size 0.56908 0.08361 6.807 3.62e-11 ***
## lweight 6.03099 0.44795 13.464 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9223 on 404 degrees of freedom
## Multiple R-squared: 0.8149, Adjusted R-squared: 0.8131
## F-statistic: 444.8 on 4 and 404 DF, p-value: < 2.2e-16
drop1(mAddit, test = "F")
## Single term deletions
##
## Model:
## consumption ~ fdrive + engine.size + lweight
## Df Sum of Sq RSS AIC F value Pr(>F)
## <none> 343.69 -61.161
## fdrive 2 50.574 394.26 -9.012 29.725 9.046e-13 ***
## engine.size 1 39.413 383.10 -18.758 46.330 3.625e-11 ***
## lweight 1 154.205 497.89 88.436 181.267 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mInter <- lm(consumption ~ (fdrive + engine.size + lweight)^2, data = CarsNow)
summary(mInter)
##
## Call:
## lm(formula = consumption ~ (fdrive + engine.size + lweight)^2,
## data = CarsNow)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1489 -0.5982 -0.1019 0.4863 3.5918
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -26.124609 5.776121 -4.523 8.06e-06 ***
## fdriverear 26.875936 7.367167 3.648 0.000299 ***
## fdrive4x4 13.308169 8.311915 1.601 0.110147
## engine.size -5.391862 1.746264 -3.088 0.002158 **
## lweight 4.757609 0.817131 5.822 1.19e-08 ***
## fdriverear:engine.size 0.009665 0.182958 0.053 0.957895
## fdrive4x4:engine.size 0.315489 0.216880 1.455 0.146547
## fdriverear:lweight -3.571144 1.061146 -3.365 0.000839 ***
## fdrive4x4:lweight -1.818723 1.189560 -1.529 0.127081
## engine.size:lweight 0.790111 0.233312 3.386 0.000778 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8726 on 399 degrees of freedom
## Multiple R-squared: 0.8364, Adjusted R-squared: 0.8327
## F-statistic: 226.7 on 9 and 399 DF, p-value: < 2.2e-16
anova(mAddit, mInter)
## Analysis of Variance Table
##
## Model 1: consumption ~ fdrive + engine.size + lweight
## Model 2: consumption ~ (fdrive + engine.size + lweight)^2
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 404 343.69
## 2 399 303.78 5 39.906 10.483 1.813e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(mInter, test = "F")
## Single term deletions
##
## Model:
## consumption ~ (fdrive + engine.size + lweight)^2
## Df Sum of Sq RSS AIC F value Pr(>F)
## <none> 303.78 -101.642
## fdrive:engine.size 2 1.9215 305.70 -103.064 1.2619 0.2842440
## fdrive:lweight 2 8.6863 312.46 -94.112 5.7045 0.0036085 **
## engine.size:lweight 1 8.7315 312.51 -92.052 11.4684 0.0007782 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mInter2 <- lm(consumption ~ fdrive + engine.size + lweight + fdrive:lweight + engine.size:lweight, data = CarsNow)
summary(mInter2)
##
## Call:
## lm(formula = consumption ~ fdrive + engine.size + lweight + fdrive:lweight +
## engine.size:lweight, data = CarsNow)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1094 -0.6157 -0.0946 0.4900 3.5782
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -22.8398 4.9687 -4.597 5.76e-06 ***
## fdriverear 27.3567 4.9219 5.558 4.98e-08 ***
## fdrive4x4 4.3904 5.5249 0.795 0.427287
## engine.size -5.8845 1.6945 -3.473 0.000571 ***
## lweight 4.2821 0.6873 6.230 1.18e-09 ***
## fdriverear:lweight -3.6356 0.6675 -5.446 8.98e-08 ***
## fdrive4x4:lweight -0.4836 0.7425 -0.651 0.515241
## engine.size:lweight 0.8662 0.2270 3.817 0.000157 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8731 on 401 degrees of freedom
## Multiple R-squared: 0.8354, Adjusted R-squared: 0.8325
## F-statistic: 290.7 on 7 and 401 DF, p-value: < 2.2e-16
drop1(mInter2, test = "F")
## Single term deletions
##
## Model:
## consumption ~ fdrive + engine.size + lweight + fdrive:lweight +
## engine.size:lweight
## Df Sum of Sq RSS AIC F value Pr(>F)
## <none> 305.70 -103.064
## fdrive:lweight 2 24.150 329.85 -75.966 15.839 2.395e-07 ***
## engine.size:lweight 1 11.105 316.81 -90.469 14.567 0.0001566 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mInter1 <- lm(consumption ~ fdrive + engine.size + lweight + fdrive:lweight, data = CarsNow)
summary(mInter1)
##
## Call:
## lm(formula = consumption ~ fdrive + engine.size + lweight + fdrive:lweight,
## data = CarsNow)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0825 -0.6328 -0.0868 0.4760 4.2210
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -37.44459 3.22260 -11.619 < 2e-16 ***
## fdriverear 22.90273 4.86163 4.711 3.40e-06 ***
## fdrive4x4 -8.59853 4.42520 -1.943 0.0527 .
## engine.size 0.57588 0.08125 7.088 6.16e-12 ***
## lweight 6.24702 0.46296 13.494 < 2e-16 ***
## fdriverear:lweight -3.03731 0.65971 -4.604 5.57e-06 ***
## fdrive4x4:lweight 1.26748 0.59358 2.135 0.0333 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8877 on 402 degrees of freedom
## Multiple R-squared: 0.8294, Adjusted R-squared: 0.8269
## F-statistic: 325.8 on 6 and 402 DF, p-value: < 2.2e-16
drop1(mInter1, test = "F")
## Single term deletions
##
## Model:
## consumption ~ fdrive + engine.size + lweight + fdrive:lweight
## Df Sum of Sq RSS AIC F value Pr(>F)
## <none> 316.81 -90.469
## engine.size 1 39.590 356.40 -44.308 50.236 6.159e-12 ***
## fdrive:lweight 2 26.879 343.69 -61.161 17.054 7.782e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mAddit, mInter)
## Analysis of Variance Table
##
## Model 1: consumption ~ fdrive + engine.size + lweight
## Model 2: consumption ~ (fdrive + engine.size + lweight)^2
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 404 343.69
## 2 399 303.78 5 39.906 10.483 1.813e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mInter1, mInter)
## Analysis of Variance Table
##
## Model 1: consumption ~ fdrive + engine.size + lweight + fdrive:lweight
## Model 2: consumption ~ (fdrive + engine.size + lweight)^2
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 402 316.81
## 2 399 303.78 3 13.027 5.7034 0.0007864 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mInter2, mInter)
## Analysis of Variance Table
##
## Model 1: consumption ~ fdrive + engine.size + lweight + fdrive:lweight +
## engine.size:lweight
## Model 2: consumption ~ (fdrive + engine.size + lweight)^2
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 401 305.70
## 2 399 303.78 2 1.9215 1.2619 0.2842
par(mfrow = c(2, 2), mar = c(4, 4, 2, 1) + 0.1, bty = BTY)
plot(mAddit, which = 1, caption = "Additive", pch = 21, col = "blue4", bg = "skyblue")
plot(mInter1, which = 1, caption = "One Interaction", pch = 21, col = "blue4", bg = "skyblue")
plot(mInter2, which = 1, caption = "Two Interactions", pch = 21, col = "blue4", bg = "skyblue")
plot(mInter, which = 1, caption = "All Interactions", pch = 21, col = "blue4", bg = "skyblue")
par(mfrow = c(1, 1), mar = c(4, 4, 4, 1) + 0.1)
funDiag <- function(fit){
FCOL <- rainbow_hcl(3)
FCOL2 <- c("red3", "darkgreen", "darkblue")
FPCH <- c(21, 23, 24)
names(FCOL) <- names(FCOL2) <- names(FPCH) <- levels(CarsNow[, "fdrive"])
resid <- residuals(fit)
PAR <- par(mfrow = c(2, 3), bty = "n", mar = c(4, 4, 4, 1) + 0.1)
on.exit(par(PAR))
for (XV in c("engine.size", "lweight")){
for (DRV in levels(CarsNow[, "fdrive"])){
y <- resid[CarsNow[, "fdrive"] == DRV]
x <- CarsNow[CarsNow[, "fdrive"] == DRV, XV]
plot(y ~ x, xlab = XV, ylab = "Residuals", main = DRV, type = "n")
abline(h = 0, col = "grey80")
points(x, y, pch = FPCH[DRV], col = FCOL[DRV], bg = FCOL2[DRV], )
lines(lowess(x, y), col = "orange", lwd = 2)
}
}
return(invisible(fit))
}
funDiag(mAddit)
funDiag(mInter1)
funDiag(mInter2)
funDiag(mInter)