Partial residuals, Simpson’s paradox
Data Policie
data(Policie, package = "mffSM")
head(Policie)
## react height weight fat pulse diast
## 1 0.284 189.1 87.36 16.98 82.9 66
## 2 0.300 178.2 117.60 27.60 50.4 87
## 3 0.250 186.0 82.85 6.61 50.7 85
## 4 0.370 183.1 62.32 3.26 70.6 59
## 5 0.351 180.0 82.00 19.00 80.0 76
## 6 0.264 195.0 102.00 27.00 83.0 77
dim(Policie)
## [1] 50 6
summary(Policie)
## react height weight fat pulse
## Min. :0.2300 Min. :165.4 Min. : 51.76 Min. : 2.860 Min. : 44.90
## 1st Qu.:0.2818 1st Qu.:172.7 1st Qu.: 70.04 1st Qu.: 7.195 1st Qu.: 67.33
## Median :0.3130 Median :178.8 Median : 79.72 Median :12.665 Median : 72.90
## Mean :0.3214 Mean :178.3 Mean : 79.18 Mean :13.036 Mean : 75.66
## 3rd Qu.:0.3643 3rd Qu.:183.0 3rd Qu.: 87.06 3rd Qu.:18.730 3rd Qu.: 82.97
## Max. :0.4120 Max. :195.0 Max. :117.60 Max. :27.600 Max. :112.20
## diast
## Min. :59.00
## 1st Qu.:68.00
## Median :76.00
## Mean :75.10
## 3rd Qu.:79.75
## Max. :95.00
fat
on height
and weight
scatterplotMatrix
comes from package car
.library("car")
palette(c("darkblue", "red3", "olivedrab", rainbow_hcl(5)))
scatterplotMatrix(~fat + height + weight, reg.line = lm, smooth = FALSE, spread = TRUE, diagonal = "histogram", data = Policie, pch = 16)
mHeight <- lm(fat ~ height, data = Policie)
summary(mHeight)
##
## Call:
## lm(formula = fat ~ height, data = Policie)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.4119 -5.2152 -0.6149 3.3587 14.5967
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -47.6791 23.9707 -1.989 0.0524 .
## height 0.3405 0.1343 2.535 0.0146 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.483 on 48 degrees of freedom
## Multiple R-squared: 0.1181, Adjusted R-squared: 0.09968
## F-statistic: 6.425 on 1 and 48 DF, p-value: 0.01457
mWeight <- lm(fat ~ weight, data = Policie)
summary(mWeight)
##
## Call:
## lm(formula = fat ~ weight, data = Policie)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9935 -2.5654 -0.3985 2.0597 7.9323
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -20.75217 3.42327 -6.062 2.02e-07 ***
## weight 0.42674 0.04266 10.003 2.51e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.931 on 48 degrees of freedom
## Multiple R-squared: 0.6758, Adjusted R-squared: 0.669
## F-statistic: 100.1 on 1 and 48 DF, p-value: 2.509e-13
mHeWe <- lm(fat ~ height + weight, data = Policie)
summary(mHeWe)
##
## Call:
## lm(formula = fat ~ height + weight, data = Policie)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.4011 -2.9482 -0.0211 2.3072 7.2968
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.55309 15.24621 1.086 0.2831
## height -0.24362 0.09728 -2.504 0.0158 *
## weight 0.50418 0.05095 9.896 4.49e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.731 on 47 degrees of freedom
## Multiple R-squared: 0.714, Adjusted R-squared: 0.7018
## F-statistic: 58.66 on 2 and 47 DF, p-value: 1.681e-13
mHeWe
.(ybar <- with(Policie, mean(fat)))
## [1] 13.036
prY <- residuals(mHeWe, type = "partial") + ybar
height
par(mfrow = c(1, 2), mar = c(4, 4, 2, 1) + 0.1, bty = BTY)
#
YLIM <- range(c(Policie[, "fat"], prY[, "height"]))
bex <- round(coef(mHeWe)["height"], 2)
bexCI <- round(confint(mHeWe)["height", ], 2)
beMarg <- round(coef(mHeight)[2], 2)
beMargCI <- round(confint(mHeight)[2, ], 2)
#
plot(fat ~ height, data = Policie, ylim = YLIM, main = "Marginal",
xlab = "Height [cm]", ylab = "Fat [%]", pch = PCH, col = COL, bg = BGC)
abline(mHeight, col = "blue3", lwd = 2)
text(165, 25, labels = eval(substitute(expression(paste(hat(beta), " = ", be, " (", beCI1, ", ", beCI2, ")", sep="")),
list(be = beMarg, beCI1 = beMargCI[1], beCI2 = beMargCI[2]))),
pos = 4, cex = 1.7)
#
plot(prY[, "height"] ~ height, data = Policie, ylim = YLIM, main = "Partial",
xlab = "Height [cm]", ylab = "Response-mean partial residuals [%]", pch = PCH3, col = COL3, bg = BGC3)
abline(lm(prY[, "height"] ~ height, data = Policie), col = "red2", lwd = 2)
text(165, 25, labels = eval(substitute(expression(paste(hat(beta), " = ", be, " (", beCI1, ", ", beCI2, ")", sep="")),
list(be = bex, beCI1 = bexCI[1], beCI2 = bexCI[2]))),
pos = 4, cex = 1.7)
#
par(mfrow = c(1, 1), mar = c(5, 4, 4, 1) + 0.1)
weight
par(mfrow = c(1, 2), mar = c(4, 4, 2, 1) + 0.1, bty = BTY)
#
YLIM <- range(c(Policie[, "fat"], prY[, "weight"]))
bex <- format(round(coef(mHeWe)["weight"], 2), nsmall = 2)
bexCI <- format(round(confint(mHeWe)["weight", ], 2), nsmall = 2)
beMarg <- format(round(coef(mWeight)[2], 2), nsmall = 2)
beMargCI <- format(round(confint(mWeight)[2, ], 2), nsmall = 2)
#
plot(fat ~ weight, data = Policie, ylim = YLIM, main = "Marginal",
xlab = "Weight [kg]", ylab = "Fat [%]", pch = PCH, col = COL, bg = BGC)
abline(mWeight, col = "blue3", lwd = 2)
text(55, 29, labels = eval(substitute(expression(paste(hat(beta), " = ", be, " (", beCI1, ", ", beCI2, ")", sep="")),
list(be = beMarg, beCI1 = beMargCI[1], beCI2 = beMargCI[2]))),
pos = 4, cex = 1.7)
#
plot(prY[, "weight"] ~ weight, data = Policie, ylim = YLIM, main = "Partial",
xlab = "Weight [kg]", ylab = "Response-mean partial residuals [%]", pch = PCH3, col = COL3, bg = BGC3)
abline(lm(prY[, "weight"] ~ weight, data = Policie), col = "red2", lwd = 2)
text(55, 29, labels = eval(substitute(expression(paste(hat(beta), " = ", be, " (", beCI1, ", ", beCI2, ")", sep="")),
list(be = bex, beCI1 = bexCI[1], beCI2 = bexCI[2]))),
pos = 4, cex = 1.7)
#
par(mfrow = c(1, 1), mar = c(5, 4, 4, 1) + 0.1)