NMSA407 Linear Regression: Tutorial

Partial residuals, Simpson’s paradox

Data Policie




Introduction

Load used data and calculate basic summaries

data(Policie, package = "mffSM")
head(Policie)
##   react height weight   fat pulse diast
## 1 0.284  189.1  87.36 16.98  82.9    66
## 2 0.300  178.2 117.60 27.60  50.4    87
## 3 0.250  186.0  82.85  6.61  50.7    85
## 4 0.370  183.1  62.32  3.26  70.6    59
## 5 0.351  180.0  82.00 19.00  80.0    76
## 6 0.264  195.0 102.00 27.00  83.0    77
dim(Policie)
## [1] 50  6
summary(Policie)
##      react            height          weight            fat             pulse       
##  Min.   :0.2300   Min.   :165.4   Min.   : 51.76   Min.   : 2.860   Min.   : 44.90  
##  1st Qu.:0.2818   1st Qu.:172.7   1st Qu.: 70.04   1st Qu.: 7.195   1st Qu.: 67.33  
##  Median :0.3130   Median :178.8   Median : 79.72   Median :12.665   Median : 72.90  
##  Mean   :0.3214   Mean   :178.3   Mean   : 79.18   Mean   :13.036   Mean   : 75.66  
##  3rd Qu.:0.3643   3rd Qu.:183.0   3rd Qu.: 87.06   3rd Qu.:18.730   3rd Qu.: 82.97  
##  Max.   :0.4120   Max.   :195.0   Max.   :117.60   Max.   :27.600   Max.   :112.20  
##      diast      
##  Min.   :59.00  
##  1st Qu.:68.00  
##  Median :76.00  
##  Mean   :75.10  
##  3rd Qu.:79.75  
##  Max.   :95.00




Dependence of fat on height and weight

Basic scatterplots to start

library("car")
palette(c("darkblue", "red3", "olivedrab", rainbow_hcl(5)))
scatterplotMatrix(~fat + height + weight, reg.line = lm, smooth = FALSE, spread = TRUE, diagonal = "histogram", data = Policie, pch = 16)

plot of chunk fig-CheckModelAssumpt-01-01

Three linear models

mHeight <- lm(fat ~ height, data = Policie)
summary(mHeight)
## 
## Call:
## lm(formula = fat ~ height, data = Policie)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.4119  -5.2152  -0.6149   3.3587  14.5967 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -47.6791    23.9707  -1.989   0.0524 .
## height        0.3405     0.1343   2.535   0.0146 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.483 on 48 degrees of freedom
## Multiple R-squared:  0.1181, Adjusted R-squared:  0.09968 
## F-statistic: 6.425 on 1 and 48 DF,  p-value: 0.01457
mWeight <- lm(fat ~ weight, data = Policie)
summary(mWeight)
## 
## Call:
## lm(formula = fat ~ weight, data = Policie)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.9935 -2.5654 -0.3985  2.0597  7.9323 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -20.75217    3.42327  -6.062 2.02e-07 ***
## weight        0.42674    0.04266  10.003 2.51e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.931 on 48 degrees of freedom
## Multiple R-squared:  0.6758, Adjusted R-squared:  0.669 
## F-statistic: 100.1 on 1 and 48 DF,  p-value: 2.509e-13
mHeWe <- lm(fat ~ height + weight, data = Policie)
summary(mHeWe)
## 
## Call:
## lm(formula = fat ~ height + weight, data = Policie)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.4011 -2.9482 -0.0211  2.3072  7.2968 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 16.55309   15.24621   1.086   0.2831    
## height      -0.24362    0.09728  -2.504   0.0158 *  
## weight       0.50418    0.05095   9.896 4.49e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.731 on 47 degrees of freedom
## Multiple R-squared:  0.714,  Adjusted R-squared:  0.7018 
## F-statistic: 58.66 on 2 and 47 DF,  p-value: 1.681e-13

Response mean partial residuals

(ybar <- with(Policie, mean(fat)))
## [1] 13.036
prY <- residuals(mHeWe, type = "partial") + ybar

Plot for height

par(mfrow = c(1, 2), mar = c(4, 4, 2, 1) + 0.1, bty = BTY)
#
YLIM <- range(c(Policie[, "fat"], prY[, "height"]))
bex <- round(coef(mHeWe)["height"], 2)
bexCI <- round(confint(mHeWe)["height", ], 2)
beMarg <- round(coef(mHeight)[2], 2)
beMargCI <- round(confint(mHeight)[2, ], 2)
#
plot(fat ~ height, data = Policie, ylim = YLIM, main = "Marginal",
     xlab = "Height [cm]", ylab = "Fat [%]", pch = PCH, col = COL, bg = BGC)
abline(mHeight, col = "blue3", lwd = 2)
text(165, 25, labels = eval(substitute(expression(paste(hat(beta), " = ", be, " (", beCI1, ", ", beCI2, ")", sep="")),
                            list(be = beMarg, beCI1 = beMargCI[1], beCI2 = beMargCI[2]))),
     pos = 4, cex = 1.7)
#
plot(prY[, "height"] ~ height, data = Policie, ylim = YLIM, main = "Partial",
     xlab = "Height [cm]", ylab = "Response-mean partial residuals [%]", pch = PCH3, col = COL3, bg = BGC3)
abline(lm(prY[, "height"] ~ height, data = Policie), col = "red2", lwd = 2)
text(165, 25, labels = eval(substitute(expression(paste(hat(beta), " = ", be, " (", beCI1, ", ", beCI2, ")", sep="")),
                            list(be = bex, beCI1 = bexCI[1], beCI2 = bexCI[2]))),
     pos = 4, cex = 1.7)

plot of chunk fig-CheckModelAssumpt-01-02

#
par(mfrow = c(1, 1), mar = c(5, 4, 4, 1) + 0.1)

Plots for weight

par(mfrow = c(1, 2), mar = c(4, 4, 2, 1) + 0.1, bty = BTY)
#
YLIM <- range(c(Policie[, "fat"], prY[, "weight"]))
bex <- format(round(coef(mHeWe)["weight"], 2), nsmall = 2)
bexCI <- format(round(confint(mHeWe)["weight", ], 2), nsmall = 2)
beMarg <- format(round(coef(mWeight)[2], 2), nsmall = 2)
beMargCI <- format(round(confint(mWeight)[2, ], 2), nsmall = 2)
#
plot(fat ~ weight, data = Policie, ylim = YLIM, main = "Marginal",
     xlab = "Weight [kg]", ylab = "Fat [%]", pch = PCH, col = COL, bg = BGC)
abline(mWeight, col = "blue3", lwd = 2)
text(55, 29, labels = eval(substitute(expression(paste(hat(beta), " = ", be, " (", beCI1, ", ", beCI2, ")", sep="")),
                            list(be = beMarg, beCI1 = beMargCI[1], beCI2 = beMargCI[2]))),
     pos = 4, cex = 1.7)
#
plot(prY[, "weight"] ~ weight, data = Policie, ylim = YLIM, main = "Partial",
     xlab = "Weight [kg]", ylab = "Response-mean partial residuals [%]", pch = PCH3, col = COL3, bg = BGC3)
abline(lm(prY[, "weight"] ~ weight, data = Policie), col = "red2", lwd = 2)
text(55, 29, labels = eval(substitute(expression(paste(hat(beta), " = ", be, " (", beCI1, ", ", beCI2, ")", sep="")),
                            list(be = bex, beCI1 = bexCI[1], beCI2 = bexCI[2]))),
     pos = 4, cex = 1.7)

plot of chunk fig-CheckModelAssumpt-01-03

#
par(mfrow = c(1, 1), mar = c(5, 4, 4, 1) + 0.1)