Multicollinearity
Data IQ
data(IQ, package = "mffSM")
head(IQ)
## gender fgender iq zn7 zn8
## 1 0 girl 94 1.84 1.81
## 2 0 girl 134 1.00 1.00
## 3 0 girl 108 1.69 1.90
## 4 1 boy 96 2.60 2.90
## 5 0 girl 101 1.92 1.72
## 6 1 boy 97 2.00 2.25
dim(IQ)
## [1] 111 5
summary(IQ)
## gender fgender iq zn7 zn8
## Min. :0.0000 girl:54 Min. : 72.0 Min. :1.000 Min. :1.000
## 1st Qu.:0.0000 boy :57 1st Qu.:101.0 1st Qu.:1.300 1st Qu.:1.270
## Median :1.0000 Median :108.0 Median :1.620 Median :1.720
## Mean :0.5135 Mean :109.2 Mean :1.746 Mean :1.782
## 3rd Qu.:1.0000 3rd Qu.:118.0 3rd Qu.:2.070 3rd Qu.:2.090
## Max. :1.0000 Max. :141.0 Max. :3.300 Max. :3.360
Boys
summary(subset(IQ, fgender == "boy", select = c("iq", "zn7", "zn8")))
## iq zn7 zn8
## Min. : 72.0 Min. :1.000 Min. :1.000
## 1st Qu.: 97.0 1st Qu.:1.380 1st Qu.:1.270
## Median :106.0 Median :1.840 Median :1.900
## Mean :107.5 Mean :1.968 Mean :2.012
## 3rd Qu.:117.0 3rd Qu.:2.610 3rd Qu.:2.550
## Max. :141.0 Max. :3.300 Max. :3.360
Girls
summary(subset(IQ, fgender == "girl", select = c("iq", "zn7", "zn8")))
## iq zn7 zn8
## Min. : 80.0 Min. :1.000 Min. :1.000
## 1st Qu.:102.2 1st Qu.:1.208 1st Qu.:1.270
## Median :111.0 Median :1.460 Median :1.540
## Mean :111.1 Mean :1.511 Mean :1.538
## 3rd Qu.:118.5 3rd Qu.:1.830 3rd Qu.:1.810
## Max. :141.0 Max. :2.230 Max. :2.450
pairs.panels
from package psych
is used here. It also shows pairwise correlation coefficients.library("psych")
pairs.panels(subset(IQ, select = c("iq", "gender", "zn7", "zn8")), bg = "palegoldenrod", col = "red3", pch = 21,
ellipses = FALSE, smooth = FALSE, lm = TRUE, hist.col = "lightblue", rug = FALSE)
round(cor(IQ[, c("iq", "gender", "zn7", "zn8")]), 2)
## iq gender zn7 zn8
## iq 1.00 -0.12 -0.69 -0.66
## gender -0.12 1.00 0.37 0.38
## zn7 -0.69 0.37 1.00 0.95
## zn8 -0.66 0.38 0.95 1.00
COLS <- c("red3", "darkblue")
BGCOLS <- rainbow_hcl(2, start = 30, end = 230)
PCH <- c(25, 24)
names(COLS) <- names(BGCOLS) <- names(PCH) <- c("girl", "boy")
layout(matrix(c(0,1,1,0, 2,2,3,3), nrow = 2, byrow = TRUE))
par(mar = c(5, 4, 1, 1) + 0.1)
plot(iq ~ fgender, col = BGCOLS, data=IQ)
plot(iq ~ zn7, col = COLS[fgender], bg = BGCOLS[fgender], pch = PCH[fgender], data = IQ)
plot(iq ~ zn8, col = COLS[fgender], bg = BGCOLS[fgender], pch = PCH[fgender], data = IQ)
legend(2.5, 140, legend = c("Girl", "Boy"), col = COLS, pt.bg = BGCOLS, pch = PCH)
#par(mfrow = c(1, 1))
(sm1 <- summary(m1 <- lm(iq ~ gender + zn7 + zn8, data = IQ)))
##
## Call:
## lm(formula = iq ~ gender + zn7 + zn8, data = IQ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.1677 -7.5243 -0.4338 7.1780 26.4095
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 138.222 3.119 44.314 < 2e-16 ***
## gender 4.563 2.221 2.055 0.04232 *
## zn7 -16.767 5.536 -3.029 0.00308 **
## zn8 -1.149 5.557 -0.207 0.83658
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.81 on 107 degrees of freedom
## Multiple R-squared: 0.4943, Adjusted R-squared: 0.4801
## F-statistic: 34.87 on 3 and 107 DF, p-value: 8.472e-16
(sm1st <- summary(m1st <- lm(scale(iq) ~ scale(gender) + scale(zn7) + scale(zn8), data = IQ)))
##
## Call:
## lm(formula = scale(iq) ~ scale(gender) + scale(zn7) + scale(zn8),
## data = IQ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.47790 -0.50164 -0.02892 0.47855 1.76069
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.169e-16 6.844e-02 0.000 1.00000
## scale(gender) 1.528e-01 7.434e-02 2.055 0.04232 *
## scale(zn7) -6.989e-01 2.308e-01 -3.029 0.00308 **
## scale(zn8) -4.800e-02 2.321e-01 -0.207 0.83658
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.721 on 107 degrees of freedom
## Multiple R-squared: 0.4943, Adjusted R-squared: 0.4801
## F-statistic: 34.87 on 3 and 107 DF, p-value: 8.472e-16
n <- nrow(IQ)
IQ <- transform(IQ, siq = scale(iq) / sqrt(n - 1),
sgender = scale(gender) / sqrt(n - 1),
szn7 = scale(zn7) / sqrt(n - 1),
szn8 = scale(zn8) / sqrt(n - 1))
(sm1st2 <- summary(m1st2 <- lm(siq ~ sgender + szn7 + szn8, data = IQ)))
##
## Call:
## lm(formula = siq ~ sgender + szn7 + szn8, data = IQ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.140912 -0.047829 -0.002757 0.045628 0.167875
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.731e-18 6.525e-03 0.000 1.00000
## sgender 1.528e-01 7.434e-02 2.055 0.04232 *
## szn7 -6.989e-01 2.308e-01 -3.029 0.00308 **
## szn8 -4.800e-02 2.321e-01 -0.207 0.83658
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06875 on 107 degrees of freedom
## Multiple R-squared: 0.4943, Adjusted R-squared: 0.4801
## F-statistic: 34.87 on 3 and 107 DF, p-value: 8.472e-16
vif
is from package car
.detach("package:psych")
library("car")
vif(m1)
## gender zn7 zn8
## 1.16923 11.26866 11.40240
vif(lm(iq ~ fgender + zn7 + zn8, data = IQ))
## fgender zn7 zn8
## 1.16923 11.26866 11.40240
(sm27 <- summary(m27 <- lm(iq ~ gender + zn7, data = IQ)))
##
## Call:
## lm(formula = iq ~ gender + zn7, data = IQ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.9606 -7.4290 -0.1927 7.0047 26.5244
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 138.093 3.043 45.376 <2e-16 ***
## gender 4.513 2.198 2.054 0.0424 *
## zn7 -17.852 1.765 -10.116 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.77 on 108 degrees of freedom
## Multiple R-squared: 0.4941, Adjusted R-squared: 0.4848
## F-statistic: 52.74 on 2 and 108 DF, p-value: < 2.2e-16
vif(m27)
## gender zn7
## 1.15531 1.15531
(sm28 <- summary(m28 <- lm(iq ~ gender + zn8, data = IQ)))
##
## Call:
## lm(formula = iq ~ gender + zn8, data = IQ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.5378 -7.9585 -0.0763 7.1273 31.0778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 137.402 3.223 42.634 < 2e-16 ***
## gender 4.474 2.303 1.943 0.0547 .
## zn8 -17.095 1.846 -9.263 2.21e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.22 on 108 degrees of freedom
## Multiple R-squared: 0.451, Adjusted R-squared: 0.4408
## F-statistic: 44.36 on 2 and 108 DF, p-value: 8.673e-15
vif(m28)
## gender zn8
## 1.169022 1.169022
pzn <- c(1, 3.4)
pdata <- data.frame(zn7 = rep(pzn, 2), zn8 = rep(pzn, 2), gender = rep(c(0, 1), each = 2))
pm27 <- predict(m27, newdata = pdata)
pm28 <- predict(m28, newdata = pdata)
par(mfrow = c(1, 2), mar = c(5, 4, 1, 1) + 0.1)
plot(iq ~ zn7, col = COLS[fgender], bg = BGCOLS[fgender], pch = PCH[fgender], data = IQ)
lines(pzn, pm27[1:2], col = COLS["girl"], lwd = 2)
lines(pzn, pm27[3:4], col = COLS["boy"], lwd = 2)
legend(2.5, 140, legend = c("Girl", "Boy"), col = COLS, pt.bg = BGCOLS, pch = PCH, lty = 1, lwd = 2)
plot(iq ~ zn8, col = COLS[fgender], bg = BGCOLS[fgender], pch = PCH[fgender], data = IQ)
lines(pzn, pm28[1:2], col = COLS["girl"], lwd = 2)
lines(pzn, pm28[3:4], col = COLS["boy"], lwd = 2)
legend(2.5, 140, legend = c("Girl", "Boy"), col = COLS, pt.bg = BGCOLS, pch = PCH, lty = 1, lwd = 2)
par(mfrow = c(1, 1))