## ----echo=FALSE, results='hide'------------------------------------------ source(".Rprofile") ## ------------------------------------------------------------------------ data(Cars2004, package = "mffSM") head(Cars2004) dim(Cars2004) summary(Cars2004) ## ------------------------------------------------------------------------ isComplete <- complete.cases(Cars2004[, c("consumption", "lweight", "engine.size")]) sum(!isComplete) CarsUsed <- subset(Cars2004, isComplete, select = c("vname", "fhybrid", "consumption", "weight", "lweight")) dim(CarsUsed) summary(CarsUsed) ## ----fig-Unusual-01-01, fig.ext='png', fig.path=FIGDIRPNG, fig.width=9, fig.height=6---- par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1) plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL, bg = BGC, xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]") #lines(lowess(CarsUsed[, "lweight"], CarsUsed[, "consumption"]), col = "blue", lwd = 2) ## ------------------------------------------------------------------------ m1 <- lm(consumption ~ lweight, data = CarsUsed) summary(m1) ## ------------------------------------------------------------------------ (Ybar <- round(with(CarsUsed, mean(consumption)), 2)) (be0 <- round(coef(m1)[1], 2)) (be1 <- round(coef(m1)[2], 4)) ## ----fig-Unusual-01-02, fig.ext='png', fig.path=FIGDIRPNG, fig.width=9, fig.height=6---- par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1) plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL2, bg = BGC2, xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]") abline(m1, col = "red2", lwd = 2) ## ------------------------------------------------------------------------ h <- hatvalues(m1) print(h[1:10]) ## ------------------------------------------------------------------------ m <- 1 - hatvalues(m1) print(m[1:10]) ## ------------------------------------------------------------------------ u <- residuals(m1) print(u[1:10]) ## ------------------------------------------------------------------------ ustd <- rstandard(m1) print(ustd[1:10]) ## ------------------------------------------------------------------------ Tt <- rstudent(m1) print(Tt[1:10]) ## ------------------------------------------------------------------------ gamma <- residuals(m1) / (1 - hatvalues(m1)) print(gamma[1:10]) ## ------------------------------------------------------------------------ CarsUsed[, "h"] <- hatvalues(m1) CarsUsed[, "m"] <- 1 - hatvalues(m1) CarsUsed[, "u"] <- residuals(m1) CarsUsed[, "ustd"] <- rstandard(m1) CarsUsed[, "gamma"] <- CarsUsed[, "u"] / CarsUsed[, "m"] CarsUsed[, "Tt"] <- rstudent(m1) # head(CarsUsed) ## ------------------------------------------------------------------------ ind <- order(abs(CarsUsed[, "Tt"]), decreasing = TRUE) print(CarsUsed[ind,][1:5,]) # print(CarsUsed[ind, 1:5][1:5,]) print(CarsUsed[ind, -(1:5)][1:5,]) ## ----fig-Unusual-01-03, fig.ext='png', fig.path=FIGDIRPNG, fig.width=9, fig.height=6---- xshow <- CarsUsed[ind,][1:5, "lweight"] yshow <- CarsUsed[ind,][1:5, "consumption"] tshow <- rownames(CarsUsed[ind,])[1:5] # par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1) plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL2, bg = BGC2, xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", ylim = c(3.75, 22)) abline(m1, col = "red2", lwd = 2) text(7.5, 8, labels = eval(substitute(expression(paste(hat(E), "(Y|X=x) = ", be0, " + ", be1, "x", sep="")), list(be0 = be0, be1 = be1))), pos = 4) text(7.5, 7, labels = eval(substitute(expression(paste(bar(Y), " = ", Ybar, sep="")), list(Ybar = Ybar))), pos = 4) text(xshow, yshow, labels = tshow, pos = 3) ## ------------------------------------------------------------------------ PvalUnadj <- 2 * pt(-abs(rstudent(m1)), df = m1[["df.residual"]] - 1) min(PvalUnadj) sum(PvalUnadj < 0.05) ## ------------------------------------------------------------------------ PvalBonf <- p.adjust(PvalUnadj, method = "bonferroni") min(PvalBonf) sum(PvalBonf < 0.05) ## ------------------------------------------------------------------------ CarsUsed <- transform(CarsUsed, PvalUnadj = PvalUnadj, PvalBonf = PvalBonf) ## ------------------------------------------------------------------------ print(subset(CarsUsed, PvalBonf < 0.05, select = c("vname", "fhybrid", "consumption", "gamma", "Tt", "PvalUnadj", "PvalBonf"))) ## ------------------------------------------------------------------------ print(CarsUsed[ind, c("vname", "fhybrid", "consumption", "gamma", "Tt", "PvalUnadj", "PvalBonf")][1:5,]) ## ----fig-Unusual-01-04, fig.ext='png', fig.path=FIGDIRPNG, fig.width=9, fig.height=6---- xout <- subset(CarsUsed, PvalBonf < 0.05)[, "lweight"] yout <- subset(CarsUsed, PvalBonf < 0.05)[, "consumption"] # par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1) plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL3, bg = BGC3, xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", ylim = c(3.75, 22)) abline(m1, col = "red2", lwd = 2) text(7.5, 8, labels = eval(substitute(expression(paste(hat(E), "(Y|X=x) = ", be0, " + ", be1, "x", sep="")), list(be0 = be0, be1 = be1))), pos = 4) text(7.5, 7, labels = eval(substitute(expression(paste(bar(Y), " = ", Ybar, sep="")), list(Ybar = Ybar))), pos = 4) text(xshow, yshow, labels = tshow, pos = 3) points(xout, yout, pch = PCH, col = "orange", bg = "red", cex = 2) ## ------------------------------------------------------------------------ inflMeas <- influence.measures(m1) #print(inflMeas) ## full table with some stars print(inflMeas$infmat[1:10,]) ## the first 10 rows without stars ## ------------------------------------------------------------------------ summary(influence.measures(m1)) ## ------------------------------------------------------------------------ (r <- m1[["rank"]]) (n <- m1[["df.residual"]] + r) ## ------------------------------------------------------------------------ h <- hatvalues(m1) print(h[1:10]) ## ------------------------------------------------------------------------ 3 * r / n ## ------------------------------------------------------------------------ sum(hatvalues(m1) > 3 * r / n) ## ------------------------------------------------------------------------ isLeverage <- (hatvalues(m1) > 3 * r /n) subset(CarsUsed, isLeverage, select = c("vname", "consumption", "weight", "lweight", "h")) ## ----fig-Unusual-01-05, fig.ext='png', fig.path=FIGDIRPNG, fig.width=9, fig.height=6---- xlev <- subset(CarsUsed, isLeverage)[, "lweight"] ylev <- subset(CarsUsed, isLeverage)[, "consumption"] tlev <- rownames(subset(CarsUsed, isLeverage)) # par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1) plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL3, bg = BGC3, xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", ylim = c(3.75, 22)) abline(m1, col = "red2", lwd = 2) text(7.5, 8, labels = eval(substitute(expression(paste(hat(E), "(Y|X=x) = ", be0, " + ", be1, "x", sep="")), list(be0 = be0, be1 = be1))), pos = 4) text(7.5, 7, labels = eval(substitute(expression(paste(bar(Y), " = ", Ybar, sep="")), list(Ybar = Ybar))), pos = 4) text(xlev, ylev, labels = tlev, pos = 3) points(xlev, ylev, pch = PCH, col = "orange", bg = "red", cex = 2) ## ------------------------------------------------------------------------ dfbts <- dfbetas(m1) print(dfbts[1:10,]) ## ------------------------------------------------------------------------ sum(abs(dfbetas(m1)[, 1]) > 1) sum(abs(dfbetas(m1)[, 2]) > 1) ## ------------------------------------------------------------------------ apply(abs(dfbetas(m1)), 2, max) ## ------------------------------------------------------------------------ dffts <- dffits(m1) print(dffts[1:10]) ## ------------------------------------------------------------------------ 3 * sqrt(r / (n-r)) ## ------------------------------------------------------------------------ sum(abs(dffits(m1)) > 3 * sqrt(r / (n-r))) ## ------------------------------------------------------------------------ CarsUsed[, "dffits"] <- dffits(m1) isDFFITS <- (abs(dffits(m1)) > 3 * sqrt(r / (n-r))) subset(CarsUsed, isDFFITS, select = c("vname", "consumption", "weight", "lweight", "dffits")) ## ----fig-Unusual-01-06, fig.ext='png', fig.path=FIGDIRPNG, fig.width=9, fig.height=6---- xdffits <- subset(CarsUsed, isDFFITS)[, "lweight"] ydffits <- subset(CarsUsed, isDFFITS)[, "consumption"] tdffits <- rownames(subset(CarsUsed, isDFFITS)) # m1without305 <- lm(consumption ~ lweight, data = subset(CarsUsed, vname != "Hummer.H2")) # par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1) plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL3, bg = BGC3, xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", ylim = c(3.75, 22)) abline(m1, col = "red2", lwd = 2) text(7.5, 8, labels = eval(substitute(expression(paste(hat(E), "(Y|X=x) = ", be0, " + ", be1, "x", sep="")), list(be0 = be0, be1 = be1))), pos = 4) text(7.5, 7, labels = eval(substitute(expression(paste(bar(Y), " = ", Ybar, sep="")), list(Ybar = Ybar))), pos = 4) text(xdffits, ydffits, labels = tdffits, pos = 3) points(xdffits, ydffits, pch = PCH, col = "orange", bg = "red", cex = 2) # abline(m1without305, col = "darkgreen", lwd = 2) legend(6.7, 22, legend = paste("Fit without obs. 305 with the highest DFFITS value", sep = ""), lty = 1, col = "darkgreen", lwd = 2) ## ------------------------------------------------------------------------ cookd <- cooks.distance(m1) print(cookd[1:10]) ## ------------------------------------------------------------------------ qf(0.50, r, n-r) ## ------------------------------------------------------------------------ sum(cooks.distance(m1) > qf(0.50, r, n-r)) ## ------------------------------------------------------------------------ max(cooks.distance(m1)) ## ------------------------------------------------------------------------ covrt <- covratio(m1) print(covrt[1:10]) ## ------------------------------------------------------------------------ 3 * (r / (n-r)) ## ------------------------------------------------------------------------ sum(abs(1 - covratio(m1)) > 3 * (r / (n-r))) ## ------------------------------------------------------------------------ sum(abs(1 - covratio(m1)) > 3 * (r / (n-r))) ## ------------------------------------------------------------------------ CarsUsed[, "covratio"] <- covratio(m1) isCOVRATIO <- (abs(1 - covratio(m1)) > 3 * (r / (n-r))) subset(CarsUsed, isCOVRATIO, select = c("vname", "consumption", "weight", "lweight", "covratio")) ## ----fig-Unusual-01-07, fig.ext='png', fig.path=FIGDIRPNG, fig.width=9, fig.height=6---- xcovratio <- subset(CarsUsed, isCOVRATIO)[, "lweight"] ycovratio <- subset(CarsUsed, isCOVRATIO)[, "consumption"] tcovratio <- rownames(subset(CarsUsed, isCOVRATIO)) # par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1) plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL3, bg = BGC3, xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", ylim = c(3.75, 22)) abline(m1, col = "red2", lwd = 2) text(7.5, 8, labels = eval(substitute(expression(paste(hat(E), "(Y|X=x) = ", be0, " + ", be1, "x", sep="")), list(be0 = be0, be1 = be1))), pos = 4) text(7.5, 7, labels = eval(substitute(expression(paste(bar(Y), " = ", Ybar, sep="")), list(Ybar = Ybar))), pos = 4) text(xcovratio, ycovratio, labels = tcovratio, pos = 3) points(xcovratio, ycovratio, pch = PCH, col = "orange", bg = "red", cex = 2) ## ----fig-Unusual-01-08, fig.ext='png', fig.path=FIGDIRPNG, fig.width=7, fig.height=6---- par(mfrow = c(1, 1), mar = c(5, 4, 1, 1) + 0.1) plot(m1, which = 4, col = "blue4") ## ----fig-Unusual-01-09, fig.ext='png', fig.path=FIGDIRPNG, fig.width=7, fig.height=6---- par(mfrow = c(1, 1), mar = c(5, 4, 1, 1) + 0.1) plot(m1, which = 5, pch = 21, col = "blue4", bg = "skyblue") ## ----fig-Unusual-01-10, fig.ext='png', fig.path=FIGDIRPNG, fig.width=7, fig.height=6---- par(mfrow = c(1, 1), mar = c(5, 4, 2, 1) + 0.1) plot(m1, which = 6, pch = 21, col = "blue4", bg = "skyblue")