NMSA407 Linear Regression: Tutorial

Confidence band around and for the regression function

Data Kojeni




Introduction

Load used data and calculate basic summaries

data(Kojeni, package = "mffSM")
head(Kojeni)
##   bplace n.child educ bfeed.dur bfeed24 gender bweight blength weight24 length24 breast father teat
## 1      1       3    2        18       0      1    3200      50     8000       75      1      0    1
## 2      1       2    3         5       0      1    3720      52     8110       68      0      0    1
## 3      1       1    2        24       1      1    4100      53     8750       64      1      0    1
## 4      1       1    3        14       0      1    3960      53     8350       72      1      1    1
## 5      1       1    1        24       1      0    3700      52     6700       68      0      0    1
## 6      1       1    2        16       0      0    3700      51     7990       68      1      0    1
##   plan mheight fheight mage fage fbplace     feduc fbfeed24 fgender fbreast ffather fteat fplan
## 1    1     160     180   26   30  Prague Secondary       No     Boy     Yes      No   Yes   Yes
## 2    1     160     187   35   38  Prague  Tertiary       No     Boy      No      No   Yes   Yes
## 3    1     173     186   26   28  Prague Secondary      Yes     Boy     Yes      No   Yes   Yes
## 4    1     159     177   24   26  Prague  Tertiary       No     Boy     Yes     Yes   Yes   Yes
## 5    0     168     183   22   28  Prague   Primary      Yes    Girl      No      No   Yes    No
## 6    1     173     186   24   29  Prague Secondary       No    Girl     Yes      No   Yes   Yes
dim(Kojeni)
## [1] 99 26
summary(Kojeni)
##      bplace         n.child           educ         bfeed.dur        bfeed24           gender      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   : 0.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.: 7.00   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1.000   Median :1.000   Median :2.000   Median :16.00   Median :0.0000   Median :0.0000  
##  Mean   :1.293   Mean   :1.566   Mean   :1.838   Mean   :14.43   Mean   :0.2828   Mean   :0.4949  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:24.00   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :2.000   Max.   :4.000   Max.   :3.000   Max.   :24.00   Max.   :1.0000   Max.   :1.0000  
##     bweight        blength        weight24       length24         breast           father      
##  Min.   :2040   Min.   :46.0   Min.   :5850   Min.   :62.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:3200   1st Qu.:50.0   1st Qu.:7025   1st Qu.:66.00   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :3480   Median :51.0   Median :7770   Median :68.00   Median :0.0000   Median :0.0000  
##  Mean   :3470   Mean   :50.6   Mean   :7690   Mean   :68.55   Mean   :0.4848   Mean   :0.3636  
##  3rd Qu.:3720   3rd Qu.:52.0   3rd Qu.:8250   3rd Qu.:70.00   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :4400   Max.   :54.0   Max.   :9950   Max.   :78.00   Max.   :1.0000   Max.   :1.0000  
##       teat             plan           mheight       fheight           mage           fage      
##  Min.   :0.0000   Min.   :0.0000   Min.   :153   Min.   :162.0   Min.   :18.0   Min.   :19.00  
##  1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:164   1st Qu.:175.0   1st Qu.:23.0   1st Qu.:26.00  
##  Median :1.0000   Median :1.0000   Median :167   Median :179.0   Median :25.0   Median :28.00  
##  Mean   :0.7677   Mean   :0.5859   Mean   :167   Mean   :179.3   Mean   :25.7   Mean   :28.89  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:171   3rd Qu.:185.0   3rd Qu.:28.0   3rd Qu.:32.00  
##  Max.   :1.0000   Max.   :1.0000   Max.   :187   Max.   :195.0   Max.   :38.0   Max.   :43.00  
##      fbplace         feduc    fbfeed24 fgender   fbreast  ffather  fteat    fplan   
##  Prague  :70   Primary  :34   No :71   Girl:50   No :51   No :63   No :23   No :41  
##  Province:29   Secondary:47   Yes:28   Boy :49   Yes:48   Yes:36   Yes:76   Yes:58  
##                Tertiary :18                                                         
##                                                                                     
##                                                                                     
## 

Create a jittered version of blength

set.seed(221913282)
Kojeni <- transform(Kojeni, blength.jitter = blength + runif(nrow(Kojeni), -0.1, 0.1))




Linear model (regression line) for dependence of birth weight on birth length

summary(mKwl <- lm(bweight ~ blength, data = Kojeni))
## 
## Call:
## lm(formula = bweight ~ blength, data = Kojeni)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -685.93 -152.83  -30.76  196.83  664.07 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7905.80     895.45  -8.829 4.52e-14 ***
## blength       224.83      17.69  12.709  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 271.7 on 97 degrees of freedom
## Multiple R-squared:  0.6248, Adjusted R-squared:  0.6209 
## F-statistic: 161.5 on 1 and 97 DF,  p-value: < 2.2e-16




Confidence bands around and for the regression function

Grid of values to calculate the band bounds

blengthNew <- seq(45.8, 54.2, length = 500)
pdata <- data.frame(blength = blengthNew)

Confidence band AROUND the regression function

bandAROUND <- predict(mKwl, newdata = pdata, interval = "confidence", level = 0.95)
print(bandAROUND[1:10,])
##         fit      lwr      upr
## 1  2391.311 2214.423 2568.199
## 2  2395.095 2218.770 2571.421
## 3  2398.880 2223.117 2574.643
## 4  2402.665 2227.464 2577.866
## 5  2406.449 2231.810 2581.088
## 6  2410.234 2236.157 2584.311
## 7  2414.019 2240.503 2587.534
## 8  2417.803 2244.849 2590.758
## 9  2421.588 2249.195 2593.981
## 10 2425.373 2253.540 2597.205

Confidence band FOR the regression function

nue <- mKwl[["df.residual"]]
kmKwl <- 2
XNew <- cbind(1, blengthNew)

Middle of the confidence band

muhat <- predict(mKwl, newdata = pdata)
muhat <- as.numeric(XNew %*% coef(mKwl))    ## the same

Half width of the band

cbFORhalf <- sqrt(diag(XNew %*% vcov(mKwl) %*% t(XNew)) * kmKwl * qf(0.95, kmKwl, nue))

Bounds of the band FOR the regression function

bandFOR <- data.frame(fit = muhat, lwr = muhat - cbFORhalf, upr = muhat + cbFORhalf)
print(bandFOR[1:10,])
##         fit      lwr      upr
## 1  2391.311 2169.743 2612.878
## 2  2395.095 2174.233 2615.958
## 3  2398.880 2178.722 2619.038
## 4  2402.665 2183.210 2622.119
## 5  2406.449 2187.699 2625.200
## 6  2410.234 2192.187 2628.281
## 7  2414.019 2196.675 2631.362
## 8  2417.803 2201.163 2634.444
## 9  2421.588 2205.651 2637.525
## 10 2425.373 2210.138 2640.607

Plot both bands

par(mfrow = c(1, 1), bty = BTY, mar = c(5, 4, 1, 1)+0.1)
plot(bweight ~ blength.jitter, data = Kojeni, col = COL2, bg = BGC2, ylim = c(1750, 5100),
     pch = PCH, xlab = "Birth length [cm]", ylab = "Birth weight [g]")

  ### Estimated means/predictions
lines(blengthNew, muhat, col = "red4", lwd = 2)

  ### Confidence band AROUND the regression function
lines(blengthNew, bandAROUND[, "lwr"], col = "red2", lwd = 2, lty = 2)
lines(blengthNew, bandAROUND[, "upr"], col = "red2", lwd = 2, lty = 2)

  ### Confidence band FOR the regression function
lines(blengthNew, bandFOR[, "lwr"], col = "blue3", lwd = 2, lty = 5)
lines(blengthNew, bandFOR[, "upr"], col = "blue3", lwd = 2, lty = 5)

legend(46, 5000, legend = c("Conf. band FOR", "Conf. band AROUND"), col = c("blue3", "red2"), lwd = 2, lty = c(5, 2))

plot of chunk fig-Simult-03-01