NMSA407 Linear Regression: Tutorial

Confidence interval for the model based mean, prediction interval

Data Kojeni




Introduction

Load used data and calculate basic summaries

data(Kojeni, package = "mffSM")
head(Kojeni)
##   bplace n.child educ bfeed.dur bfeed24 gender bweight blength weight24 length24 breast father teat
## 1      1       3    2        18       0      1    3200      50     8000       75      1      0    1
## 2      1       2    3         5       0      1    3720      52     8110       68      0      0    1
## 3      1       1    2        24       1      1    4100      53     8750       64      1      0    1
## 4      1       1    3        14       0      1    3960      53     8350       72      1      1    1
## 5      1       1    1        24       1      0    3700      52     6700       68      0      0    1
## 6      1       1    2        16       0      0    3700      51     7990       68      1      0    1
##   plan mheight fheight mage fage fbplace     feduc fbfeed24 fgender fbreast ffather fteat fplan
## 1    1     160     180   26   30  Prague Secondary       No     Boy     Yes      No   Yes   Yes
## 2    1     160     187   35   38  Prague  Tertiary       No     Boy      No      No   Yes   Yes
## 3    1     173     186   26   28  Prague Secondary      Yes     Boy     Yes      No   Yes   Yes
## 4    1     159     177   24   26  Prague  Tertiary       No     Boy     Yes     Yes   Yes   Yes
## 5    0     168     183   22   28  Prague   Primary      Yes    Girl      No      No   Yes    No
## 6    1     173     186   24   29  Prague Secondary       No    Girl     Yes      No   Yes   Yes
dim(Kojeni)
## [1] 99 26
summary(Kojeni)
##      bplace         n.child           educ         bfeed.dur        bfeed24           gender      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   : 0.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.: 7.00   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1.000   Median :1.000   Median :2.000   Median :16.00   Median :0.0000   Median :0.0000  
##  Mean   :1.293   Mean   :1.566   Mean   :1.838   Mean   :14.43   Mean   :0.2828   Mean   :0.4949  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:24.00   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :2.000   Max.   :4.000   Max.   :3.000   Max.   :24.00   Max.   :1.0000   Max.   :1.0000  
##     bweight        blength        weight24       length24         breast           father      
##  Min.   :2040   Min.   :46.0   Min.   :5850   Min.   :62.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:3200   1st Qu.:50.0   1st Qu.:7025   1st Qu.:66.00   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :3480   Median :51.0   Median :7770   Median :68.00   Median :0.0000   Median :0.0000  
##  Mean   :3470   Mean   :50.6   Mean   :7690   Mean   :68.55   Mean   :0.4848   Mean   :0.3636  
##  3rd Qu.:3720   3rd Qu.:52.0   3rd Qu.:8250   3rd Qu.:70.00   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :4400   Max.   :54.0   Max.   :9950   Max.   :78.00   Max.   :1.0000   Max.   :1.0000  
##       teat             plan           mheight       fheight           mage           fage      
##  Min.   :0.0000   Min.   :0.0000   Min.   :153   Min.   :162.0   Min.   :18.0   Min.   :19.00  
##  1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:164   1st Qu.:175.0   1st Qu.:23.0   1st Qu.:26.00  
##  Median :1.0000   Median :1.0000   Median :167   Median :179.0   Median :25.0   Median :28.00  
##  Mean   :0.7677   Mean   :0.5859   Mean   :167   Mean   :179.3   Mean   :25.7   Mean   :28.89  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:171   3rd Qu.:185.0   3rd Qu.:28.0   3rd Qu.:32.00  
##  Max.   :1.0000   Max.   :1.0000   Max.   :187   Max.   :195.0   Max.   :38.0   Max.   :43.00  
##      fbplace         feduc    fbfeed24 fgender   fbreast  ffather  fteat    fplan   
##  Prague  :70   Primary  :34   No :71   Girl:50   No :51   No :63   No :23   No :41  
##  Province:29   Secondary:47   Yes:28   Boy :49   Yes:48   Yes:36   Yes:76   Yes:58  
##                Tertiary :18                                                         
##                                                                                     
##                                                                                     
## 

Create a jittered version of blength

set.seed(221913282)
Kojeni <- transform(Kojeni, blength.jitter = blength + runif(nrow(Kojeni), -0.1, 0.1))




Linear model (regression line) for dependence of birth weight on birth length

summary(mKwl <- lm(bweight ~ blength, data = Kojeni))
## 
## Call:
## lm(formula = bweight ~ blength, data = Kojeni)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -685.93 -152.83  -30.76  196.83  664.07 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7905.80     895.45  -8.829 4.52e-14 ***
## blength       224.83      17.69  12.709  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 271.7 on 97 degrees of freedom
## Multiple R-squared:  0.6248, Adjusted R-squared:  0.6209 
## F-statistic: 161.5 on 1 and 97 DF,  p-value: < 2.2e-16




Confidence and prediction bands

Calculation

blengthNew <- seq(45.8, 54.2, length = 500)
pdata <- data.frame(blength = blengthNew)
pKwlmean <- predict(mKwl, newdata = pdata, interval = "confidence", level = 0.95)
pKwlpred <- predict(mKwl, newdata = pdata, interval = "prediction", level = 0.95)

Plot

par(mfrow = c(1, 1), bty = BTY, mar = c(5, 4, 1, 1)+0.1)
plot(bweight ~ blength.jitter, data = Kojeni, col = COL2, bg = BGC2, ylim = c(1750, 5100),
     pch = PCH, xlab = "Birth length [cm]", ylab = "Birth weight [g]")

  ### Estimated means/predictions
lines(blengthNew, pKwlmean[, "fit"], col = "red4", lwd = 2)

  ### Confidence band around the regression function
lines(blengthNew, pKwlmean[, "lwr"], col = "red2", lwd = 2, lty = 2)
lines(blengthNew, pKwlmean[, "upr"], col = "red2", lwd = 2, lty = 2)

  ### Prediction band
lines(blengthNew, pKwlpred[, "lwr"], col = "blue3", lwd = 2, lty = 5)
lines(blengthNew, pKwlpred[, "upr"], col = "blue3", lwd = 2, lty = 5)

legend(46, 5000, legend = c("Confidence band", "Prediction band"), col = c("red2", "blue3"), lwd = 2, lty = c(2, 5))

plot of chunk fig-NormalLM-04-01