NMSA407 Linear Regression: Tutorial

Simple illustration of a linear model

Data Hosi0




Introduction

Load used packages

library("lattice")

Load used data and calculate basic summaries

data(Hosi0, package = "mffSM")
head(Hosi0)
##   bweight blength weight12 length12 mage fage
## 1    3000      50     8900       78   23   30
## 2    3700      51    11550       77   26   28
## 3    3410      49    12000       80   21   24
## 4    3250      49     9600       73   19   22
## 5    3000      48     9600       74   16   22
## 6    3370      50    11100       73   19   18
dim(Hosi0)
## [1] 4838    6
summary(Hosi0)
##     bweight        blength         weight12        length12         mage            fage      
##  Min.   :1750   Min.   :46.00   Min.   : 6500   Min.   :63.0   Min.   :16.00   Min.   :17.00  
##  1st Qu.:3140   1st Qu.:49.00   1st Qu.: 9700   1st Qu.:74.0   1st Qu.:22.00   1st Qu.:24.00  
##  Median :3450   Median :50.00   Median :10350   Median :76.0   Median :25.00   Median :28.00  
##  Mean   :3429   Mean   :50.28   Mean   :10433   Mean   :76.6   Mean   :25.52   Mean   :28.49  
##  3rd Qu.:3710   3rd Qu.:51.00   3rd Qu.:11100   3rd Qu.:79.0   3rd Qu.:29.00   3rd Qu.:32.00  
##  Max.   :5100   Max.   :54.00   Max.   :16000   Max.   :94.0   Max.   :45.00   Max.   :69.00

Jittered birth length

Additionally, we create a jittered version of a variable blength which will be used on some plots to make them readable.

set.seed(221913282)
Hosi0 <- transform(Hosi0, blength.jitter = blength + runif(nrow(Hosi0), -0.1, 0.1))




Scatterplot

Labels for axes on plots

XLAB <- "Birth length [cm]"
YLAB <- "Birth weight [g]"

Scatterplot (using jittered birth length)

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(bweight ~ blength.jitter, data = Hosi0, col = COL, bg = BGC, pch = PCH, xlab = XLAB, ylab = YLAB)

plot of chunk fig-LinModDef-01-01




Conditional means of birth weight given birth length

Estimated conditional means

bweight.bar <- with(Hosi0, tapply(bweight, blength, mean))
print(bweight.bar, digits = 5)
##     46     47     48     49     50     51     52     53     54 
## 2528.1 2801.3 2979.1 3172.5 3396.1 3577.5 3763.9 3935.8 4072.5
blengths <- as.integer(names(bweight.bar))
print(blengths)
## [1] 46 47 48 49 50 51 52 53 54

Scatterplot (using jittered birth length) with added sample means of birth weight by birth length

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(bweight ~ blength.jitter, data = Hosi0, col = COL, bg = BGC, pch = PCH, xlab = XLAB, ylab = YLAB)
points(blengths, bweight.bar, pch = 22, col = "skyblue", bg = "darkgreen", cex = 1.5)

plot of chunk fig-LinModDef-01-02

Increments in estimated conditional means

diff.bar <- bweight.bar[2:9] - bweight.bar[1:8]
names(diff.bar) <- paste(blengths[2:9], "-", blengths[1:8], sep="")
print(diff.bar, digits = 3)
## 47-46 48-47 49-48 50-49 51-50 52-51 53-52 54-53 
##   273   178   193   224   181   186   172   137

Mean of the increments

mean(diff.bar)
## [1] 193.0533

Scatterplot (using jittered birth length) with added sample means of birth weight by birth length connected by lines

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(bweight ~ blength.jitter, data = Hosi0, col = COL, bg = BGC, pch = PCH, xlab = XLAB, ylab = YLAB)
points(blengths, bweight.bar, pch = 22, col = "skyblue", bg = "darkgreen", cex = 1.5)
lines(blengths, bweight.bar, col = "blue3", lwd = 2)

plot of chunk fig-LinModDef-01-03




Linear model for dependence of birth weight on birth length

Model fit

m1 <- lm(bweight ~ blength, data = Hosi0)
summary(m1)
## 
## Call:
## lm(formula = bweight ~ blength, data = Hosi0)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1520.33  -188.20   -10.33   189.67  1531.80 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6230.146    121.095  -51.45   <2e-16 ***
## blength       192.124      2.407   79.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 291.7 on 4836 degrees of freedom
## Multiple R-squared:  0.5685, Adjusted R-squared:  0.5684 
## F-statistic:  6370 on 1 and 4836 DF,  p-value: < 2.2e-16

Estimated regression coefficients (rounded)

be0.Hosi0 <- round(coef(m1)[1], 1)
be1.Hosi0 <- round(coef(m1)[2], 2)

Fitted line

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(bweight ~ blength.jitter, data = Hosi0, col = COL, bg = BGC, pch = PCH, xlab = XLAB, ylab = YLAB)
points(blengths, bweight.bar, pch = 22, col = "skyblue", bg = "darkgreen", cex = 1.5)
abline(m1, col = "red4", lwd = 2)
text(49, 2000, labels = eval(substitute(expression(paste(hat(E), "(Y; ", x, ") = ", be0.Hosi0, " + ", be1.Hosi0, x, sep="")), 
                                        list(be0.Hosi0 = be0.Hosi0, be1.Hosi0 = be1.Hosi0))), pos = 4)

plot of chunk fig-LinModDef-01-04




Plots exploring conditional distribution of birth weight given birth length

Boxplots of birth weight by birth length

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(bweight ~ factor(blength), data = Hosi0, col = BCOL, xlab = XLAB, ylab = YLAB)

plot of chunk fig-LinModDef-01-05

Histograms of birth weight by birth length (function from package lattice)

histogram(~ bweight | as.factor(blength), data = Hosi0, type = "density", col = HCOL, xlab = YLAB, ylab = "Density")

plot of chunk fig-LinModDef-01-06