NMSA407 Linear Regression: Tutorial

Categorical nominal covariate

Data Cars2004nh




Introduction

Load used data and calculate basic summaries

data(Cars2004nh, package = "mffSM")
head(Cars2004nh)
##                         vname type drive price.retail price.dealer   price cons.city cons.highway
## 1          Chevrolet.Aveo.4dr    1     1        11690        10965 11327.5       8.4          6.9
## 2 Chevrolet.Aveo.LS.4dr.hatch    1     1        12585        11802 12193.5       8.4          6.9
## 3      Chevrolet.Cavalier.2dr    1     1        14610        13697 14153.5       9.0          6.4
## 4      Chevrolet.Cavalier.4dr    1     1        14810        13884 14347.0       9.0          6.4
## 5   Chevrolet.Cavalier.LS.2dr    1     1        16385        15357 15871.0       9.0          6.4
## 6           Dodge.Neon.SE.4dr    1     1        13670        12849 13259.5       8.1          6.5
##   consumption engine.size ncylinder horsepower weight      iweight  lweight wheel.base length width
## 1        7.65         1.6         4        103   1075 0.0009302326 6.980076        249    424   168
## 2        7.65         1.6         4        103   1065 0.0009389671 6.970730        249    389   168
## 3        7.70         2.2         4        140   1187 0.0008424600 7.079184        264    465   175
## 4        7.70         2.2         4        140   1214 0.0008237232 7.101676        264    465   173
## 5        7.70         2.2         4        140   1187 0.0008424600 7.079184        264    465   175
## 6        7.30         2.0         4        132   1171 0.0008539710 7.065613        267    442   170
##      ftype fdrive
## 1 personal  front
## 2 personal  front
## 3 personal  front
## 4 personal  front
## 5 personal  front
## 6 personal  front
dim(Cars2004nh)
## [1] 425  20
summary(Cars2004nh)
##     vname                type           drive        price.retail     price.dealer   
##  Length:425         Min.   :1.000   Min.   :1.000   Min.   : 10280   Min.   :  9875  
##  Class :character   1st Qu.:1.000   1st Qu.:1.000   1st Qu.: 20370   1st Qu.: 18973  
##  Mode  :character   Median :1.000   Median :1.000   Median : 27905   Median : 25672  
##                     Mean   :2.219   Mean   :1.692   Mean   : 32866   Mean   : 30096  
##                     3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.: 39235   3rd Qu.: 35777  
##                     Max.   :6.000   Max.   :3.000   Max.   :192465   Max.   :173560  
##                                                                                      
##      price          cons.city      cons.highway     consumption     engine.size      ncylinder     
##  Min.   : 10078   Min.   : 6.20   Min.   : 5.100   Min.   : 5.65   Min.   :1.300   Min.   :-1.000  
##  1st Qu.: 19600   1st Qu.:11.20   1st Qu.: 8.100   1st Qu.: 9.65   1st Qu.:2.400   1st Qu.: 4.000  
##  Median : 26656   Median :12.40   Median : 9.000   Median :10.70   Median :3.000   Median : 6.000  
##  Mean   : 31481   Mean   :12.36   Mean   : 9.142   Mean   :10.75   Mean   :3.208   Mean   : 5.791  
##  3rd Qu.: 37514   3rd Qu.:13.80   3rd Qu.: 9.800   3rd Qu.:11.65   3rd Qu.:3.900   3rd Qu.: 6.000  
##  Max.   :183012   Max.   :23.50   Max.   :19.600   Max.   :21.55   Max.   :8.300   Max.   :12.000  
##                   NA's   :14      NA's   :14       NA's   :14                                      
##    horsepower        weight        iweight             lweight        wheel.base        length     
##  Min.   :100.0   Min.   : 923   Min.   :0.0003067   Min.   :6.828   Min.   :226.0   Min.   :363.0  
##  1st Qu.:165.0   1st Qu.:1412   1st Qu.:0.0005542   1st Qu.:7.253   1st Qu.:262.0   1st Qu.:450.0  
##  Median :210.0   Median :1577   Median :0.0006341   Median :7.363   Median :272.0   Median :472.0  
##  Mean   :216.8   Mean   :1626   Mean   :0.0006412   Mean   :7.373   Mean   :274.9   Mean   :470.6  
##  3rd Qu.:255.0   3rd Qu.:1804   3rd Qu.:0.0007082   3rd Qu.:7.498   3rd Qu.:284.0   3rd Qu.:490.0  
##  Max.   :500.0   Max.   :3261   Max.   :0.0010834   Max.   :8.090   Max.   :366.0   Max.   :577.0  
##                  NA's   :2      NA's   :2           NA's   :2       NA's   :2       NA's   :26     
##      width            ftype       fdrive   
##  Min.   :163.0   personal:242   front:223  
##  1st Qu.:175.0   wagon   : 30   rear :110  
##  Median :180.0   SUV     : 60   4x4  : 92  
##  Mean   :181.1   pickup  : 24              
##  3rd Qu.:185.0   sport   : 49              
##  Max.   :206.0   minivan : 20              
##  NA's   :28

Complete cases subset used here

To be able to compare a model fitted here with other models where also other covariates will be included, we restrict ourselves to a subset of the dataset where all variables consumption, lweight and engine.size are known.

isComplete <- complete.cases(Cars2004nh[, c("consumption", "lweight", "engine.size")])
sum(!isComplete)
## [1] 16
CarsNow <- subset(Cars2004nh, isComplete, select = c("consumption", "drive", "fdrive", "weight", "lweight", "engine.size"))
dim(CarsNow)
## [1] 409   6
summary(CarsNow)
##   consumption        drive         fdrive        weight        lweight       engine.size   
##  Min.   : 5.65   Min.   :1.000   front:212   Min.   : 923   Min.   :6.828   Min.   :1.300  
##  1st Qu.: 9.65   1st Qu.:1.000   rear :108   1st Qu.:1415   1st Qu.:7.255   1st Qu.:2.400  
##  Median :10.70   Median :1.000   4x4  : 89   Median :1577   Median :7.363   Median :3.000  
##  Mean   :10.75   Mean   :1.699               Mean   :1622   Mean   :7.371   Mean   :3.178  
##  3rd Qu.:11.65   3rd Qu.:2.000               3rd Qu.:1804   3rd Qu.:7.498   3rd Qu.:3.800  
##  Max.   :21.55   Max.   :3.000               Max.   :2903   Max.   :7.973   Max.   :6.000
with(CarsNow, tapply(consumption, fdrive, mean))
##     front      rear       4x4 
##  9.741274 11.293981 12.498876




Dependence of consumption on drive

Scatterplot

par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ drive, data = CarsNow, xaxt = "n", col = COL, bg = BGC, pch = PCH,
     xlab = "Drive", ylab = "Consumption [l/100 km]", cex.lab = 1.2, cex.axis = 1.2)
axis(1, at = 1:3, labels = levels(CarsNow[, "fdrive"]), cex.axis = 1.2)

plot of chunk fig-ParamCovar-03-01

Scatterplot with horizontally jittered values

set.seed(20010911)
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(CarsNow[, "drive"] + runif(nrow(CarsNow), -0.2, 0.2), CarsNow[, "consumption"], xaxt = "n", col = COL, bg = BGC, pch = PCH,
     xlab = "Drive", ylab = "Consumption [l/100 km]", cex.lab = 1.2, cex.axis = 1.2)
axis(1, at = 1:3, labels = levels(CarsNow[, "fdrive"]), cex.axis = 1.2)

plot of chunk fig-ParamCovar-03-02

Grand and group sample means of consumption

(ybar <- with(CarsNow, mean(consumption)))
## [1] 10.75134
(ybargr <- with(CarsNow, tapply(consumption, fdrive, mean)))
##     front      rear       4x4 
##  9.741274 11.293981 12.498876

Scatterplot with horizontally jittered values equipped additionally by the sample means

layout(matrix(c(1, 2,2,2,2), nrow = 1))
par(bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
set.seed(20010911)
plot(runif(nrow(CarsNow), -0.1, 0.1), CarsNow[, "consumption"], xaxt = "n", col = "darkblue", bg = "skyblue", pch = PCH,
     xlab = "", ylab = "Consumption [l/100 km]", xlim = c(-0.2, 0.2), cex.lab = 1.2, cex.axis = 1.2)
axis(1, at = 0, labels = "All")
points(0, ybar, pch = 22, col = "red", bg = "yellow", cex = 3)
set.seed(20010911)
plot(CarsNow[, "drive"] + runif(nrow(CarsNow), -0.1, 0.1), CarsNow[, "consumption"], xaxt = "n", col = COL, bg = BGC, pch = PCH,
     xlab = "Drive", ylab = "Consumption [l/100 km]", cex.lab = 1.2, cex.axis = 1.2)
points(1:3, ybargr, pch = 22, col = "darkgreen", bg = "seagreen", cex = 3)
axis(1, at = 1:3, labels = levels(CarsNow[, "fdrive"]), cex.axis = 1.2)

plot of chunk fig-ParamCovar-03-03

Boxplots of consumption by fdrive

FCOL <- rainbow_hcl(3)
names(FCOL) <- levels(CarsNow[, "fdrive"])
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ fdrive, data = CarsNow, col = FCOL, xlab = "Drive", ylab = "Consumption [l/100 km]", cex.lab = 1.2, cex.axis = 1.2)

plot of chunk fig-ParamCovar-03-04

Boxplot of consumption of all datapoints, boxplots of consumption by fdrive

layout(matrix(c(1, 2,2,2,2), nrow = 1))
par(bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
boxplot(CarsNow[, "consumption"], col = "sandybrown", ylab = "Consumption [l/100 km]", cexc.lab = 1.2, cex.axis = 1.2)
plot(consumption ~ fdrive, data = CarsNow, col = FCOL, xlab = "Drive", ylab = "Consumption [l/100 km]", cex.lab = 1.2, cex.axis = 1.2)

plot of chunk fig-ParamCovar-03-05

Histograms of consumption by fdrive

XLIM <- with(CarsNow, range(consumption))
layout(matrix(c(0,1,1,0, 2,2,3,3), nrow = 2, byrow = TRUE))
par(bty = BTY, mar = c(4, 4, 4, 1) + 0.1)
for (fl in levels(CarsNow[, "fdrive"])){
  hist(subset(CarsNow, fdrive == fl)[, "consumption"], prob = TRUE, col = FCOL[fl],
         xlab = "Consumption [l/100 km]", xlim = XLIM, ylim = c(0, 0.42), main = fl, cex.main = 1.6, cex.lab = 1.4, cex.axis = 1.4)
}    

plot of chunk fig-ParamCovar-03-06

par(mfrow = c(1, 1))

Normal QQ plots of consumption by fdrive

layout(matrix(c(0,1,1,0, 2,2,3,3), nrow = 2, byrow = TRUE))
par(bty = BTY, mar = c(4, 4, 4, 2) + 0.1)
for (fl in levels(CarsNow[, "fdrive"])){
  qqnorm(subset(CarsNow, fdrive == fl)[, "consumption"], main = fl, pch = PCH, col = COL, bg = BGC, cex.main = 1.6, cex.lab = 1.4, cex.axis = 1.4)
  qqline(subset(CarsNow, fdrive == fl)[, "consumption"], lwd = 2, col = "blue")
}    

plot of chunk fig-ParamCovar-03-07

par(mfrow = c(1, 1))




Full-rank parameterization without intercept

In this parameterization, regression coefficients provide the estimated group means, i.e., the group sample means.

Fit

mF <- lm(consumption ~ -1 + fdrive, data = CarsNow)
summary(mF)
## 
## Call:
## lm(formula = consumption ~ -1 + fdrive, data = CarsNow)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0913 -1.2489 -0.0440  0.9587  9.0511 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## fdrivefront   9.7413     0.1247   78.15   <2e-16 ***
## fdriverear   11.2940     0.1746   64.67   <2e-16 ***
## fdrive4x4    12.4989     0.1924   64.97   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.815 on 406 degrees of freedom
## Multiple R-squared:  0.9728, Adjusted R-squared:  0.9726 
## F-statistic:  4837 on 3 and 406 DF,  p-value: < 2.2e-16

Residual (within groups) sum of squares \(\mbox{SS}_e\)

deviance(mF)         ## SS_e
## [1] 1337.355

Only intercept model

m0 <- lm(consumption ~ 1, data = CarsNow)
summary(m0)
## 
## Call:
## lm(formula = consumption ~ 1, data = CarsNow)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1013 -1.1013 -0.0513  0.8987 10.7987 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  10.7513     0.1055   101.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.134 on 408 degrees of freedom

Total sum of squares \(\mbox{SS}_T\)

deviance(m0)         ## SS_T
## [1] 1857.242

Regression (between groups) sum of squares \(\mbox{SS}_R\)

deviance(m0) - deviance(mF)  ## SS_R
## [1] 519.8869

One-way analysis of variance (ANOVA) table

a1 <- aov(consumption ~ fdrive, data = CarsNow)
summary(a1)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## fdrive        2  519.9  259.94   78.92 <2e-16 ***
## Residuals   406 1337.4    3.29                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1




Sample means of consumption

Grand sample mean

(ybar <- with(CarsNow, mean(consumption)))
## [1] 10.75134

Group sample means

(ybargr <- with(CarsNow, tapply(consumption, fdrive, mean)))
##     front      rear       4x4 
##  9.741274 11.293981 12.498876

Mean of the group sample means

(meanybargr <- mean(ybargr))
## [1] 11.17804

Weighted mean of the group sample means

(TAB <- with(CarsNow, table(fdrive)))
## fdrive
## front  rear   4x4 
##   212   108    89
(PTAB <- prop.table(TAB))
## fdrive
##     front      rear       4x4 
## 0.5183374 0.2640587 0.2176039
(wmeanybargr <- sum(ybargr * PTAB))
## [1] 10.75134




Reference group pseudocontrasts, reference = the first group

Levels of a factor fdrive

levels(CarsNow[["fdrive"]])                       ## Which group is the first one?
## [1] "front" "rear"  "4x4"
(G <- length(levels(CarsNow[, "fdrive"])))
## [1] 3

Fit

mTrt <- lm(consumption ~ fdrive, data = CarsNow)
summary(mTrt)
## 
## Call:
## lm(formula = consumption ~ fdrive, data = CarsNow)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0913 -1.2489 -0.0440  0.9587  9.0511 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.7413     0.1247  78.149  < 2e-16 ***
## fdriverear    1.5527     0.2146   7.237 2.32e-12 ***
## fdrive4x4     2.7576     0.2292  12.030  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.815 on 406 degrees of freedom
## Multiple R-squared:  0.2799, Adjusted R-squared:  0.2764 
## F-statistic: 78.91 on 2 and 406 DF,  p-value: < 2.2e-16

Pseudocontrast matrix

(CTrt <- contr.treatment(G))
##   2 3
## 1 0 0
## 2 1 0
## 3 0 1

Estimated group means from the linear model

coef(mTrt)["(Intercept)"] + CTrt %*% coef(mTrt)[-1]
##        [,1]
## 1  9.741274
## 2 11.293981
## 3 12.498876
muhat <- as.numeric(coef(mTrt)["(Intercept)"] + CTrt %*% coef(mTrt)[-1])
names(muhat) <- levels(CarsNow[, "fdrive"])
print(muhat)
##     front      rear       4x4 
##  9.741274 11.293981 12.498876




Reference group pseudocontrasts, reference = the last group

Fit

mSAS <- lm(consumption ~ fdrive, data = CarsNow, contrasts = list(fdrive = contr.SAS))
summary(mSAS)
## 
## Call:
## lm(formula = consumption ~ fdrive, data = CarsNow, contrasts = list(fdrive = contr.SAS))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0913 -1.2489 -0.0440  0.9587  9.0511 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  12.4989     0.1924  64.969  < 2e-16 ***
## fdrive1      -2.7576     0.2292 -12.030  < 2e-16 ***
## fdrive2      -1.2049     0.2598  -4.637 4.77e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.815 on 406 degrees of freedom
## Multiple R-squared:  0.2799, Adjusted R-squared:  0.2764 
## F-statistic: 78.91 on 2 and 406 DF,  p-value: < 2.2e-16

Pseudocontrast matrix

(CSAS <- contr.SAS(G))
##   1 2
## 1 1 0
## 2 0 1
## 3 0 0

Estimated group means from the linear model

coef(mSAS)["(Intercept)"] + CSAS %*% coef(mSAS)[-1]
##        [,1]
## 1  9.741274
## 2 11.293981
## 3 12.498876
muhat <- as.numeric(coef(mSAS)["(Intercept)"] + CSAS %*% coef(mSAS)[-1])
names(muhat) <- levels(CarsNow[, "fdrive"])
print(muhat)
##     front      rear       4x4 
##  9.741274 11.293981 12.498876




Sum contrasts, reference = the last group

Fit

mSum <- lm(consumption ~ fdrive, data = CarsNow, contrasts = list(fdrive = contr.sum))
summary(mSum)
## 
## Call:
## lm(formula = consumption ~ fdrive, data = CarsNow, contrasts = list(fdrive = contr.sum))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0913 -1.2489 -0.0440  0.9587  9.0511 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 11.17804    0.09606 116.365   <2e-16 ***
## fdrive1     -1.43677    0.12003 -11.970   <2e-16 ***
## fdrive2      0.11594    0.13926   0.833    0.406    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.815 on 406 degrees of freedom
## Multiple R-squared:  0.2799, Adjusted R-squared:  0.2764 
## F-statistic: 78.91 on 2 and 406 DF,  p-value: < 2.2e-16

Contrast matrix

(CSum <- contr.sum(G))
##   [,1] [,2]
## 1    1    0
## 2    0    1
## 3   -1   -1

Estimated group means from the linear model

coef(mSum)["(Intercept)"] + CSum %*% coef(mSum)[-1]
##        [,1]
## 1  9.741274
## 2 11.293981
## 3 12.498876
muhat <- as.numeric(coef(mSum)["(Intercept)"] + CSum %*% coef(mSum)[-1])
names(muhat) <- levels(CarsNow[, "fdrive"])
print(muhat)
##     front      rear       4x4 
##  9.741274 11.293981 12.498876

Estimated group effects (“\(\alpha\)'s”)

alphaSum <- as.numeric(CSum %*% coef(mSum)[-1])
names(alphaSum) <- levels(CarsNow[, "fdrive"])
print(alphaSum)
##      front       rear        4x4 
## -1.4367702  0.1159377  1.3208326

Estimated group effects (“\(\alpha\)'s”)

library("mffSM")
LSum <- cbind(0, CSum)
rownames(LSum) <- levels(CarsNow[["fdrive"]])
colnames(LSum) <- names(coef(mSum))
print(LSum)
##       (Intercept) fdrive1 fdrive2
## front           0       1       0
## rear            0       0       1
## 4x4             0      -1      -1
LSest(mSum, L = LSum)
##         Estimate Std. Error     t value P value     Lower      Upper
## front -1.4367702  0.1200284 -11.9702552 < 2e-16 -1.672725 -1.2008156
## rear   0.1159377  0.1392631   0.8325084 0.40561 -0.157829  0.3897043
## 4x4    1.3208326  0.1468489   8.9945021 < 2e-16  1.032153  1.6095117




Weighted sum contrasts, reference = the last group

Contrast matrix

(ng <- with(CarsNow, table(fdrive)))
## fdrive
## front  rear   4x4 
##   212   108    89
CwSum <- rbind(diag(G - 1), - ng[-G] / ng[G])
rownames(CwSum) <- levels(CarsNow[["fdrive"]])
print(CwSum)
##           front      rear
## front  1.000000  0.000000
## rear   0.000000  1.000000
## 4x4   -2.382022 -1.213483

Fit

mwSum <- lm(consumption ~ fdrive, data = CarsNow, contrasts = list(fdrive = CwSum))
summary(mwSum)
## 
## Call:
## lm(formula = consumption ~ fdrive, data = CarsNow, contrasts = list(fdrive = CwSum))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0913 -1.2489 -0.0440  0.9587  9.0511 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.75134    0.08974 119.802  < 2e-16 ***
## fdrivefront -1.01007    0.08651 -11.676  < 2e-16 ***
## fdriverear   0.54264    0.14982   3.622 0.000329 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.815 on 406 degrees of freedom
## Multiple R-squared:  0.2799, Adjusted R-squared:  0.2764 
## F-statistic: 78.91 on 2 and 406 DF,  p-value: < 2.2e-16

Estimated group means from the linear model

coef(mwSum)["(Intercept)"] + CwSum %*% coef(mwSum)[-1]
##            [,1]
## front  9.741274
## rear  11.293981
## 4x4   12.498876
muhat <- as.numeric(coef(mwSum)["(Intercept)"] + CwSum %*% coef(mwSum)[-1])
names(muhat) <- levels(CarsNow[, "fdrive"])
print(muhat)
##     front      rear       4x4 
##  9.741274 11.293981 12.498876

Estimated group effects (“\(\alpha\)'s”)

alphawSum <- as.numeric(CwSum %*% coef(mwSum)[-1])
names(alphawSum) <- levels(CarsNow[, "fdrive"])
print(alphawSum)
##      front       rear        4x4 
## -1.0100712  0.5426367  1.7475317

Estimated group effects (“\(\alpha\)'s”)

LwSum <- cbind(0, CwSum)
rownames(LwSum) <- levels(CarsNow[["fdrive"]])
colnames(LwSum) <- names(coef(mwSum))
print(LwSum)
##       (Intercept) fdrivefront fdriverear
## front           0    1.000000   0.000000
## rear            0    0.000000   1.000000
## 4x4             0   -2.382022  -1.213483
LSest(mwSum, L = LwSum)
##         Estimate Std. Error    t value    P value      Lower      Upper
## front -1.0100712 0.08650951 -11.675840 < 2.22e-16 -1.1801336 -0.8400087
## rear   0.5426367 0.14982008   3.621923 0.00032946  0.2481168  0.8371567
## 4x4    1.7475317 0.17016829  10.269432 < 2.22e-16  1.4130107  2.0820526




Helmert contrasts

Fit

mHelmert <- lm(consumption ~ fdrive, data = CarsNow, contrasts = list(fdrive = contr.helmert))
summary(mHelmert)
## 
## Call:
## lm(formula = consumption ~ fdrive, data = CarsNow, contrasts = list(fdrive = contr.helmert))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0913 -1.2489 -0.0440  0.9587  9.0511 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 11.17804    0.09606 116.365  < 2e-16 ***
## fdrive1      0.77635    0.10728   7.237 2.32e-12 ***
## fdrive2      0.66042    0.07342   8.995  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.815 on 406 degrees of freedom
## Multiple R-squared:  0.2799, Adjusted R-squared:  0.2764 
## F-statistic: 78.91 on 2 and 406 DF,  p-value: < 2.2e-16

Contrast matrix

(CHelmert <- contr.helmert(G))
##   [,1] [,2]
## 1   -1   -1
## 2    1   -1
## 3    0    2

Estimated group means from the linear model

coef(mHelmert)["(Intercept)"] + CHelmert %*% coef(mHelmert)[-1]
##        [,1]
## 1  9.741274
## 2 11.293981
## 3 12.498876
muhat <- as.numeric(coef(mHelmert)["(Intercept)"] + CHelmert %*% coef(mHelmert)[-1])
names(muhat) <- levels(CarsNow[, "fdrive"])
print(muhat)
##     front      rear       4x4 
##  9.741274 11.293981 12.498876