## ## PURPOSE: Create a table with a set of descriptive statistics ## and inference for difference in means ## ## PROGRAMMER: Arnošt Komárek ## ## REQUIRES: formatOut.R ## ## ============================================================== ## TESTING # #yVar <- "f2.sco.math" #xVar <- "fam.comp" #data <- nelsNE2 #digits <- 2 #conf.level <- 0.95 #yLab <- "Score in mathematics" #xLab <- "Family composition" #rowNames <- c("All", "Mother \\& father", "Other") # get("ses.perc", nelsNE2) ## the same as nelsNE2[, "ses.perc"] funTabDescr <- function(yVar, xVar, yLab, xLab, rowNames, data, digits = 2, conf.level = 0.95) { ### Take only non-missing values of the two variables ### ----------------------------------------------------- dataNoNA <- na.omit(data[, c(yVar, xVar)]) ### Basic input checks ### ------------------------- if (!is.factor(dataNoNA[, xVar])) stop("x variable is not factor") if (length(levels(dataNoNA[, xVar])) != 2) stop("x variable does not have two levels") ### Calculate descriptive statistics ### -------------------------------------- N <- table(dataNoNA[, xVar]) # grMn <- mean(dataNoNA[, yVar]) grSD <- sd(dataNoNA[, yVar]) grSE <- grSD / sqrt(sum(N)) grMed <- median(dataNoNA[, yVar]) grQs <- quantile(dataNoNA[, yVar], prob = c(0.25, 0.75)) # Mn <- tapply(dataNoNA[, yVar], dataNoNA[, xVar], mean) SD <- tapply(dataNoNA[, yVar], dataNoNA[, xVar], sd) SE <- SD / sqrt(N) Med <- tapply(dataNoNA[, yVar], dataNoNA[, xVar], median) Qs <- tapply(dataNoNA[, yVar], dataNoNA[, xVar], quantile, prob = c(0.25, 0.75)) ### Calculate t-test ### ------------------------- #paste(yVar, "~", xVar) ## try to run also this row #formula(paste(yVar, "~", xVar)) ## and this row as well tt <- t.test(formula(paste(yVar, "~", xVar)), data = dataNoNA, conf.level = conf.level) ### Steps that should be done at some stage manually (e.g., when developing ### or debugging the function) but we do not always want to comment ### and then again uncomment it. ### Simply include it within if() which will never be TRUE when ### the function is run automatically. ### ------------------------------------------------------------------- if (FALSE){ print(N) # print(grMn) print(grSD) print(grSE) print(grMed) print(grQs) # print(Mn) print(SD) print(SE) print(Med) print(Qs) # print(tt) names(tt) } ### Put results into one table (data.frame) ### ------------------------------------------- TAB <- data.frame(N = c(sum(N), N), Mean = c(grMn, Mn), SD = c(grSD, SD), SE = c(grSE, SE), Median = c(grMed, Med), Q1 = c(grQs["25%"], sapply(Qs, "[", "25%")), Q3 = c(grQs["75%"], sapply(Qs, "[", "75%")), Diff = c(Mn[1] - Mn[2], NA, NA), Diff.Low = c(tt[["conf.int"]][1], NA, NA), Diff.Upp = c(tt[["conf.int"]][2], NA, NA), P = c(tt[["p.value"]], NA, NA)) # print(TAB) ## run it rownames(TAB)[1] <- "All" colnames(TAB)[c(3, 4)] <- c("Std. Dev.", "Std. Error") # print(TAB) ## run it ### Simple structure ### --------------------- ### The code below (if uncommented) shows how to use the 'xtable' ### package to get the LaTeX code of a table created from matrix ### or data.frame. # library("xtable") # help(xtable, package = "xtable") # print(xtable(TAB)) ### Often, we will be happy with the above LaTeX table. ### But if we are not happy, we just let R write the LaTeX code ### of the table while following our wishes. Of course, this takes ### some time and it is often not worth of doing so if we are about ### to create one/two and small table(s). But for example when ### preparing (even just one) table with results of reasonably sized ### simulation study, it is usually better to programme everything ### as shown below. ### The principal idea is to use paste() and to create a (long) character ### containing the LaTeX code of the table. The final character will then ### be "copy-pasted" into the LaTeX document using the cat() function. ### Some specialities when creating the (long) character: ### * backslash (\) must be doubled ### * \n is "carriage return" which provides a new line ### ### Check what paste(XXX, sep = "") is doing ### and what paste(XXX, collapse = "&") does (if XXX is a vector of characters). ### Below, the long character is being created in the LTAB variable. ### LaTeX table ### ------------------------- if (missing(yLab)) yLab <- yVar if (missing(xLab)) xLab <- xVar if (missing(rowNames)) rowNames <- c("All", levels(data[, xVar])) if (length(rowNames) != nrow(TAB)) stop("wrong rowNames supplied.") ### Head ### +++++++++++++ NCOL <- 6 LTAB <- paste("\\begin{tabular}{lrrrcr}\n", "\\hline\\hline\n", "\\multicolumn{", NCOL, "}{c}{\\textbf{", yLab, "}", " by \\textbf{", xLab, "}} \\\\\n", "\\hline\n", "Group & Mean (S.E.) & Std. Dev. & Median & Q$_1$\\;--\\;Q$_3$ & N \\\\\n", "\\hline\n", sep = "") ### Descriptive statistics ### ++++++++++++++++++++++++++ # rr <- 1 ## if a bigger for loop appears in the code, ## it is always useful to test oneits cycle first ## by explictly setting the looping variable (rr below) ## to some specific value, e.g., 1 inChars <- c("Mean", "Std. Error", "Std. Dev.", "Median", "Q1", "Q3") for (rr in 1:nrow(TAB)){ fNums <- formatNum(TAB[rr, inChars], digits = digits) names(fNums) <- inChars # print(fNums) fRow <- c(rowNames[rr], paste(fNums["Mean"], " (", fNums["Std. Error"], ")", sep = ""), fNums["Std. Dev."], fNums["Median"], paste(fNums["Q1"], "\\;--\\;", fNums["Q3"], sep = ""), TAB[rr, "N"]) # print(fRow) # cat(paste(fRow, collapse = " & "), " \\\\\n") LTAB <- paste(LTAB, paste(fRow, collapse = " & "), " \\\\\n", sep = "") if (rr == 1) LTAB <- paste(LTAB, "\\hline\n", sep = "") } ### Inference for difference (results of t.test) ### +++++++++++++++++++++++++++++++++++++++++++++ LTAB <- paste(LTAB, "\\hline\n", "\\multicolumn{", NCOL, "}{l}{", "Difference in means: ", formatNum(TAB[1, "Diff"], pval = TAB[1, "P"], digits = digits), "\\hspace{1em}", formatCI(TAB[1, c("Diff.Low", "Diff.Upp")], pval = TAB[1, "P"], digits = digits), "$^\\dag$,\\hspace{2em}", "P: ", formatPval(TAB[1, "P"]), "$^\\ddag$", "} \\\\\n", sep = "") ### Foot ### ++++++++ LTAB <- paste(LTAB, "\\hline\\hline\n", "\\multicolumn{", NCOL, "}{l}{\\footnotesize\\itshape", "$^\\dag$95\\% confidence interval\\hfill $^\\ddag$Welch two-sample t-test", "} \\\\\n", "\\end{tabular}\n\n", sep = "") ### We return both original results (data.frame) ### as well as the LaTeX code of the table. RET <- list(tab = TAB, tex = LTAB) #print(RET$tab) #cat(RET$tex) return(RET) }