##### ##### NMST440: Computational Environment for Statistical Data Analysis ##### ##### --------------------------------------------------------------------- ##### ##### Functions and programming ##### ##### --------------------------------------------------------------------- ##### ##### Arnošt Komárek ##### http://msekce.karlin.mff.cuni.cz/~komarek ##### komarek@karlin.mff.cuni.cz ##### ##### ====================================================================== rm(list = ls()) ROOT <- "/home/komarek/teach/mff_2020/nmst440_VypocProstr/Tutorial02/" setwd(ROOT) ### Simple function which creates a 2x2 covariance matrix ### from supplied variances, std. deviations and correlations ### ================================================================================= ### ### stop(): protection against errors in input arguments ### warning(): protection against "strange" (but not incorrect) values of input arguments CovMat <- function(sigma1, sigma2, rho){ if (sigma1 <= 0 | sigma2 <= 0 | rho < -1 | rho > 1) stop("Incorrect input parameters.") if (rho == 1 | rho == -1) warning("Really perfect correlation?") Sigma <- matrix(c(sigma1^2, rho*sigma1*sigma2, rho*sigma1*sigma2, sigma2^2), nrow=2) return(Sigma) } ### Function code CovMat ### Several identical calls to the function CovMat(sigma1 = 2, sigma2 = 3, rho = 0.5) CovMat(2, 3, 0.5) CovMat(sigma2 = 3, rho = 0.5, sigma1 = 2) CovMat(2, rho = 0.5, sigma2 = 3) ### Error will appear CovMat(sigma1 = 1, sigma2 = 1, rho = -2) ### Warning will appear CovMat(sigma1 = 1, sigma2 = 1, rho = 1) ### Error (no sigma2 given) CovMat(sigma1 = 1, rho = 0.5) ### Default values can be provided for some or all arguments ### =========================================================================== CovMat <- function(sigma1 = 2, sigma2 = 3, rho = 0.5){ if (sigma1 <= 0 | sigma2 <= 0 | rho < -1 | rho > 1) stop("Incorrect input parameters.") if (rho == 1 | rho == -1) warning("Really perfect correlation?") Sigma <- matrix(c(sigma1^2, rho*sigma1*sigma2, rho*sigma1*sigma2, sigma2^2), nrow=2) return(Sigma) } ### Several identical calls to the function CovMat() CovMat(2) CovMat(sigma1 = 2) CovMat(sigma2 = 3) CovMat(rho = 0.5) CovMat(sigma1 = 2, sigma2 = 3) ### Reasonable default value can also be supplied later on ### ### --> missing() ### =========================================================================== CovMat <- function(sigma1 = 2, sigma2, rho = 0.5){ if (missing(sigma2)) sigma2 <- 3 ### supply a default value if (sigma1 <= 0 | sigma2 <= 0 | rho < -1 | rho > 1) stop("Incorrect input parameters.") if (rho == 1 | rho == -1) warning("Really perfect correlation?") Sigma <- matrix(c(sigma1^2, rho*sigma1*sigma2, rho*sigma1*sigma2, sigma2^2), nrow=2) return(Sigma) } CovMat() ### sigma2 = 3 CovMat(sigma2 = 100) ### sigma2 = 100 ### Debugging ### =========================================================================== debug(CovMat) ### Row-by-row evaluation CovMat(sigma1 = 1, sigma2 = 4, rho = -0.1) undebug(CovMat) ### Normal evaluation again ### Setup a point where the function stops and wait (zarážka) ### ### --> browser() ### ========================================================== CovMat <- function(sigma1 = 2, sigma2 = 3, rho = 0.5){ if (sigma1 <= 0 | sigma2 <= 0 | rho < -1 | rho > 1) stop("Incorrect input parameters.") if (rho == 1 | rho == -1) warning("Really perfect correlation?") rho2 <- rho^2 browser() ### it stops here, also local variables can be explored ### more browser()'s can appear within one function Sigma <- matrix(c(sigma1^2, rho*sigma1*sigma2, rho*sigma1*sigma2, sigma2^2), nrow=2) return(Sigma) } CovMat(sigma1 = 1, sigma2 = 4, rho = -0.1) ### Frequently used or bigger functions have rarely their code written ### in the script where the statistical analysis or data management is conducted. ### Good practice: one function (or one set of closely related functions) = one file (R script). ### ### Before the first use of the function, it is loaded to the memory by running that R script. ### No need to open it, highlight it and run it as usual. ### ### source(FILE) = run the whole content of FILE ### ### Take function stored in an external file: ### ================================================= source(paste(ROOT, "/CovMat.R", sep = "")) ##### ========================================================= ##### Examples of classical programming constructions in R ##### ========================================================= ### For loop ### ============== set.seed(20110901) x <- matrix(rnorm(1000), nrow = 20) ### Matrix 20 x 50. print(x[, 1:5]) ### Row means by the for loop ### ------------------------------------ rMeans <- numeric(nrow(x)) for (i in 1:nrow(x)){ rMeans[i] <- mean(x[i,]) } print(rMeans) ### apply should be used instead (see also tutorial 3)... rMeans <- apply(x, 1, mean) print(rMeans) ### loop does not have to loop over numbers pres <- c("a", "b", "c") for (pis in pres){ cat(pis, "\n") } ### Some other classical programming constructions ### =============================================== set.seed(20110901) x <- matrix(rnorm(1000), nrow = 20) ### ifelse, if ... else ... ### -------------------------- test.number <- 10 z <- ifelse(test.number < 100, x[1], pi) print(z) z <- ifelse(test.number >= 100, x[1], pi) print(z) ### Full if ... else ... must be used if the result is vector, matrix, ... if (test.number < 100) z <- x[1,] else z <- x[,1] print(z) ### the same z <- if (test.number < 100) x[1,] else x[,1] print(z) ### for loop once more ### -------------------------- rMeans <- numeric(nrow(x)) for (i in 1:nrow(x)){ rMeans[i] <- mean(x[i,]) } print(rMeans) ### while ### ----------- rMeans <- numeric(nrow(x)) i <- 1 while (i <= nrow(x)){ rMeans[i] <- mean(x[i,]) i <- i + 1 } print(rMeans) ### repeat (AK remark: only rarely used) ### -------------------------------------- rMeans <- numeric(nrow(x)) i <- 1 repeat{ rMeans[i] <- mean(x[i,]) i <- i + 1 if (i > nrow(x)) break } print(rMeans) ### next and break ### next: go to the next round ### break: stop the loop completely ### --------------- rMeans <- numeric() for (i in 1:nrow(x)){ prumi <- mean(x[i,]) if (prumi < 0){ rMeans <- c(rMeans, prumi) next }else{ break } } print(rMeans) ### match.arg and switch ### ### match.arg: argument matching ### ### ------------------------------------------------- calculate <- function(data, choice = c("rows", "cols", "all")) { choice <- match.arg(choice) ### evaluates to the first value in choice (by default = "rows") ### uses partial matching, i.e., choice = "c" on input is the same as choice = "cols" ### will stop with error if no match result <- switch(choice, ### alternative to (many) nested if.. else.. rows = {apply(data, 1, mean)}, cols = {apply(data, 2, mean)}, all = {mean(data)} ) return(result) } ### Row means: calculate(x, choice = "rows") calculate(x, choice = "r") ## only first few chars (unique enough) can be given calculate(x) ## the first choice is used ### Column means calculate(x, choice = "cols") calculate(x, choice = "c") ### Overall mean calculate(x, choice = "all") calculate(x, choice = "a") ### Error calculate(x, choice = "b") ### Slightly more complex switch (numerical dispatcher) calculate2 <- function(data, choice = 1) { result <- switch(choice, {x <- as.numeric(data) x <- log(1 + abs(data)) mean(x) }, {x <- exp(data) mean(x) }, {mean(x)} ) return(result) } calculate2(x) calculate2(x, choice = 1) calculate2(x, choice = 2) calculate2(x, choice = 3) ### Function returning more values ### ========================================================== ### CovMat as above, now returning also determinant, eigen values, eigen vectors ### ### Most typical - more results of different type are returned as list CovMatDEV <- function(sigma1, sigma2, rho){ if (sigma1 <= 0 | sigma2 <= 0 | rho < -1 | rho > 1) stop("Incorrect input parameters.") if (rho == 1 | rho == -1) warning("Really perfect correlation?") Sigma <- matrix(c(sigma1^2, rho*sigma1*sigma2, rho*sigma1*sigma2, sigma2^2), nrow=2) EV <- eigen(Sigma) Det <- prod(EV$values) RET <- list(Sigma = Sigma, Determinant = Det, Eigen.Val = EV$values, Eigen.Vect = EV$vectors) return(RET) } cm <- CovMatDEV(sigma1 = 1, sigma2 = 2, rho = 0.5) print(cm) class(cm) cm$Sigma cm$Determinant cm$Eigen.Val cm$Eigen.Vect ### Some parts can also be returned as attributes ### - this is usually used only in more complex situations attributes(cm) CovMatDEV2 <- function(sigma1, sigma2, rho){ if (sigma1 <= 0 | sigma2 <= 0 | rho < -1 | rho > 1) stop("Incorrect input parameters.") if (rho == 1 | rho == -1) warning("Really perfect correlation?") Sigma <- matrix(c(sigma1^2, rho*sigma1*sigma2, rho*sigma1*sigma2, sigma2^2), nrow=2) EV <- eigen(Sigma) Det <- prod(EV$values) attr(Sigma, "Determinant") <- Det attr(Sigma, "Eigen.Val") <- EV$values attr(Sigma, "Eigen.Vect") <- EV$vectors return(Sigma) } cm2 <- CovMatDEV2(sigma1 = 1, sigma2 = 2, rho = 0.5) print(cm2) class(cm2) attributes(cm2) attr(cm2, "Determinant") attr(cm2, "Eigen.Val") attr(cm2, "Eigen.Vect") cm2 %*% c(1, 2) ## Usual matrix product ### We can also assign a class to the object being returned CovMatDEV3 <- function(sigma1, sigma2, rho){ if (sigma1 <= 0 | sigma2 <= 0 | rho < -1 | rho > 1) stop("Incorrect input parameters.") if (rho == 1 | rho == -1) warning("Really perfect correlation?") Sigma <- matrix(c(sigma1^2, rho*sigma1*sigma2, rho*sigma1*sigma2, sigma2^2), nrow=2) EV <- eigen(Sigma) Det <- prod(EV$values) Sigma <- matrix(c(sigma1^2, rho*sigma1*sigma2, rho*sigma1*sigma2, sigma2^2), nrow=2) EV <- eigen(Sigma) Det <- prod(EV$values) RET <- list(Sigma=Sigma, Determinant=Det, Eigen.Val=EV$values, Eigen.Vect=EV$vectors) class(RET) <- "CovMat" return(RET) } ### We can now also write methods for objects of this (new) class ### First check prototype of a generic print print ### our print must have at least those two arguments ### Default print (used if no special print for a particular class) print.default print.CovMat <- function(x, ...){ cat("\nCovariance matrix:\n") print(x$Sigma, ...) cat("\n") cat("Determinant: ", x$Determinant, "\n\n") cat("Eigen values: ") cat(x$Eigen.Val, sep=", ") cat("\n\n") cat("Eigen vectors (in columns):\n") print(x$Eigen.Vect, ...) cat("\n") return(invisible(x)) } cm3 <- CovMatDEV3(sigma1=1, sigma2=2, rho=0.5) print(cm3) print.default(cm3)