##### ##### NMST440: Advanced Aspects of the R Environment ##### ##### --------------------------------------------------------------------- ##### ##### Parallelized simulation ##### ##### - simulation settings as in nmst440-simul2.R ##### (regression with error in variables) ##### ##### --------------------------------------------------------------------- ##### ##### Arnošt Komárek ##### https://www2.karlin.mff.cuni.cz/~komarek ##### komarek@karlin.mff.cuni.cz ##### ##### ====================================================================== # args <- commandArgs(TRUE) args <- c(100, 0, 1, 0.1, 0.2) n <- as.numeric(args[1]) beta0 <- as.numeric(args[2]) beta1 <- as.numeric(args[3]) sigma.x <- as.numeric(args[4]) sigma.eps <- as.numeric(args[5]) ROOT <- "/home/komarek/teach/mff_2021/nmst440_AdvRko/Tutorial10" RESDIR <- paste(ROOT, "/SimResult/", sep = "") setwd(ROOT) ### Function to create one dataset and calculate ### LSE ### +++++++++++++++++++++++++++++++++++++++++++++++ sim1 <- function(n = 100, beta0 = 0, beta1 = 1, sigma.x = 0.1, sigma.eps = 0.2, plot = FALSE) { x.true <- runif(n, -1, 1) y.obs <- beta0 + beta1 * x.true + rnorm(n, mean = 0, sd = sigma.eps) x.obs <- x.true + rnorm(n, mean = 0, sd = sigma.x) fit <- try(lm(y.obs ~ x.obs)) if (class(fit)[1] == "try-error") return("Error") if (plot){ plot(y.obs ~ x.obs, pch = 23, col = "red3", bg = "orange", xlab = "x", ylab = "y") abline(a = beta0, b = beta1, col = "red3", lwd = 2) abline(fit, col = "blue2", lwd = 2) legend("topleft", legend = c("Fitted", "True"), col = c("blue2", "red3"), lty = 1, lwd = 2) } RES <- c(coef(fit), summary(fit)[["sigma"]]) names(RES) <- c("beta0", "beta1", "sigma.eps") return(RES) } # sim1(plot = TRUE) ### Function to run M datasets ### - the first argument (number of the run) will become useful ### in few moments ### +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ simLoop <- function(nrun = 1, M = 100, n = 100, beta0 = 0, beta1 = 1, sigma.x = 0.1, sigma.eps = 0.2) { RES <- matrix(nrow = M, ncol = 3) colnames(RES) <- c("beta0", "beta1", "sigma.eps") Mdone <- Mwrong <- 0 while (Mdone < M & Mwrong < 2 * M){ Sim <- sim1(n = n, beta0 = beta0, beta1 = beta1, sigma.x = sigma.x, sigma.eps = sigma.eps) if (Sim[1] == "Error"){ Mwrong <- Mwrong + 1 next } Mdone <- Mdone + 1 RES[Mdone,] <- Sim } return(RES) } # simLoop(M = 10) ### Run batches of 250 loops in parallel on 4 threads ### +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ library("parallel") detectCores() ## how many threads available? myCluster <- makeCluster(4) simRes <- parSapply(myCluster, 1:4, simLoop, M = 250, n = n, beta0 = beta0, beta1 = beta1, sigma.x = sigma.x, sigma.eps = sigma.eps) ### Slaves do not know objects (e.g., sim1 function) known to the Master ### Pass needed objects as arguments to simLoop function ### +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ simLoop2 <- function(nrun = 1, M = 100, n = 100, beta0 = 0, beta1 = 1, sigma.x = 0.1, sigma.eps = 0.2, sim1 = sim1) { RES <- matrix(nrow = M, ncol = 3) colnames(RES) <- c("beta0", "beta1", "sigma.eps") Mdone <- Mwrong <- 0 while (Mdone < M & Mwrong < 2 * M){ Sim <- sim1(n = n, beta0 = beta0, beta1 = beta1, sigma.x = sigma.x, sigma.eps = sigma.eps) if (Sim[1] == "Error"){ Mwrong <- Mwrong + 1 next } Mdone <- Mdone + 1 RES[Mdone,] <- Sim } return(RES) } ### Run batches of 250 loops in parallel on 4 cores ### +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # set.seed(221913282) ### This does not allow for reproducibility. ### Each slave uses its own seed. ### parLapply: clusterSetRNGStream(myCluster, 221913282) ### Ensures reproducibility and also ### different seeds on different slaves. simRes <- parLapply(myCluster, 1:4, simLoop2, M = 250, n = n, beta0 = beta0, beta1 = beta1, sigma.x = sigma.x, sigma.eps = sigma.eps, sim1 = sim1) ### clusterApply (practically the same as above): clusterSetRNGStream(myCluster, 221913282) ### Ensures reproducibility and also ### different seeds on different slaves. simRes <- clusterApply(myCluster, 1:4, simLoop2, M = 250, n = n, beta0 = beta0, beta1 = beta1, sigma.x = sigma.x, sigma.eps = sigma.eps, sim1 = sim1) ### Stop cluster stopCluster(myCluster) ### What are we getting as results ### +++++++++++++++++++++++++++++++++ class(simRes) length(simRes) dim(simRes[[1]]) simRes[[1]][1:10,] simRes[[2]][1:10,] simRes[[3]][1:10,] simRes[[4]][1:10,] ### Put results together ### ++++++++++++++++++++++++++++++++++ simMat <- rbind(simRes[[1]], simRes[[2]], simRes[[3]], simRes[[4]]) dim(simMat) simMat[1:10,] ### Save results ### ++++++++++++++++++++++++++++++++ save(simMat, file = paste(RESDIR, "/errvar-", n, "-", beta0, "-", beta1, "-", sigma.x*10, "-", sigma.eps*10, ".RData", sep = "")) ### Process results ### ++++++++++++++++++++++++++++++++ (load(paste(RESDIR, "/errvar-", n, "-", beta0, "-", beta1, "-", sigma.x*10, "-", sigma.eps*10, ".RData", sep = ""))) apply(simMat, 2, mean) ### etc.