##### ##### NMST440: Advanced Aspects of the R Environment ##### ##### --------------------------------------------------------------------- ##### ##### Work out results of simulations performed on a cluster (or the local PC) ##### ##### --------------------------------------------------------------------- ##### ##### Arnošt Komárek ##### https://www2.karlin.mff.cuni.cz/~komarek ##### komarek@karlin.mff.cuni.cz ##### ##### ====================================================================== rm(list = ls()) setwd("/home/komarek/teach/mff_2021/nmst440_AdvRko/Tutorial11") RESDIR <- "./SimulIndTest_2021/RData/" ### results of the local calculations (2021), M = 1000, total CPU time = about 4 min #RESDIR <- "./SimulIndTest_2017/RData/" ### results of the cluster calculations (2017), M = 10000, total CPU time = about 13 days (FILES <- dir(RESDIR)) ### Result files that are available ### Scenarios that were simulated ### -------------------------------- ### Local (2021) N <- c(10, 50, 100) A <- c(0.1, 0.5, 1) RHO <- c(0, 0.25, 0.50, 0.75, 0.95) Mmax <- 1000 ### Cluster (2017) N <- c(10, 50, 100, 250) A <- c(0.1, 0.5, 1) RHO <- c(0, 0.25, 0.50, 0.75, 0.95) Mmax <- 10000 ### For each a and each rho, we create a matrix ### with columns corresponding to sample sizes. ### Matrices will be stored in a list Tsamp. ### --------------------------------------------- Tsamp <- list() for (a in A){ for (rho in RHO){ LNAME <- paste(a*10, "_", rho*100, sep = "") Tsamp[[LNAME]] <- matrix(NA, nrow = Mmax, ncol = length(N)) colnames(Tsamp[[LNAME]]) <- paste(N) for (n in N){ FNAME <- paste("I_", a*10, "_", rho*100, "_", n, ".RData", sep = "") if (!(FNAME %in% FILES)){ cat("*** ", FNAME, " not (yet) available.\n", sep = "") next } load(paste(RESDIR, FNAME, sep = "")) Tsamp[[LNAME]][, paste(n)] <- T } } } ### Check first few rows of each matrix ### ---------------------------------------------- ### a = 0.1, rho = 0, 0.25, 0.50, 0.75, 0.95 Tsamp[["1_0"]][1:10,] Tsamp[["1_25"]][1:10,] Tsamp[["1_50"]][1:10,] Tsamp[["1_75"]][1:10,] Tsamp[["1_95"]][1:10,] ### a = 0.5, rho = 0, 0.25, 0.50, 0.75, 0.95 Tsamp[["5_0"]][1:10,] Tsamp[["5_25"]][1:10,] Tsamp[["5_50"]][1:10,] Tsamp[["5_75"]][1:10,] Tsamp[["5_95"]][1:10,] ### a = 1.0, rho = 0, 0.25, 0.50, 0.75, 0.95 Tsamp[["10_0"]][1:10,] Tsamp[["10_25"]][1:10,] Tsamp[["10_50"]][1:10,] Tsamp[["10_75"]][1:10,] Tsamp[["10_95"]][1:10,] ### Distribution of the test statistic for given a and sample size n when rho changes ### ---------------------------------------------------------------------------------------- library("colorspace") COL <- rainbow_hcl(5) ## colors for different rho's names(COL) <- paste(RHO) n <- 100 layout(matrix(c(4,4,1,1, 2,2,3,3), nrow = 2, byrow = TRUE)) for (a in A){ ### Vector of all T's (to get its range for plot) Tall <- numeric() for (rho in RHO){ Tall <- c(Tall, Tsamp[[paste(a*10, "_", rho*100, sep = "")]][, paste(n)]) } ### Empirical cdf's plot(ecdf(Tsamp[[paste(a*10, "_", 0, sep = "")]][, paste(n)]), col = COL[1], xlim = range(Tall), xlab = "T", main = paste("a = ", a, sep = "")) for (rho in RHO[-1]){ plot(ecdf(Tsamp[[paste(a*10, "_", rho*100, sep = "")]][, paste(n)]), col = COL[paste(rho)], add = TRUE) } } ### Legend plot(c(0, 100), c(0, 100), type = "n", xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n") legend(10, 70, legend = RHO, lty = 1, lwd = 2, col = COL) text(10, 80, labels = expression(rho), pos = 4, font = 2) par(mfrow = c(1, 1)) ### Empirical critical values (for a test on 5% significance level) ### ----------------------------------------------------------------- C <- matrix(NA, nrow = length(A), ncol = length(N)) rownames(C) <- paste(A) colnames(C) <- paste(N) print(C) for (a in A){ C[paste(a),] <- apply(Tsamp[[paste(a*10, "_0", sep = "")]], 2, quantile, 0.95, na.rm = TRUE) } print(C) ### Empirical power (for a test on 5% significance level) ### ------------------------------------------------------ Power <- matrix(NA, nrow = length(A) * length(RHO), ncol = length(N)) rownames(Power) <- paste(rep(RHO, each = length(A)), " - ", rep(A, length(RHO)), sep = "") colnames(Power) <- paste(N) print(Power) for (rho in RHO){ for (a in A){ Ms <- apply(!is.na(Tsamp[[paste(a*10, "_", rho*100, sep = "")]]), 2, sum, na.rm = TRUE) Cs <- matrix(rep(C[paste(a),], each = nrow(Tsamp[[paste(a*10, "_", rho*100, sep = "")]])), nrow = nrow(Tsamp[[paste(a*10, "_", rho*100, sep = "")]])) Power[paste(rho, " - ", a, sep = ""),] <- apply(Tsamp[[paste(a*10, "_", rho*100, sep = "")]] >= Cs, 2, sum, na.rm = TRUE) / Ms } } print(Power) ### Which value of a (among those tried) seems to be optimal? ### Surprised by 0.05 in all columns of the first three rows (rho = 0)?