#### # Functions used for analysis of breast cancer data # The functions differ from the general (used in simulations) in the grid, which contains n0 points in 0, if necessary ### library(clue) library(sfsmisc) norma=function(x){ # L2 norm of a vector x return(sqrt(sum(x^2))) } grid=function(d,n.R,n.S){ # used iid uniform grid over the unit sphere for each rank separately n=n.R*n.S target.grid<-matrix(rep(0,d*n),ncol=d) z1=rmvnorm(n,sigma=diag(d)) u=z1/apply(z1,1,norma) z2=(1:n.R)/(n.R+1) for(i in 1:n.R) target.grid[(i-1)*n.S+(1:n.S),]=z2[i]*u[(i-1)*n.S+(1:n.S),] return(target.grid) } dF.data=function(y,n.R,n.S){ # y is given nxd matrix of sample of size n from d-dimensional distribution # grid generated here # used a random grid over the unit ball # the levels (ranks) are equidistant # the directions are chosen randomly on the unit sphere, for each rank level separately -> n directions in total d=dim(y)[2] n=dim(y)[1] # n.R * n.S = n n0=n-n.R*n.S target.grid<-grid(d,n.R,n.S) if(n0>0) target.grid=rbind(target.grid,matrix(0,nrow=n0,ncol=d)) # costes<-matrix(0,ncol=n,nrow=n) for (i in 1:n){for (j in 1:n) costes[i,j]<-sum((y[i,]-target.grid[j,])^2) } # takto odpovida, ze asignacion[i] je bod z gridu, tj. distr. funkce asignacion<-solve_LSAP(costes) return(target.grid[asignacion,]) } LTS.data=function(y,c,n1){ # the same as LTS, but adds n0 points to 0 in the grid # Wilcoxon cores # computes test statistic Ta # c is a vector of scores of size n # n1 corresponds to n.R from the factorization n=n.R*n.S; needs to be specified by the user n=dim(y)[1] n2=floor(n/n1) Femp=dF.data(y,n.R=n1,n.S=n2) Ni=apply(Femp,1,norma) Si=t(apply(Femp,1,devide)) cm=mean(c) cn=1/sqrt(sum((c-mean(c))^2)) Ta=cn*apply((c-cm)*Ni*Si,2,sum) # test statistic -> d-dim vector return(Ta) } devide=function(x){ # x vector n=norma(x) if(n>0) d=x/n else d=0*x return(d) } twosample.test.data=function(y1,y2,n1){ # the same as LTS, but uses n0 points in 0 in the grid # performs Wilcoxon test # two sample test for y1 and y2 # yi is a random sample from a d-dimensional distribution of size nyi, i=1,2 # n1 corresponds to n.R from the factorization n=nR*nS with n=ny1+ny2 ny1=dim(y1)[1] ny2=dim(y2)[1] if(dim(y1)[2]!=dim(y2)[2]){print("wrong dimensions"); break} d=dim(y1)[2] y=rbind(y1,y2) c=c(rep(1,ny1),rep(0,ny2)) Ta=LTS.data(y,c,n1) Ji=1/3 Q2=d/Ji*sum(Ta^2) p=1-pchisq(Q2,df=d) return(list(Q2=Q2,p=p)) # returns the value of test statistic Q^2 and the corresponding p-value p } #### # plot: panel.hist <- function(x,...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(usr[1:2], 0, 1.5) ) h <- hist(x, plot = FALSE) breaks <- h$breaks; nB <- length(breaks) y <- h$counts; y <- y/max(y) rect(breaks[-nB], 0, breaks[-1], y, col = "gray70", lwd=1) }