##################### # Functions for 2-sample test and MANOVA in d=>2 dimensions # The empirical distribution function is computed using a random grid in R^d # The test statistics are computed directly for the function J(u)=u, i.e. Wilcoxon test # The factorization n=n.R*n.S needs to be specified by the user #################### library(clue) library(sfsmisc) sqdist<-function(a,b){ return(abs(a-b)^2) } grid=function(d,n.R,n.S){ # used iid uniform grid over the unit sphere for each rank separately n=n.R*n.S target.grid<-matrix(rep(0,d*n),ncol=d) z1=rmvnorm(n,sigma=diag(d)) u=z1/apply(z1,1,norma) z2=(1:n.R)/(n.R+1) for(i in 1:n.R) target.grid[(i-1)*n.S+(1:n.S),]=z2[i]*u[(i-1)*n.S+(1:n.S),] return(target.grid) } dF.0=function(y,grid){ # y is a given nxd matrix of sample of size n from d-dimensional distribution # grid specified by user d=dim(y)[2] n=dim(y)[1] if(n!=dim(grid)[1]) print("wrong assignment") else{ # costes<-t(outer(target.grid[,1],y[,1],"sqdist")) for(r in 2:d){ costes<-costes+t(outer(target.grid[,r],y[,r],"sqdist"))} asignacion<-solve_LSAP(costes) return(grid[asignacion,]) } # returns the empirical distribution function } dF=function(y,n.R,n.S){ # y is given nxd matrix of sample of size n from d-dimensional distribution # grid generated here # used a random grid over the unit ball # the levels (ranks) are equidistant # the directions are chosen randomly on the unit sphere, for each rank level separately -> n directions in total d=dim(y)[2] n=dim(y)[1] # n.R * n.S = n if(n!=n.R*n.S) print("wrong assignment") else{ target.grid<-grid(d,n.R,n.S) # costes<-t(outer(target.grid[,1],y[,1],"sqdist")) for(r in 2:d){ costes<-costes+t(outer(target.grid[,r],y[,r],"sqdist"))} asignacion<-solve_LSAP(costes) return(target.grid[asignacion,]) } } norma=function(x){ # L2 norm of a vector x return(sqrt(sum(x^2))) } LTS=function(y,c,n1){ # computes test statistic Ta # c is a vector of scores of size n # n1 corresponds to n.R from the factorization n=n.R*n.S; needs to be specified by the user n=dim(y)[1] n2=n/n1 if(n2!=round(n2)) {print("error"); break } Femp=dF(y,n.R=n1,n.S=n2) Ni=apply(Femp,1,norma) Si=Femp/Ni cm=mean(c) cn=1/sqrt(sum((c-mean(c))^2)) Ta=cn*apply((c-cm)*Ni*Si,2,sum) # test statistic -> d-dim vector return(Ta) } twosample.test=function(y1,y2,n1){ # two sample test for y1 and y2 # yi is a random sample from a d-dimensional distribution of size nyi, i=1,2 # n1 corresponds to n.R from the factorization n=nR*nS with n=ny1+ny2 ny1=dim(y1)[1] ny2=dim(y2)[1] if(dim(y1)[2]!=dim(y2)[2]){print("wrong dimensions"); break} d=dim(y1)[2] y=rbind(y1,y2) c=c(rep(1,ny1),rep(0,ny2)) Ta=LTS(y,c,n1) Ji=1/3 Q2=d/Ji*sum(Ta^2) p=1-pchisq(Q2,df=d) return(list(Q2=Q2,p=p)) # returns the value of test statistic Q^2 and the corresponding p-value p } PM.manova=function(y,f,n1){ # y is a vector of responses (again d=2 dimensional) of size n # f is a vector of the factorial variable (determining the levels) of size n # n1 corresponds to n.R in n=n.R*n.S (number of rank levels) d=dim(y)[2] n=dim(y)[1] n2=n/n1 Femp=dF(y,n.R=n1,n.S=n2) #Femp=dF(y,n.R=n.R,n.S=n/n.R) # empirical df Ni=apply(Femp,1,norma) # norm of Femp Si=Femp/Ni l=levels(f) K=length(l) soucin=(Ni*Si) Ji=1/3 Q=0 for(k in 1:K){ ni=table(f)[k] Q=Q+1/ni*norma(apply(soucin[f==l[k],],2,sum))^2 } Q=Q-1/n*norma(apply(soucin,2,sum))^2 QQ=Q*d/Ji # QQ=Q*d/(Ji) pp=1-pchisq(QQ,df=(K-1)*d) return(list(Q=QQ,p=pp)) # returns value of the test statistic QQ and the corresponding p-value }