####
# Functions used for analysis of breast cancer data
# The functions differ from the general (used in simulations) in the grid, which contains n0 points in 0, if necessary
###

library(clue)
library(sfsmisc)

norma=function(x){
  # L2 norm of a vector x
  return(sqrt(sum(x^2)))
}


grid=function(d,n.R,n.S){
  
  # used iid uniform grid over the unit sphere for each rank separately
  n=n.R*n.S
  
  
  target.grid<-matrix(rep(0,d*n),ncol=d)
  
  z1=rmvnorm(n,sigma=diag(d))
  u=z1/apply(z1,1,norma)
  z2=(1:n.R)/(n.R+1)
  
  for(i in 1:n.R) 
    target.grid[(i-1)*n.S+(1:n.S),]=z2[i]*u[(i-1)*n.S+(1:n.S),]
  return(target.grid)
}


dF.data=function(y,n.R,n.S){
  # y is given nxd matrix of sample of size n from d-dimensional distribution
  # grid generated here 
  # used a random grid over the unit ball
  # the levels (ranks) are equidistant
  # the directions are chosen randomly on the unit sphere, for each rank level separately -> n  directions in total
  
  
  d=dim(y)[2]
  n=dim(y)[1]
  # n.R * n.S = n 
  n0=n-n.R*n.S
  target.grid<-grid(d,n.R,n.S)
  if(n0>0) target.grid=rbind(target.grid,matrix(0,nrow=n0,ncol=d))
  # 
  costes<-matrix(0,ncol=n,nrow=n)
  for (i in 1:n){for (j in 1:n) costes[i,j]<-sum((y[i,]-target.grid[j,])^2)
  }
  # takto odpovida, ze asignacion[i] je bod z gridu, tj. distr. funkce
  asignacion<-solve_LSAP(costes)
  return(target.grid[asignacion,])
}


LTS.data=function(y,c,n1){
  # the same as LTS, but adds n0 points to 0 in the grid
  # Wilcoxon cores
  
  # computes test statistic Ta
  # c is a vector of scores of size n
  # n1 corresponds to n.R from the factorization n=n.R*n.S; needs to be specified by the user 
  
  
  n=dim(y)[1]
  n2=floor(n/n1)
  
  Femp=dF.data(y,n.R=n1,n.S=n2)
  Ni=apply(Femp,1,norma) 
  Si=t(apply(Femp,1,devide))
  cm=mean(c)
  cn=1/sqrt(sum((c-mean(c))^2))  
  
  Ta=cn*apply((c-cm)*Ni*Si,2,sum)    # test statistic -> d-dim vector
  
  return(Ta)  
}


devide=function(x){
  # x vector
  n=norma(x)
  if(n>0) d=x/n else d=0*x
  return(d)
}


twosample.test.data=function(y1,y2,n1){
  # the same as LTS, but uses n0 points in 0 in the grid
  # performs Wilcoxon test
  
  # two sample test for y1 and y2
  # yi is a  random sample from a  d-dimensional distribution of size nyi, i=1,2
  # n1 corresponds to n.R from the factorization n=nR*nS with n=ny1+ny2
  
  ny1=dim(y1)[1]
  ny2=dim(y2)[1]
  if(dim(y1)[2]!=dim(y2)[2]){print("wrong dimensions"); break}
  d=dim(y1)[2]
  y=rbind(y1,y2)
  c=c(rep(1,ny1),rep(0,ny2))
  Ta=LTS.data(y,c,n1)
  Ji=1/3
  Q2=d/Ji*sum(Ta^2)
  p=1-pchisq(Q2,df=d)
  return(list(Q2=Q2,p=p))  
  # returns the value of test statistic Q^2 and the corresponding p-value p
}

#### 
# plot:


panel.hist <- function(x,...)
{
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(usr[1:2], 0, 1.5) )
  h <- hist(x, plot = FALSE)
  breaks <- h$breaks; nB <- length(breaks)
  y <- h$counts; y <- y/max(y)
  rect(breaks[-nB], 0, breaks[-1], y, col = "gray70", lwd=1)
}