.First <- function() {
  options(
    repos = c(CRAN = "https://cran.rstudio.com/"),
    download.file.method = "wget"
  )
}
.First()

#install.packages("ff", dependencies=T)
#install.packages("ffbase",dependencies=TRUE)
#install.packages("rrBLUP",dependencies=TRUE )

#install.packages("rpart",dependencies=TRUE)
#install.packages("EMMREML",dependencies=TRUE)
#install.packages("RColorBrewer",dependencies=TRUE)
#install.packages("SAMM", dependencies=TRUE)
library("rpart")
library("stringr")
library("Matrix")
library("parallel")
library("ff")
library("ffbase")
library("EMMREML")
library("RColorBrewer")
library("rrBLUP")
library("compiler")
#library("SAMM")

setCompilerOptions(suppressUndefined=TRUE)
tr <- function(x)sum(diag(x))
#M is coded as -1,0,1; no missing

emm_reml<-function (y, X, Z, K) 
{
  enableJIT(3)
  q = dim(X)[2]
  n = length(y)
  X<-Matrix(X)
  spI <- Diagonal(n)
  S <- spI - tcrossprod(X %*% solve(crossprod(X)), X)
  if (is.null(K)){ZK=Z}else{ZK <- Z %*% K}
  offset <- log(n)
  ZKZt <- tcrossprod(ZK, Z)
  ZKZtandoffset <- ZKZt + offset * spI
  SZKZtSandoffset <- {
    S %*% ZKZtandoffset
  } %*% S
  svdSZKZtSandspI <- eigen(SZKZtSandoffset, symmetric = TRUE)
  rm(SZKZtSandoffset)
  foo <- gc();rm(foo)
  Ur <- svdSZKZtSandspI$vectors[, 1:(n - q)]
  lambda <- svdSZKZtSandspI$values[1:(n - q)] - offset
  eta <- crossprod(Ur, y)
  minimfunc <- function(delta) {
    (n - q) * log(sum(eta^2/{
      lambda + delta
    })) + sum(log(lambda + delta))
  }
  
  minimfunc <- cmpfun(minimfunc)
  optimout <- optimize(minimfunc, lower = 9^(-10), upper = 9^10, 
                       tol = 1e-10)
  deltahat <- optimout$minimum
  Hinvhat <- solve(ZKZt + deltahat * spI)
  XtHinvhat <- crossprod(X, Hinvhat)
  betahat <- solve(XtHinvhat %*% X, XtHinvhat %*% y)
  ehat <- (y - {
    X %*% betahat
  })
  Hinvhatehat <- Hinvhat %*% ehat
  sigmausqhat <- sum(eta^2/{
    lambda + deltahat
  })/(n - q)
  Vinv <- (1/sigmausqhat) * Hinvhat
  sigmaesqhat <- deltahat * sigmausqhat
  uhat <- crossprod(ZK, Hinvhatehat)
  df <- n - q
  loglik <- -0.5 * (optimout$objective + df + df * log(2 * 
                                                         pi/df))
  return(list(Vu = sigmausqhat, Ve = sigmaesqhat, betahat = betahat, 
              uhat = uhat, loglik = loglik))
}

########M is 0,1,2 coded
Amat.pieces<-function(M, pieces=10, mc.cores=1){
  AmatPieces<-function(M){
    pvec<-matrix(apply(M, 2, function(x){mean(x)/2}), ncol=1)
    MMt<-tcrossprod(M-2*matrix(1, nrow=nrow(M), ncol=1)%*%t(pvec))
    return(list(pvec, MMt))
  }
  CombAmatPieces<-function(Amatpiecesout){
    nparts<-length(Amatpiecesout)
    NumAmat<-Amatpiecesout[[1]][[2]]
    denomAmat<-sum(2*Amatpiecesout[[1]][[1]]*(1-Amatpiecesout[[1]][[1]]))
    for (i in 2:length(Amatpiecesout)){
      NumAmat<-NumAmat+Amatpiecesout[[i]][[2]]
      denomAmat<-denomAmat+sum(2*Amatpiecesout[[i]][[1]]*(1-Amatpiecesout[[i]][[1]]))
      
    }
    return(NumAmat/denomAmat)
  }
  
  x <- 1:ncol(M)
  n <- pieces
  chunk <- function(x,n) split(x, factor(sort(rank(x)%%n)))
  
  listforlapply<-chunk(x,n)
  
  lapplyfunc<-function(x){
    return(AmatPieces(as(M[,x], "sparseMatrix")))
  }
  
  lapplyout<-mclapply(X=listforlapply, FUN=lapplyfunc, mc.cores=mc.cores)
  
  
  A<-as.matrix(CombAmatPieces(lapplyout))
  
  return(A)
  
}

###############Example


loglike<- function(resid, Vinv)
{  
  n<- nrow(resid)
  temp1 <- Vinv%*%resid
  (-.5)*(-determinant(Vinv, logarithm=T)$modulus + crossprod(resid,temp1)+n*log(2*pi))
}

em.mixed <- function(y, X, Z, beta, Ve, Vu,maxiter=2000,tolerance = 1e-0010)
{
  n <- nrow(y)
  q1 <- nrow(Z)
  conv <- 0
  ZZt<- tcrossprod(Z) 
  tolpareig<-log(ncol(ZZt))
  diag(ZZt)<-diag(ZZt)+tolpareig
  eigZZt<-eigen(ZZt, symmetric=T)
  diag(ZZt)<-diag(ZZt)-tolpareig
  
  eigenvals=eigZZt$values-tolpareig
  XtX<-crossprod(X)
  xb=X%*%beta
  resid=y-xb
  lambda=as.numeric(Ve/Vu)
  Vinv <- tcrossprod(eigZZt$vectors%*%diag(1/sqrt(eigenvals+lambda)))/as.numeric(Vu)
  
  L0 <- loglike(resid, Vinv)
  i<-0
  repeat{
    if(i>maxiter){
      break
    }
    
    temp1 <- Vinv %*% resid
    s0 <- as.numeric(Ve)^2 * crossprod(temp1) + as.numeric(Ve) * n - as.numeric(Ve)^2 * tr(Vinv)
    s1 <- as.numeric(Vu)^2 * crossprod(temp1,ZZt%*%temp1)+ as.numeric(Vu)*q1 - as.numeric(Vu)^2 *sum(Z*(Vinv%*%Z))
    w <- xb + as.numeric(Ve) * temp1
    Ve <- s0/n
    Vu <- s1/q1
    beta <- solve(XtX,crossprod(X,w))
    lambda=as.numeric(Ve/Vu)
    xb <- X %*% beta
    resid <- (y-xb)
    Vinv <- tcrossprod(eigZZt$vectors%*%diag(1/sqrt(eigenvals+lambda)))/as.numeric(Vu)
    L1 <- loglike(resid, Vinv)
    if(L1 < L0) { 
      conv <- 0
      break
    }
    i <- i + 1
    if(abs(L1 - L0) < tolerance) {
      conv <- 1
      break
    }  
    L0 <- L1
  }
  list(beta=beta, Ve=Ve,Vu=Vu,Loglikelihood=L1,conv=conv)
}


em.mixed2 <- function(y, X, Z, ZZt, eigZZt, eigenvals, beta, Ve, Vu,maxiter=2000,tolerance = 1e-0010)
{
  n <- nrow(y)
  q1 <- nrow(Z)
  conv <- 0
  XtX<-crossprod(X)
  xb=X%*%beta
  resid=y-xb
  lambda=as.numeric(Ve/Vu)
  Vinv <- tcrossprod(eigZZt$vectors%*%diag(1/sqrt(eigenvals+lambda)))/as.numeric(Vu)
  
  L0 <- loglike(resid, Vinv)
  i<-0
  repeat{
    if(i>maxiter){
      break
    }
    
    temp1 <- Vinv %*% resid
    s0 <- as.numeric(Ve)^2 * crossprod(temp1) + as.numeric(Ve) * n - as.numeric(Ve)^2 * tr(Vinv)
    s1 <- as.numeric(Vu)^2 * crossprod(temp1,ZZt%*%temp1)+ as.numeric(Vu)*q1 - as.numeric(Vu)^2 *sum(Z*(Vinv%*%Z))
    w <- xb + as.numeric(Ve) * temp1
    Ve <- s0/n
    Vu <- s1/q1
    beta <- solve(XtX,crossprod(X,w))
    lambda=as.numeric(Ve/Vu)
    xb <- X %*% beta
    resid <- (y-xb)
    Vinv <- tcrossprod(eigZZt$vectors%*%diag(1/sqrt(eigenvals+lambda)))/as.numeric(Vu)
    L1 <- loglike(resid, Vinv)
    if(L1 < L0) { 
      conv <- 0
      break
    }
    i <- i + 1
    if(abs(L1 - L0) < tolerance) {
      conv <- 1
      break
    }  
    L0 <- L1
  }
 return(list(beta=beta, Ve=Ve,Vu=Vu,Loglikelihood=L1,conv=conv))
}


loglikerpart<- function(resid, Vinv)
{
  n<- nrow(resid)
  temp1 <- Vinv%*%resid
  (-.5)*(-determinant(Vinv, logarithm=T)$modulus + crossprod(resid,temp1)+n*log(2*pi))
  
}

merpart<-function(y, X,  z=z, ZZt, eigZZt, eigenvals, M, var00, var10,LNull=NULL,maxiter=2000,tolerance = 1e-0010, skipfittree=10, rpartcontrol=rpart.control(minsplit = 1, minbucket = 2, cp = 0.01,
                                                                                                                                                              maxcompete = 4, maxsurrogate = 5, usesurrogate = 2, xval = 10,
                                                                                                                                                              surrogatestyle = 0, maxdepth = 3)){
  n=nrow(y)
  if (((!is.null(M))&(!is.null(X)))){  
    x=cbind(X,M)
    colnames(x)<-c(colnames(X), colnames(M))
  } else if (((!is.null(M))&(is.null(X)))){  
    x=M
  } else if (((is.null(M))&(!is.null(X)))){  
    x=X
  } else {print("Either X or M should not be NULL")
    break
  }
  q1 <- nrow(z)
  meanrpart=mean(y)
  var0=var00
  var1=var10
  lambda=as.numeric(var0/var1)
  Vinv <- tcrossprod(eigZZt$vectors%*%diag(1/sqrt(eigenvals+lambda)))/as.numeric(var1)
  resid <- (y-meanrpart)
  conv <- 0
  L0 <- loglikerpart(resid, Vinv)
  i<-0
  repeat{
    if(i>maxiter){
      break
    }
    temp1 <- Vinv %*% resid
    
    s0 <- as.numeric(var0)^2 * crossprod(temp1) + as.numeric(var0) * n - as.numeric(var0)^2 * tr(Vinv)
    s1 <- as.numeric(var1)^2 * crossprod(temp1,ZZt%*%temp1)+ as.numeric(var1)*q1 - as.numeric(var1)^2 *sum(z*(Vinv%*%z))
    w <- meanrpart + as.numeric(var0) * temp1
    var0 <- s0/n
    var1 <- s1/q1
    if ((i %% skipfittree)==0){
      rpartdata<-data.frame(cbind(w,x))
      colnames(rpartdata)<-c("w",colnames(x))
      require(rpart)
      rpartmodel<-rpart(w~., data=rpartdata, control = rpartcontrol)
      meanrpart<-predict(rpartmodel)
    }
    lambda=as.numeric(var0/var1)
    Vinv <- tcrossprod(eigZZt$vectors%*%diag(1/sqrt(eigenvals+lambda)))/as.numeric(var1)
    
    resid <- (y-meanrpart)
    L1 <- loglikerpart(resid, Vinv)
    
    i <- i + 1
    if(abs(L1 - L0) < tolerance) {
      conv=1
      break
    }  
    L0 <- L1
  }
  
  
  getrulesrpart<-function(tree){
    leafnodeRows <- grepl("leaf",tree$frame$var)
    nodevals <- as.numeric(rownames(tree$frame)[leafnodeRows])
    capture.output(rules <- path.rpart(tree,nodevals))
    capture.output(rulesdf <- do.call("rbind",lapply(rules,function(x)paste(x,collapse = " -AND- "))))
    capture.output(rulesdf <- data.frame(nodeNumber=rownames(rulesdf),rule=rulesdf[,1],stringsAsFactors=FALSE))  
    return(rulesdf)
  }
  
  output<- list(mean=getrulesrpart(rpartmodel), var0=var0,var1=var1,Loglikelihood=L1, LNull=LNull, conv=conv, iter=i)
  
  return(output)
  
}


localmarkersampler<-function(Map, nsample, maxlength, keep=NULL){
  nsample=nsample-1
  Map$pos<-as.numeric(Map$pos)
  
  firstmarker<-Map[sample(1:nrow(Map), 1),]
  chrfirstmarker<-firstmarker$chr
  posfirstmarker<-firstmarker$pos
  minonchrom<-min(Map[Map$chr==chrfirstmarker,]$pos)
  maxonchrom<-max(Map[Map$chr==chrfirstmarker,]$pos)
  minpos<-max(minonchrom, posfirstmarker-maxlength)
  maxpos<-min(maxonchrom, posfirstmarker+maxlength)
  Mappart1<-Map[(Map$chr==chrfirstmarker&(((Map$pos<=maxpos)&(Map$pos>=minpos))&Map$pos>posfirstmarker)),]
  #print(dim(Mappart))
  
  Mapsample1<-Mappart1[sample(1:nrow(Mappart1), min(nsample-1,nrow(Mappart1)), replace=T),]
  Mappart2<-Map[(Map$chr==chrfirstmarker&(((Map$pos<=maxpos)&(Map$pos>=minpos))&Map$pos<posfirstmarker)),]
  #print(dim(Mappart))
  
  Mapsample2<-Mappart2[sample(1:nrow(Mappart2), min(nsample-1, nrow(Mappart2)), replace=T),]
  #print(dim(Mapsample))
  
  Mapsample<-rbind(firstmarker,Mapsample1,Mapsample2)
  
  #print(Mapsample)
  if (!is.null(keep)){
    Mapsample<-rbind(Mapsample,Map[(Map$mrk%in%keep),])
    
  }
  Mapsample<-Mapsample[!duplicated(Mapsample$mrk),]
  return(Mapsample)
}

#M(m x n) can be an ff link 


meforest<-function(y, X, Z, K, Mlist,map, nsample, maxlength,ntrees,bagging,proprow, mc.cores=1,var00=1, var10=1,maxiter=200,tolerance = 1e-007, skipfittree=1, rpartcontrol=rpart.control(minsplit = 1, minbucket = 2, cp = 0.01,
                                                                                                                                                                                           maxcompete = 4, maxsurrogate = 5, usesurrogate = 2, xval = 10,
                                                                                                                                                                                           surrogatestyle = 0, maxdepth = 3), cp=0, maxdepth=3){
  nchunks<-length(Mlist)
  rowsinchunks<-lapply(Mlist, function(x){return(ncol(x))})
  cholK<-chol(K)
  mclapplyfunction<-function(dummyvar){
    n=nrow(y)
    samplerows<-sample(1:n, ceiling(proprow*n))
    Z=Z[samplerows,]
    y=matrix(y[samplerows,], ncol=1)
    if (!is.null(X)){colnamesX<-colnames(X)
    X=matrix(X[samplerows,], nrow=length(samplerows))
    colnames(X)<-colnamesX
    }
   
    z=Z%*%cholK
    l=ncol(Z)
    n=nrow(y)
    ZZt=tcrossprod(z)
    tolpareig<-log(ncol(ZZt))
    diag(ZZt)<-diag(ZZt)+tolpareig
    eigZZt<-eigen(ZZt, symmetric=T)
    diag(ZZt)<-diag(ZZt)-tolpareig
    eigenvals=eigZZt$values-tolpareig
    emmixedout<-em.mixed2(y=y, X=matrix(1, nrow=n, ncol=1), Z=z, ZZt=ZZt, eigZZt=eigZZt, eigenvals=eigenvals,  beta=matrix(0,nrow=1, ncol=1), Vu=var(c(y))/2, Ve=var(c(y))/2,maxiter=maxiter,tolerance = tolerance)
    LNull<-emmixedout$Loglikelihood
    #samout1<-SAMM(Y=matrix(y,ncol=1),X=matrix(1, nrow=nrow(y), ncol=1), Zlist=list(Z), Klist=list(K),
    #              lambda=0, W=diag(nrow(y)),R=list(diag(nrow(y))),Siglist=list(),
    #              corfunc=c(F,F), corfuncfixed=c(F,F), sigfunc=c(F),mmalg="emm_ml",
    #              tolparconv=1e-10, tolparinv=1e-10,maxiter=1000,geterrors=F)
    
    # LNull<-samout1$loglik
    #LNull=emm_reml(y, X=matrix(1, nrow=nrow(y), ncol=1),Z=Z, K=K)$loglik
    var00=var(y)/2
    var10=var(y)/2
    lapplyfunc<-function(dummyvar1){
      mapsample<- localmarkersampler(Map=map, nsample=nsample, maxlength=maxlength, keep=NULL)
      mapsample$mrk<-as.character(mapsample$mrk)
      #print(mapsample)
      ####################
      Mpart<-matrix(ncol=length(mapsample$mrk), nrow=l)
      rowno=0
      rownamesforMpart<-c()
      for (i in 1:nchunks){
        Mpart[(rowno+1):(rowno+rowsinchunks[[i]]),]<-t(as.matrix(Mlist[[i]][rownames(Mlist[[i]])%in%mapsample$mrk,]))
        rowno<-rowno+rowsinchunks[[i]]
        rownamesforMpart<-c(rownamesforMpart,colnames(Mlist[[i]]))
      }
      rownames(Mpart)<-rownamesforMpart
      Mpart<-Mpart[match(rownames(K),rownames(Mpart)),]
      
      Mtouse<-matrix(Z%*%Mpart, ncol=length(mapsample$mrk))
      colnames(Mtouse)<-mapsample$mrk
      
      if (is.null(rpartcontrol)){
        rpartcontrol=rpart.control(minsplit = 1, minbucket = 2, cp = cp,
                                   maxcompete = 4, maxsurrogate = 5, usesurrogate = 2, xval = 10, surrogatestyle = 0, maxdepth = sample(1:maxdepth, 1))
      }
      mepartresults=merpart(y=y, X=X, z=z, ZZt=ZZt, eigZZt=eigZZt, eigenvals=eigenvals, M=Mtouse, var00=var00, var10=var10,LNull=LNull,maxiter=maxiter ,tolerance = tolerance*1e+2, skipfittree=skipfittree, rpartcontrol=rpartcontrol)
      
      return(list(mepartresults,mapsample))
    }
    output<-lapply(1:ntrees, lapplyfunc)
    return(output)
    
  }
  #output<-lapply(1:bagging, mclapplyfunction)
  output<-mclapply(X=1:bagging, FUN=mclapplyfunction, mc.cores = mc.cores, mc.set.seed = T, mc.preschedule = F)
  return(output)
}


getLRT<-function(meforestout){
  nbag<-length(meforestout)
  ntree<-length(meforestout[[1]])
  c(unlist(mclapply(1:nbag, function(ibag){
    lapply(1:ntree, function(itree){
      return(2*(meforestout[[ibag]][[itree]][[1]]$Loglikelihood-meforestout[[ibag]][[itree]][[1]]$LNull))
      
    })
  })
  ))
}

getLRTforrules<-function(meforestout){
  nbag<-length(meforestout)
  ntree<-length(meforestout[[1]])
  c(unlist(mclapply(1:nbag, function(ibag){
    lapply(1:ntree, function(itree){
      rep(2*(meforestout[[ibag]][[itree]][[1]]$Loglikelihood-meforestout[[ibag]][[itree]][[1]]$LNull),length(meforestout[[ibag]][[itree]][[1]]$mean[,2]))
    })
  })
  ))
}

getnamesfromrules<-function(meforestout){
  nbag<-length(meforestout)
  ntree<-length(meforestout[[1]])
  c(unlist(mclapply(1:nbag, function(ibag){
    lapply(1:ntree, function(itree){
      meforestout[[ibag]][[itree]][[1]]$mean[,2]
    })
  })
  ))
}


###################
predictruledf<-function(ruledf, newdata){
  nrulesindf<-nrow(ruledf)
  if (nrulesindf>1){
    output<-lapply(1:nrulesindf, function(iruledf){
      
      splitrule1<-unlist(strsplit(ruledf[iruledf,2],split=" -AND- "))
      if (length(splitrule1)==1){
        return(rep(1,ncol(newdata)))
      } else{
        splitrule1<-splitrule1[-1]
        lengthsplitrule1<-length(splitrule1)
        splittype1<-lapply(splitrule1,function(x){grepl("<",x)})
        #print(splitrule1)
        out1<-lapply(1:lengthsplitrule1, function(ix){
          if (splittype1[[ix]]){
            
            splitcond<-unlist(strsplit(splitrule1[ix],split="<"))
            
            return(newdata[splitcond[1],]<as.numeric(splitcond[2]))
            
            
          } else {
            
            splitcond<-unlist(strsplit(splitrule1[ix],split=">="))
            
            return(newdata[splitcond[1],]>=as.numeric(splitcond[2]))
          }
          
        }
        )
        
        
        out2<-Reduce("rbind", out1)
        
        out2<-matrix(out2, ncol=ncol(newdata))
        return(apply(out2,2,prod))
      }})
    
    return(Reduce("cbind",output))
  } else {return(matrix(1,nrow=ncol(newdata), ncol=1))}
  
}


predicrulesmeforest<-function(meforestout, X=NULL,Mlist, mc.cores=1){
  nchunks<-length(Mlist)
  getnamesfromrules<-function(meforestout){
    nbag<-length(meforestout)
    ntree<-length(meforestout[[1]])
    c(unlist(mclapply(1:nbag, function(ibag){
      lapply(1:ntree, function(itree){
        meforestout[[ibag]][[itree]][[1]]$mean[,2]
      })
    })
    ))
  }
  nbag<-length(meforestout)
  ntree<-length(meforestout[[1]])
  
  ntotal<-nbag*ntree
  matrixforntotaltree<-matrix(1:ntree,ntree,nbag)
  matrixforntotalbag<-matrix(1:nbag,ntree,nbag, byrow=T)
  
  output<-Reduce("rbind",lapply(1:nchunks, function(xi){
    
    output<-Reduce("cbind",mclapply(1:ntotal, function(x){
      
      itree<-matrixforntotaltree[x]
      ibag<-matrixforntotalbag[x]
      Matxi<-Mlist[[xi]]
      if(!is.null(X)){
        Xi<-X[rownames(X)%in%colnames(Matxi),]
        Xi<-matrix(Xi, ncol=ncol(X), dimnames=dimnames(Xi))
        
        Xi<-Xi[match(colnames(Matxi),rownames(Xi)),]
        Matxi<-rbind(t(Xi),Matxi)
      }
      
      
      outdf<-predictruledf(meforestout[[ibag]][[itree]][[1]]$mean, newdata=Matxi)
      return(outdf)
    }
    , mc.preschedule = T, mc.cores=mc.cores))
    colnames(output)<-getnamesfromrules(meforestout)
    
    return(output)
  }))
  
  return(output)
}


#######################################


getnamesfromrules<-function(meforestout, mc.cores=1){
  nbag<-length(meforestout)
  ntree<-length(meforestout[[1]])
  c(unlist(mclapply(1:nbag, function(ibag){
    lapply(1:ntree, function(itree){
      meforestout[[ibag]][[itree]][[1]]$mean[,2]
    })
  }, mc.preschedule = T, mc.cores=mc.cores)
  ))
}


getpositionsfromrulesnames<-function(rulesnames, colnamesX=NULL){
  nrules<-length(rulesnames)
  c(unlist(lapply(1:nrules, function(irule){
    rulename<-rulesnames[irule]
    if (rulename!="root"){
      
      splitrule1<-unlist(strsplit(rulename,split=" -AND- "))
      splitrule1<-splitrule1[-1]
      splitrule2<-unlist(strsplit(unlist(strsplit(splitrule1,split=c("<"))) ,split=">="))
      if (!is.null(colnamesX)){splitrule2.2<-splitrule2[!(splitrule2%in%colnamesX)]} else {splitrule2.2<-splitrule2}
      onlylocs<-splitrule2.2[unlist(lapply(splitrule2.2,function(x){grepl("_",x)}))]
      if (length(onlylocs)>0){
        outformatrix<-c(unlist(strsplit(c(onlylocs),split="_")))
        splitrule3<-matrix(c(unlist(strsplit(onlylocs,split="_"))), ncol=2, byrow=T)
        return(paste(splitrule3[1,1], mean(as.numeric(splitrule3[,2])), sep="_"))
      } else {
        splitrule2.2<-splitrule2[(splitrule2%in%colnamesX)]
        return(paste(splitrule2.2, sep="-and-"))
      }
    } else {return(NULL)}
  }
  )
  )
  )
}


modelwithrules<-function(y, X, Z, K=NULL, M=NULL,R, map=NULL, useGblup=F){
  
  modelout<-emm_reml(y=y,X=X,Z=Z%*%R,K=NULL)
  
  return(modelout)
}

modelwithrulesglmnet<-function(y, X, Z, K=NULL, M=NULL,R, map=NULL, alpha=1){
  library(glmnet)
  fit.glmnet.cv <- cv.glmnet(Z%*%R, y,nfold = 5,alpha = alpha)
  uhat<-coef(fit.glmnet.cv, s = fit.glmnet.cv$lambda.min)
  
  modelout<-list(uhat=uhat)
  return(modelout)
}


#################################################################################
variableimportance<-function(outemmremlobj, rulesmat, markernames, GBLUP=F, K=NULL){
  
  if (GBLUP){
    ruleeffects<-crossprod(rulesmat,solve(K,outemmremlobj$uhat))
  } else{ruleeffects<-outemmremlobj$uhat}
  namesrules<-colnames(rulesmat)
  ###########read names and extract variable names
  library("stringr")
  markercount<-sapply(markernames,function(x){return(str_count(namesrules,paste(x, "<", sep="")))}, simplify = "array")
  markercount<-markercount+sapply(markernames,function(x){return(str_count(namesrules,paste(x, ">=", sep="")))}, simplify = "array")
  colnames(markercount)<-markernames
  importances<-apply(markercount, 2, function(x){return(x*abs(ruleeffects))})[[1]]
  colnames(importances)<-markernames
  return(colSums(importances))
}


variableimportancemclapply<-function(outemmremlobj, rulesmat, markernames, GBLUP=F, K=NULL, mc.cores=1){
  return(simplify2array(mclapply(X=markernames, FUN=function(x){return(variableimportance(outemmremlobj, rulesmat, x, GBLUP, K))}, mc.cores = mc.cores, mc.preschedule = T)))
}


###################
variableinteractionmeasures<-function(outemmremlobj, rulesmat, markernames1,markernames2, GBLUP=F, K=NULL){
  
  if (GBLUP){
    ruleeffects<-crossprod(rulesmat,solve(K,outemmremlobj$uhat))
  }
  else{ruleeffects<-outemmremlobj$uhat}
  namesrules<-colnames(rulesmat)
  ###########read names and extract variable names
  library("stringr")
  markercount1<-sapply(markernames1,function(x){return(str_count(namesrules,x))}, simplify = "array")
  colnames(markercount1)<-markernames1
  markercount2<-sapply(markernames2,function(x){return(str_count(namesrules,x))}, simplify = "array")
  colnames(markercount2)<-markernames2
  markercount1[markercount1>0]<-1
  markercount2[markercount2>0]<-1
  markerinteractionsimportancematrix<-matrix(0,nrow=length(markernames1), ncol=length(markernames2))
  rownames(markerinteractionsimportancematrix)<-markernames1
  colnames(markerinteractionsimportancematrix)<-markernames2
  
  for (i in markernames1){
    for (j in markernames2){
      for (k in 1:length(ruleeffects)){
        markerinteractionsimportancematrix[i,j]=markerinteractionsimportancematrix[i,j]+markercount1[k,i]*markercount2[k,j]*ruleeffects[k]
      }
    }
  }
  rownames(markerinteractionsimportancematrix)<-markernames1
  colnames(markerinteractionsimportancematrix)<-markernames2
  return(list(interactionstrangth=markerinteractionsimportancematrix))
}


sameSizeVectorList2Matrix <- function(vectorList){  
  sm_i<-NULL
  sm_j<-NULL
  sm_x<-NULL
  for (k in 1:length(vectorList)) {
    sm_i <- c(sm_i,rep(k,length(vectorList[[k]]@i)))
    sm_j <- c(sm_j,vectorList[[k]]@i)
    sm_x <- c(sm_x,vectorList[[k]]@x)
  }
  return(sparseMatrix(i=sm_i,j=sm_j,x=sm_x,dims=c(length(vectorList),vectorList[[1]]@length)))
}


variableimportance2<-function(outemmremlobj, rulesmat, markernames, GBLUP=F, K=NULL, mc.cores=1){
  if (GBLUP){
    ruleeffects<-crossprod(rulesmat,solve(K,outemmremlobj$uhat))
  }
  else{ruleeffects<-outemmremlobj$uhat}
  namesrules<-colnames(rulesmat)
  ###########read names and extract variable names
  
  
  markercount<-mclapply(X=markernames,FUN=function(x){return(as(str_count(namesrules, paste(x, "<", sep=""))+str_count(namesrules, paste(x, ">=", sep="")),"dsparseVector"))}, mc.cores=mc.cores)
  Markercount<-sameSizeVectorList2Matrix(markercount) 
  dim(Markercount)
  rownames(Markercount)<-markernames
  #importances<-mclapply(1:nrow(Markercount), function(x){return(as(Markercount[x,]*ruleeffects,"dsparseVector"))}, mc.cores=mc.cores)
  
  importances<-mcmapply(function(x){return(as(c(Markercount[x,])*abs(ruleeffects),"sparseMatrix"))}, 1:nrow(Markercount),  mc.cores=mc.cores)
  importances<-Reduce("cBind", importances)
  colnames(importances)<-markernames
  return(importances)
}


variableimportancemclapply2<-function(outemmremlobj, rulesmat, markernames, GBLUP=F, K=NULL, mc.cores=1){
  return(simplify2array(mclapply(X=markernames, FUN=function(x){return(colSums(variableimportance2(outemmremlobj, rulesmat, x, GBLUP, K, mc.cores=1)))}, mc.cores = mc.cores, mc.preschedule = T)))
}


CartProduct = function(CurrentMatrix, NewElement)
{
  
  if (length(dim(NewElement)) != 0 )
  {
    warning("New vector has more than one dimension.")
    return (NULL)
  }
  
  if (length(dim(CurrentMatrix)) == 0)
  {
    CurrentRows = length(CurrentMatrix)
    CurrentMatrix = as.matrix(CurrentMatrix, nrow = CurrentRows, ncol = 1)
  } else {
    CurrentRows = nrow(CurrentMatrix)
  }
  
  var1 = replicate(length(NewElement), CurrentMatrix, simplify=F)
  var1 = do.call("rbind", var1)
  
  var2 = rep(NewElement, CurrentRows)
  var2 = matrix(var2[order(var2)], nrow = length(var2), ncol = 1)
  
  CartProduct = cbind(var1, var2)
  return (CartProduct)
}

variableinteractionmeasures2<-function(outemmremlobj, rulesmat, markernames1,markernames2, GBLUP=F, K=NULL, mc.cores=1){
  if (GBLUP){
    ruleeffects<-crossprod(rulesmat,solve(K,outemmremlobj$uhat))
  }
  else{ruleeffects<-outemmremlobj$uhat}
  namesrules<-colnames(rulesmat)
  ###########read names and extract variable names
  
  markercount1<-mclapply(X=markernames1,FUN=function(xi){return(as(str_count(namesrules, paste(xi, "<", sep=""))+str_count(namesrules, paste(xi, ">=", sep="")),"dsparseVector"))}, mc.cores=mc.cores)
  Markercount1<-sameSizeVectorList2Matrix(markercount1) 
  dim(Markercount1)
  rownames(Markercount1)<-markernames1
  
  markercount2<-mclapply(X=markernames2,FUN=function(xi){return(as(str_count(namesrules, paste(xi, "<", sep=""))+str_count(namesrules, paste(xi, ">=", sep="")),"dsparseVector"))}, mc.cores=mc.cores)
  Markercount2<-sameSizeVectorList2Matrix(markercount2) 
  dim(Markercount2)
  rownames(Markercount2)<-markernames2
  Markercount1[Markercount1>0]<-1
  Markercount2[Markercount2>0]<-1
  dim(Markercount1)
  dim(Markercount2)
  sum(Markercount2>0)
  someFunction<-function(i,j){
    return(sum(abs(Markercount1[i,]*Markercount2[j,]*ruleeffects)))
  }
  cartout = CartProduct(1:length(markernames1), 1:length(markernames2))
  require(parallel)
  aList = mcmapply(someFunction, cartout[,1], cartout[,2],  mc.cores=mc.cores, SIMPLIFY = T)
  
  return(aList)
}

require(compiler)

predictruledf<-cmpfun(predictruledf)
Amat.pieces<-cmpfun(Amat.pieces)
em.mixed<-cmpfun(em.mixed)
localmarkersampler<-cmpfun(localmarkersampler)
loglike<-cmpfun(loglike)
loglikerpart<-cmpfun(loglikerpart)
merpart<-cmpfun(merpart)

meforest<-cmpfun(meforest)
modelwithrules<-cmpfun(modelwithrules)
tr<-cmpfun(tr)

predictruledf<-cmpfun(predictruledf)
predicrulesmeforest <- cmpfun(predicrulesmeforest)
getnamesfromrules<-cmpfun(getnamesfromrules)
getpositionsfromrulesnames<-cmpfun(getpositionsfromrulesnames)
variableimportance<-cmpfun(variableimportance)
variableinteractionmeasures<-cmpfun(variableinteractionmeasures)


###########################################################

LER<-function(y, X=NULL, Z, K, Mlist,map, nsample=10, maxlength=20,ntrees=100,bagging=9,proprow=.1, mc.cores=3,var00=1, var10=1,maxiter=500,tolerance = 1e-6, skipfittree=5, rpartcontrol=NULL, cp=0.03, maxdepth=2){
  
  meforestout<-meforest(y=y, X=X, Z=Z, K=K, Mlist=Mlist,map=map, nsample=nsample, maxlength=maxlength,ntrees=ntrees,bagging=bagging,proprow=proprow, mc.cores=mc.cores,var00=var00, var10=var10,maxiter=maxiter,tolerance = tolerance, skipfittree=skipfittree, rpartcontrol=rpartcontrol, cp=cp, maxdepth=maxdepth)
  getnamesfromrulesout<-getnamesfromrules(meforestout)
  if (is.null(X)){
  predicrulesmeforestout<-predicrulesmeforest(meforestout, Mlist, X=NULL,mc.cores=mc.cores)
  } else {predicrulesmeforestout<-predicrulesmeforest(meforestout, Mlist, X=X,mc.cores=mc.cores)
  }
  R=apply(predicrulesmeforestout, 2, function(y) (y - mean(y)) / sd(y) ^ as.logical(sd(y)))
  modelwithrulesout<-modelwithrules(y=y, X=matrix(1,nrow=length(y)), Z=Z, K=NULL, M=NULL,R=R, map=NULL, useGblup=F)
  if (is.null(X)){
  variableimportancemclapply2out<-variableimportancemclapply2(outemmremlobj=modelwithrulesout, rulesmat=R, markernames=c(rownames(Mlist[[1]])),mc.cores=mc.cores)
  variableinteractionmeasures2out<-variableinteractionmeasures2(outemmremlobj=modelwithrulesout, rulesmat=R, markernames1=c(rownames(Mlist[[1]]))[1:200],markernames2=c(rownames(Mlist[[1]]))[1:200], GBLUP=F, K=NULL, mc.cores=mc.cores)
  } else {
    variableimportancemclapply2out<-variableimportancemclapply2(outemmremlobj=modelwithrulesout, rulesmat=R, markernames=c(colnames(X),rownames(Mlist[[1]])),mc.cores=mc.cores)
    variableinteractionmeasures2out<-variableinteractionmeasures2(outemmremlobj=modelwithrulesout, rulesmat=R, markernames1=c(colnames(X),rownames(Mlist[[1]]))[1:200],markernames2=c(colnames(X),rownames(Mlist[[1]]))[1:200], GBLUP=F, K=NULL, mc.cores=mc.cores)  
  }
  matrixout<-matrix(variableinteractionmeasures2out, nrow=200)
  return(list(meforestout=meforestout,R=R,modelwithrulesout=modelwithrulesout, variableimportancemclapply2out=variableimportancemclapply2out,variableinteractionmeasures2out=matrixout))
}

LER<-cmpfun(LER)


ExtractRules<-function(ytrain,Xtrain,Xtest, nrules,proprow=.2,propcol=.2, maxdepth=3, cp.prune=NULL, alpha=.2){
  require(rpart)  
  
  
  onetree<-function(ytrain,Xtrain,Xtest,proprow=proprow,propcol=propcol, maxdepth=maxdepth){
  	maxdepthin=rpois(1,maxdepth)
while(maxdepthin>maxdepth+1||maxdepthin==0){maxdepthin=rpois(1,maxdepth)}
  maxdepth=maxdepthin
  
    ntt<-sample(1:dim(Xtrain)[[1]],ceiling(dim(Xtrain)[[1]]*proprow))
    ptt<-sample(1:dim(Xtrain)[[2]],ceiling(dim(Xtrain)[[2]]*propcol))
    Xtrain0<-matrix(Xtrain[,ptt], nrow=nrow(Xtrain))
    Xtrain1<-matrix(Xtrain[ntt,ptt], ncol=ncol(Xtrain0))
    Xtest1<-matrix(Xtest[,ptt], ncol=ncol(Xtrain0))
    
    ytrain1<-ytrain[ntt]
  
    data1<-data.frame(p=ytrain1, X0=I(Xtrain1))
    data0<-data.frame(p=ytrain, X0=I(Xtrain0))
    
    
    datatest<-data.frame(X0=I(cbind(Xtest1)))
    dataall<-data.frame(X0=I(rbind(Xtrain0, Xtest1)))

    colnames(data1$X0)<-colnames(data0$X0)<-colnames(datatest$X0)<-colnames(dataall$X0)<-colnames(Xtrain)[ptt]

    tree <- rpart(p~X0, data=data1 , minsplit = 1, minbucket = 2, maxdepth=maxdepth)
    
    if (!is.null(cp.prune)){tree<- prune(tree, cp = cp.prune)}
    #cptarg = sqrt(tree$cptable[3,1]*tree$cptable[4,1])
 
    #tree = prune(tree,cp=cptarg)
    leafnodeRows <- grepl("leaf",tree$frame$var)
    nodevals <- as.numeric(rownames(tree$frame)[leafnodeRows])
    capture.output(rules <- path.rpart(tree,nodevals))
    capture.output(rulesdf <- do.call("rbind",lapply(rules,function(x)paste(x,collapse = " -AND- "))))
    capture.output(rulesdf <- data.frame(nodeNumber=rownames(rulesdf),rule=rulesdf[,1],stringsAsFactors=FALSE))
    #print("Check1")
    ###################
    #############################################################
    # code from Deniz (march 20) to extract the rules
    # also works if the tree has been pruned
    # the current CRAN version of partykit might create an error there (march 2012)
    aaaall<-predict(tree,newdata = dataall)
    
    #print("check2")
    featureall<-as.factor(aaaall)
    aaa<-predict(tree,newdata = data0)
    featuretrain<-factor(aaa, levels(featureall))
    
    #print("check3")
    
    aaatest<-predict(tree,newdata = datatest)
    featuretest<-factor(aaatest,levels(featureall))
    
    
    featuretrain<-model.matrix(~featuretrain-1)
    featuretest<-model.matrix(~featuretest-1)
   
    return(list(ftrain=featuretrain[,1:(dim(featuretrain)[2]-1)],ftest=featuretest[,1:(dim(featuretrain)[2]-1)], rulesdf=rulesdf[1:(dim(featuretrain)[2]-1),]))
  }
  
  rulesdfv<-c()
  ftrainv<-c()
  ftestv<-c()
  ncolrules<-0
  while (ncolrules<nrules){
    outtree<-tryCatch(onetree(ytrain,Xtrain,Xtest,proprow=proprow,propcol=propcol, maxdepth=maxdepth), error=function(e){NULL})
    if(!is.null(outtree)){
      ftrainv<-cbind(ftrainv,outtree$ftrain)
      ftestv<-cbind(ftestv,outtree$ftest)
      rulesdfv<-rbind(rulesdfv, outtree$rulesdf)
      ncolrules<-ncol(ftrainv)
    }
  }
  library("glmnet")
  if (is.null(alpha)){alpha=.2}
  cvglmnetout<-cv.glmnet(as.matrix(ftrainv), ytrain, alpha=alpha)
  glmnetcoefs<-coef(cvglmnetout, s = "lambda.min")
  glmnetcoefsnotzero<-glmnetcoefs[-1]!=0
 print(sum(glmnetcoefsnotzero))
  return(list(ftrainv=ftrainv[,glmnetcoefsnotzero],ftestv=ftestv[,glmnetcoefsnotzero], rulesdfv=rulesdfv[glmnetcoefsnotzero,], glmnetcoefs=glmnetcoefs[-1][glmnetcoefsnotzero]))
}


####################split the genome and extract rules for each region.
splittingfunction<-function(lastsplit,nsplit){
  levelslastsplit<-levels(lastsplit)
  lengthsoflevelslastsplit<-as.numeric(unlist(lapply(levelslastsplit, function(level){length(lastsplit[lastsplit==level])})))
  
  lengthsofnewchunks<-ceiling(lengthsoflevelslastsplit/nsplit)
  newsplit<-c()
  l=0
  for (i in levelslastsplit){
    l=l+1
    lengthused<-0
    for (j in 1:(nsplit-1)){
      newsplit<-c(newsplit, rep(j, lengthsofnewchunks[l]))
      lengthused<-lengthused+lengthsofnewchunks[l]
    }
    newsplit<-c(newsplit, rep(nsplit, (lengthsoflevelslastsplit[l]-lengthused)))
  }
  newsplit<-paste(lastsplit,newsplit,sep="_")
  #print(newsplit)
  return(newsplit)
}

makehierarchy<-function(mrkMap, nsplit=3, depth=3){
  P<-mrkMap
  P$part0<-as.factor(rep("R",dim(P)[1]))
  
  P$part1<-as.factor(P$chrom)
  
  lastsplit<-P$part1
  for (i in 2:depth){
 
    P[,(i+4)]<-splittingfunction(lastsplit=lastsplit,nsplit=nsplit)
    lastsplit<-as.factor(P[,(i+4)])
  }  
  colnamesP<-c("marker", "chrom","loc","part0","part1", paste("part",2:depth,sep=""))
  colnames(P)<-colnamesP
  return(P)
}


extractrulesforpartitionswithpcs<-function(ytrain, Xtrain, PCtrain=NULL, Xtest, PCtest=NULL, Pv, nrules,proprow=.2,propcol=.2, maxdepth=3, cp.prune=NULL, alpha=.2,mc.cores=1){
 
  
  PvU<-unique(Pv)
  lapplyfunc<-function(x){
    Xtrainx<-Xtrain[,Pv%in%x]
    Xtestx<-Xtest[,Pv%in%x]
    if (!is.null(PCtrain)){
    	Xtrainx<-cbind(PCtrain, Xtrainx)
    	    Xtestx<-cbind(PCtest, Xtestx)
        npcs<-ncol(PCtrain)
    pcnames<-paste("pc",1:npcs,sep="")	
   # browser()
    colnames(Xtrainx)<-colnames(Xtestx)<-c(pcnames,colnames(Xtrain)[Pv%in%x])
    	} else {colnames(Xtrainx)<-colnames(Xtestx)<-c(colnames(Xtrain)[Pv%in%x])}

    return(tryCatch(ExtractRules(ytrain=ytrain,Xtrain=Xtrainx,Xtest=Xtestx, nrules=nrules,proprow=proprow,propcol=propcol, maxdepth=maxdepth, cp.prune=cp.prune, alpha=alpha), error=function(e){
      print("encountered errors")
      return(NULL)}))
  } 
  
  if(mc.cores > 1){
  require("parallel")

  out<- mclapply(as.list(PvU), lapplyfunc, mc.cores=mc.cores)
 
  return(out)
}
   if(mc.cores == 1){
  out<-lapply(PvU, lapplyfunc)
  return(out)
  }
}


meanimpute<-function(X){
  imputecol<-function(x){
  x[is.na(x)]<-mean(x[!is.na(x)])
  return(x)
  }
  Ximp<-apply(X,2,imputecol)
  return(as.matrix(Ximp))
  }

sameSizeVectorList2Matrix <- function(vectorList){  
    sm_i<-NULL
    sm_j<-NULL
    sm_x<-NULL
    for (k in 1:length(vectorList)) {
        sm_i <- c(sm_i,rep(k,length(vectorList[[k]]@i)))
        sm_j <- c(sm_j,vectorList[[k]]@i)
        sm_x <- c(sm_x,vectorList[[k]]@x)
    }
return(sparseMatrix(i=sm_i,j=sm_j,x=sm_x,dims=c(length(vectorList),vectorList[[1]]@length)))
}

variableimportance<-function(outemmremlobj, rulesmat, markernames, GBLUP=F, K=NULL, mc.cores=1){
   if (GBLUP){
    ruleeffects<-t(rulesmat)%*%solve(K)%*%outemmremlobj$uhat
  }
  else{ruleeffects<-outemmremlobj$uhat}
  namesrules<-colnames(rulesmat)
  ###########read names and extract variable names
    library(stringr)
    library(Matrix)
    library(parallel)
  
    markercount<-mclapply(markernames,function(x){return(as(str_count(namesrules, paste(x, "<", sep=""))+str_count(namesrules, paste(x, ">", sep="")),"dsparseVector"))}, mc.cores=mc.cores)
    Markercount<-sameSizeVectorList2Matrix(markercount) 
    dim(Markercount)
    rownames(Markercount)<-markernames
     #importances<-mclapply(1:nrow(Markercount), function(x){return(as(Markercount[x,]*ruleeffects,"dsparseVector"))}, mc.cores=mc.cores)
    
    importances<-mcmapply(function(x){return(as(c(Markercount[x,])*ruleeffects,"sparseMatrix"))}, 1:nrow(Markercount),  mc.cores=mc.cores)
    importances<-Reduce("cBind", importances)
  colnames(importances)<-markernames
return(list(counts=Markercount,importances=importances))
}


CartProduct = function(CurrentMatrix, NewElement)
{

if (length(dim(NewElement)) != 0 )
{
warning("New vector has more than one dimension.")
return (NULL)
}

if (length(dim(CurrentMatrix)) == 0)
{
CurrentRows = length(CurrentMatrix)
CurrentMatrix = as.matrix(CurrentMatrix, nrow = CurrentRows, ncol = 1)
} else {
CurrentRows = nrow(CurrentMatrix)
}

var1 = replicate(length(NewElement), CurrentMatrix, simplify=F)
var1 = do.call("rbind", var1)

var2 = rep(NewElement, CurrentRows)
var2 = matrix(var2[order(var2)], nrow = length(var2), ncol = 1)

CartProduct = cbind(var1, var2)
return (CartProduct)
}

variableinteractionmeasures<-function(outemmremlobj, rulesmat, markernames1,markernames2, GBLUP=F, K=NULL, mc.cores=1){
  if (GBLUP){
    ruleeffects<-t(rulesmat)%*%solve(K)%*%outemmremlobj$uhat
  }
  else{ruleeffects<-outemmremlobj$uhat}
  namesrules<-colnames(rulesmat)
  ###########read names and extract variable names
     library(stringr)
    library(Matrix)
     library(parallel)

    markercount1<-mclapply(markernames1,function(x){return(as(str_count(namesrules, paste(x, "<", sep=""))+str_count(namesrules, paste(x, ">", sep="")),"dsparseVector"))}, mc.cores=mc.cores)
    Markercount1<-sameSizeVectorList2Matrix(markercount1) 
    dim(Markercount1)
    rownames(Markercount1)<-markernames1

    markercount2<-mclapply(markernames2,function(x){return(as(str_count(namesrules, paste(x, "<", sep=""))+str_count(namesrules, paste(x, ">", sep="")),"dsparseVector"))}, mc.cores=mc.cores)
    Markercount2<-sameSizeVectorList2Matrix(markercount2) 
    dim(Markercount2)
    rownames(Markercount2)<-markernames2
  Markercount1[Markercount1>0]<-1
  Markercount2[Markercount2>0]<-1
  dim(Markercount1)
  dim(Markercount2)
  
  someFunction<-function(i,j){
return(sum(abs(Markercount1[i,]*Markercount2[j,]*ruleeffects)))
}
cartout = CartProduct(1:length(markernames1), 1:length(markernames2))
require(parallel)
aList = mcmapply(someFunction, cartout[,1], cartout[,2],  mc.cores=mc.cores)

    return(aList)
}


##############
set.seed(123)

topmarkerslistEMMA<-vector(mode="list", length=100)
topmarkerslistLER<-vector(mode="list", length=100)


for (iii in 1:100){
  print(iii)
library(EMMREML)
nrules=4000
maxdepth=3
nsplit=10
alpha=0.001
mc.cores=5
perctrain=.99
nimpmarkers=30
cp.prune=0.001
proprow=.3
propcol=.2
traitcolumn=10

load("MouseData.RData")
genodata01<-genodata01[genodata01$chrom==3,]

####filter
maffilterfunction<-function(x){
  xtable<-table(x[-c(1:3)])
  
  if (length(xtable)==1){return(FALSE)}
  else{
    if (length(xtable)==3){
      f1=2*xtable[names(xtable)=="0"]+xtable[names(xtable)=="1"]
      f2=2*xtable[names(xtable)=="2"]+xtable[names(xtable)=="1"]
      if((f1<.05)||(f2<.05)){return(FALSE)}else{return(TRUE)}
    }
    if (length(xtable)==2){
      if (("0"%in%names(xtable))&&("1"%in%names(xtable))){
        f1=2*xtable[names(xtable)=="0"]+xtable[names(xtable)=="1"]
        if((f1<.05)){return(FALSE)}else{return(TRUE)}
      }
      if (("2"%in%names(xtable))&&("1"%in%names(xtable))){
        f2=2*xtable[names(xtable)=="2"]+xtable[names(xtable)=="1"]
        if((f2<.05)){return(FALSE)}else{return(TRUE)}
      }
      if (("0"%in%names(xtable))&&("2"%in%names(xtable))){
        f1=2*xtable[names(xtable)=="0"]
        f2=2*xtable[names(xtable)=="2"]
        if(((f1<.05)||(f2<.05))){return(FALSE)}else{return(TRUE)}
      }
    }
  }
}


filtermarkers<-c(apply(genodata01,1,maffilterfunction))
genodata01<-genodata01[filtermarkers,]
dim(genodata01)
genodata01[,1]<-paste("M",1:nrow(genodata01), sep="")
phenodataonetrait<-Phenotypes[,c(1,4,traitcolumn)]
colnames(phenodataonetrait)[1]<-"id"

Mmapped<-t(genodata01[,-c(1,2,3)])
rownames(Mmapped)<-colnames(genodata01[,-c(1,2,3)])
indicesmatrix<-mapply(function(x){
  irow<-sample(1:nrow(Mmapped), 1)
  icol<-sample(1:ncol(Mmapped), 1)
  return(c(irow,icol))
},1:10000, SIMPLIFY=T)
dim(indicesmatrix)
#Mmapped[t(indicesmatrix)]<-rbinom(10000,2,.15)

Mmapped<-matrix(rbinom(2000*1000,2,.15), nrow=2000, ncol=1000)
Mmapped[1:50,1:5]
colnames(Mmapped)<-paste("M",1:1000, sep="")
rownames(Mmapped)<-paste("L",1:2000, sep="")


mrkMap<-data.frame(marker=paste("M",1:1000, sep=""), chrom=rep(1,1000), loc=1:1000)
mrkMap1<-makehierarchy(mrkMap=mrkMap, nsplit=5, depth=2)
mrkMap<-makehierarchy(mrkMap=mrkMap, nsplit=nsplit, depth=2)

Phenotype<-data.frame(id=paste("L",1:2000, sep=""), sex=rep(c("M", "F"), each=1000), trait=rep(0,2000))


################################################
####replace the Phenotype[,3] with the simulated phenotype based on mrkMap
dim(Mmapped)
M1<-Mmapped[,mrkMap1[,6]%in%levels(as.factor(mrkMap1[,6]))[1]]
M2<-Mmapped[,mrkMap1[,6]%in%levels(as.factor(mrkMap1[,6]))[2]]
M3<-Mmapped[,mrkMap1[,6]%in%levels(as.factor(mrkMap1[,6]))[3]]
M4<-Mmapped[,mrkMap1[,6]%in%levels(as.factor(mrkMap1[,6]))[4]]
M5<-Mmapped[,mrkMap1[,6]%in%levels(as.factor(mrkMap1[,6]))[5]]
svdMmapped<-svd(Mmapped,nu=3,nv=3)
PCs<-Mmapped%*%svdMmapped$v
PCs<-scale(PCs, center=T, scale=T)
colnames(PCs)<-c("pc1", "pc2", "pc3")
M1<-cbind(PCs, M1)
M2<-cbind(PCs, M2)
M3<-cbind(PCs, M3)
M4<-cbind(PCs, M4)
M5<-cbind(PCs, M5)

M1effect<-apply(M1, 1, function(x){return((.6*x[11]+.5*x[14]-.4*x[17]))})
M1effect<-M1effect/sd(M1effect)
M1markers<-colnames(M1)[c(11,14,17)]
M2effect<-apply(M2, 1, function(x){if(x[1]<0) {return((.6*x[11]-.5*x[14]-.4*x[17]))} else (return(-(.6*x[11]+.5*x[14]+.4*x[17])))})
M2effect<-M2effect/sd(M2effect)
M2markers<-colnames(M2)[c(11,14,17)]
M3effect<-apply(M3, 1, function(x){(.6*x[11]+.5*x[14]-.4*x[17])^2})
M3effect<-M3effect/sd(M3effect)
M3markers<-colnames(M3)[c(11,14,17)]


M4effect<-apply(M4, 1, function(x){if(x[1]<0){return((.6*x[11]+.5*x[14]-.4*x[17])^2)} 
  else {return(-(.6*x[11]-.5*x[14]+.4*x[17])^2)}})
M4effect<-M4effect/sd(M4effect)
M4markers<-colnames(M4)[c(11,14,17)]


M5effect<-apply(M5, 1, function(x){if(x[1]<0){return((.6*x[11]+.5*x[14]-.4*x[17]+.5*x[2])^2)} else {return((-.6*x[11]-.5*x[14]-.4*x[17]+.5*x[2])^2)}})
M5effect<-M5effect/sd(M5effect)
M5markers<-colnames(M5)[c(11,14,17)]
allmarkers<-c(M1markers,M2markers,M3markers,M4markers, M5markers)
Gval<-M1effect+M2effect+M3effect+M4effect+M5effect
Phenotype[,3]<-as.matrix(model.matrix(~-1+Phenotype[,2]))%*%c(50, 55)+Gval+.5*sd(Gval)*rnorm(nrow(Phenotype))

Ntotal<-dim(Phenotype)[1]
trainingsamplesize=ceiling(perctrain*Ntotal)


sampletrain<-sample(rownames(Mmapped), trainingsamplesize, replace=FALSE)

sampletest<-setdiff(rownames(Mmapped), sampletrain)
colnames(Phenotypes)<-c("taxa", "sex","trait")

K<-cov(t(Mmapped))
K<-K/mean(diag(K))
rownames(K)<-colnames(K)<-rownames(Mmapped)
Phenotypestrain<-Phenotype[Phenotype$id%in%sampletrain,]
Phenotypestest<-Phenotype[Phenotype$id%in%sampletest,]
head(Phenotypestrain)

Ztrain<-as.matrix(model.matrix(~factor(Phenotypestrain[,1], levels=rownames(Mmapped))))
Ztest<-as.matrix(model.matrix(~factor(Phenotypestest[,1], levels=rownames(Mmapped))))
colnames(Mmapped)<-mrkMap$marker

Xtrain<-model.matrix(~Phenotypestrain[,2])
Xtest<-model.matrix(~Phenotypestest[,2])


PCtrain<-Ztrain%*%PCs
PCtest<-Ztest%*%PCs
MmappedTrain<-Ztrain%*%Mmapped
MmappedTest<-Ztest%*%Mmapped
rownames(MmappedTrain)<-Phenotypestrain[,1]
#######regular GWAS with rrBLUP
library(rrBLUP)
colnames(Mmapped)<-paste("M",1:1000, sep="")
rownames(Mmapped)<-paste("L",1:2000, sep="")

geno=cbind(data.frame(marker=paste("M",1:1000, sep=""), chrom=rep(1,1000), pos=1:1000),t(MmappedTrain))
Ktrain<-tcrossprod(Ztrain%*%K, Ztrain)

rownames(Ktrain)<-colnames(Ktrain)<-Phenotypestrain[,1]

pheno2 <- Phenotypestrain

colnames(pheno2) <- c("ID","Sex","Trait")


ans.emmax <- GWAS(pheno=pheno2,fixed="Sex",geno=geno,P3D=TRUE,n.core=1,K=Ktrain, plot=F, n.PC=3) 


##########

outemmreml2<-emmreml(y=Phenotypestrain[,3], X=Xtrain, Z=Ztrain, K=K)


yhattrain<-Xtrain%*%outemmreml2$betahat+Ztrain%*%outemmreml2$uhat
Xbetatrain<-Xtrain%*%outemmreml2$betahat


rules<-extractrulesforpartitionswithpcs(ytrain=Phenotypestrain[,3]-Xbetatrain, Xtrain=MmappedTrain, PCtrain=PCtrain,Xtest=MmappedTest, PCtest=PCtest, Pv=mrkMap[,6], nrules=nrules,proprow=proprow,propcol=propcol, maxdepth=maxdepth, cp.prune=cp.prune,mc.cores=mc.cores, alpha=alpha)


Pv<-unique(mrkMap[,6])


rulestrain<-c()
rulestest<-c()
rulesdescdf<-c()
KRuleslist<-vector(mode="list", length=length(rules))
Krulesrestlist<-vector(mode="list", length=length(rules))
for (i in 1:length(rules)){
  rulestrain<-cbind(rulestrain,rules[[i]]$ftrainv)
  KRuleslist[[i]]<-cov(t(rules[[i]]$ftrainv))
  KRuleslist[[i]]<-KRuleslist[[i]]/mean(diag(KRuleslist[[i]]))
  rulestrainnoi<-c()
  for (j in setdiff(1:length(rules), i)){
    rulestrainnoi<-cbind(rulestrainnoi,rules[[j]]$ftrainv)
    
  }
  Krulesrestlist[[i]]<-cov(t(rulestrainnoi))
  Krulesrestlist[[i]]<-Krulesrestlist[[i]]/mean(diag(Krulesrestlist[[i]]))
  rulestest<-cbind(rulestest,rules[[i]]$ftestv)
  rulesdescdf<-rbind(rulesdescdf,cbind(rep(Pv[i],nrow(rules[[i]]$rulesdfv)), rules[[i]]$rulesdfv))
}


rownames(rulestrain)<-Phenotypestrain[,1]
rownames(rulestest)<-Phenotypestest[,1]
colnames(rulestrain)<-colnames(rulestest)<-rulesdescdf[,3]
dim(rulestrain)
rulesmat<-rbind(rulestrain,rulestest)
rulesmat<-rulesmat[match(rownames(K), rownames(rulesmat)),]
Krules<-cov(t(rulesmat))
Krules<-Krules/mean(diag(Krules))
outemmremlKrules<-emmreml(y=Phenotypestrain[,3], X=Xtrain, Z=Ztrain, K=Krules)
outemmremlrules<-emmreml(y=Phenotypestrain[,3], X=Xtrain, Z=rulestrain, K=diag(ncol(rulestrain)))
library(glmnet)
cvglmnetout<-cv.glmnet(rulestrain, Phenotypestrain[,3], alpha=.9)
glmnetcoefs<-coef(cvglmnetout, s = "lambda.min")
outvarimp<-variableimportance(outemmremlobj=outemmremlrules, rulesmat=rulesmat, markernames=paste("X0",c("pc1","pc2","pc3",colnames(Mmapped)),sep=""), GBLUP=F ,ruleeffects=NULL, K=NULL, mc.cores=mc.cores)

outvarimpglmnet<-variableimportance(outemmremlobj=NULL, rulesmat=rulesmat, markernames=paste("X0",c("pc1","pc2","pc3",colnames(Mmapped)),sep=""), GBLUP=F ,ruleeffects=glmnetcoefs[-1], K=NULL, mc.cores=mc.cores)

tablerules<-table(rulesdescdf[,1])
colvecrules<-c()
for (ii in 1:length(tablerules)){
  colvecrules<-c(colvecrules,rep(ii,tablerules[ii]))
}

tablemarkers<-table(mrkMap[,2])
colvecmarkers<-c()
for (ii in 1:length(tablemarkers)){
  colvecmarkers<-c(colvecmarkers,rep(ii,tablemarkers[ii]))
}


#########################
topmarkers<-names(colSums(outvarimp$importances))[order(abs(colSums(outvarimp$importances)), decreasing=T)[1:nimpmarkers]]
####now sort the top markers with respect to the map
topmarkerslistEMMA[[iii]]<-(ans.emmax[order(ans.emmax[,4], decreasing=T),1])[1:nimpmarkers]
topmarkerslistLER[[iii]]<-topmarkers
}
save(topmarkerslistEMMA,topmarkerslistLER, file="simresultsasso.RData" )


emmaassociation<-c()
lerassociation<-c()
for (i in 1:100){
  emmaassociation<-rbind(emmaassociation,allmarkers%in%topmarkerslistEMMA[[i]])
  lerassociation<-rbind(lerassociation,paste("X0",allmarkers,sep="")%in%topmarkerslistLER[[i]])
}

colSums(emmaassociation)
colSums(lerassociation)