## Get necessary packages
install.packages(c("devtools", "ggplot2", "gtools", "plyr", "gdata"))
library(devtools)
install_github(user="behuang", repo="mpMap")
library(mpMap)
library(gtools)
library(plyr)
library(ggplot2)
library(gdata)

## Functions for computing map uncertainty
whichrow<-function(mat,x){
  # Helper function, returns which row of a matrix match a given vector
  which(apply(mat,1,function(y){all(y==x)}))  
}

get_geno <- function(obj){
  if (class(obj)[1]=="ri4self"){
    N <- nrow(obj$geno[[1]]$data)
    founders <- obj$founderGeno
    n.mar <- ncol(founders)
    geno <- obj$geno[[1]]$truegeno
    finals <- sapply(1:n.mar, function(m) founders[geno[,m],m])
  } else if (class(obj)[1]=="mpcross"){
    finals <- obj$finals
  }
  return(finals)
}

gen_N <- function(object, markers=1:3){
  if (class(object)[1]=="riself"){
    finals <- pull.geno(object)-1
  } else {
    finals <- get_geno(object)
  } 
  finals <- finals[,markers]
  finals.df <- as.data.frame(finals)
  finals.df <- na.omit(finals.df)
  padding  <- as.data.frame(permutations(2, 3, c(1,0), repeats=TRUE))
  names(padding) <- names(finals.df)
  finals.df <- rbind(finals.df, padding)
  count.df <- count(finals.df, names(finals.df))
  N <- count.df$freq-1
  return(N)
}

pt3_v2 <- function(g, theta){
  # Return the three point probability of a particular genotype
  # eg pt3(g=c("A", "B", "C"), theta=c(0.1,0.2)) returns the probability
  # of observing A-B-C given theta_12 = 0.1 and theta_23 = 0.2
  x <- as.numeric(mapvalues(g, c("A", "B", "C", "D"), c(0,0,1,1), warn_missing=FALSE))
  # whether the selfing process produces a recombinant genotype or not
  # Ie treating the data as a 2-way RIL, grouping {A and B} and {C and D}
  ri <- abs(diff(x))
  # whether or not recombination occured in the initial cross
  x2 <- as.numeric(mapvalues(g, c("A", "B", "C", "D"), 1:4, warn_missing=FALSE))
  rb <- as.numeric(diff(x2)!=0)
  R <- 2*theta/(1+2*theta)
  if (all(ri == c(0,0))){
    p <- 1/4*(2-R[1]-R[2]-R[3])*(1-theta[1])^{1-rb[1]}*
      (theta[1]^{rb[1]})*(theta[2]^{rb[2]})*(1-theta[2])^{1-rb[2]}*1/2
  } else if (all(ri==c(0,1))){
    p <- 1/4*(-R[1]+R[2]+R[3])*(1-theta[1])^{1-rb[1]}*theta[1]^{rb[1]}*1/4
  } else if (all(ri==c(1,0))){
    p <- 1/4*(R[1]-R[2]+R[3])*(1-theta[2])^{1-rb[2]}*theta[2]^{rb[2]}*1/4
  } else if (all(ri==c(1,1))){
    d <- as.numeric(g[1]!=g[3])
    p <- 1/4*(R[1]+R[2]-R[3])*(1-theta[3])^{1-d}*(theta[3])^{d}*1/4
  } else{
    stop('Genotype Not Recognised')
  }
  return(p)
} 


prob_array <- function(founders, theta){
  prob.out <- array(0, c(8, nrow(theta)))
  perms <- permutations(2, 3, c(0,1), repeats=TRUE)
  geno <- permutations(4, 3, LETTERS[1:4], repeats=T)
  theta <- cbind(theta, theta[,1]+theta[,2]-2*theta[,1]*theta[,2])
  for (i in 1:nrow(geno)){
    g <- geno[i,]
    prob <- apply(theta, 1, function(val){pt3_v2(g, val)})
    snp <- sapply(1:3, function(k){founders[g[k], k]})
    addto <- whichrow(perms, snp)
    prob.out[addto, ] <- prob.out[addto, ]+prob
  }
  return (prob.out)
}



loglk3 <- function(theta,N, founders){
  probs <- as.vector(prob_array(founders, t(theta)))
  loglike <- sum(N*log(probs), na.rm=T)
  return(loglike)
}

like3 <- function(obj, markers, rf.hat){
  # Maximises 3 point likelihood for a triple of markers
  # rf.hat is a matrix of pairwise recombination fractions needed for
  # the initial estimates in the optim function
  # Returns a vector of length 3
  # first two elements are the MLEs of theta_adjacent
  # last element is the maximised likelihood
  N <- gen_N(obj, markers)
  if (class(obj)[1]=="ri4self"){
    founders <- obj$founderGeno[,markers]
  } else if (class(obj)[1]=="mpcross"){
    founders <- obj$founders[,markers]
  }
  initial <-c(rf.hat[markers[1], markers[2]], rf.hat[markers[2], markers[3]])
  # Function limits have to be set to   0  < theta < 0.5 (note  0  <= theta <= 0.5 resuts in errors)
  opt.obj <- optim(initial, loglk3, N=N, founders=founders, control=list(fnscale=-1), method="L-BFGS-B", lower=c(0.00001,0.00001), upper=c(0.49999,0.49999))
  out <- c(opt.obj$par, opt.obj$value)
  return(out)
}



gen_coef <- function(sdp.1, sdp.2){
  phen <- permutations(2, 2, c(0,1), repeats=TRUE)
  A <- numeric(4)
  B <- numeric(4)
  for (i in 1:nrow(phen)){
    p <- phen[i,]
    G1 <- which(sdp.1 == p[1])
    G2 <- which(sdp.2 == p[2])
    is.recombinant <- outer(G1, G2, "==")
    A[i] <- sum(is.recombinant)
    B[i] <- length(is.recombinant)- A[i]
  }
  out <- cbind(A, B)
  colnames(out) <- c("A", "B")
  return(out)
}

prob3 <- function(obj, mrks, theta=NULL){
  # Returns a data frame giving the probability of observing each
  # three point SNP phenotype at a triple of markers
  # Input object can be a mpcross object an Rqtl ri4self object or a matrix of founder SDPS
  if (class(obj)[1]=="mpcross"){
    obj <- subset(obj, markers=mrks)
    map <- unlist(obj$map)
    founders <- obj$founders
  } else if(class(obj)[1]=="ri4self"){
    map <- unlist(obj$geno[[1]]$map)
    obj <- pull.markers(obj,names(map)[mrks])
    founders <- obj$founderGeno[,]
    rownames(founders) <- LETTERS[1:4]
  } else if (class(obj)[1]=="matrix"){
    founders <- obj
    rownames(founders) <- LETTERS[1:4]
  }
  if (is.null(theta) & class(obj)[1] %in% c("mpcross", "ri4self")){
    D <- dist(map)
    theta <- haldaneX2R(as.matrix(D)/100)
  }
  if (class(theta)!="matrix"){
    theta.mat <- matrix(0, 3,3)
    theta.mat[rbind(c(1,2), c(2,1))] <- theta[1]
    theta.mat[rbind(c(2,3), c(3,2))] <- theta[2]
    theta.mat[rbind(c(1,3), c(3,1))] <- theta[1]+theta[2]-2*theta[1]*theta[2]
    theta <- theta.mat
  }
  perms <- permutations(2, 3, c(0,1), repeats=TRUE)
  perms <- apply(perms, 1,paste, collapse="")
  prob.df <- data.frame(pattern=perms, prob=0)
  geno <- permutations(4, 3, LETTERS[1:4], repeats=T)
  for (i in 1:nrow(geno)){
    g <- geno[i,]
    prob <- pt3(g, theta)
    snp <- paste(sapply(1:3, function(k){founders[g[k], k]}), collapse="")
    addto <- match(snp, prob.df$pattern)
    prob.df[addto, "prob"] <- prob.df[addto, "prob"] + prob
  }
  return(prob.df)
}


pt3 <- function(g, theta){
  # Return the three point probability of a particular genotype
  # eg pt3(g=c("A", "B", "C"), theta=c(0.1,0.2)) returns the probability
  # of observing A-B-C given theta_12 = 0.1 and theta_23 = 0.2
  x <- as.numeric(mapvalues(g, c("A", "B", "C", "D"), c(0,0,1,1), warn_missing=FALSE))
  # whether or not "recombination" occured in the selfing process
  # Ie treating the data as a 2-way RIL, grouping {A and B} and {C and D}
  ri <- abs(diff(x))
  x2 <- as.numeric(mapvalues(g, c("A", "B", "C", "D"), 1:4, warn_missing=FALSE))
  # whether or not recombination occured in the initial cross
  rb <- as.numeric(diff(x2)!=0)
  R <- 2*theta/(1+2*theta)
  if (all(ri == c(0,0))){
    p <- 1/4*(2-R[1,2]-R[2,3]-R[1,3])*(1-theta[1,2])^{1-rb[1]}*
      (theta[1,2]^{rb[1]})*(theta[2,3]^{rb[2]})*(1-theta[2,3])^{1-rb[2]}*1/2
  } else if (all(ri==c(0,1))){
    p <- 1/4*(-R[1,2]+R[2,3]+R[1,3])*(1-theta[1,2])^{1-rb[1]}*theta[1,2]^{rb[1]}*1/4
  } else if (all(ri==c(1,0))){
    p <- 1/4*(R[1,2]-R[2,3]+R[1,3])*(1-theta[2,3])^{1-rb[2]}*theta[2,3]^{rb[2]}*1/4
  } else if (all(ri==c(1,1))){
    d <- as.numeric(g[1]!=g[3])
    p <- 1/4*(R[1,2]+R[2,3]-R[1,3])*(1-theta[1,3])^{1-d}*(theta[1,3])^{d}*1/4
  } else{
    stop('Genotype Not Recognised')
  }
  return(p)
} 

## Calculation of covariance for recombination fractions within a triplet
gen_cov_v2 <- function(obj, markers, theta.mat){
  if (class(obj)[1]=="mpcross"){
    obj <- subset(obj, markers=markers)
    founders <- obj$founders[, 1:3]
    map <- obj$map
  } else if(class(obj)[1]=="ri4self"){
    map <- pull.map(obj)
    obj <- pull.markers(obj, names(map[[1]])[markers])
    founders <- obj$founderGeno[,1:3]
  } else if (class(obj)[1]=="matrix"){
    founders <- obj
  }
  n.mar <- 3
  
  coef.12 <- gen_coef(founders[,1], founders[,2])
  A_12 <- coef.12[,1]
  B_12 <- coef.12[,2]
  
  coef.23 <- gen_coef(founders[,2], founders[,3])
  A_23 <- coef.23[,1]
  B_23 <- coef.23[,2]
  
  
  coef.13 <- gen_coef(founders[,1], founders[,3])
  A_13 <- coef.13[,1]
  B_13 <- coef.13[,2]
  
  theta <- theta.mat[rbind(1:2,2:3, c(1,3))]
  t_12 <- theta[1]
  t_23 <- theta[2]
  t_13 <- theta[3]
  
  K_12_11 <- (3*A_12[4]-B_12[4])/((1+2*t_12)*((-1+t_12)*A_12[4]-t_12*B_12[4]))
  K_12_10 <- (3*A_12[3]-B_12[3])/((1+2*t_12)*((-1+t_12)*A_12[3]-t_12*B_12[3]))
  K_12_01 <- (3*A_12[2]-B_12[2])/((1+2*t_12)*((-1+t_12)*A_12[2]-t_12*B_12[2])) 
  K_12_00 <- (3*A_12[1]-B_12[1])/((1+2*t_12)*((-1+t_12)*A_12[1]-t_12*B_12[1])) 
  K_23_11 <- (3*A_23[4]-B_23[4])/((1+2*t_23)*((-1+t_23)*A_23[4]-t_23*B_23[4]))  
  K_23_10 <- (3*A_23[3]-B_23[3])/((1+2*t_23)*((-1+t_23)*A_23[3]-t_23*B_23[3]))  
  K_23_01 <- (3*A_23[2]-B_23[2])/((1+2*t_23)*((-1+t_23)*A_23[2]-t_23*B_23[2]))  
  K_23_00 <- (3*A_23[1]-B_23[1])/((1+2*t_23)*((-1+t_23)*A_23[1]-t_23*B_23[1])) 
  K_13_11 <- (3*A_13[4]-B_13[4])/((1+2*t_13)*((-1+t_13)*A_13[4]-t_13*B_13[4])) 
  K_13_10 <- (3*A_13[3]-B_13[3])/((1+2*t_13)*((-1+t_13)*A_13[3]-t_13*B_13[3])) 
  K_13_01 <- (3*A_13[2]-B_13[2])/((1+2*t_13)*((-1+t_13)*A_13[2]-t_13*B_13[2])) 
  K_13_00 <- (3*A_13[1]-B_13[1])/((1+2*t_13)*((-1+t_13)*A_13[1]-t_13*B_13[1])) 
  
  
  H_12_11 <- -(3*A_12[4]-B_12[4])*((-1+4*t_12)*A_12[4]-(1+4*t_12)*B_12[4])/((1+2*t_12)^2*((-1+t_12)*A_12[4]-t_12*B_12[4])^2)
  H_12_10 <- -(3*A_12[3]-B_12[3])*((-1+4*t_12)*A_12[3]-(1+4*t_12)*B_12[3])/((1+2*t_12)^2*((-1+t_12)*A_12[3]-t_12*B_12[3])^2)
  H_12_01 <- -(3*A_12[2]-B_12[2])*((-1+4*t_12)*A_12[2]-(1+4*t_12)*B_12[2])/((1+2*t_12)^2*((-1+t_12)*A_12[2]-t_12*B_12[2])^2)
  H_12_00 <- -(3*A_12[1]-B_12[1])*((-1+4*t_12)*A_12[1]-(1+4*t_12)*B_12[1])/((1+2*t_12)^2*((-1+t_12)*A_12[1]-t_12*B_12[1])^2)
  H_23_11 <- -(3*A_23[4]-B_23[4])*((-1+4*t_23)*A_23[4]-(1+4*t_23)*B_23[4])/((1+2*t_23)^2*((-1+t_23)*A_23[4]-t_23*B_23[4])^2)
  H_23_10 <- -(3*A_23[3]-B_23[3])*((-1+4*t_23)*A_23[3]-(1+4*t_23)*B_23[3])/((1+2*t_23)^2*((-1+t_23)*A_23[3]-t_23*B_23[3])^2)
  H_23_01 <- -(3*A_23[2]-B_23[2])*((-1+4*t_23)*A_23[2]-(1+4*t_23)*B_23[2])/((1+2*t_23)^2*((-1+t_23)*A_23[2]-t_23*B_23[2])^2)
  H_23_00 <- -(3*A_23[1]-B_23[1])*((-1+4*t_23)*A_23[1]-(1+4*t_23)*B_23[1])/((1+2*t_23)^2*((-1+t_23)*A_23[1]-t_23*B_23[1])^2)
  H_13_11 <- -(3*A_13[4]-B_13[4])*((-1+4*t_13)*A_13[4]-(1+4*t_13)*B_13[4])/((1+2*t_13)^2*((-1+t_13)*A_13[4]-t_13*B_13[4])^2)
  H_13_10 <- -(3*A_13[3]-B_13[3])*((-1+4*t_13)*A_13[3]-(1+4*t_13)*B_13[3])/((1+2*t_13)^2*((-1+t_13)*A_13[3]-t_13*B_13[3])^2)
  H_13_01 <- -(3*A_13[2]-B_13[2])*((-1+4*t_13)*A_13[2]-(1+4*t_13)*B_13[2])/((1+2*t_13)^2*((-1+t_13)*A_13[2]-t_13*B_13[2])^2)
  H_13_00 <- -(3*A_13[1]-B_13[1])*((-1+4*t_13)*A_13[1]-(1+4*t_13)*B_13[1])/((1+2*t_13)^2*((-1+t_13)*A_13[1]-t_13*B_13[1])^2)
  
  r_12 <- t_12/(4*(1+2*t_12))
  r_23 <- t_23/(4*(1+2*t_23))
  r_13 <- t_13/(4*(1+2*t_13))
  
  nr_12 <- (1-t_12)/(4*(1+2*t_12))
  nr_23 <- (1-t_23)/(4*(1+2*t_23))
  nr_13 <- (1-t_13)/(4*(1+2*t_13))
  
  P_12_11 <- A_12[4]*nr_12+B_12[4]*r_12
  P_12_10 <- A_12[3]*nr_12+B_12[3]*r_12
  P_12_01 <- A_12[2]*nr_12+B_12[2]*r_12
  P_12_00 <- A_12[1]*nr_12+B_12[1]*r_12
  P_23_11 <- A_23[4]*nr_23+B_23[4]*r_23
  P_23_10 <- A_23[3]*nr_23+B_23[3]*r_23
  P_23_01 <- A_23[2]*nr_23+B_23[2]*r_23
  P_23_00 <- A_23[1]*nr_23+B_23[1]*r_23
  P_13_11 <- A_13[4]*nr_13+B_13[4]*r_13
  P_13_10 <- A_13[3]*nr_13+B_13[3]*r_13
  P_13_01 <- A_13[2]*nr_13+B_13[2]*r_13
  P_13_00 <- A_13[1]*nr_13+B_13[1]*r_13
  
  
  R_12 <- 8*r_12
  R_13 <- 8*r_13
  R_23 <- 8*r_23
  
  prob.df <- prob3(obj, 1:3, theta.mat)
  
  P_000 <- subset(prob.df, pattern=="000", select=prob, drop=TRUE)
  P_001 <- subset(prob.df, pattern=="001", select=prob, drop=TRUE)
  P_010 <- subset(prob.df, pattern=="010", select=prob, drop=TRUE)
  P_011 <- subset(prob.df, pattern=="011", select=prob, drop=TRUE)
  P_100 <- subset(prob.df, pattern=="100", select=prob, drop=TRUE)
  P_101 <- subset(prob.df, pattern=="101", select=prob, drop=TRUE)
  P_110 <- subset(prob.df, pattern=="110", select=prob, drop=TRUE)
  P_111 <- subset(prob.df, pattern=="111", select=prob, drop=TRUE)
  
  c_12_23 <- K_12_11*K_23_11*(P_111-P_12_11*P_23_11) + K_12_11*K_23_10*(P_110-P_12_11*P_23_10) +
    K_12_11*K_23_01*(-P_12_11*P_23_01) + K_12_11*K_23_00*(-P_12_11*P_23_00) +
    K_12_10*K_23_11*(-P_12_10*P_23_11)+ K_12_10*K_23_10*(-P_12_10*P_23_10) +
    K_12_10*K_23_01*(P_101-P_12_10*P_23_01) + K_12_10*K_23_00*(P_100-P_12_10*P_23_00) +  
    K_12_01*K_23_11*(P_011-P_12_01*P_23_11) + K_12_01*K_23_10*(P_010-P_12_01*P_23_10) +
    K_12_01*K_23_01*(-P_12_01*P_23_01) + K_12_01*K_23_00*(-P_12_01*P_23_00) +
    K_12_00*K_23_11*(-P_12_00*P_23_11) + K_12_00*K_23_10*(-P_12_00*P_23_10) +
    K_12_00*K_23_01*(P_001-P_12_00*P_23_01) + K_12_00*K_23_00*(P_000-P_12_00*P_23_00)
  
  c_12_13 <- K_12_11*K_13_11*(P_111-P_12_11*P_13_11)+K_12_11*K_13_10*(P_110-P_12_11*P_13_10) +
    K_12_11*K_13_01*(-P_12_11*P_13_01)+ K_12_11*K_13_00*(-P_12_11*P_13_00)+
    K_12_10*K_13_11*(P_101-P_12_10*P_13_11) + K_12_10*K_13_10*(P_100-P_12_10*P_13_10)+
    K_12_10*K_13_01*(-P_12_10*P_13_01)+K_12_10*K_13_00*(-P_12_10*P_13_00) +
    K_12_01*K_13_11*(-P_12_01*P_13_11) + K_12_01*K_13_10*(-P_12_01*P_13_10) +
    K_12_01*K_13_01*(P_011-P_12_01*P_13_01) + K_12_01*K_13_00*(P_010-P_12_01*P_13_00) +
    K_12_00*K_13_11*(-P_12_00*P_13_11) +K_12_00*K_13_10*(-P_12_00*P_13_10) +
    K_12_00*K_13_01*(P_001-P_12_00*P_13_01) + K_12_00*K_13_00*(P_000-P_12_00*P_13_00)
  
  c_13_23 <- K_13_11*K_23_11*(P_111-P_13_11*P_23_11)+K_13_11*K_23_10*(-P_13_11*P_23_10) +
    K_13_11*K_23_01*(P_101 - P_13_11*P_23_01) + K_13_11*K_23_00*(-P_13_11*P_23_00) +
    K_13_10*K_23_11*(-P_13_10*P_23_11) + K_13_10*K_23_10*(P_110-P_13_10*P_23_10) +
    K_13_10*K_23_01*(-P_13_10*P_23_01) + K_13_10*K_23_00*(P_100 - P_13_10*P_23_00) +
    K_13_01*K_23_11*(P_011-P_13_01*P_23_11) + K_13_01*K_23_10*(-P_13_01*P_23_10) +
    K_13_01*K_23_01*(P_001-P_13_01*P_23_01) + K_13_01*K_23_00*(-P_13_01*P_23_00) +
    K_13_00*K_23_11*(-P_13_00*P_23_11) + K_13_00*K_23_10*(P_010-P_13_00*P_23_10) +
    K_13_00*K_23_01*(-P_13_00*P_23_01) + K_13_00*K_23_00*(P_000 - P_13_00*P_23_00)
  
  
  V_12 <- K_12_11^2*(P_12_11-P_12_11^2) + K_12_10^2*(P_12_10-P_12_10^2) +
    K_12_01^2*(P_12_01-P_12_01^2) + K_12_00^2*(P_12_00-P_12_00^2) +
    2*(K_12_11*K_12_10*(-P_12_11*P_12_10)+K_12_11*K_12_01*(-P_12_11*P_12_01)+
         K_12_11*K_12_00*(-P_12_11*P_12_00)+K_12_10*K_12_01*(-P_12_10*P_12_01)+
         K_12_10*K_12_00*(-P_12_10*P_12_00)+K_12_01*K_12_00*(-P_12_01*P_12_00))
  
  V_23 <- K_23_11^2*(P_23_11-P_23_11^2) + K_23_10^2*(P_23_10-P_23_10^2) +
    K_23_01^2*(P_23_01-P_23_01^2) + K_23_00^2*(P_23_00-P_23_00^2) +
    2*(K_23_11*K_23_10*(-P_23_11*P_23_10)+K_23_11*K_23_01*(-P_23_11*P_23_01)+
         K_23_11*K_23_00*(-P_23_11*P_23_00)+K_23_10*K_23_01*(-P_23_10*P_23_01)+
         K_23_10*K_23_00*(-P_23_10*P_23_00)+K_23_01*K_23_00*(-P_23_01*P_23_00))
  
  V_13 <- K_13_11^2*(P_13_11-P_13_11^2) + K_13_10^2*(P_13_10-P_13_10^2) +
    K_13_01^2*(P_13_01-P_13_01^2) + K_13_00^2*(P_13_00-P_13_00^2) +
    2*(K_13_11*K_13_10*(-P_13_11*P_13_10)+K_13_11*K_13_01*(-P_13_11*P_13_01)+
         K_13_11*K_13_00*(-P_13_11*P_13_00)+K_13_10*K_13_01*(-P_13_10*P_13_01)+
         K_13_10*K_13_00*(-P_13_10*P_13_00)+K_13_01*K_13_00*(-P_13_01*P_13_00))
  
  V <- matrix(0, ncol=3, nrow=3)
  V[1,1] <- V_12
  V[2,2] <- V_23
  V[3,3] <- V_13
  
  V[rbind(c(1,2), c(2,1))] <- c_12_23
  V[rbind(c(1,3), c(3,1))] <- c_12_13
  V[rbind(c(2,3), c(3,2))] <- c_13_23
  
  J <- matrix(0, ncol=3, nrow=3)
  
  H_12 <- H_12_11*P_12_11 + H_12_10*P_12_10 + H_12_01*P_12_01 + H_12_00*P_12_00
  H_23 <- H_23_11*P_23_11 + H_23_10*P_23_10 + H_23_01*P_23_01 + H_23_00*P_23_00
  H_13 <- H_13_11*P_13_11 + H_13_10*P_13_10 + H_13_01*P_13_01 + H_13_00*P_13_00	
  
  J[1,1] <- H_12
  J[2,2] <- H_23
  J[3,3] <- H_13 			   
  
  J <- (-1)*J
  Sigma <- (solve(J) %*% V %*% solve(J))
  return(Sigma)
}

VuongTest <- function(object, triple, chr=1){
  # Perform Vuong's Closeness Test at a triple of markers
  # Function returns a vector with OSS, test statistic, p-value and second best ordering
  # p-value is for a two-sided test, ie models are different
  # It saves time if the input object has a rf$theta component with estimated   recombination fractions
  # Argument triple is taken to be estimated order
  # Best model (from 3pt likelihood) from alternate orderings is chosen as the  comparison model
  # In the output, if alternate==1 it means that the best alternate order was   triple[c(1,3,2)]
  # In the output, if alternate==2 it means that the best alternate order was   triple[c(3,1,2)]
  object <- subset(object, chr=chr)
  if (class(object)[1]=="mpcross"){
     founders <- object$founders
   } else if(class(object)[1]=="ri4self"){
     founders <- object$founderGeno
     rownames(founders) <- LETTERS[1:4]
   }
   if (is.null(object$rf$theta))
    rf.hat <- estim.rf(object)  else rf.hat <- object$rf$theta
   # Calculating OSS
   L.alt <- numeric(2)
   mod1 <- like3(object, triple[1:3], rf.hat)
   mod2 <- like3(object, triple[c(1,3,2)], rf.hat)
   mod3 <- like3(object, triple[c(3,1,2)], rf.hat)
   L.est <- mod1[3]
   L.alt <- c(mod2[3], mod3[3])
   OSS <- (L.est - max(L.alt)) * log(exp(1))/log(10)
   theta1 <- mod1[1:2]
   # Which is the second best model
   modelb <- which.max(L.alt)
   if (modelb==1){
     theta2 <- mod2[1:2]
     altord <-c(1,3,2)
   } else {
     theta2 <- mod3[1:2]
     altord <- c(3,1,2)
   }
  # Likelihood Ratio for numerator of test statistic
  LR <- (L.est - max(L.alt))
  # Calculating denominator of test statistic
  f1 <- prob3(founders[, triple], mrks=triple, theta=theta1)
  f2 <- prob3(founders[, triple[altord]], mrks=triple[altord], theta=theta2)
  perms <- permutations(2, 3, c(0,1), repeats=TRUE)
  geno <- get_geno(object)
  geno <- geno[, triple]
  geno <- as.data.frame(geno)
  geno <- na.omit(geno)
  N <- nrow(geno)
  prob1 <- numeric(N)
  prob2 <- numeric(N)
  for (i in 1:N){
    yi1 <- paste0(geno[i,], collapse="")
    yi2 <- paste0(geno[i,altord], collapse="")
    ycat1 <- match(yi1, f1$pattern)
    ycat2 <- match(yi2, f2$pattern)
    prob1[i] <- f1[ycat1, "prob"]
    prob2[i] <- f2[ycat2, "prob"]
  }
  # Estimated standard deviation
  omega <- sd(log(prob1/prob2))
  test.stat <- LR/(sqrt(N)*omega)
  p.value <- 2*pnorm(abs(test.stat), lower.tail=FALSE)
  output <- c(OSS, LR, test.stat, p.value, modelb)
  names(output) <- c("OSS", "LR", "test.stat", "p.value", "alternate")
  return(output)
}


## Read in genotype and map data
dat4 <- read.csv("FileS2.csv", skip=1, h=T, row.names=1)

## Create object for analysis
ped = sim.mpped(4, 1, nrow(dat4)-5)
finals=dat4[6:nrow(dat4),]
founders=dat4[2:5,]
mp = mpcross(finals=finals, founders=founders, pedigree=ped, id=which(ped[,4]==1), fid=1:4)
mp$map=list()
mp$map[["3B"]]=as.numeric(dat4[1,])
names(mp$map[["3B"]])=colnames(dat4)

  ## Now run the analysis for all triples on the chromosome.
  subobj <- mp
  # Taking one marker from each bin
  subobj <- subset(subobj, markers=which(!duplicated(subobj$map[[1]])))
  subobj <- mpestrf(subobj, r=c(seq(0, 0.1, by=0.0001), seq(0.1,0.5, by=0.01)))
  subobj <- mpimputerf(subobj)
  
  rf.hat <- subobj$rf$theta
  rf.hat[rf.hat==0] <- .0001  

  n.mar <- ncol(subobj$finals)
  first.mar <- 1:(n.mar-2)
  triplets <- cbind(first.mar, first.mar+1, first.mar+2)
  
  supscore <- rep(NA, nrow(triplets))
  post.probs <- array(0, c(nrow(triplets), 3))
  p.values <- rep(NA, nrow(triplets))
  L.alt <- numeric(2)
  n.trip <- nrow(triplets)

  vuong <- list()
  for (j in 1:3){
    print(j)
    triple <- triplets[j,]
    # Calculating order support score
    # like3 function returns rf estimates as well as the likelihood
    L.cor <- like3(subobj, triple[1:3], rf.hat)[3]
    L.alt[1]<- like3(subobj, triple[c(1,3,2)], rf.hat)[3]
    L.alt[2] <- like3(subobj, triple[c(3,1,2)], rf.hat)[3]
    supscore[j] <- (L.cor - max(L.alt)) * log(exp(1))/log(10)
    # Calculating p-value
    # This might require estimating the recombination fractions on a very fine grid to work properly
    theta.hat <- c(rf.hat[triple[1],triple[2]], rf.hat[triple[2],triple[3]], rf.hat[triple[1],triple[3]])
    G <- cbind(c(-1+2*theta.hat[2], -1+2*theta.hat[1], 1))
    theta.rest <-  rf.hat[triple, triple]
    theta.rest[rbind(c(1,3), c(3,1))] <- rf.hat[triple[1],triple[2]]+ rf.hat[triple[2],triple[3]]-
      2*rf.hat[triple[1],triple[2]]*rf.hat[triple[2],triple[3]]
    n.ind <- sum(gen_N(subobj, triple))
    V <- try(gen_cov_v2(subobj, triple,theta.rest)/n.ind, silent=TRUE)
    # If triplet of founders includes a combination where the rf is inestimable
    if (class(V)=="try-error") next
    g <- rf.hat[triple[1],triple[3]]-rf.hat[triple[1],triple[2]]-rf.hat[triple[2],triple[3]]+2*rf.hat[triple[1],triple[2]]*rf.hat[triple[2],triple[3]]
    test.stat <- g * (t(G) %*% V %*% G)^{-1} * g
    p.value <- pchisq(test.stat, 1, lower.tail=FALSE)
    p.values[j] <- p.value
    vuong[[j]] <- VuongTest(subobj, triple)
  }
  
  central.marker <- unlist(subobj$map)[triplets[,2]]
  mat <- data.frame(MarkerPos=central.marker, OSS=supscore, vuongp=unlist(lapply(vuong, function(x) x[4])), pval=p.values)


############ Generate plots from results
library(ggplot2)

## Figure 3A
dat <- data.frame(Position=central.marker, MMU=-log10(p.values))
col <- rep("Certain", nrow(dat))
col[p.values < .05/111] <- "Uncertain"
col[p.values < 1e-20] <- "Very Uncertain"
dat$cat <- factor(col, levels=c("Certain", "Uncertain", "Very Uncertain"))

dat2 <- data.frame(Position=central.marker, MMU=loess(-log10(p.values)~central.marker, span=.15)$fitted, cat=factor(rep("Very Uncertain", 111)))
jpeg("Fig3A.jpg", height=5, width=5, units="in", res=600)
p <- ggplot(dat, aes(x=Position, y=MMU, col=cat))+geom_point(size=3)+theme_bw()+geom_line(data=dat2)
p <- p + labs(colour="Uncertainty")+xlab("Genetic Map (cM)")+theme(legend.position=c(.8, .8))
p <- p+scale_colour_manual(values=c("#a6d96a", "#fdae61", "#d7191c"))
p
dev.off()

## Also want to plot the p-value against the distance to closest marker or average distance
mindist <- vector(length=length(poss))
for (i in 1:n.trip) mindist[i] <- min(diff(obj$map[[1]][i+0:2]))
avgdist <- vector(length=length(poss))
for (i in 1:n.trip) avgdist[i] <- mean(diff(obj$map[[1]][i+0:2]))

dat <- data.frame(MinDist=avgdist, MMU=-log10(p.values))
dat2 <- data.frame(MinDist=avgdist, MMU=loess(-log10(p.values)~avgdist, span=.55)$fitted, cat=factor(rep("Very Uncertain", 111)))
col <- rep("Certain", nrow(dat))
col[poss<.05/111 & p.values< .05/111] <- "Uncertain MMU & OSS"
col[poss>.05/111 & p.values < .05/111] <- "Uncertain MMU"
col[poss<.05/111 & p.values > .05/111] <- "Uncertain OSS"
dat$cat <- factor(col, levels=c("Certain", "Uncertain MMU", "Uncertain OSS", "Uncertain MMU & OSS"))
dat$avgdist <- avgdist
jpeg("Fig3B.jpg", height=5, width=5, units="in", res=600)
p <- ggplot(dat, aes(x=MinDist, y=MMU, col=cat))+geom_point(size=3)+theme_bw()+scale_colour_manual(values=c("#a6d96a", "#fdae61", "#6baed6", "#d7191c"))
p <- p + labs(colour="Uncertainty")+xlab("Average distance between markers (cM)")+theme(legend.position=c(.7, .8))
p+scale_x_log10()
dev.off()

### Read in data from TableS2.xls
seq3b <- list()
for (i in 1:4) seq3b[[i]] <- read.xls("TableS2.xls", sheet=i)

pos <- do.call("rbind", lapply(seq3b, function(x) x[,c(1,3)]))
pos <- pos[!duplicated(pos[,1]),]
mrk <- intersect(pos[,1], colnames(mp$finals))
index <- sapply(mp$map[[1]][match(mrk, names(mp$map[[1]]))], function(x) which.min(abs(central.marker-x)))

dat <- data.frame(PhysMap=pos[match(mrk, pos[,1]),2], GenMap=mp$map[[1]][match(mrk, names(mp$map[[1]]))], Signif=-log10(p.values[index]))
SigCat <- vector(length=nrow(dat))
SigCat[dat$Signif < -log10(.05/111)] <- 1
SigCat[dat$Signif > -log10(.05/111) & dat$Signif < 20] <- 2
SigCat[dat$Signif > 20] <- 3
## Need to mark markers where genetic and physical positions do not match up. 
index <- sapply(c(64656, 19180, 40595, 497469, 19982, 17082, 491667, 108114, 16378, 19778, 1379957, 444579), function(x) grep(x, rownames(dat)))
incon <- rep(0, nrow(dat))
incon[index] <- 1
dat$Inconsistency <- factor(incon)
dat$Uncertainty <- factor(SigCat)
dat2 <- dat[!is.na(dat[,3]),]
jpeg("Fig4.jpg", height=5, width=5, units="in", res=600)
p <- ggplot(dat2, aes(y=PhysMap, x=GenMap, col=Uncertainty, shape=Inconsistency))+theme_bw()+geom_point(size=3)+scale_colour_manual(values=c("#a6d96a", "#fdae61", "#d7191c"), labels=c("MMU<5", "5<MMU<20", "MMU>20"))+scale_shape_manual(values=c(19, 8))
p <- p+theme(legend.position=c(.8, .6))+scale_x_continuous("Genetic Map (cM)") + scale_y_continuous("Physical Map (bp)")
p <- p+guides(shape=F)
p
dev.off()