# Calculate test statistics and p-value using two-part statistical test
# Used for comparing two zero-inflated samples, each of which can have non-zero mean 
# Composite null hypothesis: equal proportion of non-zero values, and equal mean of non-zero values
# Not applicable for paired samples (yet)
# Citation: https://doi.org/10.1371/journal.pone.0020296 and https://doi.org/10.2202/1544-6115.1425

# Input: 2 vectors of the 2 samples
# Output: test statistics X and corresponding p-value
#         output$statistic and output$p.value
#         output$statistic1 : statistics of the proportion test
#         output$statistic2 : statistics of the mean test

twopart_statistics <- function(x,y) {
  df <- 0 # degrees of freedom for chi-square distribution of the two-part statistics
  
  #----- Calculate the test statistics for equal (non-zero) proportion (same as chi-squared test with continuity correction)
  # Total number of values in each sample
  nx <- length(x)
  ny <- length(y)
  # Number of non-zero values in each sample
  mx <- sum(x>0)
  my <- sum(y>0)
  
  # Non-zero proportion of each sample
  px <- mx/nx
  py <- my/ny
  p <- (mx + my)/(nx + ny)
  
  # First test statistics
  # If at least one group has no zero values or no non-zero values, set the first statistics to 0
  if (p*(1-p) == 0) {
    Z <- 0
  } else {
    df <- df + 1
    Z <- (abs(px-py) - (1/nx+1/ny)/2) / sqrt(p * (1-p) * (1/nx+1/ny))
  }
  
  #----- Calculate the test statistics for the meana of non-zero values (same as Mann-Whitney U test)
  # Non-zero components
  x_non0 <- x[x > 0]
  y_non0 <- y[y > 0]
  xy_non0 <- c(x_non0, y_non0)
  
  # Sum of ranks of sample 1 (ties are averaged)
  R <- rank(xy_non0, ties.method='average')
  R1 <- sum(R[1:length(x_non0)])
  
  # Wilcoxon test statistics
  U <- mx*my + mx*(mx+1)/2 - R1
  
  # Tie correction (more info: http://s3-euw1-ap-pe-ws4-cws-documents.ri-prod.s3.amazonaws.com/9780415819947/explanations_wilcoxon_rank.pdf)
  xy_unique <- unique(xy_non0) # unique non-zero values
  tie_correct <- 0
  for (xy in xy_unique) {
    ti <- sum(xy_non0 == xy)
    tie_correct <- tie_correct + ti*( ti^2 - 1 )
  }
  
  # Normalization and continuity correction
  mu <- mx * my/2
  sig <- sqrt( mx*my/12 * (mx+my+1 - tie_correct/(mx+my)/(mx+my-1)) )
  
  # Second test statistics
  # If at least one group has no non-zero values, set the second statistics to 0
  if (mx*my == 0){
    W <- 0
  } else {
    df <- df + 1
    W <- ( abs(U-mu) - 0.5 ) / sig
  }
  
  #----- Two-part statistics
  X2 <- Z^2 + W^2
  
  return(list('statistic' = X2, 'p.value' = pchisq(X2, df=df, lower.tail=FALSE), 'statistic1' = Z, 'statistic2' = W))
}