# Calculate test statistics and p-value using two-part statistical test # Used for comparing two zero-inflated samples, each of which can have non-zero mean # Composite null hypothesis: equal proportion of non-zero values, and equal mean of non-zero values # Not applicable for paired samples (yet) # Citation: https://doi.org/10.1371/journal.pone.0020296 and https://doi.org/10.2202/1544-6115.1425 # Input: 2 vectors of the 2 samples # Output: test statistics X and corresponding p-value # output$statistic and output$p.value # output$statistic1 : statistics of the proportion test # output$statistic2 : statistics of the mean test twopart_statistics <- function(x,y) { df <- 0 # degrees of freedom for chi-square distribution of the two-part statistics #----- Calculate the test statistics for equal (non-zero) proportion (same as chi-squared test with continuity correction) # Total number of values in each sample nx <- length(x) ny <- length(y) # Number of non-zero values in each sample mx <- sum(x>0) my <- sum(y>0) # Non-zero proportion of each sample px <- mx/nx py <- my/ny p <- (mx + my)/(nx + ny) # First test statistics # If at least one group has no zero values or no non-zero values, set the first statistics to 0 if (p*(1-p) == 0) { Z <- 0 } else { df <- df + 1 Z <- (abs(px-py) - (1/nx+1/ny)/2) / sqrt(p * (1-p) * (1/nx+1/ny)) } #----- Calculate the test statistics for the meana of non-zero values (same as Mann-Whitney U test) # Non-zero components x_non0 <- x[x > 0] y_non0 <- y[y > 0] xy_non0 <- c(x_non0, y_non0) # Sum of ranks of sample 1 (ties are averaged) R <- rank(xy_non0, ties.method='average') R1 <- sum(R[1:length(x_non0)]) # Wilcoxon test statistics U <- mx*my + mx*(mx+1)/2 - R1 # Tie correction (more info: http://s3-euw1-ap-pe-ws4-cws-documents.ri-prod.s3.amazonaws.com/9780415819947/explanations_wilcoxon_rank.pdf) xy_unique <- unique(xy_non0) # unique non-zero values tie_correct <- 0 for (xy in xy_unique) { ti <- sum(xy_non0 == xy) tie_correct <- tie_correct + ti*( ti^2 - 1 ) } # Normalization and continuity correction mu <- mx * my/2 sig <- sqrt( mx*my/12 * (mx+my+1 - tie_correct/(mx+my)/(mx+my-1)) ) # Second test statistics # If at least one group has no non-zero values, set the second statistics to 0 if (mx*my == 0){ W <- 0 } else { df <- df + 1 W <- ( abs(U-mu) - 0.5 ) / sig } #----- Two-part statistics X2 <- Z^2 + W^2 return(list('statistic' = X2, 'p.value' = pchisq(X2, df=df, lower.tail=FALSE), 'statistic1' = Z, 'statistic2' = W)) }