#We suggest removing all objects in your enviroment fist 
rm(list = ls())

#read supplementary data
prov.data <- read.csv("Supplementary materials_Data.csv")


#install metap (for combining probabilities) if you haven't before
if(!"metap" %in% installed.packages()[,"Package"])  install.packages("metap")


#### TEST 1 ###############
##### Probability that only one bird is at the colony at any time by shuffling all 10 min segments #####
#set number of iterations
reps = 10000

#t1 == test1
t1.pair.period <- parallel::mclapply(unique(prov.data$pair.obs.period), mc.cores = 3, function(i)
{
  #subset for each nest.observation (nest+observation period)
  a <- prov.data[prov.data$pair.obs.period == i,]
  
  #put male and female data as independent columns (sex is a single column in p) 
  d <- cbind(as.character(a$trip.type[a$sex == "m"]), as.character(a$trip.type[a$sex == "f"]))
  
  #convert NAs to at the colony (CO)
  # d <- apply(b, 2, function(x) {levels(x)<-c("ST","LT","CO") 
  # x[is.na(x)]<-"CO"
  # return(x)})
  
  #count how many periods have 1 member of the pair at ST and the other at LT
  obs <- length(which(apply(d, 1, function(x) any(all(x == c("ST", "LT")), all(x == c("LT", "ST"))))))
  
  
  #create matrix with labels that will be use to split "ST", "LT" and "CO"
  mat <- matrix(nrow = nrow(d), ncol = 2)
  mat[1, ] <- 1

  for(e in 2:nrow(d))
  {
  #males  
  if(d[e - 1, 1] == d[e, 1])  mat[e, 1] <- mat[e - 1, 1] else  mat[e, 1] <- mat[e - 1, 1] + 1
  
  #females  
  if(d[e - 1, 2] == d[e, 2])  mat[e, 2] <- mat[e - 1, 2] else  mat[e, 2] <- mat[e - 1, 2] + 1
    }
  
  #now split sequences as elements of a list independently for males and females
  seqlmales <- split(d[,1], mat[,1])
  seqlfemales <- split(d[,2], mat[,2])
  
  #name then with the event type
  names(seqlmales) <- sapply(seqlmales, function(x) x[1])
  names(seqlfemales) <- sapply(seqlfemales, function(x) x[1])
  
    #randomization
  exp <- sapply(1:reps, function(j)
  {

    #randomized position at colony sequences for both male and female
    seqlmales[which(names(seqlmales) == "CO")] <- seqlmales[sample(which(names(seqlmales) == "CO"))]
    seqlfemales[which(names(seqlfemales) == "CO")] <- seqlfemales[sample(which(names(seqlfemales) == "CO"))]
    
    #randomized position of male away from colony sequences (LT, ST) for both male and female
    seqlmales[which(names(seqlmales) != "CO")] <- seqlmales[sample(which(names(seqlmales) != "CO"))]
    seqlfemales[which(names(seqlfemales) != "CO")] <- seqlfemales[sample(which(names(seqlfemales) != "CO"))]
    
    #put ramdomized sequences back into a matrix
    f <- cbind(unlist(seqlmales), unlist(seqlfemales)) 
    
    #count how many periods have 1 member of the pair at ST and the other at LT
    exp <- length(which(apply(f, 1, function(x) any(all(x == c("ST", "LT")), all(x == c("LT", "ST"))))))
      })
  
  #calculate p value
  p <- length(exp[exp > obs])/reps
  
  #change 0 and 1 p values so probability combining function do not crash
  #convert zeros to the smallest probability expected according to the number of iterations (reps)
  if(p == 0) p <- 1/reps
  
  #convert ones (1) to the highest probability expected according to the number of iterations (reps)
  if(p == 1) p <- (reps - 1)/reps
  
  #return a data frame including p value nes
  return(data.frame(pair = a$pair[1], pair.obs.period = i, obs, mean.exp = mean(exp), pvalue = p))
  }    
  )  

# put togheter in a single file
t1.pair.period <- do.call(rbind, t1.pair.period)

### combine p value within each pair ###
# any of this methods for combining probabilities can be used, we reported only the sumlog (Fisher method)
# but all were tested. They all provide qualitatively equivalent results

#Fisher's method
# combp <- function(x) metap::sumlog(x)

# logit method
# combp <- function(x) metap::logitp(x)

# sum z method
combp <- function(x) metap::sumz(x)


#running loop on each pair to get a single p value per pair
t1.pair <- lapply(unique(t1.pair.period$pair), function(x)
  {
  #select the rows for that particular pair
  Y <- t1.pair.period[t1.pair.period$pair == x,]
  
  #if more than one row then combine p values the test, else keep the single pvalue
  if(nrow(Y) > 1) p <- combp(Y$pvalue)$p else p <- Y$pvalue
  
  #put results back into a data frame
  return(data.frame(pair = x, pvalue = p))
  })

# put it together in a single data frame
t1.pair <- do.call(rbind, t1.pair)

#calculate a single p-value for the complete data set 
## this is the combined probability that only one bird is at the colony at any time
combp(t1.pair$pvalue)



#### TEST 2 ###############
##### Probability of an even distribution of the time of chick feeding #####
t2.pair.period <- parallel::mclapply(unique(prov.data$pair.obs.period), mc.cores = 3, function(i)
{
  #subset for each nest.observation (nest+observation period)
  a <- prov.data[prov.data$pair.obs.period == i,]
  
  #put male and female data as independent columns (sex is a single column in p) 
  d <- cbind(as.character(a$trip.type[a$sex == "m"]), as.character(a$trip.type[a$sex == "f"]))

  # calculate when male and females came back from foraging trips (e.g. feeding times) 
  feed.t <- unlist(lapply(2:nrow(d), function(x)
  {
    if(d[x, 1] != d[x - 1, 1] & (d[x - 1, 1]) != "CO") a <- x else a <- NA
    if(d[x, 2] != d[x - 1, 2] & (d[x - 1, 2]) != "CO") b <- x else b <- NA
    return(c(a,b))
  }))
  
  #remove NAs
  feed.t <- feed.t[!is.na(feed.t)]
  
  #calculate de time differences between feedings
  feed.t <- feed.t[2:length(feed.t)] - feed.t[1:(length(feed.t)-1)]
  
  #multiple by 10 to getting in minuts
  feed.t <- feed.t * 10
  
  #calculate coefficient of variation
  obs <- sd(feed.t)/(mean(feed.t))
  
  #create matrix with labels that will be use to split "ST", "LT" and "CO"
  mat <- matrix(nrow = nrow(d), ncol = 2)
  mat[1, ] <- 1
  
  for(e in 2:nrow(d))
  {
    #males  
    if(d[e - 1, 1] == d[e, 1])  mat[e, 1] <- mat[e - 1, 1] else  mat[e, 1] <- mat[e - 1, 1] + 1
    
    #females  
    if(d[e - 1, 2] == d[e, 2])  mat[e, 2] <- mat[e - 1, 2] else  mat[e, 2] <- mat[e - 1, 2] + 1
  }
  
  #now split sequences as elements of a list independently for males and females
  seqlmales <- split(d[,1], mat[,1])
  seqlfemales <- split(d[,2], mat[,2])
  
  #name then with the event type
  names(seqlmales) <- sapply(seqlmales, function(x) x[1])
  names(seqlfemales) <- sapply(seqlfemales, function(x) x[1])
  
  #randomization
  exp <- sapply(1:reps, function(j)
  {
    #randomized position at colony sequences for both male and female
    seqlmales[which(names(seqlmales) == "CO")] <- seqlmales[sample(which(names(seqlmales) == "CO"))]
    seqlfemales[which(names(seqlfemales) == "CO")] <- seqlfemales[sample(which(names(seqlfemales) == "CO"))]
    
    #randomized position of male away from colony sequences (LT, ST) for both male and female
    seqlmales[which(names(seqlmales) != "CO")] <- seqlmales[sample(which(names(seqlmales) != "CO"))]
    seqlfemales[which(names(seqlfemales) != "CO")] <- seqlfemales[sample(which(names(seqlfemales) != "CO"))]
    
    #put ramdomized sequences back into a matrix
    f <- cbind(unlist(seqlmales), unlist(seqlfemales)) 
    
    feed.t <- unlist(lapply(2:nrow(d), function(x)
    {
      if(f[x, 1] != f[x - 1, 1] & (f[x - 1, 1]) != "CO") a <- x else a <- NA
      if(f[x, 2] != f[x - 1, 2] & (f[x - 1, 2]) != "CO") b <- x else b <- NA
      return(c(a,b))
    }))
    
    #remove NAs
    feed.t <- feed.t[!is.na(feed.t)]
    
    #calculate de time differences between feedings
    feed.t <- feed.t[2:length(feed.t)] - feed.t[1:(length(feed.t)-1)]
    
    #multiple by 10 to getting in minuts
    feed.t <- feed.t * 10
    
    #calculate coefficient of variation for randomize data
    exp <- sd(feed.t)/(mean(feed.t))
  })
  
  #calculate p value
  p <- length(exp[exp < obs])/reps
  
  #change 0 and 1 p values so probability combining function do not crash
  #convert zeros to the smallest probability expected according to the number of iterations (reps)
  if(p == 0) p <- 1/reps
  
  #convert ones (1) to the highest probability expected according to the number of iterations (reps)
  if(p == 1) p <- (reps - 1)/reps
  
  #return a data frame including p value nes
  return(data.frame(pair = a$pair[1], pair.obs.period = i,obs.cv = obs, mean.exp.cv = mean(exp), pvalue = p))
}    
)  

# put togheter in a single file
t2.pair.period <- do.call(rbind, t2.pair.period)

### combine p value within each pair ###
# again, any of this methods for combining probabilities can be used, we reported only the sumlog (Fisher method)
# but all were tested. They all provide qualitatively equivalent results

#Fisher's method
# combp <- function(x) metap::sumlog(x)

# logit method
# combp <- function(x) metap::logitp(x)

# sum z method
combp <- function(x) metap::sumz(x)


#running loop on each pair to get a single p value per pair
t2.pair <- lapply(unique(t2.pair.period$pair), function(x)
{
  #select the rows for that particular pair
  Y <- t2.pair.period[t2.pair.period$pair == x,]
  
  #if more than one row then combine p values the test, else keep the single pvalue
  if(nrow(Y) > 1) p <- combp(Y$pvalue)$p else p <- Y$pvalue
  
  #put results back into a data frame
  return(data.frame(pair = x, pvalue = p))
})

# put it together in a single data frame
t2.pair <- do.call(rbind, t2.pair)

#calculate a single p-value for the complete data set 
## this is the combined probability that variation in feeding time
combp(t2.pair$pvalue)