#File S2
##############   Wild-ID matching- this is for the confirmed-matches output ###########################
######################## Nathan F. Bendik, City of Austin, 10-8-2012 ##################################
############################### Special thanks to Brian Diggs #########################################

# NOTES:
# The purpose of this script is to allow the user to convert the 'confirmed_matches.txt' file output by 
# Wild-ID into a capture-history file suitable for input into program MARK. It requires photographs named 
# by date or order according to how the user wants to group captures (the below example is with an 8-digit 
# date stamp prefix). All photographs for a given date or order # must be of unique individuals, as this 
# code will not work appropriately with duplicate photos. This code relies on the transitive property, i.e. 
# if photo A=B and B=C, then C=A and the fact that the photos are scored in chronoloigcal order by Wild-ID. 

#Step 1: Rename all (processed) photos so that first 8 characters of photo name is the date in YYYYMMDD 
# format- This is so Wild-ID scores them chronologically 
#Step 2: Run WildID to generate cofirmed-matches.txt- make sure no duplicate photos exist (i.e. same capture 
# occasion, same individual)
#Step 3: Delete the '#' from the header in the 'confirmed_matches.txt' file, and save, before running
#Step 4: Using 'confirmed_matches.txt', this R script will generate an .inp file with a capture history for 
# each individual appropriate for input into program MARK

rm(list=ls(all=TRUE))
library("reshape2")     # must install package reshape2
setwd('C:/whatevers') #your working directory here
matches=read.table("confirmed-matches.txt", blank.lines.skip = TRUE, fill= TRUE, header= TRUE)
matches$Choice.rank=NULL #now clean up data file
matches$Score=NULL
attach(matches)
DF <- data.frame(Serial,Match.serial)
detach(matches)

# substring first 8 digits of filename for date (this depends on original photo name- in this case, 
# an 8 digit date code was used as a prefix for all file names)
dates <- substr(matches$Relpath,0,8) 
uniquedates <- unique(dates)       #unique dates across all samples
day <- seq(1:length(uniquedates))  #sequence of unique dates (this won't work if out of order - WildID scores in alphabetical order)
temp <-data.frame(uniquedates,day) #temporary dataframe to hold the unique dates and the associated "days" 
DF$dates <- dates
DF <- merge(DF,temp,by.x="dates",by.y="uniquedates",all=TRUE) #marry the days to DF
DF$dates<-NULL  #drop the date variableDF
bottom.df <- DF[DF$Match.serial==-1,]  #new captures	
mapdown.df <- DF[DF$Match.serial!=-1,] #recaptures
mapdown <- c(mapdown.df$Match.serial, bottom.df$Serial)
names(mapdown) <- c(mapdown.df$Serial, bottom.df$Serial)
oldSerial <- DF$Serial
newSerial <- mapdown[as.character(oldSerial)]

while(any(oldSerial != newSerial)) {
    oldSerial = newSerial
    newSerial = mapdown[as.character(oldSerial)]
}

DF$id <- unname(newSerial)

# Using reshape2 to convert this into wide format (one column per group(day)) where the column is "1" if 
# there was something in that one and "0" if not.
wide <- dcast(DF, id~day, value.var="id", fun.aggregate=function(Serial){if(length(Serial)>0){"1"}else{"0"}})

# Paste these "0"/"1" memberships together to get the recaphistory variable
wide$recaphistory = apply(wide[,-1], 1, paste, collapse="")

# Replace the oldx/newx part with:
iterx <- matrix(DF$Serial, ncol=1)
iterx <- cbind(iterx, mapdown[as.character(iterx[,1])])
while(any(iterx[,ncol(iterx)]!=iterx[,ncol(iterx)-1])) {
    iterx <- cbind(iterx, mapdown[as.character(iterx[,ncol(iterx)])])
}
DF$id <- iterx[,ncol(iterx)]

# To generate the records data
records <- tapply(iterx[,1], iterx[,ncol(iterx)], paste, collapse=",")
wide$records <- records[as.character(wide$id)]

final=wide
table(final["recaphistory"]) # prints a summary of capture histories

########## GENERATES MARK .inp FILE  ############
final$semi=as.character(";") #add a semicolon (MARK requires this)
final$count=(1) #add the number one (MARK requires this) representing one history record per individual
attach(final)
output <- paste(recaphistory,count,semi)
detach(final)
write(output,file="confirmed_matches_capture_history.inp") 
#################################################