#File S2 ############## Wild-ID matching- this is for the confirmed-matches output ########################### ######################## Nathan F. Bendik, City of Austin, 10-8-2012 ################################## ############################### Special thanks to Brian Diggs ######################################### # NOTES: # The purpose of this script is to allow the user to convert the 'confirmed_matches.txt' file output by # Wild-ID into a capture-history file suitable for input into program MARK. It requires photographs named # by date or order according to how the user wants to group captures (the below example is with an 8-digit # date stamp prefix). All photographs for a given date or order # must be of unique individuals, as this # code will not work appropriately with duplicate photos. This code relies on the transitive property, i.e. # if photo A=B and B=C, then C=A and the fact that the photos are scored in chronoloigcal order by Wild-ID. #Step 1: Rename all (processed) photos so that first 8 characters of photo name is the date in YYYYMMDD # format- This is so Wild-ID scores them chronologically #Step 2: Run WildID to generate cofirmed-matches.txt- make sure no duplicate photos exist (i.e. same capture # occasion, same individual) #Step 3: Delete the '#' from the header in the 'confirmed_matches.txt' file, and save, before running #Step 4: Using 'confirmed_matches.txt', this R script will generate an .inp file with a capture history for # each individual appropriate for input into program MARK rm(list=ls(all=TRUE)) library("reshape2") # must install package reshape2 setwd('C:/whatevers') #your working directory here matches=read.table("confirmed-matches.txt", blank.lines.skip = TRUE, fill= TRUE, header= TRUE) matches$Choice.rank=NULL #now clean up data file matches$Score=NULL attach(matches) DF <- data.frame(Serial,Match.serial) detach(matches) # substring first 8 digits of filename for date (this depends on original photo name- in this case, # an 8 digit date code was used as a prefix for all file names) dates <- substr(matches$Relpath,0,8) uniquedates <- unique(dates) #unique dates across all samples day <- seq(1:length(uniquedates)) #sequence of unique dates (this won't work if out of order - WildID scores in alphabetical order) temp <-data.frame(uniquedates,day) #temporary dataframe to hold the unique dates and the associated "days" DF$dates <- dates DF <- merge(DF,temp,by.x="dates",by.y="uniquedates",all=TRUE) #marry the days to DF DF$dates<-NULL #drop the date variableDF bottom.df <- DF[DF$Match.serial==-1,] #new captures mapdown.df <- DF[DF$Match.serial!=-1,] #recaptures mapdown <- c(mapdown.df$Match.serial, bottom.df$Serial) names(mapdown) <- c(mapdown.df$Serial, bottom.df$Serial) oldSerial <- DF$Serial newSerial <- mapdown[as.character(oldSerial)] while(any(oldSerial != newSerial)) { oldSerial = newSerial newSerial = mapdown[as.character(oldSerial)] } DF$id <- unname(newSerial) # Using reshape2 to convert this into wide format (one column per group(day)) where the column is "1" if # there was something in that one and "0" if not. wide <- dcast(DF, id~day, value.var="id", fun.aggregate=function(Serial){if(length(Serial)>0){"1"}else{"0"}}) # Paste these "0"/"1" memberships together to get the recaphistory variable wide$recaphistory = apply(wide[,-1], 1, paste, collapse="") # Replace the oldx/newx part with: iterx <- matrix(DF$Serial, ncol=1) iterx <- cbind(iterx, mapdown[as.character(iterx[,1])]) while(any(iterx[,ncol(iterx)]!=iterx[,ncol(iterx)-1])) { iterx <- cbind(iterx, mapdown[as.character(iterx[,ncol(iterx)])]) } DF$id <- iterx[,ncol(iterx)] # To generate the records data records <- tapply(iterx[,1], iterx[,ncol(iterx)], paste, collapse=",") wide$records <- records[as.character(wide$id)] final=wide table(final["recaphistory"]) # prints a summary of capture histories ########## GENERATES MARK .inp FILE ############ final$semi=as.character(";") #add a semicolon (MARK requires this) final$count=(1) #add the number one (MARK requires this) representing one history record per individual attach(final) output <- paste(recaphistory,count,semi) detach(final) write(output,file="confirmed_matches_capture_history.inp") #################################################