##### File S1. Script to generate example simulated datasets and impute missing data. ##### require("devtools") library(devtools) install_github(user="behuang", repo="mpMap") library(mpMap) NumberMarkers <- 201 NumberProgeny <- 200 MissingProgenyProb <- .6 MissingParentProb <- .36 ChromosomeLength <- 100 seed <- 1 set.seed(seed) map <- sim.map(len=ChromosomeLength, n.mar=NumberMarkers, eq.spacing=T, include.x=F) ## Generate 8-way pedigree ped <- sim.mpped(nfounders=8, nfunnels=1, nperfam=NumberProgeny, nssdgen=6, nseeds=1) ## Simulate full dataset mpobj <- sim.mpcross(map=map, pedigree=ped, seed=seed) FullSimData <- rbind(mpobj$founders, mpobj$finals) fullmp <- mpobj ## Simulate which progeny genotypes will be missing missmat <- matrix(rbinom(prod(dim(mpobj$finals)),1, p=MissingProgenyProb), nrow=nrow(mpobj$finals), ncol=ncol(mpobj$finals)) mpobj$finals[missmat==1] <- NA ## Simulate which parent genotypes will be missing missfou <- matrix(rbinom(prod(dim(mpobj$founders)), 1, p=MissingParentProb), nrow=nrow(mpobj$founders), ncol=ncol(mpobj$founders)) mpobj$founders[missfou==1] <- NA MissSimData <- rbind(mpobj$founders, mpobj$finals) imputedmp <- mpimpute(mpobj, what="both") ## How many missing in parents? progeny? ## check this - should have better performance. sum(is.na(imputedmp$founders)) ## How many missing in progeny? sum(is.na(imputedmp$finals)) ## Proportion incorrectly imputed in parents? progeny? sum(imputedmp$founders[which(is.na(mpobj$founders))]!=fullmp$founders[which(is.na(mpobj$founders))])/sum(is.na(mpobj$founders)) sum(imputedmp$finals[which(is.na(mpobj$finals))]!=fullmp$finals[which(is.na(mpobj$finals))], na.rm=T)/sum(is.na(mpobj$finals))