D <- read.table(file = "../Material/Mapping-Illumina-Genome-25-June-2009.txt", header = T) D$DELTA <- abs(D$POS.ILMN - D$POS.MAP) D$CHROM.ILMN.BIN <- D$CHROM.ILMN != 0 D$CHROM.MAP.BIN <- !is.na(D$CHROM.MAP) D$POS.ILMN.BIN <- D$POS.ILMN != 0 D$POS.MAP.BIN <- !is.na(D$POS.MAP) cat("Now considering all SNPs on the chip\n") cat("\n") cat("###### CONTINGENCY TABLE MAPPING TO CHROMOSOMES #####\n") cat("\n") print(addmargins(table(subset(D, select = c(CHROM.ILMN.BIN, CHROM.MAP.BIN))))) cat("\n") n1 <- nrow(subset(D, CHROM.ILMN == 0 & !is.na(CHROM.MAP))) cat("SNPs not mapped to chromosome by Illumina, but mapped by us ", n1, "\n") n2 <- nrow(subset(D, CHROM.ILMN != 0 & is.na(CHROM.MAP))) cat("SNPs mapped to chromosome by Illumina, but not mapped by us ", n2, "\n") #Filter entries which have a chromosome assignment both according to Illumina and to the mapping M <- subset(D, CHROM.MAP.BIN & CHROM.ILMN.BIN) cat("Number of SNPs mapped to chromosomes (Illumina + our mapping)", nrow(M), "\n") ContTable <- table(as.data.frame(as.matrix(subset(M, select = c("CHROM.ILMN", "CHROM.MAP"))))) n.different.chromosome <- sum(ContTable) - sum(diag(ContTable)) cat("Number of SNPs mapped to different chromosomes", n.different.chromosome, "\n") Diff <- subset(M, as.character(CHROM.ILMN) != as.character(CHROM.MAP)) print(Diff) M <- subset(M, as.character(CHROM.ILMN) == as.character(CHROM.MAP)) n.same.chromosome <- nrow(M) cat("Number of SNPs mapped to the same chromosome (Illumina + our mapping)", n.same.chromosome, "\n") cat("\nNow considering SNPs mapped to the same chromosome by Illumina and us\n") cat("\n") cat("##### CONTINGENCY TABLE MAPPING TO UNIQUE LOCI ######\n") cat("\n") print(addmargins(table(subset(M, select = c(POS.ILMN.BIN, POS.MAP.BIN))))) cat("\n") n3 <- nrow(subset(M, POS.ILMN.BIN & !POS.MAP.BIN)) cat("SNPs mapped to unique locus by Illumina, but not by us ", n3, "\n") n4 <- nrow(subset(M, !POS.ILMN.BIN & POS.MAP.BIN)) cat("SNPs not mapped to unique locus by Illumina, but by us ", n4, "\n") #Filter entries which are on the same chromosome and which have a position according to Illumina and our mapping Z <- subset(M, POS.ILMN.BIN & POS.MAP.BIN) cat("Number of SNPs with chromosomal position (Illumina + our mapping)", nrow(Z), "\n") n.great.shift <- nrow(subset(Z, DELTA > 2)) cat("Number of cases where deviation is greater than 2:\t", n.great.shift, "\n") #stop() #print(table(Z$DELTA)) #n <- nrow(subset(D, (CHROM.ILMN != 0 & is.na(CHROM.MAP)) | (CHROM.ILMN == 0 & !is.na(CHROM.MAP)) | (as.character(CHROM.ILMN) != as.character(CHROM.MAP)) | DELTA > 2 )) n <- n1 + n2 + n3 + n4 + n.great.shift + n.different.chromosome cat("Total number of SNPs with substantial deviations:\t", n, "\n")