library(seqinr) library(ape) #D <- read.dna("../Material/OligoSequences.fasta", format = "fasta", as.character = TRUE) D <- read.table(file = "../Material/BovineSNP50_B.csv", header = T, sep = ",") #stop() cat("Sequence data set read in.\n") P <- read.table(file = "../Material/Mapping-Illumina-Genome-25-June-2009.txt", header = T) cat("Position data read in.\n") D <- subset(D, select = c(Name, AlleleA_ProbeSeq)) n <- nrow(D) cat("Number of lines:\t", n, "\n") MultipleSeqs <- names(which(table(D$AlleleA_ProbeSeq) > 1)) n.ms <- length(MultipleSeqs) cat("Number of oligo seqences occurring more than once:", n.ms, "\n") M <- c() for(i in 1 : n.ms) { D.aux <- subset(D, AlleleA_ProbeSeq == MultipleSeqs[i]) print(D.aux) P.aux <- subset(P, ID %in% D.aux$Name) print(P.aux) if(i == 1) { M <- t(c(as.vector(P.aux$ID), MultipleSeqs[i])) } else { M <- rbind(M, t(c(as.vector(P.aux$ID), MultipleSeqs[i]))) } } M <- as.data.frame(M) names(M) <- c("ID1", "ID2", "SEQUENCE") M <- merge(M, P, by.x = "ID1", by.y = "ID") M <- subset(M, select = c(ID1, ID2, SEQUENCE, CHROM.ILMN, POS.ILMN)) M <- merge(M, P, by.x = "ID2", by.y = "ID") M <- subset(M, select = -c(CHROM.ILMN.y, CHROM.MAP)) names(M) <- c("SNP1", "SNP2", "SEQUENCE", "CHROM", "POS.ILMN1", "POS.ILMN.2", "POS.MAP") write.table(M, file = "DuplicateOligos.txt", sep = "\t", row.names = FALSE, quote = FALSE) #stop() REVCOMP <- vector(length = n, mode = "character") cat("Producing reverse complementary sequences\n") for(i in 1 : n) { REVCOMP[i] <- toupper(c2s(comp(rev(s2c(as.character(D$AlleleA_ProbeSeq[i])))))) if(i %% 1000 == 0) { cat(i, "\n") } } D <- cbind(D, REVCOMP)