######################################################################## ###Read fastq file and barcode file all_seqs<-read.fastq('m:/pc/Desktop/Mikkel/pacBio/Mikkel_PCR_pool/Mikkel_PCR_pool/MikkelPool_reads_of_insert_min3passes.fastq') length(all_seqs) hist(unlist(lapply(all_seqs,length)),breaks=100) barcodes<-read.table('m:/pc/Desktop/Mikkel/pacBio/PCR_control_study/BarcodesC2.txt',header=F) #filter on length all_seqs<-all_seqs[-which(unlist(lapply(all_seqs,length))<300)] all_seqs<-all_seqs[-which(unlist(lapply(all_seqs,length))>400)] length(all_seqs) ###demultiplex reads gogo<-demultiplex(barcodes,all_seqs,mm=0) length(gogo) names(gogo) par(las=2) barplot(unlist(lapply(gogo,length))) mean(unlist(lapply(gogo,length))) sd(unlist(lapply(gogo,length))) #nr. unassigned sequences length(all_seqs)-sum(unlist(lapply(gogo,length))) ###Extract microsat sequences #pstI digest control. #x20 tetra pstI_4x20<-extr.mi.sat('tccaagcgagag','tcatgtgataaa',gogo[[1]]) length(pstI_4x20) pstI_4x20_l<-unlist(lapply(pstI_4x20,length)) #x10 tetra pstI_4x10<-extr.mi.sat('aactaaagtaca','gggttgaccttc',gogo[[1]]) length(pstI_4x10) summary(unlist(lapply(pstI_4x10,length))) barplot(table(unlist(lapply(pstI_4x10,length)))) pstI_4x10_l<-unlist(lapply(pstI_4x10,length)) #ecoRI digest control. #x30 dinuc #10cyc241 ecoRI_2x30<-extr.mi.sat('taaagttctatc','acccttaatgtt',gogo[[2]]) length(ecoRI_2x30) summary(unlist(lapply(ecoRI_2x30,length))) barplot(table(unlist(lapply(ecoRI_2x30,length)))) ecoRI_2x30_l<-unlist(lapply(ecoRI_2x30,length)) #x10 tetra ecoRI_4x10<-extr.mi.sat('atggactctaca','aagctataaaga',gogo[[2]]) length(ecoRI_4x10) summary(unlist(lapply(ecoRI_4x10,length))) barplot(table(unlist(lapply(ecoRI_4x10,length)))) ecoRI_4x10_l<-unlist(lapply(ecoRI_4x10,length)) #10cyc131 #x20 tetra c10_131_4x20<-extr.mi.sat('tccaagcgagag','tcatgtgataaa',gogo[[3]]) length(c10_131_4x20) summary(unlist(lapply(c10_131_4x20,length))) barplot(table(unlist(lapply(c10_131_4x20,length)))) c10_131_4x20_l<-unlist(lapply(c10_131_4x20,length)) #x10 tetra c10_131_4x10<-extr.mi.sat('aactaaagtaca','gggttgaccttc',gogo[[3]]) length(c10_131_4x10) summary(unlist(lapply(c10_131_4x10,length))) barplot(table(unlist(lapply(c10_131_4x10,length)))) c10_131_4x10_l<-unlist(lapply(c10_131_4x10,length)) #10cyc132 #x20 tetra c10_132_4x20<-extr.mi.sat('tccaagcgagag','tcatgtgataaa',gogo[[5]]) length(c10_132_4x20) summary(unlist(lapply(c10_132_4x20,length))) barplot(table(unlist(lapply(c10_132_4x20,length)))) c10_132_4x20_l<-unlist(lapply(c10_132_4x20,length)) #x10 tetra c10_132_4x10<-extr.mi.sat('aactaaagtaca','gggttgaccttc',gogo[[5]]) length(c10_132_4x10) summary(unlist(lapply(c10_132_4x10,length))) barplot(table(unlist(lapply(c10_132_4x10,length)))) c10_132_4x10_l<-unlist(lapply(c10_132_4x10,length)) ################### #10cyc241 c10_241_2x30<-extr.mi.sat('taaagttctatc','acccttaatgtt',gogo[[4]]) length(c10_241_2x30) summary(unlist(lapply(c10_241_2x30,length))) barplot(table(unlist(lapply(c10_241_2x30,length)))) c10_241_2x30_l<-unlist(lapply(c10_241_2x30,length)) c10_241_4x10<-extr.mi.sat('atggactctaca','aagctataaaga',gogo[[4]]) length(c10_241_4x10) summary(unlist(lapply(c10_241_4x10,length))) barplot(table(unlist(lapply(c10_241_4x10,length)))) c10_241_4x10_l<-unlist(lapply(c10_241_4x10,length)) #10cyc242 c10_242_2x30<-extr.mi.sat('taaagttctatc','acccttaatgtt',gogo[[6]]) length(c10_242_2x30) summary(unlist(lapply(c10_242_2x30,length))) barplot(table(unlist(lapply(c10_242_2x30,length)))) c10_242_2x30_l<-unlist(lapply(c10_242_2x30,length)) c10_242_4x10<-extr.mi.sat('atggactctaca','aagctataaaga',gogo[[6]]) length(c10_242_4x10) summary(unlist(lapply(c10_242_4x10,length))) barplot(table(unlist(lapply(c10_242_4x10,length)))) c10_242_4x10_l<-unlist(lapply(c10_242_4x10,length)) ########################## #20cyc131 #x20 tetra c20_131_4x20<-extr.mi.sat('tccaagcgagag','tcatgtgataaa',gogo[[7]]) length(c20_131_4x20) summary(unlist(lapply(c20_131_4x20,length))) barplot(table(unlist(lapply(c20_131_4x20,length)))) c20_131_4x20_l<-unlist(lapply(c20_131_4x20,length)) #x10 tetra c20_131_4x10<-extr.mi.sat('aactaaagtaca','gggttgaccttc',gogo[[7]]) length(c20_131_4x10) summary(unlist(lapply(c20_131_4x10,length))) barplot(table(unlist(lapply(c20_131_4x10,length)))) c20_131_4x10_l<-unlist(lapply(c20_131_4x10,length)) #20cyc132 #x20 tetra c20_132_4x20<-extr.mi.sat('tccaagcgagag','tcatgtgataaa',gogo[[9]]) length(c20_132_4x20) summary(unlist(lapply(c20_132_4x20,length))) barplot(table(unlist(lapply(c20_132_4x20,length)))) c20_132_4x20_l<-unlist(lapply(c20_132_4x20,length)) #x10 tetra c20_132_4x10<-extr.mi.sat('aactaaagtaca','gggttgaccttc',gogo[[9]]) length(c20_132_4x10) summary(unlist(lapply(c20_132_4x10,length))) barplot(table(unlist(lapply(c20_132_4x10,length)))) c20_132_4x10_l<-unlist(lapply(c20_132_4x10,length)) ############################ #20cyc241 #2x30 c20_241_2x30<-extr.mi.sat('taaagttctatc','acccttaatgtt',gogo[[8]]) length(c20_241_2x30) summary(unlist(lapply(c20_241_2x30,length))) barplot(table(unlist(lapply(c20_241_2x30,length)))) c20_241_2x30_l<-unlist(lapply(c20_241_2x30,length)) #4x10 c20_241_4x10<-extr.mi.sat('atggactctaca','aagctataaaga',gogo[[8]]) length(c20_241_4x10) summary(unlist(lapply(c20_241_4x10,length))) barplot(table(unlist(lapply(c20_241_4x10,length)))) c20_241_4x10_l<-unlist(lapply(c20_241_4x10,length)) #20cyc242 #2x30 c20_242_2x30<-extr.mi.sat('taaagttctatc','acccttaatgtt',gogo[[10]]) length(c20_242_2x30) summary(unlist(lapply(c20_242_2x30,length))) barplot(table(unlist(lapply(c20_242_2x30,length)))) c20_242_2x30_l<-unlist(lapply(c20_242_2x30,length)) c20_242_4x10<-extr.mi.sat('atggactctaca','aagctataaaga',gogo[[10]]) length(c20_242_4x10) summary(unlist(lapply(c20_242_4x10,length))) barplot(table(unlist(lapply(c20_242_4x10,length)))) c20_242_4x10_l<-unlist(lapply(c20_242_4x10,length)) #################################### #30cyc131 #x20 tetra c30_131_4x20<-extr.mi.sat('tccaagcgagag','tcatgtgataaa',gogo[[11]]) length(c30_131_4x20) summary(unlist(lapply(c30_131_4x20,length))) barplot(table(unlist(lapply(c30_131_4x20,length)))) c30_131_4x20_l<-unlist(lapply(c30_131_4x20,length)) #x10 tetra c30_131_4x10<-extr.mi.sat('aactaaagtaca','gggttgaccttc',gogo[[11]]) length(c30_131_4x10) summary(unlist(lapply(c30_131_4x10,length))) barplot(table(unlist(lapply(c30_131_4x10,length)))) c30_131_4x10_l<-unlist(lapply(c30_131_4x10,length)) #30cyc132 #x20 tetra c30_132_4x20<-extr.mi.sat('tccaagcgagag','tcatgtgataaa',gogo[[13]]) length(c30_132_4x20) summary(unlist(lapply(c30_132_4x20,length))) barplot(table(unlist(lapply(c30_132_4x20,length)))) c30_132_4x20_l<-unlist(lapply(c30_132_4x20,length)) #x10 tetra c30_132_4x10<-extr.mi.sat('aactaaagtaca','gggttgaccttc',gogo[[13]]) length(c30_132_4x10) summary(unlist(lapply(c30_132_4x10,length))) barplot(table(unlist(lapply(c30_132_4x10,length)))) c30_132_4x10_l<-unlist(lapply(c30_132_4x10,length)) ############################### #30cyc241 c30_241_2x30<-extr.mi.sat('taaagttctatc','acccttaatgtt',gogo[[12]]) length(c30_241_2x30) summary(unlist(lapply(c30_241_2x30,length))) barplot(table(unlist(lapply(c30_241_2x30,length)))) c30_241_2x30_l<-unlist(lapply(c30_241_2x30,length)) c30_241_4x10<-extr.mi.sat('atggactctaca','aagctataaaga',gogo[[12]]) length(c30_241_4x10) summary(unlist(lapply(c30_241_4x10,length))) barplot(table(unlist(lapply(c30_241_4x10,length)))) c30_241_4x10_l<-unlist(lapply(c30_241_4x10,length)) #30cyc242 c30_242_2x30<-extr.mi.sat('taaagttctatc','acccttaatgtt',gogo[[14]]) length(c30_242_2x30) summary(unlist(lapply(c30_242_2x30,length))) barplot(table(unlist(lapply(c30_242_2x30,length)))) c30_242_2x30_l<-unlist(lapply(c30_242_2x30,length)) c30_242_4x10<-extr.mi.sat('atggactctaca','aagctataaaga',gogo[[14]]) length(c30_242_4x10) summary(unlist(lapply(c30_242_4x10,length))) barplot(table(unlist(lapply(c30_242_4x10,length)))) c30_242_4x10_l<-unlist(lapply(c30_242_4x10,length)) ##################################### #40cyc131 #x20 tetra c40_131_4x20<-extr.mi.sat('tccaagcgagag','tcatgtgataaa',gogo[[15]]) length(c40_131_4x20) summary(unlist(lapply(c40_131_4x20,length))) barplot(table(unlist(lapply(c40_131_4x20,length)))) c40_131_4x20_l<-unlist(lapply(c40_131_4x20,length)) #x10 tetra c40_131_4x10<-extr.mi.sat('aactaaagtaca','gggttgaccttc',gogo[[15]]) length(c40_131_4x10) summary(unlist(lapply(c40_131_4x10,length))) barplot(table(unlist(lapply(c40_131_4x10,length)))) c40_131_4x10_l<-unlist(lapply(c40_131_4x10,length)) #40cyc132 #x20 tetra c40_132_4x20<-extr.mi.sat('tccaagcgagag','tcatgtgataaa',gogo[[17]]) length(c40_132_4x20) summary(unlist(lapply(c40_132_4x20,length))) barplot(table(unlist(lapply(c40_132_4x20,length)))) c40_132_4x20_l<-unlist(lapply(c40_132_4x20,length)) #x10 tetra c40_132_4x10<-extr.mi.sat('aactaaagtaca','gggttgaccttc',gogo[[17]]) length(c40_132_4x10) summary(unlist(lapply(c40_132_4x10,length))) barplot(table(unlist(lapply(c40_132_4x10,length)))) c40_132_4x10_l<-unlist(lapply(c40_132_4x10,length)) ###################################### #40cyc241 #x30 dinuc c40_241_2x30<-extr.mi.sat('taaagttctatc','acccttaatgtt',gogo[[16]]) length(c40_241_2x30) summary(unlist(lapply(c40_241_2x30,length))) barplot(table(unlist(lapply(c40_241_2x30,length)))) c40_241_2x30_l<-unlist(lapply(c40_241_2x30,length)) #x10 tetra c40_241_4x10<-extr.mi.sat('atggactctaca','aagctataaaga',gogo[[16]]) length(c40_241_4x10) summary(unlist(lapply(c40_241_4x10,length))) barplot(table(unlist(lapply(c40_241_4x10,length)))) c40_241_4x10_l<-unlist(lapply(c40_241_4x10,length)) #40cyc242 #x30 dinuc c40_242_2x30<-extr.mi.sat('taaagttctatc','acccttaatgtt',gogo[[18]]) length(c40_242_2x30) summary(unlist(lapply(c40_242_2x30,length))) barplot(table(unlist(lapply(c40_242_2x30,length)))) c40_242_2x30_l<-unlist(lapply(c40_242_2x30,length)) #x10 tetra c40_242_4x10<-extr.mi.sat('atggactctaca','aagctataaaga',gogo[[18]]) length(c40_242_4x10) summary(unlist(lapply(c40_242_4x10,length))) barplot(table(unlist(lapply(c40_242_4x10,length)))) c40_242_4x10_l<-unlist(lapply(c40_242_4x10,length)) ##################################################################### ###extract internal spacer sequences ###No pcr controls pstI_spacer<-extr.spacer('tcatgtgataaa','aactaaagtaca',gogo[[1]]) length(pstI_spacer) summary(unlist(lapply(pstI_spacer,length))) pstI_spacer_l<-unlist(lapply(pstI_spacer,length)) barplot(table(pstI_spacer_l)) table(pstI_spacer_l)/length(pstI_spacer) pstI_s_id<-indel.rate(pstI_spacer_l,140) ecoRI_spacer<-extr.spacer('acccttaatgtt','atggactctaca',gogo[[2]]) length(ecoRI_spacer) summary(unlist(lapply(ecoRI_spacer,length))) ecoRI_spacer_l<-unlist(lapply(ecoRI_spacer,length)) barplot(table(ecoRI_spacer_l)) ecoRI_s_id<-indel.rate(ecoRI_spacer_l,133) ############### #10cyc131 c10_131_spacer<-extr.spacer('tcatgtgataaa','aactaaagtaca',gogo[[3]]) length(c10_131_spacer) summary(unlist(lapply(c10_131_spacer,length))) c10_131_spacer_l<-unlist(lapply(c10_131_spacer,length)) barplot(table(c10_131_spacer_l)) c10_131_s_id<-indel.rate(c10_131_spacer_l,140) #10cyc132 c10_132_spacer<-extr.spacer('tcatgtgataaa','aactaaagtaca',gogo[[5]]) length(c10_132_spacer) summary(unlist(lapply(c10_132_spacer,length))) c10_132_spacer_l<-unlist(lapply(c10_132_spacer,length)) barplot(table(c10_132_spacer_l)) c10_132_s_id<-indel.rate(c10_132_spacer_l,140) ################### #10cyc241 c10_241_spacer<-extr.spacer('acccttaatgtt','atggactctaca',gogo[[4]]) length(c10_241_spacer) summary(unlist(lapply(c10_241_spacer,length))) c10_241_spacer_l<-unlist(lapply(c10_241_spacer,length)) barplot(table(c10_241_spacer_l)) c10_241_s_id<-indel.rate(c10_241_spacer_l,133) #10cyc242 c10_242_spacer<-extr.spacer('acccttaatgtt','atggactctaca',gogo[[6]]) length(c10_242_spacer) summary(unlist(lapply(c10_242_spacer,length))) c10_242_spacer_l<-unlist(lapply(c10_242_spacer,length)) barplot(table(c10_242_spacer_l)) c10_242_s_id<-indel.rate(c10_242_spacer_l,133) ########################## #20cyc131 c20_131_spacer<-extr.spacer('tcatgtgataaa','aactaaagtaca',gogo[[7]]) length(c20_131_spacer) summary(unlist(lapply(c20_131_spacer,length))) c20_131_spacer_l<-unlist(lapply(c20_131_spacer,length)) barplot(table(c20_131_spacer_l)) c20_131_s_id<-indel.rate(c20_131_spacer_l,140) #20cyc132 c20_132_spacer<-extr.spacer('tcatgtgataaa','aactaaagtaca',gogo[[9]]) length(c20_132_spacer) summary(unlist(lapply(c20_132_spacer,length))) c20_132_spacer_l<-unlist(lapply(c20_132_spacer,length)) barplot(table(c20_132_spacer_l)) c20_132_s_id<-indel.rate(c20_132_spacer_l,140) ############################ #20cyc241 c20_241_spacer<-extr.spacer('acccttaatgtt','atggactctaca',gogo[[8]]) length(c20_241_spacer) summary(unlist(lapply(c20_241_spacer,length))) c20_241_spacer_l<-unlist(lapply(c20_241_spacer,length)) barplot(table(c20_241_spacer_l)) c20_241_s_id<-indel.rate(c20_241_spacer_l,133) #20cyc242 c20_242_spacer<-extr.spacer('acccttaatgtt','atggactctaca',gogo[[10]]) length(c20_242_spacer) summary(unlist(lapply(c20_242_spacer,length))) c20_242_spacer_l<-unlist(lapply(c20_242_spacer,length)) barplot(table(c20_242_spacer_l)) c20_242_s_id<-indel.rate(c20_242_spacer_l,133) #################################### #30cyc131 c30_131_spacer<-extr.spacer('tcatgtgataaa','aactaaagtaca',gogo[[11]]) length(c30_131_spacer) summary(unlist(lapply(c30_131_spacer,length))) c30_131_spacer_l<-unlist(lapply(c30_131_spacer,length)) barplot(table(c30_131_spacer_l)) c30_131_s_id<-indel.rate(c30_131_spacer_l,140) #30cyc132 c30_132_spacer<-extr.spacer('tcatgtgataaa','aactaaagtaca',gogo[[13]]) length(c30_132_spacer) summary(unlist(lapply(c30_132_spacer,length))) c30_132_spacer_l<-unlist(lapply(c30_132_spacer,length)) barplot(table(c30_132_spacer_l)) c30_132_s_id<-indel.rate(c30_132_spacer_l,140) ############################### #30cyc241 c30_241_spacer<-extr.spacer('acccttaatgtt','atggactctaca',gogo[[12]]) length(c30_241_spacer) summary(unlist(lapply(c30_241_spacer,length))) c30_241_spacer_l<-unlist(lapply(c30_241_spacer,length)) barplot(table(c30_241_spacer_l)) c30_241_s_id<-indel.rate(c30_241_spacer_l,133) #30cyc242 c30_242_spacer<-extr.spacer('acccttaatgtt','atggactctaca',gogo[[14]]) length(c30_242_spacer) summary(unlist(lapply(c30_242_spacer,length))) c30_242_spacer_l<-unlist(lapply(c30_242_spacer,length)) barplot(table(c30_242_spacer_l)) c30_242_s_id<-indel.rate(c30_242_spacer_l,133) ##################################### #40cyc131 c40_131_spacer<-extr.spacer('tcatgtgataaa','aactaaagtaca',gogo[[15]]) length(c40_131_spacer) summary(unlist(lapply(c40_131_spacer,length))) c40_131_spacer_l<-unlist(lapply(c40_131_spacer,length)) barplot(table(c40_131_spacer_l)) c40_131_s_id<-indel.rate(c40_131_spacer_l,140) #40cyc132 c40_132_spacer<-extr.spacer('tcatgtgataaa','aactaaagtaca',gogo[[17]]) length(c40_132_spacer) summary(unlist(lapply(c40_132_spacer,length))) c40_132_spacer_l<-unlist(lapply(c40_132_spacer,length)) barplot(table(c40_132_spacer_l)) c40_132_s_id<-indel.rate(c40_132_spacer_l,140) ###################################### #40cyc241 c40_241_spacer<-extr.spacer('acccttaatgtt','atggactctaca',gogo[[16]]) length(c40_241_spacer) summary(unlist(lapply(c40_241_spacer,length))) c40_241_spacer_l<-unlist(lapply(c40_241_spacer,length)) barplot(table(c40_241_spacer_l)) c40_241_s_id<-indel.rate(c40_241_spacer_l,133) #40cyc242 c40_242_spacer<-extr.spacer('acccttaatgtt','atggactctaca',gogo[[18]]) length(c40_242_spacer) summary(unlist(lapply(c40_242_spacer,length))) c40_242_spacer_l<-unlist(lapply(c40_242_spacer,length)) barplot(table(c40_242_spacer_l)) c40_242_s_id<-indel.rate(c40_242_spacer_l,133)