#mothur commands to perform amplicon QC and generate a distance matrix make.contigs(file=DV1314mock.files, processors=1) summary.seqs(fasta=DV1314mock.trim.contigs.fasta) screen.seqs(fasta=current, group=DV1314mock.contigs.groups, summary=DV1314mock.trim.contigs.summary, maxambig=8, maxhomop=8, maxlength=520) unique.seqs(fasta=current) count.seqs(name=DV1314mock.trim.contigs.good.names, group=DV1314mock.contigs.good.groups) pcr.seqs(fasta=silva.bacteria.fasta, start=1, end=11894, keepdots=F) system(mv silva.bacteria.pcr.fasta silva.V1V3.fasta) align.seqs(fasta=DV1314mock.trim.contigs.good.unique.fasta, reference=silva.V1V3.fasta, flip=true) summary.seqs(fasta=DV1314mock.trim.contigs.good.unique.align, count=DV1314mock.trim.contigs.good.count_table) screen.seqs(fasta=DV1314mock.trim.contigs.good.unique.align, count=DV1314mock.trim.contigs.good.count_table, summary=DV1314mock.trim.contigs.good.unique.summary, start=1, optimize=end) filter.seqs(fasta=DV1314mock.trim.contigs.good.unique.good.align, vertical=T, trump=.) unique.seqs(fasta=DV1314mock.trim.contigs.good.unique.good.filter.fasta, count=DV1314mock.trim.contigs.good.good.count_table) pre.cluster(fasta=DV1314mock.trim.contigs.good.unique.good.filter.unique.fasta, count=DV1314mock.trim.contigs.good.unique.good.filter.count_table, diffs=2) chimera.uchime(fasta=DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.fasta, count=DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.count_table, processors=1, dereplicate=t) remove.seqs(fasta=DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.fasta, accnos=DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.uchime.accnos) classify.seqs(fasta=DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.pick.fasta, count=DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.uchime.pick.count_table, reference=trainset9_032012.pds.fasta, taxonomy=trainset9_032012.pds.tax, cutoff=80) remove.lineage(fasta=DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.pick.fasta, count=DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.uchime.pick.count_table, taxonomy=DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.pick.pds.wang.taxonomy, taxon=Chloroplast-Mitochondria-unknown-Archaea-Eukaryota) summary.seqs(fasta=DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.fasta) seq.error(fasta=DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.fasta, reference=mock16S3.fna, aligned=F) system(mv DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.fasta DV1314mock.all.fasta) system(mv DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.uchime.pick.pick.count_table DV1314mock.all.count_table) dist.seqs(fasta=DV1314mock.all.fasta, cutoff=0.2) cluster(column=DV1314mock.all.dist, count=DV1314mock.all.count_table) make.shared(list=DV1314mock.all.an.unique_list.list, count=DV1314mock.all.count_table, label=0.03) catchall(shared=DV1314mock.all.an.unique_list.shared) count.groups(shared=DV1314mock.all.an.unique_list.shared) sub.sample(shared=DV1314mock.all.an.unique_list.shared, size=6654) summary.single(shared=DV1314mock.all.an.unique_list.0.03.subsample.shared, calc=nseqs-coverage-sobs-shannon-chao-invsimpson) #R commands run outside of mothur to produce a list of singletons for removal > torig<-read.table("DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.uchime.pick.pick.count_table", header=TRUE, comment.char="", colClasses=c(rep("character", 1), rep("integer", 1), rep("NULL", 12))) # where 12 is the number of columns to skip (i.e. read the column of totals then skip the columns containing the counts for each individual sample.) > tsing<-subset(torig, total == 1) > keeps<-"Representative_Sequence" > singlist<-tsing[,keeps,drop=FALSE] > write.table(singlist, file="DV1314mocksingles.accnos", quote=FALSE, row.names=FALSE, col.names=FALSE) #mothur commands to remove the singletons; repeat error and diversity analyses. Repeat the procedures for singleton + doubleton removal system(cp DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.fasta DV1314mock.outsingletons.fasta) system(cp DV1314mock.trim.contigs.good.unique.good.filter.unique.precluster.uchime.pick.pick.count_table DV1314mock.outsingletons.count_table) remove.seqs(fasta=DV1314mock.outsingletons.fasta, accnos=DV1314mocksingles.accnos) remove.seqs(count=DV1314mock.outsingletons.count_table, accnos=DV1314mocksingles.accnos) dist.seqs(fasta=DV1314mock.outsingletons.pick.fasta, cutoff=0.2) cluster(column=DV1314mock.outsingletons.pick.dist, count=DV1314mock.outsingletons.pick.count_table) make.shared(list=DV1314mock.outsingletons.pick.an.unique_list.list, count=DV1314mock.outsingletons.pick.count_table, label=0.03) rarefaction.single(shared=current) classify.seqs(fasta=DV1314mock.outsingletons.pick.fasta, count=DV1314mock.outsingletons.pick.count_table, reference=trainset9_032012.pds.fasta, taxonomy=trainset9_032012.pds.tax, cutoff=80) classify.otu(list=DV1314mock.outsingletons.pick.an.unique_list.list, count=DV1314mock.outsingletons.pick.count_table, taxonomy=DV1314mock.outsingletons.pick.pds.wang.taxonomy, label=0.03) seq.error(fasta=DV1314mock.outsingletons.pick.fasta, reference=mock16S3.fna, aligned=F) catchall(shared=DV1314mock.outsingletons.pick.an.unique_list.shared) count.groups(shared=DV1314mock.outsingletons.pick.an.unique_list.shared) sub.sample(shared=DV1314mock.outsingletons.pick.an.unique_list.shared, size=6654) summary.single(shared=DV1314mock.outsingletons.pick.an.unique_list.0.03.subsample.shared, calc=nseqs-coverage-sobs-shannon-chao-invsimpson)