REM MelCapPa = Melcher's CAP3 parsing DIM hold$(15000): DIM count%(3) DIM matrix(500,500,7) REM matrix dimension needs to be increased from 500 if there are more OTUs DIM RefTable1$(14) DIM RefTable2(14) DIm filename$, volume%, entryno, linepos, entrytot, linetot, lineno, level, index DIM X, Y, Z, Q, S, FILENO, savetit$, gMyWindow&, position, test$, counter, rowsize DIM A$, B$, C$, T$, seq1, seq2, triang REM in core A$ and B$ are the two sequences being compared ON BREAK END REM Set up tables FOR X =0 TO 13 STEP 2 RefTable2(X)= 1 NEXT X FOR X =1 TO 13 STEP 2 RefTable2(X)= -1 NEXT X RefTable1$(0) = "TC" RefTable1$(1) = "CT" RefTable1$(2) = "AG" RefTable1$(3) = "GA" RefTable1$(4) = "TA" RefTable1$(5) = "AT" RefTable1$(6) = "CG" RefTable1$(7) = "GC" RefTable1$(8) = "TG" RefTable1$(9) = "GT" RefTable1$(10) = "CA" RefTable1$(11) = "AC" REM no. 12 is for other matches. no. 13 is for total REM MAIN EVENT LOOP local fn BuildWindow gMyWindow& = FN GetNewWindow(128,0,-1) call SetPort(gMyWindow&) end fn "intro" PRINT "Program calculates for each sequence pair in the input file" PRINT "the numbers of each mutation type over the whole sequence." PRINT PRINT "The fasta files should have equal length lines; " PRINT "every OTU entry should have the same number of residues." PRINT PRINT "Output is a comma-delimited text file with one sequence pair per row." PRINT "The different columns represent, in order, substitutions" PRINT "TC, CT, AG, GA, TA, AT, CG, GC, TG, GT, CA, AC, total positions compared, other" PRINT "Order of nucleotides depends only on position in sequence list and has no biological meaning in this application." "Open" REM fileName$ = FILES$(_fOpen,"Choose file",,volume%) fileName$ = FILES$(1,"TEXT",,volume%) IF filename$="" THEN END OPEN "I",1,filename$,,volume% REM initialize linepos = 0 entryno = 0 REM loop for acquisition of sequences DO input#1, A$ hold$(counter) = A$ counter = counter + 1 UNTIL EOF(1) entrytot = counter REM figure out how many rows per sequence and how many sequences linetot=0 DO linetot = linetot +1 UNTIL Left$(hold$(linetot), 1) = ">" entrytot = entrytot/linetot PRINT entrytot "number of sequences" rowsize = LEN(hold$(1)) PRINT rowsize "length of row" PRINT "Sequences acquired." "Core of pairwise comparison" FOR seq1 = 0 TO entrytot FOR seq2 = seq1 + 1 TO entrytot For lineno = 1 to linetot -1 A$ = hold$(seq1*linetot + lineno) B$ = hold$(seq2*linetot + lineno) C$ = hold$(lineno) FOR position = 1 to rowsize LONg IF (MID$(A$,position,1) = "-") OR (MID$(B$,position,1) = "-") matrix(seq1, seq2, 6) = matrix(seq1, seq2, 6) +1 GOTO "Skip analysis" END IF IF MID$(A$,position,1) = MID$(B$,position,1) THEN GOTO "Skip analysis" T$ = MID$(B$,position,1) + MID$(A$,position,1) index =16 level =0 triang = 0 FOR X =0 TO 14 IF index <15 THEN GOTO "Finish match" LONG IF T$ = RefTable1$(X) index = INT(X/2) triang = RefTable2(X) IF triang = 1 THEN matrix(seq1, seq2, index) = matrix(seq1, seq2, index) +1 IF triang = -1 THEN matrix(seq2, seq1, index) = matrix(seq2, seq1, index) +1 END IF "Finish match" NEXT X "Skip analysis" matrix(seq2, seq1,6) = matrix(seq2, seq1,6) +1 NEXT position NEXT lineno PRINT "*"; NEXT seq2 PRINT NEXT seq1 "Save" REM write to file, one line per pair PRINT "Choose output filename" CLOSE #1 fileno=3:savetit$=filename$ + ".mutf" fileName$ = FILES$(_fSave,"Save values as:",savetit$,volume%) OPEN "O",fileno,filename$,,volume% FOR seq2 = 0 to entrytot FOR seq1 = (seq2 + 1) TO entrytot FOR level =0 TO 6 PRINT#fileno, matrix(seq1, seq2, level) "," matrix (seq2, seq1, level)","; NEXT level PRINT#fileno, "" NEXT seq1 NEXT seq2 CLOSE PRINT "File completed." END