######################################################################### # Fetch the sequence surrounded the miRNA loci from a one line fasta # genome file # Chen et. al ######################################################################### #!/usr/bin/perl -w use strict; my$inFile=$ARGV[0]; my$genomeFile=$ARGV[1]; my$size=$ARGV[2]; my$header=1; open(INFILE, "<$inFile")||die "cannot open $inFile:$!"; while (my$line=){ if($header==1){ $header++; next; } chomp $line; my($query_seq, $database_seq, $perc_id, $align_len, $mismatch, $gap, $q_start, $q_stop, $db_start, $db_stop, $e_val, $bit_score)=split "\t", $line; #find midpoint of database start and stop points my$midPoint=int(($db_stop+$db_start)/2); #get startpoint for substring my$startPoint=$midPoint-100; #make sure starting point is not negative if($startPoint<0){ $startPoint=0; } open(GENOMEFILE, "<$genomeFile")||die "cannot open $genomeFile:$!"; my$store=0; my$keepSeq; while (my$line=){ my$flankingDist=$size; if($line=~/$database_seq\n/){ $store=1; next; } if($store==1){ #make sure substring does not go past length of sequence chomp $line; my$seqLength=length $line; if(($startPoint+$flankingDist)>$seqLength){ $flankingDist=$seqLength-$startPoint; } my$flankingSeq=substr $line, $startPoint, $flankingDist; print "$query_seq\t$database_seq\t$flankingSeq\n"; print "$flankingDist\n"; $store=0; } } close GENOMEFILE; } close(INFILE);