#Interage - bash script to run on Unix command line # finds intergenic sequences, to be used in ORFage script # contact: Zachary Ardern: zachary.ardern@tum.de # requires ENTREZ Direct (Edirect) tool from NCBI, available here: https://www.ncbi.nlm.nih.gov/books/NBK179288/ # Set Chromosome chr=NC_002695.1 # Set Input - bed file; (set "$bed" in "$bed".bed): bed=blastp_hits ##################################### esearch -db nucleotide -query "$chr" < /dev/null | efetch -format ft > $chr.ft ; cat $chr.ft | awk '{if ($1~/^[0-9]+$/ && $3=="CDS") print }' | awk '{if ($1<$2) print $0 "\t" "plus" ; else if ($1>$2) print $2 "\t" $1 "\t" $3 "\t" "minus"}' > genes-table.txt ; #Upstream Sequence Regions (for plus and minus-strand genes of interest) above1=0 ; cat $bed.bed | awk '{if ($4=="+") print}' | sort -nk 2 | while read -r ch first last strand ; do above=$(awk '{if ($2<'$first') print $2 }' genes-table.txt | tail -1) ; #only print if annotated gene last position does not match previous value test "$above" -gt "$above1" && echo -e $chr "\t" $above "\t" $first "\t" $strand ; above1=$(echo $above) ; done > up1-$bed.bed ; below1=99999999999 ; cat $bed.bed | awk '{if ($4=="-") print}' | sort -nrk 2 | while read -r ch first last strand ; do below=$(awk '{if ($1>'$last') print $1 }' genes-table.txt | head -1) ; #only print if annotated gene first position does not match previous value test "$below" -lt "$below1" && echo -e $chr "\t" $last "\t" $below "\t" $strand ; below1=$(echo $below) ; done > up2-$bed.bed ; cat up1-$bed.bed up2-$bed.bed > upstream$bed.bed ; #Downstream Sequence Regions (for plus and minus-strand genes of interest) above1=0 ; cat $bed.bed | awk '{if ($4=="-") print}' | sort -nk 2 | while read -r ch first last strand ; do above=$(awk '{if ($2<'$first') print $2 }' genes-table.txt | tail -1) ; #only print if annotated gene last position does not match previous value test "$above" -gt "$above1" && echo -e $chr "\t" $above "\t" $first "\t" $strand ; above1=$(echo $above) ; done > down1-$bed.bed ; below1=99999999999 ; cat $bed.bed | awk '{if ($4=="+") print}' | sort -nrk 2 | while read -r ch first last strand ; do below=$(awk '{if ($1>'$last') print $1 }' genes-table.txt | head -1) ; #only print if annotated gene first position does not match previous value test "$below" -lt "$below1" && echo -e $chr "\t" $last "\t" $below "\t" $strand ; below1=$(echo $below) ; done > down2-$bed.bed ; cat down1-$bed.bed down2-$bed.bed > downstream$bed.bed ;