DIR=$(zenity --file-selection --directory --title="Select a DIRECTORY CONTAINING DATA") cd $DIR THRESHOLD=$(zenity --forms --title="INDICATE THRESHOLD min COVERAGE" --text="INDICATE THRESHOLD" \ --add-entry="THRESHOLD") GQ=$(zenity --forms --title="INDICATE THRESHOLD Genotyping Quality" --text="INDICATE THRESHOLD" \ --add-entry="THRESHOLD") echo "COVERAGE Analysis step starting" zenity progress bar dialog ( echo "20" ; sleep 1 echo "# COVERAGE and depth calculation" ; sleep 1 cd $DIR depth=$(ls | grep .deep.txt) if [ -z "${depth}" ]; then for file in *.bam; do samtools depth "$file" > "$(basename "$file" .bam).deep.txt" ;done else echo "$depth found." fi echo "40" ; sleep 1 echo "#under threshold--calculation " ; sleep 1 TOTBED=$(ls | grep .TOT.bed) if [ -z "${TOTBED}" ]; then for file in *.bam; do bamToBed -i "$file" > "$(basename "$file" .bam).TOT.bed" ;done else echo "$TOTBED found." fi cat *.TOT.bed | awk -F'\t' '{print $1"\t"$2"\t"$3}' > TOTALI.bed sortBed -i TOTALI.bed > PTOTALI.bed mergeBed -i PTOTALI.bed > TOTALI.bed for file in *.deep.txt; do awk -F"\t" '$3 < '$THRESHOLD' { print $1"\t"$2"\t"$2 }' "$file" > "$(basename "$file" .deep.txt).underT.bed" ;done echo "50" ; sleep 1 echo "#not covered regions " ; sleep 1 echo "80" ; sleep 1 echo "# region in target calculation" ; sleep 1 for file in *.TOT.bed do subtractBed -a TOTALI.bed -b $file > "$(basename "$file" .TOT.bed).zero.bed" ;done for file in *.underT.bed; do intersectBed -a TOTALI.bed -b "$file" > "$(basename "$file" .underT.bed).underTHRESHOLD.bed" ;done for file in *.underTHRESHOLD.bed; do cat "$file" "$(basename "$file" .underTHRESHOLD.bed).zero.bed" > "$(basename "$file" .underTHRESHOLD.bed).RECOVER.bed" ;done echo "100" ; sleep 1 ) | zenity --progress --auto-close "press OK and go on" --text "upload" --percentage=0 for file in *.deep.txt; do awk -F'\t' '{print $1"-"$2"\t"$1"\t"$2"\t"$3}' $file > "$(basename "$file" .deep.txt).tempD"; done cat *tempD > COVERAGE awk -F"\t" '!_[$1,$2]++' COVERAGE > BASES sort -k1 BASES > tempBASES for file in *.tempD do sort -k1 $file > "$(basename "$file" .tempD).tempD2"; done for file in *.tempD do join -a 1 -a 2 -e "0" tempBASES "$(basename "$file" .tempD).tempD2" -o 2.4 > "$(basename "$file" .tempD).tempDID"; done for file in *.tempDID do find $file -type f -print -exec cat {} \; > "$(basename "$file" .tempDID).tempDID2"; done rm -f, yes | rm *.tempDID paste *.tempDID2 | sed "s/.tempDID//g" > MATRIX echo "COORDINATE" > tempCO awk -F'\t' '{print $1}' tempBASES >> tempCO paste tempCO MATRIX | sed "s/\t/,/g" > CoverageMatrix.csv rm -f, yes | rm *.temp rm -f, yes | rm *.deep* rm -f, yes | rm *.underTHRESHOLD.bed rm -f, yes | rm *.TOT.bed rm -f, yes | rm *.zero.bed rm -f, yes | rm *.underT.bed zenity progress bar dialog ( echo "20" ; sleep 1 echo "# vcf files processing" ; sleep 1 mkdir VCF_Original cp *.vcf VCF_Original ls *.vcf|xargs -I% sed -i 's/$/\t%/' % for file in *.vcf; do grep -v -h '^#' $file > "$(basename "$file" .vcf).tempQ"; done for file in *.vcf do awk -F'\t' '{print $10}' "$(basename "$file" .vcf).tempQ" | awk -F':' '{print $4}' > "$(basename "$file" .vcf).tempQ1"; done for file in *.vcf do paste "$(basename "$file" .vcf).tempQ" "$(basename "$file" .vcf).tempQ1" > "$(basename "$file" .vcf).tempQ2"; done for file in *.vcf do awk -F"\t" '$12 >= '$GQ' { print $0 }' "$(basename "$file" .vcf).tempQ2" | awk '{$12=""; print}' > "$(basename "$file" .vcf).tempQ3"; done rm -f, yes | rm *.vcf for file in *.tempQ3; do cp -rf $file "$(basename "$file" .tempQ3).GQ.vcf"; done sed -i "s/ /\t/g" *.vcf sed -i "s/ /\t/g" *.tempQ3 cat *tempQ3 > MULTIPLE awk -F"\t" '!_[$1,$2,$4,$5]++' MULTIPLE > ALLVARIANT for file in *.tempQ2 do awk -F"\t" '$12 < '$GQ' { print $1"\t"$2"\t"$2 }' $file > "$(basename "$file" .tempQ2).GQ.bed"; done for file in *.GQ.bed do cat $file "$(basename "$file" .GQ.bed).RECOVER.bed" > "$(basename "$file" .GQ.bed).RECOVER_ALL.bed"; done rm *.txt rm MULTIPLE.vcf ) | zenity --progress --auto-close "press Ok and go on" --text "upload data" --percentage=0 zenity progress bar dialog ( echo "40" ; sleep 1 echo "# PED creation" ; sleep 1 for file in *.vcf do awk -F'\t' '{print $1"-"$2"-"$4"-"$5}' $file > "$(basename "$file" .vcf).temp0" ; done for file in *.vcf do paste "$(basename "$file" .vcf).temp0" $file > "$(basename "$file" .vcf).temp"; done for file in *.vcf do awk '!seen[$1]++ >= 1' "$(basename "$file" .vcf).temp" > "$(basename "$file" .vcf).TEMPO1"; done FILE2=$DIR/ALLVARIANT awk -F'\t' '{print $1"-"$2"-"$4"-"$5}' $FILE2 > temp1 paste temp1 $FILE2 > temp2 sort -k1 temp2 > temp5 for file in *.vcf do sort -k1 "$(basename "$file" .vcf).TEMPO1" > "$(basename "$file" .vcf).temp6"; done for file in *.vcf do awk -F'\t' '{print $1"\t"$11}' "$(basename "$file" .vcf).temp6" > "$(basename "$file" .vcf).temp6bis"; done for file in *.vcf do awk -F':' '{print $1}' "$(basename "$file" .vcf).temp6bis" > "$(basename "$file" .vcf).temp6tris"; done for file in *.vcf do sort -k1 "$(basename "$file" .vcf).temp6tris" > "$(basename "$file" .vcf).temp6tris2"; done for file in *.vcf do join -a 1 -a 2 -e "NA" temp5 "$(basename "$file" .vcf).temp6tris2" -o 1.1,2.2 > "$(basename "$file" .vcf).ID"; done for file in *.vcf do sed 's/-/\t/g' "$(basename "$file" .vcf).ID" > "$(basename "$file" .vcf).ID.bed"; done for file in *.vcf do awk -F'\t' '{print $1"\t"$2"\t"$2"\t"$3"\t"$4"\t"$5}' "$(basename "$file" .vcf).ID.bed" > "$(basename "$file" .vcf).ID2.bed"; done ) | zenity --progress --auto-close "press OK and go on" --text "upload data" --percentage=0 zenity progress bar dialog ( echo "60" ; sleep 1 echo "# genotypes removal" ; sleep 1 for file in *.GQ.vcf do subtractBed -a "$(basename "$file" .vcf).ID2.bed" -b "$(basename "$file" .GQ.vcf).RECOVER_ALL.bed" | sed 's/NA/AA/g' > "$(basename "$file" .vcf).ID3.bed"; done ) | zenity --progress --auto-close "press ok and go on" --text "upload data" --percentage=0 zenity progress bar dialog ( echo "80" ; sleep 1 echo "# matrix transposition" ; sleep 1 for file in *.vcf do awk -F'\t' '{print $1"-"$2"-"$4"-"$5"\t"$6}' "$(basename "$file" .vcf).ID3.bed" > "$(basename "$file" .vcf).ID4.bed"; done for file in *.vcf do join -a 1 -a 2 -e "NA" temp5 "$(basename "$file" .vcf).ID4.bed" -o 2.2 > "$(basename "$file" .vcf).ID5"; done for file in *.vcf do find "$(basename "$file" .vcf).ID5" -type f -print -exec cat {} \; > "$(basename "$file" .vcf).tempOUT"; done paste *.tempOUT > PEDIGREE awk ' { for (i = 1; i <= NF; i++) { if(NR == 1) { s[i] = $i; } else { s[i] = s[i] " " $i; } } } END { for (i = 1; s[i] != ""; i++) { print s[i]; } }' PEDIGREE > tempPED ) | zenity --progress --auto-close "press ok and go on" --text "upload data" --percentage=0 awk -F'\t' '{print $1}' temp5 > tempMAP awk -F'-' '{print $1}' tempMAP | sed "s/chr//g" > tempMAP2 awk -F'-' '{print $2}' tempMAP > tempMAP3 paste tempMAP2 tempMAP tempMAP3 | sed "s/ /\t/g" > tempMAP4 awk -F'\t' '{print $1"\t"$2"\t""0""\t"$3}' tempMAP4 > MAP.map sed "s/\//@/g" tempPED | sed "s/0@1/A B/g" | sed "s/1@1/B B/g" | sed "s/NA/0 0/g" | sed "s/1@2/A B/g" | sed "s/AA/A A/g" | sed "s/ 1 / A B /g" | sed "s/BA/A B/g" | sed "s/BA/A B/g" | sed "s/ /\t/g" > temPED2 awk -F'\t' '{print $1"\t"$1"\t""0""\t""0""\t""0""\t""0""\t"$0}' temPED2 | cut -f1-6,8- | sed "s/.GQ.ID5//g" | sed "s/ / /g"> PED.ped awk -F'\t' '{print $1"\t"$1"\t""0""\t""0""\t""0""\t""0""\t"$0}' temPED2 > tempPEDX cut -f1-6,8- tempPEDX > tempPEDY awk -F'\t' '{print $1"\t"$1"\t""0""\t""0""\t""0""\t""0""\t"$0}' temPED2 | awk '{$7=""; print}' > prova awk -F'\t' '{print $1"\t"$1"\t""0""\t""0""\t""0""\t""0""\t"$0}' temPED2 > prova zenity progress bar dialog ( echo "100" ; sleep 1 echo "# analisi terminata" ; sleep 3 rm -f, yes | rm *temp* rm -f, yes | rm *.ID* rm -f, yes | rm *.TEMPO* ) | zenity --progress --auto-close "press OK and go on" --text "upload data" --percentage=0 rm -f, yes | rm *temp* rm -f, yes | rm *temPED2* rm -f, yes | rm *.ID* rm -f, yes | rm *.TEMPO* rm -f, yes | rm *.bed rm -f, yes | rm *.vcf rm -f, yes | rm PEDIGREE rm -f, yes | rm BASES rm -f, yes | rm MULTIPLE rm -f, yes | rm ALLVARIANT rm -f, yes | rm MATRIX rm -f, yes | rm COVERAGE mv -f, yes | mv $DIR/VCF_Original/*.vcf $DIR/ zenity --info --title="analysis consensus" --window-icon=/dati/server/Software/appGEN.png --text=" are you happy? have a nice day gentilini.davide@gmail.com" --ok-label="close me"