#********************Mc_clintock******************** #!/bin/bash -x #SBATCH --job-name=Mcclintock #SBATCH --ntasks=1 #SBATCH --mem=64G #SBATCH --cpus=8 #SBATCH -o mcclintock%a_%A.out #SBATCH -e mcclintock%a_%A.err #SBATCH --partition=general #SBATCH --get-user-env=PWD #source /opt/Modules/3.2.9/init/Modules4bash.sh module load conda source activate /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_mc_clintock mkdir logs echo 'start_retroseq' >> logs/temp_time.runtimes; /bin/date >> logs/temp_time.runtimes; #TE-locate TEMP retroseq PoPoolationTE NGStemapper run_dir=`pwd` cd .. bash /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_mc_clintock/mcclintock-master/mcclintock.sh -m "temp telocate retroseq popoolationte ngstemapper" -r $run_dir/sacCer2.fasta -c $run_dir/sac_cer_TE_seqs.fasta -g $run_dir/reference_TE_locations.gff -t $run_dir/sac_cer_te_families.tsv -1 /scratch/075-melo-TEmovement/ENVIRONMENTS/t-lex/20x/20x_1.fastq -2 /scratch/075-melo-TEmovement/ENVIRONMENTS/t-lex/20x/20x_2.fastq -p 8 -M 64; echo 'endtime' >> logs/temp_time.runtimes; /bin/date >> logs/temp_time.runtimes; #********************PopoolationTE2******************** #!/bin/bash -x #SBATCH --job-name=popoolationTE2 #SBATCH --ntasks=1 #SBATCH --mem=64G #SBATCH --cpus=8 #SBATCH -o popte2%a_%A.out #SBATCH -e popte2%a_%A.err #SBATCH --get-user-env=PWD module load wublast module load hmmer module load trf module load repeatmasker module load bwa module load samtools bwa index LTR.temergedref.fasta; bwa bwasw -t 3 LTR.temergedref.fasta ../../../RAW_READS/MH63/illumina-reads/reads_1_5x.fq > 5x_1.sam & bwa bwasw -t 3 LTR.temergedref.fasta ../../../RAW_READS/MH63/illumina-reads/reads_1_5x.fq > 5x_2.sam ; java -jar ../popte2.jar se2pe --fastq1 ../../../RAW_READS/MH63/illumina-reads/reads_1_5x.fq --fastq2 ../../../RAW_READS/MH63/illumina-reads/reads_2_5x.fq --bam1 5x_1.sam --bam2 5x_2.sam --sort --output 5x_LTR.sort.bam; mkdir 5x; java -jar ../popte2.jar identifySignatures --ppileup 5x_LTR_different_coverage.ppileup.gz --mode separate --output 5x/5x_different_coverage.signatures --min-count 3; java -jar ../popte2.jar frequency --ppileup 5x_LTR_different_coverage.ppileup.gz --signature quick/different_coverage.signatures --output 5x/5x_different_coverage.freqsig; java -jar ../popte2.jar pairupSignatures --signature 5x/5x_different_coverage.freqsig --ref-genome /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_popte2/LTR/LTR.temergedref.fasta --hier /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_popte2/LTR/ltr.tehier --min-distance -200 --max-distance 300 --output 5x/5x_different_coverage.teinsertions; mkdir 5x/filter; java -jar ../popte2.jar filterSignatures --input 5x/5x_different_coverage.freqsig --output 5x/filter/5x_different_coverage.freqsig --max-otherte-count 2 --max-structvar-count 2 java -jar ../popte2.jar pairupSignatures --signature 5x/filter/5x_different_coverage.freqsig --ref-genome /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_popte2/LTR/LTR.temergedref.fasta --hier /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_popte2/LTR/ltr.tehier --min-distance -200 --max-distance 300 --output filter/5x_different_coverage.teinsertions #********************Relocate2******************** #!/bin/bash -x #SBATCH --ntasks=1 #SBATCH --mem=64G #SBATCH --cpus=8 #SBATCH --output=relocate2.output #SBATCH --job-name="relocate2" #SBATCH --partition=general #source /opt/Modules/3.2.9/init/Modules4bash.sh #source /opt/Modules/3.2.9/init/Modules4bash.sh module load conda source activate /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_relocate2 #TE-locate TEMP retroseq PoPoolationTE python ../scripts/relocaTE2.py -c 16 -s 273 --te_fasta sac_cer_TE_seqs.fasta --genome_fasta sacCer2.fasta --fq_dir reads/40x/ --outdir LTR_output/40x/ --reference_ins sacCer2.fasta.ori.out --run #********************Teflon******************** #!/bin/bash #SBATCH --job-name=teflon_10x #SBATCH --ntasks=1 #SBATCH --mem=64G #SBATCH --cpus=8 #SBATCH -o run_time_teflon.%j.%N.out #SBATCH -e run_time_teflon.%j.%N.err #SBATCH --get-user-env=PWD module load samtools; module load bwa; module load conda; module load trf; module load hmmer; module load wublast; module load repeatmasker; mkdir logs source activate /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old; python teflon_prep_annotation.py -a prova/sample_reference_TE_locations.bed -t prova/sample_TE_hierarchy.txt -f prova/sample_reference_TE_annotation.fasta -g prova/sample_reference_genome.fasta -p mites; python teflon_prep_custom.py -e /opt/RepeatMasker/RepeatMasker-4.0.5/RepeatMasker -g prova/sample_reference_genome.fasta -l /075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/prova/sample_reference_TE_annotation.fasta -p mites_prova echo 'bwa_mem' >> logs/teflon_time.runtimes; /bin/date >> logs/teflon_time.runtimes; bwa index /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/mites.prep_MP/mites.mappingRef.fa; bwa mem -t 4 -Y /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/mites.prep_MP/mites.mappingRef.fa /scratch/075-melo-TEmovement/ENVIRONMENTS/t-lex/samples/10x/10x_1.fastq /scratch/075-melo-TEmovement/ENVIRONMENTS/t-lex/samples/10x/10x_2.fastq > prova/10x_sample.sam; samtools view -Sb prova/10x_sample.sam | samtools sort -@ 4 -o prova/10x_sample.sorted.bam; rm prova/10x_sample.sam; samtools index prova/10x_sample.sorted.bam; echo 'teflon_start' >> logs/teflon_time.runtimes; /bin/date >> logs/teflon_time.runtimes; python /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/teflon.v0.4.py -w /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/10xmites -d /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/mites.prep_TF/ -s /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/prova/sample_file_mites_10x.txt -i 10x_sample -eb /opt/bwa/bwa -es /opt/samtools/samtools-1.3/bin/samtools -l1 family -l2 family -q 10 -sd 2488 -t 8; python teflon_collapse.py -w /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/10xmites -d /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/mites.prep_TF/ -s /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/prova/sample_file_mites_10x.txt -es /opt/samtools/samtools-1.3/bin/samtools -n1 2 -n2 2 -q 10 -t 8; python teflon_count.py -w /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/10xmites -d /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/mites.prep_TF/ -s /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/prova/sample_file_mites_10x.txt -es /opt/samtools/samtools-1.3/bin/samtools -i 10x_sample -eb /opt/bwa/bwa -l2 family -q 10 -t 8; python teflon_genotype.py -w /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/10xmites -d /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/mites.prep_TF/ -s /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/TEFLON/TEFLoN-master/prova/sample_file_mites_10x.txt -dt pooled; echo 'teflon_end' >> logs/teflon_time.runtimes; /bin/date >> logs/teflon_time.runtimes; #********************Jitterbug******************** #!/bin/bash #SBATCH --job-name=jitterbug #SBATCH --ntasks=1 #SBATCH --mem=64G #SBATCH --cpus=8 #SBATCH -o jitterbug.%j.%N.out #SBATCH -e jitterbug.%j.%N.err #SBATCH --get-user-env=PWD module load bwa module load samtools module load conda mkdir logs echo 'bwa_aln' >> logs/jitterbug_time.runtimes /bin/date >> logs/jitterbug_time.runtimes bwa aln -t 12 -n 4 -o 1 -e 3 -f /scratch/075-melo-TEmovement/RAW_READS/MH63/illumina-reads/final_renamed1_10x.fq.sai /scratch/075-melo-TEmovement/RAW_READS/index_files/Nipponbare_reference.fasta /scratch/075-melo-TEmovement/RAW_READS/MH63/illumina-reads/final_renamed1_10x.fq & bwa aln -t 12 -n 4 -o 1 -e 3 -f /scratch/075-melo-TEmovement/RAW_READS/MH63/illumina-reads/final_renamed2_10x.fq.sai /scratch/075-melo-TEmovement/RAW_READS/index_files/Nipponbare_reference.fasta /scratch/075-melo-TEmovement/RAW_READS/MH63/illumina-reads/final_renamed2_10x.fq ; bwa sampe /scratch/075-melo-TEmovement/RAW_READS/index_files/Nipponbare_reference.fasta /scratch/075-melo-TEmovement/RAW_READS/MH63/illumina-reads/final_renamed1_10x.fq.sai /scratch/075-melo-TEmovement/RAW_READS/MH63/illumina-reads/final_renamed2_10x.fq.sai /scratch/075-melo-TEmovement/RAW_READS/MH63/illumina-reads/final_renamed1_10x.fq /scratch/075-melo-TEmovement/RAW_READS/MH63/illumina-reads/final_renamed2_10x.fq > 10x_MH63.sam; samtools view -bS 10x_MH63.sam > 10x_MH63.bam; samtools sort 10x_MH63.bam > 10x_MH63_sorted.bam; samtools index 10x_MH63_sorted.bam; rm 10x_MH63.sam; rm 10x_MH63.bam; source activate /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old; echo 'jitterbug' >> logs/jitterbug_time.runtimes /bin/date >> logs/jitterbug_time.runtimes python /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/jitterbug/jitterbug.py --psorted 10x_MH63_sorted.bam -t /scratch/075-melo-TEmovement/BENCHMARK/rice_annotations/Nip_MITE_filtered_orthology.gff3 -l LTRJB_10x -n 6 -o LTRJB_10x ; python /scratch/075-melo-TEmovement/ENVIRONMENTS/conda_jitterbug_old/jitterbug/jitterbug_filter_results_func.py -g LTRJB_10x.TE_insertions_paired_clusters.gff3 -c LTRJB_10x.filter_config.txt -o LTRJB_10x.TE_insertions_paired_clusters-QFILTERED.gff3 > logs/LTRJB_10x.jbfilterstats.txt; echo 'end_time' >> logs/jitterbug_time.runtimes /bin/date >> logs/jitterbug_time.runtimes #********************ITIS******************** #!/bin/bash -x #SBATCH --mem=64G #SBATCH --cpus=8 #SBATCH -o itis.%j.%N.out #SBATCH -e itis.%j.%N.err #SBATCH --job-name="5xmitesItis" #SBATCH --partition=general module load conda; source activate /scratch/075-melo-TEmovement/ENVIRONMENTS/tools_itis/; module load R; perl itis.pl -g MITES/IRGSP-1.0_genome.fasta -t MITES/mite_TE.fasta -l 100 -N 5xmites -1 /scratch/075-melo-TEmovement/ENVIRONMENTS/t-lex/5x/final_renamed1_5x.fq -2 /scratch/075-melo-TEmovement/ENVIRONMENTS/t-lex/5x/final_renamed2_5x.fq -e Y; #********************MELT******************** #! /bin/bash +x #SBATCH --job-name=MELT #SBATCH --ntasks=1 #SBATCH --mem=64G #SBATCH --cpus=8 #SBATCH -o melt.PB.slurm.out #SBATCH -e melt.PB.slurm.err #SBATCH --get-user-env=PWD module load bowtie/2.3.3 module load conda source activate /scratch/075-melo-TEmovement/BENCHMARK/melt GENOME_PATH='/scratch/075-melo-TEmovement/BENCHMARK/genomes/Nip.fna' FASTA_INDEX='/scratch/075-melo-TEmovement/BENCHMARK/genomes/Nip.fna.fai' BAM_FILE=/scratch/075-melo-TEmovement/RAW_READS/alignments/MH63_vs_Nip_5X_sorted.bam MELT_EXECUTABLE='/scratch/075-melo-TEmovement/BENCHMARK/melt_executable/MELT.jar' #MEI_LIST=/scratch/075-melo-TEmovement/BENCHMARK/CONSENSUS/NipMEI/Nip.mei.list5 MEI_LIST=/scratch/075-melo-TEmovement/BENCHMARK/CONSENSUS/NipMEI/Nip.mei.list1 GENE_ANNOTATION=/scratch/075-melo-TEmovement/BENCHMARK/00_RUNS/TEST_RUN_FABIO/melt_firstrun/files/the_gene_thing/Nip.genes.bed ln -s '/scratch/075-melo-TEmovement/BENCHMARK/melt_executable/MELT.jar' java -jar MELT.jar Single \ -a \ -c 8 \ -h '/scratch/075-melo-TEmovement/BENCHMARK/genomes/Nip.fna' \ -bamfile /scratch/075-melo-TEmovement/RAW_READS/alignments/MH63_vs_Nip_5X_sorted.bam \ -n /scratch/075-melo-TEmovement/BENCHMARK/00_RUNS/TEST_RUN_FABIO/melt_firstrun/files/the_gene_thing/Nip.genes.bed \ -t /scratch/075-melo-TEmovement/BENCHMARK/CONSENSUS/NipMEI/Nip.mei.list1 \ -w results_5x_new #********************TRACKPOSON******************** #!/bin/bash -l #SBATCH --nodes=1 #SBATCH --mem=64G #SBATCH --cpus=8 #SBATCH -o trackposon.out #SBATCH -e trackposon.err #SBATCH --job-name="Trackposon" #module load conda; #source activate bioperl; module load bowtie module load samtools module load perl module load blast file='MH63_40X' DB='Oryza1fam66_259' DIR='./' fq1=/scratch/075-melo-TEmovement/ENVIRONMENTS/t-lex/5x/final_renamed1_5x.fq fq2=/scratch/075-melo-TEmovement/ENVIRONMENTS/t-lex/5x/final_renamed2_5x.fq out=$file te=$(echo $DB | awk -F "_" '{print $1}') cd $DIR #Build a index with the TE reference fasta bowtie2-build Oryza1fam66_259.fa Oryza1fam66_259 #Mapping reads against TE index bowtie2 --time --end-to-end -k 1 --very-fast -p 6 -x $DB \ -1 $fq1 -2 $fq2 \ | samtools view -bS -@ 2 - > "$out"-vs-"$te".bam #Keep unmap reads - flag unmap/map samtools view "$out"-vs-"$te".bam | awk -F "\t" '{if ( ($1!~/^@/) && (($2==69) || ($2==133) || ($2==165) || ($2==181) || ($2==101) || ($2==117)) ) {print ">"$1"\n"$10}}' > $out-vs-$te.fa #blast fa against IRGSP1.0 for identification of TE insertion point blastn -db IRGSP-1.0_genome.fasta -query $out-vs-$te.fa -out $out-vs-$te.fa.bl -num_threads 8 -evalue 1e-20 #parse blast to find insertion point in unique region perl find_insertion_point.pl $out-vs-$te.fa.bl $out-vs-$te #sort bed sort -k1,1 -k2,2n $out-vs-$te.bed > $out-vs-$te.sort.bed #coveragebed bedtools coverage -counts -nonamecheck \ -a IRGSP_500bp.bed -b $out-vs-$te.sort.bed \ | awk -F "\t" '{if ($4>=2){print $0}}' > coveragebed_$out-vs-$te\_per500pb.bed # -a IRGSP-1.0_10kbpwindows.bed -b $out-vs-$te.sort.bed \ # | awk -F "\t" '{if ($4>=2){print $0}}' > coveragebed_$out-vs-$te\_per10kb.bed #cleaning temporary files #rm $out-vs-$te.bam #rm $out-vs-$te.fa* #rm $out-vs-$te.bed