package TechnicalReplicates; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.Arrays; import java.util.Hashtable; import GTF_annotation.UCSCGene; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader.SortOrder; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileWriterFactory; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecordIterator; public class splitTechnRep_confiBand { private static String outFile1; private static String prefixBam; private static String hashFile; private static String samOrBamFile; private static SAMFileWriter obam1; private static SAMFileWriter obam2; private static long readLimit; private static int[][] hashMatrix; private static String ending; private static SAMRecord[] shuffle; private static Hashtable gene_tab; private static NumberFormat f = new DecimalFormat("000"); /** * @param args * @throws Exception */ public static void main(String[] args) { System.out.println(Arrays.toString(args)); //parameter input if(args.length != 11){ System.out.println("java -jar program.jar samOrBamFile hashFile endingHash outFile prefixBam ucsc_ref ofname prefixExonCount repeating start end"); System.exit(-1); } samOrBamFile = args[0]; hashFile = args[1]; ending = args[2]; //check files existence try{ File f = new File(samOrBamFile); if(!f.exists()) throw new FileNotFoundException("SAM or BAM file missing."); }catch(Exception e){e.printStackTrace(); System.exit(-1);} outFile1 = args[3]; prefixBam = args[4]; // initial parameters String ucsc_ref = args[5]; String ofname1 = args[6]; String prefixExonCount = args[7]; int repeatings = Integer.parseInt(args[8]); int start = Integer.parseInt(args[9]); int end = Integer.parseInt(args[10]); try { System.out.println("Readin hash matrix"); //readin organism information System.out.println("Readin genome annotation"); gene_tab = UCSCGene.makeGeneList(ucsc_ref); System.out.println("Run simulation"); System.out.print("Simulating the following read amounts: "); long[] readAmount = {3000000, 4000000, 6000000, 7000000, 8000000, 9000000}; for(long l : readAmount) System.out.print(l +"\t"); System.out.println(); // ~ start repeated simulation for(int k=start; k<=end;k++){ System.out.println("hash: "+ k); File f = new File(hashFile+k+"."+ending); if(!f.exists()) throw new FileNotFoundException("Hash file missing."); readHash(k, ending); for(int j=0; j> - 1 for index start at 0 return h; } private static void readHash(int subfile, String ending) throws Exception { //count lines and columns BufferedReader readerHash = new BufferedReader(new FileReader(new File(hashFile+subfile+"."+ending))); int countLines = 1; String line = readerHash.readLine(); int countColumns = line.split("\t").length; while( (line = readerHash.readLine()) != null) countLines++; readerHash.close(); System.out.println("Lines = " + countLines + "\tcolumns = " + countColumns); hashMatrix = new int[countLines][countColumns]; //readin hash file as matrix readerHash = new BufferedReader(new FileReader(new File(hashFile+subfile+"."+ending))); line = ""; int i = 0; while( (line = readerHash.readLine()) != null) hashMatrix[i++] = toIntArray(countColumns, line.split("\t")); readerHash.close(); } private static void exonCount(String ucsc_ref, String sam_file, String ofname) throws Exception { // Find counts per exon SAMFileReader ibam = new SAMFileReader(new File(sam_file)); BufferedWriter obuf = new BufferedWriter(new FileWriter(new File(ofname))); for (String gname : gene_tab.keySet()) { UCSCGene gene = gene_tab.get(gname); int exonId =0; for (GTF_annotation.GenomeInterval uexon : gene.getExonList()) { long read_cnt = 0; String chr = gene.getChr(); SAMRecordIterator itr = ibam.query(chr, uexon.getStart(), uexon.getEnd(), true); while (itr.hasNext() ){ read_cnt++; itr.next(); } itr.close(); obuf.write(chr +"|"+ gene.getName2() + "|" + f.format(exonId++) + "\t" + read_cnt + "\n"); } } obuf.close(); ibam.close(); } private static void exonCountDmel(String ucsc_ref, String sam_file, String ofname) throws Exception { //readin organism information Hashtable gene_tab = UCSCGene.makeGeneList(ucsc_ref); // Find counts per exon SAMFileReader ibam = new SAMFileReader(new File(sam_file)); BufferedWriter obuf = new BufferedWriter(new FileWriter(ofname)); for (String gname : gene_tab.keySet()) { UCSCGene gene = gene_tab.get(gname); int exonId =0; for (GTF_annotation.GenomeInterval uexon : gene.getExonList()) { long read_cnt = 0; String chr = gene.getChr(); if(chr.contains("chrom")) chr = "Uextra"; else if(chr.contains("chrM")) chr = "dmel_mitochondrion_genome"; else chr = chr.replace("chr", ""); SAMRecordIterator itr = ibam.query(chr, uexon.getStart(), uexon.getEnd(), true); while ((itr.hasNext())) { read_cnt++; itr.next(); } itr.close(); exonId++; obuf.write(chr +"|"+ gene.getName2() + "|" + f.format(exonId) + "\t" + read_cnt + "\n"); } } obuf.close(); ibam.close(); } private static void delete(String split_sam_file) { File f = new File(split_sam_file); f.delete(); f = new File(split_sam_file.replace("bam", "bai")); f.delete(); f = null; } }