#Fotiou_Process.pl use strict; use warnings; use Cwd qw(getcwd); my $start_run = time(); my $row; my %data; chdir "/PATH-TO-DIRECTORY"; my $file = "/Fotiou_complete/Fotiou_Raw_Data/Fotiou_Data_1.csv"; #HGNC Gene names open my $in, "<", $file or die; while ($row = <$in>) { foreach ($row) { chomp($row); if ($row =~ /^([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^.]*)$/){ $data{$2}{"$2,$3,$4,$5,$6,$7,$8,$9"} = ""; } } } close $in or die; my @files = glob("/Fotiou_complete/Fotiou_Raw_Data/*.txt"); foreach my $file (@files) { my $name; my %eliza; my $title; my $count =0; if ($file =~ /\/Fotiou_complete\/Fotiou_Raw_Data\/([^.]*)/i){ $name = $1; print "$name\n"; } open my $fh, "<", $file or die; while ($row = <$fh>) { foreach ($row) { chomp($row); if ($row !~ /^ENSG/){ $title = $row; if ($title !~ /\t$/){ $title = $title."\t"; } } if ($row =~ /^([^\t]*)([\w\W\d\D\s\S]*)$/g){ my $gene = $1; my $info = $2; if ($info !~ /\t$/){ $info = $info."\t"; } $info =~ s/,/|/g; $info =~ s/\t/,/g; $title =~ s/\t/,/g; $eliza{$gene}{$info} = ""; } else{ print "X $row\n"; } } } open (my $out, ">", "/Fotiou_complete/Fotiou_Output/$name\.csv")or die "Couldn't open: $!"; print $out "$title"; print $out "Haplosufficiency.rank,FFS,Branch.length,Paralog.status,SSD.Partners,Strict.Partners,Intermediate.Partners,Relaxed.Partners\n"; open (my $out2, ">", "/Fotiou_complete/Fotiou_Output/MISSING_$name\.csv")or die "Couldn't open: $!"; open (my $out3, ">", "/Fotiou_complete/Fotiou_Output/Hap-Par_$name\.csv")or die "Couldn't open: $!"; foreach my $gene (sort keys %eliza){ if (exists($data{$gene})) { print $out "$gene"; if (exists($eliza{$gene})) { my $count = 0; foreach my $k (sort keys %{$eliza{$gene}}){ $count++; if ($count == 1) { print $out "$k"; if ($k !~ /([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),/){ print $out ","; } } else { if ($k !~ /^$/) { print $out3 "$gene"; print $out3 "$k"; if ($k !~ /([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),/){ print $out3 ","; } print $out3 "\n"; } } } } else{ print $out ","; } foreach my $l (sort keys %{$data{$gene}}){ print $out "$l\n"; } } else { if ($gene =~ /^ENSG/){ print $out2 "$gene\n"; #No associated gene found } } } } exit;