#!usr/bin/perl =head1 NAME heatmap.pl - Creates a heatmap of characters for each subset of gene/species =head1 SYNOPSIS heatmap.pl -t taxid1 taxid2 -g gene1 gene2 -d database -u username -p password -o output =head1 DESCRIPTION heatmap.pl takes a number of taxids and gene names, connects to iphy database and creates a heatmap which shows the number of characters for each gene/species Requires DBI and R with ggplot2 installed =head1 AUTHORS Georgios D. Koutsovoulos G.D.Koutsovoulos@sms.ed.ac.uk =cut use strict; use warnings; use DBI; use Getopt::Long qw(:config pass_through no_ignore_case); my @taxids; my @genes; my $database="iphy"; my $username=""; my $password=""; my $output="heatmap.pdf"; GetOptions ( "taxids=i{,}" => \@taxids, "genes=s{,}" => \@genes, "database=s" => \$database, "username=s" => \$username, "password=s" => \$password, "output=s"=> \$output, ); $database="DBI:Pg:dbname=$database"; unless (@taxids and @genes) { die "Usage: heatmap.pl -t taxid1 taxid2 -g gene1 gene2 -d database -u username -p password -o output -t taxids for the species -g gene names (if name contains spaces put \ before the space) -d database name [default:iphy] -u username for the database [default:none] -p password for the database [default:none] -o name for the heatmap file [default:heatmap.pdf]\n"; } my $species_name; my $gene_id; my $length; #Connect to the database and prepare the queries my $dbh = DBI->connect($database,$username,$password) or die "$DBI::errstr"; my $find_species_name = $dbh->prepare("SELECT DISTINCT name FROM tree_node WHERE taxid=?"); my $find_gene_id = $dbh->prepare("SELECT DISTINCT id FROM gene WHERE name LIKE ?"); my $find_gene_id_synonym = $dbh->prepare("SELECT DISTINCT gene_id FROM gene_synonyms WHERE synonyms_string LIKE ?"); my $find_gene_name = $dbh->prepare("SELECT name FROM gene WHERE id=?"); my $find_length = $dbh->prepare("SELECT LENGTH(sequence) FROM consensus_sequence WHERE locus_id=? AND taxid=?"); open OUTPUT, ">for_R.csv"; print OUTPUT "species,genes,characters\n"; foreach my $taxid (@taxids) { $find_species_name->execute($taxid); #get the species name for the given taxid $species_name=$find_species_name->fetchrow_array; die "Species with taxid $taxid does not exist in the database" unless (defined $species_name); foreach my $gene (@genes) { $find_gene_id->execute($gene); #first search in the gene table for the gene id $gene_id=$find_gene_id->fetchrow_array; unless (defined $gene_id) { $find_gene_id_synonym->execute($gene); #else search in the synonyms table $gene_id=$find_gene_id_synonym->fetchrow_array; die "$gene gene does not exist in the database" unless (defined $gene_id); $find_gene_name->execute($gene_id); #get the original gene name $gene=$find_gene_name->fetchrow_array; } $find_length->execute($gene_id,$taxid); #get the length of the gene $length=$find_length->fetchrow_array; $length="" unless (defined $length); #create the output file to be used in R print OUTPUT"$species_name,$gene,$length\n"; } } close OUTPUT; my $path=`pwd`; chomp($path); #The script that runs in R and creates the heatmap open RSCRIPT, ">rscript.R"; print RSCRIPT "library(ggplot2)\n"; print RSCRIPT "data<-read.csv(\"for_R.csv\",sep=\",\",header=T)\n"; print RSCRIPT "f=ggplot(data,aes(x=data\$species,y=data\$genes))\n"; print RSCRIPT "pdf(\"$path/$output\",)\n"; print RSCRIPT "f+geom_tile(aes(fill=data\$characters))+opts(axis.text.x=theme_text(angle=90,hjust=1))+scale_fill_gradient(\"# of characters\",low=\"light yellow\",high=\"dark red\")+scale_y_discrete(\"Genes\")+scale_x_discrete(\"Species\")\n"; print RSCRIPT "dev.off()\n"; close RSCRIPT; `R CMD BATCH $path/rscript.R`; #Comment the lines below for keeping these files unlink("rscript.R"); unlink("for_R.csv");