# #COGRIM R program #@2006 #version 1.0 ## Software can be freely used for non-commercial purposes and freely distributed # # the sample data input files (all tab delimited) are as listed. # Please mail all comments/questions to ggchen@pcbi.upenn.edu # ########### #pmat.txt (specify TFs and genes, all values are equal to 1) ########## #prior.txt (specify the prior1) ############ #prior2.txt (specify the prior2, same format as prior1) ########### #exp_regulators.txt (expression profile of TFs) ############ #exp_genes.txt (expression profile of genes) ##################################################################### #The convergence is monitored by itsim.R #(Written 1 July 1991 by Andrew Gelman, Dept. of Statistics, Columbia Univ. ###################################################################### ######## # read & process input data as formatted, please specify your own input data files # ######## b<-read.table(file="pmat.txt", , sep="\t", as.is=T) hm<-read.table(file="prior.txt", , sep="\t", as.is=T) hm2<-read.table(file="prior2.txt", , sep="\t", as.is=T) f<-read.table(file="exp_regulators.txt",sep="\t", as.is=T) g<-read.table(file="exp_genes.txt", sep="\t", as.is=T) num_gene <-dim(b)[1]-2 num_tf <-dim(f)[1]-1 num_cond <- dim(f)[2]-4 f_tmp<-c(0) b_tmp<-c(0) bx<-array(0, c(num_gene,num_tf,num_cond)) x<-array(0, c(num_gene,num_tf,num_cond)) for(i in 1:num_gene){ for (j in 1:num_tf){ b_tmp<-as.numeric(b[i+2,j+2]) for (t in 1:num_cond){ f_tmp<-as.numeric(f[j+1,t+4]) bx[i,j,t]=f_tmp*b_tmp } } } gene<-array(0, c(num_gene, num_cond)) for(i in 1:num_gene){ for(t in 1:num_cond){ tmp<-as.numeric(g[i+1,t+4]) gene[i,t]<-tmp } } rm(tmp) phm<-matrix(0,num_gene,num_tf) bval<-matrix(0,num_gene,num_tf) mval<-matrix(0,num_gene,num_tf) for(i in 1:num_gene){ for(j in 1:num_tf){ bval[i,j]<-as.numeric(hm[i+2,j+2]) mval[i,j]<-as.numeric(hm2[i+2,j+2]) phm[i,j]<-sqrt(bval[i,j]*mval[i,j]) } } ######### # specify start chain # hm_cut<-0.7 var_start<-c(0) A_start<-rbind(rep(c(0.2),num_gene)) B_start<-rbind(rep(c(0.2),num_tf)) C_start<-matrix(0,num_gene,num_tf) w_start<-rbind(rep(c(0.5),num_tf)) for( i in 1:num_gene){ for (j in 1:num_tf){ if(phm[i,j]>=hm_cut){C_start[i,j]<-1} else {C_start[i,j]<-0} }} num_start<-length(var_start) num_iteration=5 x_var<-matrix(0,num_iteration,num_start) x_A<-array(0,c(num_iteration,num_start,num_gene)) x_B<-array(0,c(num_iteration,num_start,num_tf)) x_w<-array(0,c(num_iteration,num_start,num_tf)) for (st in 1:num_start){ A<-A_start[st,] B<-B_start[st,] var<-var_start[st] C<-C_start w<-w_start[st,] ######## # start the gibbs sampling # for(i in 1:num_gene){ for(j in 1:num_tf){ for (t in 1:num_cond){ x[i,j,t]<-C[i,j]*bx[i,j,t] } } } for (it in 1:num_iteration){ cat(paste("start", st, "of", num_start, "...iteration", it, "of", num_iteration, "please wait...\n")) var<-var_sampling(A,B) A<-alpha_sampling(var,B) for (j in 1:num_tf){ B[j]<-beta_sampling(j,var,A,B) } for (j in 1:num_tf){ # w[j]<-0.5 w[j]<-w_sampling(j,C,bval,mval) C[,j]<-pc_sampling(j,var,A,B,bx) ### update xijt for(i in 1:num_gene){ for(j in 1:num_tf){ for (t in 1:num_cond){ x[i,j,t]<-C[i,j]*bx[i,j,t] } } } } x_var[it,st]<-var x_A[it,st,]<-A x_B[it,st,]<-B x_w[it,st,]<-w cat(paste("end iteration", it, "\n")) } } # call itsim.R #con_var<-monitor(x_var) #con_A<-monitor(x_A) #con_B<-monitor(x_B) #con_w<-monitor(x_w) ### ## testObject <- function(object) { exists(as.character(substitute(object))) } #### sampling weight w # w_sampling<-function(j,C,bval,mval){ tfj<-j bdx<-c(0) seqx<-c(0) for(i in 1:num_gene){ bdx<-bdx+C[i,tfj]*log(bval[i,tfj])+(1-C[i,tfj])*log(1-bval[i,tfj]) seqx<-seqx+C[i,tfj]*log(mval[i,tfj])+(1-C[i,tfj])*log(1-mval[i,tfj]) } bin<-c(100) pz_log<-rep(c(0),bin) pz_norm<-rep(c(0),bin) pz_culm<-rep(c(0),bin) for(i in 1:bin){ z<-i/bin pz_log[i]<-z*bdx+(1-z)*seqx } sum_pz<-sum(exp(pz_log[])) pz_norm[1]<-exp(pz_log[2])/sum_pz pz_culm[1]<-pz_norm[1] for(i in 2:bin){ pz_norm[i]<-exp(pz_log[i])/sum_pz pz_culm[i]<-pz_culm[i-1]+pz_norm[i] } u<-runif(1,0,1) for(i in 2:bin){ if(u>pz_culm[i-1] && u<=pz_culm[i]){ w[tfj]<-i/bin }} return(w[tfj]) } #### sampling C # pc_sampling<-function(j,var,A,B,bx){ tfj<-j if(testObject(tmp0)){rm(tmp0)} if(testObject(tmp1)){rm(tmp1)} tmp <-matrix(0, num_gene, num_cond) tmp0 <-matrix(0, num_gene, num_cond) F0<-rep(c(0),num_gene) FF0<-rep(c(0),num_gene) tmp1 <-matrix(0, num_gene, num_cond) F1<-rep(c(0),num_gene) FF1<-rep(c(0),num_gene) pc0<-rep(c(0),num_gene) pc1<-rep(c(0),num_gene) for( i in 1:num_gene){ for(t in 1:num_cond){ for(j in 1:num_tf){ tmp[i,t]<-tmp[i,t]+B[j]*x[i,j,t] } tmp0[i,t]<-tmp[i,t]-B[tfj]*x[i,tfj,t] tmp1[i,t]<-tmp[i,t]-B[tfj]*x[i,tfj,t]+B[tfj]*bx[i,tfj,t] } } for( i in 1:num_gene){ F0[i]<-0 F1[i]<-0 for(t in 1:num_cond){ F0[i]=F0[i]+(gene[i,t]-A[i]-tmp0[i,t])^2 F1[i]=F1[i]+(gene[i,t]-A[i]-tmp1[i,t])^2 } FF10<-(F1[i]-F0[i])/(2*var) tmp_FF0<-((bval[i,tfj]/(1-bval[i,tfj]))^w[tfj])*((mval[i,tfj]/(1-mval[i,tfj]))^(1-w[tfj])) tmp_FF1<-(((1-bval[i,tfj])/bval[i,tfj])^w[tfj])*(((1-mval[i,tfj])/mval[i,tfj])^(1-w[tfj])) FF0[i]<-exp(-FF10)*tmp_FF0 FF1[i]<-exp(FF10)*tmp_FF1 pc0[i]<-1/(1+FF0[i]) pc1[i]<-1/(1+FF1[i]) u<-runif(1,0,1) if(pc1[i]>=u ){C[i,tfj]=1} else if(pc1[i]< u ){C[i,tfj]=0} } return(C[,tfj]) } ### ############### # sampling beta - B # beta_sampling<-function(j,var,A,B){ tau_beta<-10000 tfj<-j if(testObject(tmp)){rm(tmp)} tmp<-matrix(0, num_gene, num_cond) Vv <-matrix(0,num_gene, num_cond) Txx <- c(0) Tvx <- c(0) for(i in 1:num_gene){ for (t in 1:num_cond){ for (j in 1:num_tf){ tmp[i,t]=tmp[i,t]+B[j]*x[i,j,t] } Vv[i,t]=gene[i,t]-A[i]-tmp[i,t]+B[tfj]*x[i,tfj,t] } } for(i in 1:num_gene){ for (t in 1:num_cond){ Txx<-Txx+x[i,tfj,t]^2 Tvx<-Tvx+Vv[i,t]*x[i,tfj,t] } } theta <-rnorm(1,0,1) tmp2<-(1/tau_beta)^2+Txx/var B[tfj]<-(Tvx/var)/tmp2+theta*sqrt(1/tmp2) return(B[tfj]) } ## testObject <- function(object) { exists(as.character(substitute(object))) } ######### ### sampling var # var_sampling<-function(A,B){ if(testObject(tmp)){rm(tmp)} tmp <-matrix(0, num_gene, num_cond) V<-c(0) for(i in 1:num_gene){ for (t in 1:num_cond){ for (j in 1:num_tf){ tmp[i,t]=tmp[i,t]+B[j]*x[i,j,t] } V=V+(gene[i,t]-A[i]-tmp[i,t])^2 } } #theta<-c(15000) if(testObject(theta)){rm(theta)} theta<-rchisq(1,num_gene*num_cond+2) var<-(V+1)/theta return(var) } ############# # sampling alpha-A # alpha_sampling<-function(var,B){ tau_alpha<-10000 if(testObject(tmp)){rm(tmp)} tmp <-matrix(0, num_gene, num_cond) Y<-rep(c(0), num_gene) for(i in 1:num_gene){ for (t in 1:num_cond){ for (j in 1:num_tf){ tmp[i,t]=tmp[i,t]+B[j]*x[i,j,t] } Y[i]=Y[i]+(gene[i,t]-tmp[i,t]) } } if(testObject(tmp2)){rm(tmp2)} tmp2<-(num_cond/var+1/(tau_alpha^2)) for(i in 1:num_gene){ Z<-rnorm(1,0,1) A[i]<-((1/var)/tmp2)*Y[i]+Z*sqrt(1/tmp2) } return(A) }