##### Scripts used in Project "Differential expression of miRNAs in pancreatobiliary type of periampullary adenocarcinoma and its associated stroma" ##### Written by Vandana Sandhu #### Script 1 #### Script for moderated t test tum=read.table("Tumor.txt",sep="\t") ## File having tumor expression data str=read.table("Stroma.txt",sep="\t") ## File having stroma expression data genes=read.table("miRNA-names.txt",sep="\t") tum1=tum[2:nrow(tum),2:ncol(tum)] str1=str[2:nrow(str),2:ncol(str)] gp1=as.matrix(tum1) gp2=as.matrix(str1) dat=cbind(gp1,gp2) dim(dat) x1=ncol(gp1) x2=ncol(gp2) design <- model.matrix(~ 0+factor(c(rep(1,x1),rep(2,x2)))) ### design matrix for fitting in lmfit function colnames(design)<-c("Tumor","Stroma") library(limma) fit<-lmFit(dat,design) contrast.matrix<-makeContrasts(Tumor-Stroma,levels=design) fit2<-contrasts.fit(fit,contrast.matrix) fit2<-eBayes(fit2) tp=topTable(fit2,coef=1,genelist=genes,adjust="fdr") ### toptable function carries out moderated t-test and fdr correcteion using benjamini and hochberg correction is done write.table(tp,"tumor-stroma.txt") ######################################################################### ############ ###### Script 2 ###### Script for hierarchical clustering of samples xx<- read.table("miRNA_expression_data", sep="\t") xmat<-xx[2:nrow(xx) ,2:ncol(xx)] xmat <- sapply(xmat, function(xx) as.numeric(as.character(xx))) xmat<-data.matrix(xmat) c<-xx[1, 2:ncol(xx)] cx=c r<-xx[2:nrow(xx), 1] rx=r a.sample <- c(rep("Normal",8),rep("Carcinoma",18),rep("Stroma",18)) f.sample <- factor(a.sample) vec.sample <- c("grey","red","blue") a.color <- rep(0,length(f.sample)) for(i in 1:length(f.sample)) a.color[i] <- vec.sample[ f.sample[i]==levels(f.sample) ] mydatamatrix <- data.matrix(xmat) mydatascale <- t(scale(t(mydatamatrix))) ## Scaling the data hr <- hclust(as.dist(1-cor(t(mydatascale), method="pearson")), method="complete") # Calculating pearson score row-wise hc <- hclust(as.dist(1-cor(mydatascale, method="pearson")), method="complete") # Calculating pearson score column-wise myclhr <- cutree(hr, h=max(hr$height)/2); mycolhr <- sample(rainbow(256)); myclhc <- cutree(hc, h=max(hc$height)/2); mycolhc <- sample(rainbow(256)); mycolhr <- mycolhr[as.vector(myclhr)]; mycolhc <- mycolhc[as.vector(myclhc)]; library("gplots") library("RSvgDevice") pdf("Plot.pdf") heatmap.2(mydatamatrix , Rowv=as.dendrogram(hr),Colv=as.dendrogram(hc), dendrogram="col", scale="row", col=greenred, trace="none", density.info="none", ColSideColors=a.color, key=TRUE, keysize=1.0, margin=c(5,8), sepcolor = "", labCol=c ,labRow=r) dev.off() ######################################################################### ############ ###### Script 3 ###### Script for sparce PCA of samples xx<- read.table("mirna_expression_data_input.txt", sep="\t") xmat<-xx[2:nrow(xx) ,2:ncol(xx)] xmat <- sapply(xmat, function(xx) as.numeric(as.character(xx))) dd=t(xmat) cv.out=SPC.cv(dd, sumabsvs=seq(1, 13,len=13), nfolds=5, niter=5, v=NULL, ### 5 fold Cross validaton for different sumabsv values range; it must be between 1 and square root of number of columns of data trace=TRUE, orth=FALSE, center=TRUE, vpos=FALSE, vneg=FALSE) print(cv.out) plot(cv.out) #### Best sumabsv value (lowest CV error): 5 ##Smallest sumabsv value that has CV error within 1 SE of best CV error: 4 library("PMA") out <- SPC(dd,sumabsv=4, K=1) ### Used sumabsv equlas to 4 because it gives Smallest sumabsv value that has CV error within 1 SE of best CV error x=c(rep(15,8),rep(16,18),rep(17,18)) #### For giving different shapes to samples cc=c(rep("grey",8),rep("red",18),rep("blue",18)) plot(out$u,col=cc,pch=x,cex=1) ######################################################################### ############ ###### Script 4 ###### Script for boxplots and one way ANNOVA dd1=read.table("anova-tumor.txt",sep="\t",header=TRUE) ### Tumor expression data dd2=read.table("anova-stroma.txt",sep="\t",header=TRUE) ### Stroma expression data dd3=read.table("anova-normal.txt",sep="\t",header=TRUE) ### Normal expression data cc<-c(dd1[,1]) #miRNA names j=0 p <- vector() for( i in 2:44){ dd_matrix<- data.frame(wt=c(dd1[,i],dd2[,i],dd3[,i]),type=factor(c(rep("2.Carcinoma",18),rep("1.Stroma",18),rep("3.Normal",8)))) sapply(split(dd_matrix$wt,dd_matrix$type),mean) sapply(split(dd_matrix$wt,dd_matrix$type),var) sqrt(sapply(split(dd_matrix$wt,dd_matrix$type),var)) j=j+1 file=cc[j]; pdf(paste("boxplot/",file,".pdf",sep="")) #boxplot(split(dd_matrix$wt,dd_matrix$type),xlab="typement",ylab="Pod Weight",col="green") fitdd_matrix <- lm(wt~type, data=dd_matrix) zzz=anova(fitdd_matrix) ## Annova test p[j]<-zzz[,5] boxplot(split(dd_matrix$wt,dd_matrix$type),xlab="",ylab="miRNA expression",col=c("blue","red","grey")) ### Plotting boxplots points(rep(1,length(dd_matrix$wt)),dd_matrix$wt) title(main=paste(cc[j])) legend("topright",paste(p[j])) dev.off() i=i+1 dd_matrix <- vector() file=vector() } write.table(cbind(cc,p),"p_val.txt"); adjust_p=p.adjust(p) write.table(cbind(cc,adjust_p),"adjust_p_val.txt");