######################################################################################################### #USERS REQUIRE A COMMA SEPARATED VALUE (CSV) DATA-FILE NAMED 'Input.csv' CONTAINING THE DATA FOR A SINGLE # MOLECULAR MARKER WHICH IS LOCATED IN A COMPUTER DIRECTORY OF THEIR CHOICE. # #THE USER SHOULD MODIFY THE FIRST EXECUTABLE LINE OF THIS SCRIPT TO INDICATE THE LOCATION OF 'Input.csv' # THIS FILE SHOULD CONTAIN INFORMATION FOR ONLY A SINGLE MOLECULAR MARKER # #ALL PLOTS ARE EXPORTED AS PUBLICATION QUALITY OUTPUT NAMED "Figure 1.tif" AND "Figure 2.tif" TO THE SAME # DIRECTORY # #THE USER MAY CHANGE THE MAXIMUM NUMBER OF PCR CYCLES (USED TO DETECT CENSORING) FROM THE CURRENT DEFAULT # OF 40 ON THE SECOND EXECUTABLE LINE OF THE SCRIPT # #THE USER SHOULD ENSURE THAT THE R-SUPPLEMENTARY PACKAGE VGAM IS INSTALLED IN THEIR R-LIBARY # #THE DATA-FILE Input.csv USUALLY COMPRISES SEVEN COLUMNS (FOR EXCEPTION SEE BELOW): # COLUMN 1 - SAMPLING TIME (NUMERIC) INCLUDING ONE PRE-CHALLENGE TIME (0) AND SUBSEQUENT TIMES # COLUMN 2 - AN IDENTIFIER (NUMERIC) FOR EACH INDIVIDUAL # COLUMN 3 - AN IDENTIFIER ('C' OR 'E') INDICATING WHETHER THE VALUE IS CHALLENGED OR CONTROL # COLUMN 4 - THE CP VALUE OF THE MOLECULAR MARKER (NUMERIC) # COLUMN 5 - THE CALIBRATION COEFFICIENT FOR THE MOLECULAR MARKER (NUMERIC) # COLUMN 6 - THE CP VALUE OF A STANDARDISING HOUSEKEEPING GENE (NUMERIC) # COLUMN 7 - THE CALIBRATION COEFFICIENT FOR STANDARDISING HOUSEKEEPING GENE (NUMERIC) # #COLUMN HEADINGS MUST BE PRESENT AND NAMES CAN BE CHOSEN BY THE USER #INFORMATION ON THE HOUSEKEEPING GENE IS OPTIONAL AND IF OMITTED THE FILE WILL COMPRISE FIVE COLUMNS ONLY. # #EXAMPLE OF LAYOUT OF Input.csv #Time Indiv Group MarkCP MarkCal ElfCP ElfCal # 0 1 C 20.15 3.41 24.54 3.36 # 0 2 C 23.92 3.41 24.23 3.36 # 0 3 E 22.28 3.41 25.96 3.36 # 0 4 E 21.42 3.41 24.77 3.36 # 4 1 C 20.15 3.41 24.54 3.36 # 4 2 C 3.41 24.23 3.36 # 4 3 E 18.42 3.41 25.96 3.36 # 4 4 E 16.97 3.41 24.77 3.36 # 8 1 C 20.15 3.41 24.54 3.36 # 8 2 C 23.92 3.41 3.36 # 8 3 E 18.42 3.41 25.96 3.36 # 8 4 E 16.97 3.41 24.77 3.36 # #VALUES OF 'E' & 'C' IN COLUMN 4 REFER TO EXPERIMENTAL (OR CHALLENGED) AND CONTROL (OR UNCHALLENGED) GROUPS # #MISSING VALUES (INCLUDING VALUES WHICH MAY BE OUTWITH THE LIMITS OF DETECTION OF THE QPCR) ALLOWED IN COLUMNS 4 & 6 ONLY # SHOULD BE INDICATED BY A BLANK CELL (AS SHOWN ABOVE) # #A MINIMUM OF TWO CONTROL VALUES ARE REQUIRED FOR THIS PROGRAM (ALTHOUGH IT IS UNLIKELY TO GIVE SATISFACTORY RESULTS WITH # SO FEW VALUES) # #FOR EXPERIMENTS WHICH SAMPLE THE SAME INDIVIDUALS AT EACH TIME POINT – ASSIGN THE SAME NUMBER TO EACH INDIVIDUAL AT EACH SAMPLING POINT. # #FOR EXPERIMENTS WHICH SAMPLE DIFFERENT INDIVIDUALS AT EACH TIME POINT – ASSIGN A DIFFERENT NUMBER TO EACH NDIVIDUAL # #SCRIPT BY MALCOLM HALL DURING 2015 TO RUN ON R VERSION 3.1.2 AND THE SUPPLEMENTARY PACKAGE VGAM_0.9-6 # #THE USER SHOULD BEWARE - THIS IS NOT A DOCUMENTED AND REFINED 'FUNCTION' AND THE POSSIBILITY OF BUGS CANNOT BE EXCLUDED #CODING ENQUIRIES CAN BE ADDRESSED TO malcolm.hall@scotland.gsi.gov.uk # ############################################################################################################### ###################################### #USER MUST SET DIRECTORY IN LINE BELOW ###################################### setwd("H://Miscellanious/BertrandISA") #THE USER SHOULD SPECIFY THE LOCATION OF THEIR DATA FILE OF Input.csv ############################################################################################################# #THE USER SHOULD SET A VALUE AT WHICH CENSORING COULD TAKE PLACE - DEFAULT ASSUMES A MAXIMUM OF 40 PCR-CYCLES ############################################################################################################# ParamCenVal<-40 #THE USER SHOULD SPECIFY THE MAXIMUM QPCR CP THRESHOLD ########################################################### #INSTALL SUPPLEMENTARY R PROGRAM VGAM AND PROVIDE CITATIONS ########################################################### library(VGAM) #THE USER SHOULD ENSURE THAT THEY HAVE PREVIOUSLY INSTALLED VGAM IN THEIR R LIBRARY citation() citation("VGAM") ######################################### #READ IN DATA AND CARRY OUT MINOR EDITING ######################################### wrkdat<-read.table("Input.csv",header=TRUE,sep=",",colClasses="character",fill=TRUE) names(wrkdat)[1]<-"Time" names(wrkdat)[2]<-"Indiv" names(wrkdat)[3]<-"Type" names(wrkdat)[4]<-"Depvar" if (ncol(wrkdat)>4) names(wrkdat)[5]<-"Depvarcal" if (ncol(wrkdat)>5) { names(wrkdat)[6]<-"elf" names(wrkdat)[7]<-"elfcal"} wrkdat$Time<-as.numeric(wrkdat$Time) wrkdat$Indiv<-as.numeric(wrkdat$Indiv) wrkdat$Type<-as.factor(wrkdat$Type) wrkdat$Depvar<-as.numeric(wrkdat$Depvar) if(ncol(wrkdat)>4) wrkdat$Depvarcal<-as.numeric(wrkdat$Depvarcal) if(ncol(wrkdat)>5){ wrkdat$elf<-as.numeric(wrkdat$elf) wrkdat$elfcal<-as.numeric(wrkdat$elfcal) wrkdat<-subset(wrkdat,elf!="NA")} ########################################################## #EVALUATE WHETHER CONTROL VALUES ARE LIKELY TO BE CENSORED ########################################################## tempdat<-subset(wrkdat,Type=="C")$Depvar ParamNmMs<-sum(is.na(tempdat)) #Temporary - Number of Cp values missing ParamNmPt<-sum(!is.na(tempdat)) #Temporary - Number Cp values present ParamMn<-NA #Temporary - Mean of distribution ParamSD<-NA #Temporary - Standard deviation of distribution ParamCen<-NA #Flag for occurrence of censoring if(anyNA(tempdat)&ParamNmPt>2){ ParamMax<-max(tempdat,na.rm=TRUE) for(iii in 1:length(tempdat)) tempdat[iii]<-ifelse(is.na(tempdat[iii]),ParamMax+(1e-03),tempdat[iii]) tempres<-vglm(tempdat~1,tobit(Upper=ParamMax+(1e-03),type.f="cens"),crit="c") rm(ParamMax) ParamMn<-coef(tempres)[1] ParamSD<-exp(coef(tempres)[2]) rm(tempres) if(!is.na(ParamMn)&!is.na(ParamSD)) ParamCen<-pbinom(0,ParamNmMs,pnorm(ParamCenVal,ParamMn,ParamSD,lower.tail=FALSE),lower.tail=FALSE) ParamCen<-ifelse(ParamCen>0.05,1,0)} if(ParamNmMs==0&ParamNmPt>2) ParamCen<-0 if(ParamNmPt<=2) cat("\n","There are insufficient control values available for analysis","\n") rm(tempdat,ParamNmMs,ParamNmPt,ParamMn,ParamSD) ######################################### #CONVERT CP VALUES TO RELATIVE EXPRESSION ######################################### if(ncol(wrkdat)>4) wrkdat$Depvar<-with(wrkdat,(10^(-1*(Depvar/Depvarcal)))) if(ncol(wrkdat)>5) wrkdat$Depvar<-with(wrkdat,Depvar/(10^(-1*(elf/elfcal)))) wrkdat<-wrkdat[1:4] ############################################ #ESTABLISHING UPPER AND LOWER 95 PERCENTILES ############################################ ParamMn<-NA #Estimated mean of expression values of control group ParamSD<-NA #Estimated standard deviation of expression values of control group tempdat<-log10(subset(wrkdat,Type=="C")$Depvar) if(ParamCen==0){ ParamMn<-mean(tempdat,na.rm=TRUE) ParamSD<-sd(tempdat,na.rm=TRUE)} if(ParamCen==1){ ParamMin<-min(tempdat,na.rm=TRUE) for(iii in 1:length(tempdat)) tempdat[iii]<-ifelse(is.na(tempdat[iii]),ParamMin-(1e-06),tempdat[iii]) tempres<-vglm(tempdat~1,tobit(Lower=ParamMin-(1e-06),type.f="cens"),crit="c") ParamMn<-coef(tempres)[1] ParamSD<-exp(coef(tempres)[2]) rm(ParamMin,tempres)} rm(tempdat) ResdatUP<-ParamMn+(1.96*ParamSD) ResdatLP<-ParamMn-(1.96*ParamSD) rm(ParamMn,ParamSD) ########################################################################### #IDENTIFY EXPERIMENTAL INDIVDUALS WHICH LIE OUTWITH THE PERCENTILE ENVELOPE ########################################################################### wrkdat$outwith<-ifelse(log10(wrkdat$Depvar)ResdatUP,1,wrkdat$outwith) ########################################################################################## #EVALUATE IF THERE ARE DIFFERENCES BETWEEN THE RESPONSE OF EXPERIMENTAL AND CONTROL GROUPS ########################################################################################## ParamDiff<-NA #P-value for different response of expression values between control and experimental group if(ParamCen==0){ tempdat<-na.omit(wrkdat)} if(ParamCen==1){ tempdat<-wrkdat tempdat$outwith<-with(tempdat,ifelse(is.na(outwith),0,outwith))} tempdat<-rbind(c(length(subset(tempdat,Type=="E"&outwith==1)$Depvar),length(subset(tempdat,Type=="E"&outwith==0)$Depvar)), +c(length(subset(tempdat,Type=="C"&outwith==1)$Depvar),length(subset(tempdat,Type=="C"&outwith==0)$Depvar))) ParamDiff<-round(fisher.test(tempdat,alternative="greater")$p.value,3) rm(tempdat) ################################ #REMOVE MISSING VALUES FROM DATA ################################ wrkdat<-na.omit(wrkdat) ######################################################################################################### #IDENTIFY EXPERIMENTAL ANIMALS WITH COMPLETE DATA ASSOCIATED WITH THE MINIMUM AND MAXIMUM RESPONSE VALUES ######################################################################################################### tempdat<-subset(wrkdat,Type=="E") tempdat1<-data.frame(cbind(sort(as.vector(unique(as.numeric(tempdat$Indiv)))),as.vector(table(tempdat$Indiv)))) tempdat<-merge(tempdat,tempdat1,by.x="Indiv",by.y="X1") rm(tempdat1) tempdat<-subset(tempdat,X2==max(length(unique(tempdat$Time)))) Indivmin<-subset(tempdat,Depvar==min(Depvar,na.rm=TRUE))$Indiv[1] Indivmax<-subset(tempdat,Depvar==max(Depvar,na.rm=TRUE))$Indiv[1] rm(tempdat) ####################################################### #ESTIMATE SUITABLE MINIMUM AND MAXIMUM VALUES FOR GRAPH ####################################################### ylimmax<-with(wrkdat,ifelse(max(Depvar)>ResdatUP,log10(max(Depvar)),ResdatUP)) if(ParamCen==0) ylimmin<-with(wrkdat,ifelse(min(log10(Depvar))0.05) with(subset(wrkdat,Type=="E"&outwith==1&Indiv!=Indivmin&Indiv!=Indivmax),points(jitter(Time,jtr),log10(Depvar),col="black",pch=1)) if(ParamDiff<=0.05) with(subset(wrkdat,Type=="E"&outwith==1&Indiv!=Indivmin&Indiv!=Indivmax),points(jitter(Time,jtr),log10(Depvar),col="black",pch=16)) with(subset(wrkdat,Indiv==Indivmax),lines(Time,log10(Depvar),col="black")) ParamDiff<-round(ParamDiff,3) if(ParamCen==0) with(subset(wrkdat,Indiv==Indivmin),lines(Time,log10(Depvar),col="black")) if(ParamDiff<=0.05) text(((xlimmax-xlimmin)+xlimmin)/2,floor(ylimmin),paste("Difference between groups is statistically significant")) dev.off() ########################### #PLOT DATA FOR NORMAL SCALE ########################### tiff(filename = "Figure 2.tif", width=5,height=5,type ="cairo",units="in",res=800,pointsize=10, compression="lzw") with(subset(wrkdat,Type=="C"),plot(jitter(Time,jtr),Depvar,xlim=c(xlimmin,xlimmax),ylim=c(10^ylimmin,10^ylimmax),ylab="expression",xlab="time post challenge (day)",cex.lab=1.2,pch=46,main="",cex.main=1.4,bty="n")) with(subset(wrkdat,Type=="E"&Indiv==Indivmin|Indiv==Indivmax),points(Time,Depvar,col="black",pch=1)) if(ParamDiff<=0.05) with(subset(wrkdat,Type=="E"&outwith==1&(Indiv==Indivmin|Indiv==Indivmax)),points(Time,Depvar,col="black",pch=16)) with(subset(wrkdat,Type=="E"&outwith==0&(Indiv!=Indivmin&Indiv!=Indivmax)),points(jitter(Time,jtr),Depvar,col="black",pch=1)) if(ParamDiff>0.05) with(subset(wrkdat,Type=="E"&outwith==1&Indiv!=Indivmin&Indiv!=Indivmax),points(jitter(Time,jtr),Depvar,col="black",pch=1)) if(ParamDiff<=0.05) with(subset(wrkdat,Type=="E"&outwith==1&Indiv!=Indivmin&Indiv!=Indivmax),points(jitter(Time,jtr),Depvar,col="black",pch=16)) with(subset(wrkdat,Indiv==Indivmax),lines(Time,Depvar,col="black")) if(ParamCen==0) with(subset(wrkdat,Indiv==Indivmin),lines(Time,Depvar,col="black")) dev.off() ######################################################################## #ENSURE THERE ARE NO OLD VECTORS OR DATAFRAMES FLOATING ABOUT THE SYSTEM ######################################################################## rm(list=ls())