######################################## # DATA SEARCH FROM ALL THREE DATABASES: # DRUG RESPONSE DATA FOR EXON 19 DEL LUNG CANCER CELL LINES VS. WT CELL LINES ######################################## #Set working directory setwd("C:/") ##################### # LIBRARIES ##################### library(reshape2) library(dplyr) library(ggplot2) library(tidyr) library(magrittr) library(rcompanion) library(grid) library(ggthemes) ##################### #CCLE DATABASE SEARCH ##################### ###################### # DATA INPUT ###################### #Central files collected from website: CCLEdrugdata=read.table("CCLE_NP24.2009_Drug_Data_2015.02.24_fixed_16022018.csv", sep=";", header=T, stringsAsFactors = F) #I have collected all mutation data from CCLE Mutations website as .csv files and GDSC mutation data and collected them into files containing exon 19 del mutations and all other ERBB mutations. Separate files because of different naming systems between databases. mutationdata=read.table("CCLE mutation list.csv",sep=";",header=T,stringsAsFactors=F) # List of selected drugs (erlotinib + lapatinib) ERBB_drugs_CCLE = read.csv("ERBB drugs CCLE.csv", sep = ";") ########################## # COLLECTION OF DRUG DATA ########################## #Collect drug data #Erlotinib SelectedDrug1 = CCLEdrugdata[which(CCLEdrugdata$Compound=="Erlotinib"),] #Collect the cell line data SelectedMutations1 = merge(mutationdata,SelectedDrug1, by="CCLE.Cell.Line.Name") #Lapatinib SelectedDrug2 = CCLEdrugdata[which(CCLEdrugdata$Compound=="Lapatinib"),] #Collect the cell line data that appears in both files SelectedMutations2 = merge(mutationdata,SelectedDrug2, by="CCLE.Cell.Line.Name") CCLE_ERBB = rbind(SelectedMutations1,SelectedMutations2) CCLE_ERBB = CCLE_ERBB[which(CCLE_ERBB$CancerType=="Lung"),] write.csv(CCLE_ERBB,file="CCLE_MUT_Cell_Lines.csv") #Collect WT cell line data: ccle_whole1 = SelectedDrug1 CellLines1 = SelectedMutations1$CCLE.Cell.Line.Name OtherLines1 = ccle_whole1[!ccle_whole1$CCLE.Cell.Line.Name %in% CellLines1,] #For other drug ccle_whole2 = SelectedDrug2 CellLines2 = SelectedMutations2$CCLE.Cell.Line.Name OtherLines2 = ccle_whole2[!ccle_whole2$CCLE.Cell.Line.Name %in% CellLines2,] merged_CCLE = rbind(OtherLines1,OtherLines2) merged_CCLE = merged_CCLE[which(merged_CCLE$CancerType=="Lung"),] write.csv(merged_CCLE,file="CCLE_WT_Cell_Lines.csv") #Calculate AUC erlo=merged_CCLE[which(merged_CCLE$Compound=="Erlotinib"),] mean(erlo$ActArea) #Mean of negative ctrl cell lines = 0.6118893 lapa=merged_CCLE[which(merged_CCLE$Compound=="Lapatinib"),] mean(lapa$ActArea) #Mean of negative ctrl cell lines = 0.7242165 #Bring files to excel and add 0 然 values to 100% and calculate response values for the rest (value = 100+x). ##################### #CTRP DATABASE SEARCH ##################### ###################### # DATA INPUT ###################### #Central files collected from website: #Data for experiments #information about experimental growth conditions, media, and SNP fingerprinting CTRP_ExperimentData = read.csv("v20.meta.per_experiment.csv",header=T,sep=";",stringsAsFactors = F) #Data about cell lines #contextual cancer cell line information and annotation CTRP_CellLineData = read.csv("v20.meta.per_cell_line.csv",header=T,sep=";",stringsAsFactors = F) #average measured and predicted viability values following quality control and curve-fitting for each cancer cell line treated with compound for each concentration point tested #experiment_id, cpv_avg_pv, cpd_conc_umol, master_cpd_id CTRP_AvrgViab = read.table("v20.data.per_cpd_post_qc.txt", sep="\t", header = T, stringsAsFactors = F) #AUC and EC50 data #Contains AUC values, exp_id, master_cpd_id CTRP_AUC_data=read.table("v20.data.curves_post_qc.txt", sep = "\t", header = T, stringsAsFactors = F) #Mutation data file done by checking CTRP database numbering for cell lines CTRP_mutationdata=read.table("CTRP mutation list.csv",sep=";",header=T,stringsAsFactors=F) # List of selected drugs CTRP_ERBB_drugs = read.csv("ERBB drugs CTRP.csv", sep = ";") ######################## # COLLECTING DRUG DATA ######################## # Goes through the drug list to collect all data FilteredDrugDatas = list(NA) for(DrugOfIteration in CTRP_ERBB_drugs$master_cpd_id ){ # filter only drug on choice, which is DrugOfIteration drugdata.Filtered1 = CTRP_AvrgViab[CTRP_AvrgViab$master_cpd_id == DrugOfIteration,] # make small ExpID table of only relevant columns ExpID = CTRP_ExperimentData[,c("experiment_id","master_ccl_id")] # make small CellLineID table of only relevant columns CellLineID = CTRP_CellLineData[,c("ccl_name","master_ccl_id","ccle_primary_site")] #merge experiment number to filtered raw data drugdata.Filtered2 = merge(drugdata.Filtered1,ExpID,by="experiment_id") # merge ccl_id to filtered raw data drugdata.Filtered3 = merge(drugdata.Filtered2,CellLineID,by="master_ccl_id") ncol(drugdata.Filtered3) # Add a data.frame as a list element to the list FilteredDrugDatas FilteredDrugDatas[[DrugOfIteration]] = drugdata.Filtered3 } FilteredDrugDatas = do.call(rbind,FilteredDrugDatas) FilteredDrugDatas = FilteredDrugDatas[-1,] # Take only lung cell lines FilteredDrugDatas = FilteredDrugDatas[which(FilteredDrugDatas$ccle_primary_site=="lung"),] #Collect only cell lines that have data CTRP_cells = CTRP_mutationdata$Cell_Line_Name sub.table = FilteredDrugDatas[which(FilteredDrugDatas$ccl_name %in% CTRP_cells),] ################# #Re-organize data ################# dataset = sub.table dataset2 = dcast(dataset, master_ccl_id+experiment_id+ccl_name+master_cpd_id ~ cpd_conc_umol, value.var="cpd_avg_pv",fun.aggregate = sum,fill=Inf) dataset2[dataset2 == Inf] <- "" sub.table2=CTRP_mutationdata[which(CTRP_mutationdata$master_ccl_id %in% dataset2$master_ccl_id),] CTRPSelectedMutations = merge(dataset2,CTRP_mutationdata, by="master_ccl_id",all.x=T) write.csv(CTRPSelectedMutations,file="CTRP organized MUT cells.csv") ########################## #COLLECT AUC AND EC50 INFO ########################## #Collecting AUC values for MUT #Select columns CTRP_drug_data = CTRPSelectedMutations #Select those that share same master_cpd_id filtered.data1 = CTRP_AUC_data[CTRP_AUC_data$master_cpd_id %in% unique(CTRP_drug_data$master_cpd_id),] #Select those that have same ID filtered.data2 = filtered.data1[filtered.data1$experiment_id %in% CTRP_drug_data$experiment_id,] #Combine by ID Combined.data1 = merge(CTRP_drug_data,filtered.data2,by=c("experiment_id","master_cpd_id")) #Take only these columns Combined.data2 = Combined.data1[,c("ccl_name", "master_cpd_id", "apparent_ec50_umol", "area_under_curve", "experiment_id")] write.table(Combined.data2,file= "CTRP MUT EC50 and AUC.csv", sep=",", col.names= T) ######################################## #COLLECT NEGATIVE CTRL AUC AND EC50 INFO ######################################## #Collect only cell lines that have data neg_CTRP_cells = unique(CTRP_mutationdata$Cell_Line_Name) neg.sub.table = FilteredDrugDatas[!(FilteredDrugDatas$ccl_name %in% neg_CTRP_cells),] #Re-shape neg.dataset = neg.sub.table neg.dataset2 = dcast(neg.dataset, master_ccl_id+experiment_id+ccl_name+master_cpd_id ~ cpd_conc_umol, value.var="cpd_avg_pv",fun.aggregate = sum,fill=Inf) neg.dataset2[neg.dataset2 == Inf] <- "" write.csv(neg.dataset2,file="CTRP organized WT cells.csv") #Collecting AUC values for WT CTRP_neg_drug_data=neg.dataset2 #Select columns CTRP_neg_drug_data = CTRP_neg_drug_data[,c("ccl_name","master_cpd_id","experiment_id")] #Select unique column combinations #CTRP_neg_drug_data = unique(CTRP_neg_drug_data) #Select those that share same master_cpd_id Neg.filtered.data1 = CTRP_AUC_data[CTRP_AUC_data$master_cpd_id %in% unique(CTRP_neg_drug_data$master_cpd_id),] #Select those that have same ID Neg.filtered.data2 = Neg.filtered.data1[Neg.filtered.data1$experiment_id %in% CTRP_neg_drug_data$experiment_id,] #Combine by ID Neg.Combined.data1 = merge(CTRP_neg_drug_data, Neg.filtered.data2,by=c("experiment_id","master_cpd_id")) #Take only these columns Neg.Combined.data2 = Neg.Combined.data1[,c("ccl_name", "master_cpd_id", "apparent_ec50_umol", "area_under_curve", "experiment_id"),] write.table(Neg.Combined.data2,file= "CTRP WT EC50 and AUC.csv", sep=",", col.names= T) #Negative ctrl values for different drugs: 52926=gefitinib, 52928=erlotinib, 606135=afatinib, 634309=lapatinib gefitinib_ctrl = Neg.Combined.data2[Neg.Combined.data2$master_cpd_id=="52926",] length(gefitinib_ctrl$area_under_curve) # mean from 86 cell lines mean(gefitinib_ctrl$area_under_curve) # AUC neg ctrl 12.4321 erlotinib_ctrl = Neg.Combined.data2[Neg.Combined.data2$master_cpd_id=="52928",] length(erlotinib_ctrl$area_under_curve) # mean from 90 cell lines mean(erlotinib_ctrl$area_under_curve) # AUC neg ctrl 13.44199 afatinib_ctrl = Neg.Combined.data2[Neg.Combined.data2$master_cpd_id=="606135",] length(afatinib_ctrl$area_under_curve) # mean from 88 cell lines mean(afatinib_ctrl$area_under_curve) # AUC neg ctrl 10.36891 lapatinib_ctrl = Neg.Combined.data2[Neg.Combined.data2$master_cpd_id=="634309",] length(lapatinib_ctrl$area_under_curve) # mean from 86 cell lines mean(lapatinib_ctrl$area_under_curve) # AUC neg ctrl 12.86479 ###################### # Fit response values by adding 0 然 concentration point at 100, calculate for values by x*100 ###################### ##################### #GDSC DATABASE SEARCH ##################### ###################### # DATA INPUT ###################### #Central files collected from website: #I have gone through "Screened_Compounds.xlsx" to search RTK drugs. Drug data is available in file bolow: GDSC_data=read.table("v17.3_public_raw_data.csv", sep=",", header=T, stringsAsFactors = F) GDSC_AUC=read.table("v17.3_fitted_dose_response.csv", sep=";", header=T, stringsAsFactors = F) GDSC_drugs=read.table("GDSC_search_drugs.csv", sep=";", header=T, stringsAsFactors = F) GDSC_cells=read.table("GDSC_Cell_Line_Info.csv",sep=";",header=T,stringsAsFactors=F) GDSC_mutations =read.table("GDSC mutations.csv", sep = ";", header = T, stringsAsFactors = F) #Select all the drug data # 1=Erlotinib SelectedDrug1 = GDSC_data[which(GDSC_data$DRUG_ID==1),] # 119=Lapatinib SelectedDrug2 = GDSC_data[which(GDSC_data$DRUG_ID==119),] # 1032=Afatinib SelectedDrug3 = GDSC_data[which(GDSC_data$DRUG_ID==1032),] # 1377=Afatinib SelectedDrug4 = GDSC_data[which(GDSC_data$DRUG_ID==1377),] # 1010=Gefitinib SelectedDrug5 = GDSC_data[which(GDSC_data$DRUG_ID==1010),] #Merge these together GDSC_data2 = rbind(SelectedDrug1,SelectedDrug2, SelectedDrug3, SelectedDrug4, SelectedDrug5) # Add cell line info GDSC_data_info = merge(GDSC_data2, GDSC_cells, by="COSMIC_ID", all.x = T) # Select cell lines filtered.data1 = GDSC_data_info[GDSC_data_info$CELL_LINE_NAME %in% unique(GDSC_mutations$CELL_LINE_NAME),] filtered.data1 = filtered.data1[which(filtered.data1$Site=="lung"),] filtered.data.wt = GDSC_data_info[!GDSC_data_info$CELL_LINE_NAME %in% unique(GDSC_mutations$CELL_LINE_NAME),] filtered.data.wt = filtered.data.wt[which(filtered.data.wt$Site=="lung"),] #Collect negative controls (NC) for MUT NC = GDSC_data[which(GDSC_data$TAG=="NC-0"),] GDSC_ID = unique(filtered.data1$COSMIC_ID) NC_GDSC = NC[NC$COSMIC_ID %in% GDSC_ID,] SCAN_ID = unique(filtered.data1$SCAN_ID) NC_GDSC_TOT_MUT = NC_GDSC[NC_GDSC$SCAN_ID %in% SCAN_ID,] #Collect negative controls for WT cells NC = GDSC_data[which(GDSC_data$TAG=="NC-0"),] GDSC_ID = unique(filtered.data.wt$COSMIC_ID) NC_GDSC = NC[NC$COSMIC_ID %in% GDSC_ID,] SCAN_ID = unique(filtered.data.wt$SCAN_ID) NC_GDSC_TOT_WT = NC_GDSC[NC_GDSC$SCAN_ID %in% SCAN_ID,] #Calculate the negative control means meantable = NC_GDSC_TOT_MUT %>% group_by(SCAN_ID,CELL_LINE_NAME) %>% summarise(mean = mean(INTENSITY)) meantable2 = NC_GDSC_TOT_WT %>% group_by(SCAN_ID,CELL_LINE_NAME) %>% summarise(mean = mean(INTENSITY)) #write.csv(meantable, file="GDSC NC means for MUT.csv") #write.csv(meantable2, file="GDSC NC means for WT.csv") # Re-shape data for MUT GDSC_search = dcast(filtered.data1, CELL_LINE_NAME+SCAN_ID+DRUG_ID ~ CONC, value.var="INTENSITY",fun.aggregate = sum,fill=Inf) GDSC_search[GDSC_search == Inf] <- "" #write.csv(GDSC_search,file="All GDSC drugs MUT.csv") # Re-shape data for WT GDSC_search2 = dcast(filtered.data.wt, CELL_LINE_NAME+SCAN_ID+DRUG_ID ~ CONC, value.var="INTENSITY",fun.aggregate = sum,fill=Inf) GDSC_search2[GDSC_search2 == Inf] <- "" #write.csv(GDSC_search2,file="All GDSC drugs WT.csv") # Add negative control means to points GDSC_data3 = merge(GDSC_search, meantable, by=c("SCAN_ID","CELL_LINE_NAME"), all.x = T) GDSC_data4 = merge(GDSC_search2, meantable2, by=c("SCAN_ID","CELL_LINE_NAME"), all.x = T) #write.csv(GDSC_data3, file="Merged neg control and points MUT.csv") #write.csv(GDSC_data4, file="Merged neg control and points WT.csv") #Collect AUC for MUT AUC1 = unique(filtered.data1$COSMIC_ID) AUC2 = GDSC_AUC[GDSC_AUC$COSMIC_ID %in% AUC1,] AUC3 = unique(filtered.data1$DRUG_ID) AUC4 = AUC2[AUC2$DRUG_ID %in% AUC3,] # Add AUC data GDSC_data3_AUC4 = merge(GDSC_data3, AUC4, by=c("CELL_LINE_NAME","DRUG_ID"), all.x = T) #write.csv(GDSC_data3_AUC4, file = "GDSC_MUT_drug_AUC.csv") # Add mutation data GDSC_data3_AUC4_GDSC_mutations = merge(GDSC_data3_AUC4,GDSC_mutations , by=c("CELL_LINE_NAME"), all.x = T) write.csv(GDSC_data3_AUC4_GDSC_mutations, file = "GDSC_MUT_drug_AUC_mutations.csv") #Collect AUC info for WT AUC1 = unique(filtered.data.wt$COSMIC_ID) AUC2 = GDSC_AUC[GDSC_AUC$COSMIC_ID %in% AUC1,] AUC3 = unique(filtered.data.wt$DRUG_ID) AUC4 = AUC2[AUC2$DRUG_ID %in% AUC3,] # Add AUC data GDSC_data3_AUC4 = merge(GDSC_data4, AUC4, by=c("CELL_LINE_NAME","DRUG_ID"), all.x = T) write.csv(GDSC_data3_AUC4, file = "GDSC_WT_drug_AUC.csv") #Calculate WT AUC value means Gefitinib_ctrl = AUC4[AUC4$DRUG_NAME=="Gefitinib",] mean(as.numeric(Gefitinib_ctrl$AUC),na.rm=T) length(Gefitinib_ctrl$AUC) #hist(as.numeric(Gefitinib_ctrl$AUC)) # AUC neg ctrl 0.9572212; 94 Afatinib_ctrl1 = AUC4[AUC4$DRUG_ID==1032,] mean(as.numeric(Afatinib_ctrl1$AUC),na.rm=T) length(Afatinib_ctrl1$AUC) # AUC neg ctrl 0.9385724, max conc 0.5; 94 Afatinib_ctrl2 = AUC4[AUC4$DRUG_ID==1377,] mean(as.numeric(Afatinib_ctrl2$AUC),na.rm=T) length(Afatinib_ctrl2$AUC) # AUC neg ctrl 0.9025666, max conc 10; 105 ## The two afatinib series together Afatinib_ctrl = AUC4[AUC4$DRUG_NAME=="Afatinib",] mean(as.numeric(Afatinib_ctrl$AUC),na.rm=T) length(Afatinib_ctrl$AUC) # AUC neg ctrl 0.9195744; 199 Lapatinib_ctrl = AUC4[AUC4$DRUG_NAME=="Lapatinib",] mean(as.numeric(Lapatinib_ctrl$AUC),na.rm=T) length(Lapatinib_ctrl$AUC) # AUC neg ctrl 0.9509353; 45 Erlotinib_ctrl = AUC4[AUC4$DRUG_NAME=="Erlotinib",] mean(as.numeric(Erlotinib_ctrl$AUC),na.rm=T) length(Erlotinib_ctrl$AUC) # AUC neg ctrl 0.9505726; 43 #Calculate the normalized and fitted drug response values. ####################### #FINALIZING THE DATASET ####################### #Combine all three dataset files into one file that has all the fitted drug values in one concentration series with cell line names unified. # Check CNV status from Harmonizome (http://amp.pharm.mssm.edu/Harmonizome/) and RAS status from mutation files. ##################### # p value calculation ##################### #Two-sample t-test (independent) # compare the differences between the averages #Note that, unpaired two-samples t-test can be used only under certain conditions: #when the two groups of samples (A and B), being compared, are normally distributed. This can be checked using Shapiro-Wilk test. #and when the variances of the two groups are equal. This can be checked using F-test. ############################################# # In our situation the F-test results in favor of Welch t test (two groups being compared are different (heteroscedasticity)) because of the very different group sizes. If we use this correct method, the p values are not significant but if we would assume variance equality, we could use two sample t test where the difference is significant in all occasions but lapatinib. The difference becomes significant if we pool the erlotinib, gefitinib and afatinib together and compare it to lapatinib (p-value = 0.002963). ############################################# CTRP_AUC_data=read.csv("CTRP AUC values.csv",sep=";",header=T,stringsAsFactors=F) #################### # TEST FOR ERLOTINIB CTRP_AUC_data1 = CTRP_AUC_data[CTRP_AUC_data$Compound=="Erlotinib",] a=CTRP_AUC_data1[CTRP_AUC_data1$Protein.change=="WT",] a=a$AUC plotNormalHistogram(a) b=CTRP_AUC_data1[CTRP_AUC_data1$Protein.change=="p.ELREA746del",] b=b$AUC plotNormalHistogram(b) var.test(a,b) #F test to compare two variances # data: a and b # F = 0.10057, num df = 89, denom df = 3, p-value = # 2.035e-05 # alternative hypothesis: true ratio of variances is not equal to 1 # 95 percent confidence interval: # 0.007202728 0.328526133 # sample estimates: # ratio of variances # 0.1005713 t.test(a,b, var.equal=FALSE, paired=FALSE) #Welch Two Sample t-test # data: a and b # t = 1.852, df = 3.0269, p-value = 0.1603 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # -3.71757 14.19329 # sample estimates: # mean of x mean of y # 13.441987 8.204125 t.test(a,b, var.equal=TRUE, paired=FALSE) # data: a and b # t = 5.0393, df = 92, p-value = 2.325e-06 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # 3.173518 7.302205 # sample estimates: # mean of x mean of y # 13.441987 8.204125 #################### # TEST FOR GEFITINIB CTRP_AUC_data2 = CTRP_AUC_data[CTRP_AUC_data$Compound=="Gefitinib",] a=CTRP_AUC_data2[CTRP_AUC_data2$Protein.change=="WT",] a=a$AUC plotNormalHistogram(a) b=CTRP_AUC_data2[CTRP_AUC_data2$Protein.change=="p.ELREA746del",] b=b$AUC plotNormalHistogram(b) var.test(a,b) # data: a and b # F = 0.093771, num df = 85, denom df = 3, p-value = # 1.014e-05 # alternative hypothesis: true ratio of variances is not equal to 1 # 95 percent confidence interval: # 0.006714337 0.306996758 # sample estimates: # ratio of variances # 0.09377099 t.test(a,b, var.equal=FALSE, paired=FALSE) #Welch Two Sample t-test # data: a and b # t = 2.0305, df = 3.0262, p-value = 0.1345 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # -3.054038 13.967743 # sample estimates: # mean of x mean of y # 12.43210 6.97525 t.test(a,b, var.equal=TRUE, paired=FALSE) #Two Sample t-test # data: a and b # t = 5.6338, df = 88, p-value = 2.085e-07 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # 3.531983 7.381721 # sample estimates: # mean of x mean of y # 12.43210 6.97525 #################### # TEST FOR AFATINIB CTRP_AUC_data3 = CTRP_AUC_data[CTRP_AUC_data$Compound=="Afatinib",] a=CTRP_AUC_data3[CTRP_AUC_data3$Protein.change=="WT",] a=a$AUC plotNormalHistogram(a) b=CTRP_AUC_data3[CTRP_AUC_data3$Protein.change=="p.ELREA746del",] b=b$AUC plotNormalHistogram(b) var.test(a,b) #F test to compare two variances # data: a and b # F = 0.17995, num df = 87, denom df = 3, p-value = # 0.003098 # alternative hypothesis: true ratio of variances is not equal to 1 # 95 percent confidence interval: # 0.0128866 0.5884749 # sample estimates: # ratio of variances # 0.1799528 t.test(a,b, var.equal=FALSE, paired=FALSE) #Welch Two Sample t-test # data: a and b # t = 2.36, df = 3.0493, p-value = 0.098 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # -1.708909 11.872982 # sample estimates: # mean of x mean of y # 10.368911 5.286875 t.test(a,b, var.equal=TRUE, paired=FALSE) #Two Sample t-test # data: a and b # t = 5.0902, df = 90, p-value = 1.946e-06 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # 3.098558 7.065514 # sample estimates: # mean of x mean of y # 10.368911 5.286875 #################### # TEST FOR LAPATINIB CTRP_AUC_data4 = CTRP_AUC_data[CTRP_AUC_data$Compound=="Lapatinib",] a=CTRP_AUC_data4[CTRP_AUC_data4$Protein.change=="WT",] a=a$AUC plotNormalHistogram(a) b=CTRP_AUC_data4[CTRP_AUC_data4$Protein.change=="p.ELREA746del",] b=b$AUC plotNormalHistogram(b) var.test(a,b) #F test to compare two variances # data: a and b # F = 0.35828, num df = 85, denom df = 2, p-value = # 0.134 # alternative hypothesis: true ratio of variances is not equal to 1 # 95 percent confidence interval: # 0.009073606 1.380711738 # sample estimates: # ratio of variances # 0.3582816 t.test(a,b, var.equal=FALSE, paired=FALSE) #Welch Two Sample t-test # data: a and b # t = 1.0569, df = 2.0503, p-value = 0.399 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # -4.721640 7.893812 # sample estimates: # mean of x mean of y # 12.86479 11.27870 t.test(a,b, var.equal=TRUE, paired=FALSE) #Two Sample t-test # data: a and b # t = 1.7116, df = 87, p-value = 0.09054 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # -0.2557874 3.4279594 # sample estimates: # mean of x mean of y # 12.86479 11.27870 ################################ # TEST FOR POOLED ERLO+GEFI+AFA CTRP_AUC_data5 = rbind(CTRP_AUC_data1,CTRP_AUC_data2, CTRP_AUC_data3) a=CTRP_AUC_data5[CTRP_AUC_data5$Protein.change=="WT",] a=a$AUC plotNormalHistogram(a) b=CTRP_AUC_data5[CTRP_AUC_data5$Protein.change=="p.ELREA746del",] b=b$AUC plotNormalHistogram(b) var.test(a,b) #F test to compare two variances # data: a and b # F = 0.20363, num df = 263, denom df = 11, p-value = # 1.276e-06 # alternative hypothesis: true ratio of variances is not equal to 1 # 95 percent confidence interval: # 0.06995375 0.41584486 # sample estimates: # ratio of variances # 0.2036349 t.test(a,b, var.equal=FALSE, paired=FALSE) #Welch Two Sample t-test # data: a and b # t = 3.7777, df = 11.205, p-value = 0.002963 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # 2.204915 8.328219 # sample estimates: # mean of x mean of y # 12.088651 6.822083 ### -> BECOMES SIGNIFICANT WHEN SAMPLE SIZE IS INCREASED t.test(a,b, var.equal=TRUE, paired=FALSE) #Two Sample t-test # data: a and b # t = 7.6468, df = 274, p-value = 3.501e-13 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # 3.910689 6.622445 # sample estimates: # mean of x mean of y # 12.088651 6.822083 ##################### # Figure ##################### #publication theme modified from: # https://rpubs.com/Koundy/71792 windowsFonts() FontName <- "sans" theme_Publication <- function(base_size=14,base_family=FontName) { library(grid) library(ggthemes) (theme_foundation(base_size=base_size) + theme(plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5,family=FontName), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold",size = rel(1),family=FontName), axis.title.y = element_text(angle=90,vjust =2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(family=FontName), axis.line = element_line(colour="black"), axis.ticks = element_line(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "bottom", legend.direction = "horizontal", legend.key.size= unit(0.2, "cm"), legend.margin = unit(0, "cm"), legend.text = element_text(family=FontName), legend.title = element_text(face="italic",family=FontName), plot.margin=unit(c(10,5,5,5),"mm"), strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), strip.text = element_text(face="bold",family=FontName) )) } ##################################### cells=read.table("Search results for exon 19 del mutation data Figure.csv", sep=";", header=T) cells$uniqID = paste("ID",rep(1:nrow(cells)),sep="") Cells_Long = gather(cells,concentration,SurvCellsPer,X0.然:X66.然,factor_key=T) Cells_Long$concentrationNum = Cells_Long$concentration Cells_Long$concentrationNum = gsub("X","",Cells_Long$concentrationNum) Cells_Long$concentrationNum = gsub(".然","",Cells_Long$concentrationNum) Cells_Long$concentrationNum = as.numeric(Cells_Long$concentrationNum) Cells_Long2 = Cells_Long[is.na(Cells_Long$concentrationNum)==F,] Cells_Long2 = Cells_Long2[is.na(Cells_Long2$SurvCellsPer)==F,] Cells_Long3 = Cells_Long2[Cells_Long2$Database == "GDSC",] Cells_Long3 = Cells_Long3[,] Cells_Long4 = Cells_Long3[Cells_Long3$Compound == "Gefitinib",] Cells_Long4 = Cells_Long4[,] #black for WT, red for exon19 # p.E746_A750del, WT cbPalette = c("red", "black") plottoshow = ggplot(data=Cells_Long4,aes(x=factor(concentrationNum),y=SurvCellsPer,color=Protein.change,group=uniqID),na.rm=T) + geom_line() + geom_point()+ scale_colour_manual(values=cbPalette) plottoshow = plottoshow+theme_Publication() plot(plottoshow)