--- title: "FRAP_Analysis_Pipeline_August2016_NuclearFoci" author: "Analise Hofmann" date: "June 28, 2017" output: html_document --- ```{r, Set-up} library(readr) library(utils) #Set the Starting Directory to the top directory containing the below four folders that will be used to store data throughout the analysis StartingDir = _______________________________ #Example: "./FRAP_AnalysisFolder" ##User should make these four folders in their working directory with the same names as below ## Put the raw files from Olympus FV1000 into the "starting_Rawdata" folder Folder_containing_RawData <- "./starting_Rawdata" Folder_toPrint_csvFiles <- "./starting_CSVdata" Folder_toPrint_ColumnNorm <- "./ColumnNormalizedTo100" Folder_ForFinalTables <- "./Final_tables" #file_nameIndex <- 1 csv_RawfileList <- list.files(path = "./starting_Rawdata/") #get the raw data file names to cut and convert to .csv files in analysis cycling_RawFileName <- csv_RawfileList[i] cyclingRawFile_path <- paste("./starting_Rawdata/",cycling_RawFileName, sep = "") ``` ```{r, Function 1 to get column identies for each file from user} ############## ### Function 1, calculate the number of rows to cut from raw oif xls files ######## get_NumberToCutRows = function(csv_RawfileList){ #input_column_identities3 <- data.frame(matrix(NA, nrow = nrow(input_column_identities), ncol = 3 ), stringsAsFactors=FALSE) input_column_identities3 <- data.frame(matrix(NA, nrow = length(csv_RawfileList), ncol = 3 ), stringsAsFactors=FALSE) colnames(input_column_identities3) <- c("colNames", "NumToCut", "NumROIs") #input_column_identities3[,1] <- input_column_identities #to be replaced below in next function so user can enter the column names #for(i in 1:nrow(input_column_identities)) for (i in 1:length(csv_RawfileList)){ #temp = as.character(input_column_identities[i,1]) #temp = nchar(temp) print(csv_RawfileList[i]) input_column_identities3[i,3] <- as.integer(readline("what is the number of ROIs for the above file name?")) temp2 <- input_column_identities3[i,3] input_column_identities3[i,2] <- temp2 + 8 } return(input_column_identities3) } ############# END FUNCTION 1 ``` ```{r, Run Function 1 and get comlumn identities from the user} input_column_identities <- get_NumberToCutRows(csv_RawfileList) num_skip <- data.frame(input_column_identities, stringsAsFactors=FALSE) #add column names to num_skip variable ``` ```{r, Second Function to get info from user on the files} ####################### ### Function 2, add column names, and save cut tables to csv ###################### #not quite working to cut each file down, must be some inconsistencies with some of the files, most are good, only some need editting #works fine! #maybe separate the cutting of the raw files, and conversion into csv into its own function separate from getting the column names in 'fenyc' format #this would allow us to input the column names into a table manually, and to read the table in so that conversion to csv does not require all that manual entry of data xlsToCSV = function(Folder_containing_RawData,Folder_toPrint_csvFiles, csv_RawfileList, num_skip){ for (i in 1:length(csv_RawfileList)){ #setwd("../") #cycling_RawFileName <- csv_RawfileList[i] #cyclingRawFile_path <- paste("./starting_Rawdata/",cycling_RawFileName, sep = "") #i = 1 #getwd() print(csv_RawfileList[i]) wd <- getwd() print(wd) setwd(Folder_containing_RawData) csvfileName = csv_RawfileList[i] print(csvfileName) csvFileName1 <- paste(Folder_containing_RawData, csvfileName, sep = "") print(csvFileName1) #test_table <- read_delim(file = csv_RawfileList[i], skip = num_skip, delim ="\t", col_names= FALSE) test_table <- read_delim(file = csv_RawfileList[i], skip = num_skip[i,2], delim ="\t", col_names= TRUE) #maybe change this back to FALSE, so that the first row is the ROI region test_table2 <- test_table[ ,colSums(is.na(test_table)) == 0] #removes extra columns added #test_table2 <- test_table[ , 3:num_skip[i,3]] #remove useless first two columns of num and time #print(test_table2[1,]) print(test_table[1,]) print(num_skip[i,3]) num_skip[i,1] <- as.character(readline("please give column names for above printed file in this format: fmffdc etc...")) #working! #if else check below is working to make sure you enter the correct number of letters if(nchar(num_skip[i,1]) == num_skip[i,3]){ print("Good Job!") } else{ print("You must try again, wrong number of letters") print(num_skip[i,3]) num_skip[i,1] <- as.character(readline("please give column names for above printed file in this format: fmffdc etc. with the correct number of columns letters as printed above:")) print("Second time is a charm!") } #print(num_skip[i,1]) ####Need to convert to 12 obs. with 9 variables. DONE! csv_fileNameFinal <- paste(csv_RawfileList[i], "Cut.csv", sep ="") #csv_fileList <- list.files(path = Folder_toPrint_csvFiles) setwd("../") setwd(Folder_toPrint_csvFiles) write.csv(file <- csv_fileNameFinal, x = test_table2, row.names = FALSE) setwd("../") getwd() } #setwd(../) return(num_skip) } ############# END FUNCTION 2 ``` ```{r, Run Function to Cut the Raw Input Files into Tables} #setwd('../') #optional just to check correct folder input_column_identities4 <- xlsToCSV(Folder_containing_RawData, Folder_toPrint_csvFiles, csv_RawfileList, num_skip) #input_column_identities4 <- input_column_identities5 setwd('../') #sends back to starting directory filename <- list.files(path = Folder_toPrint_csvFiles) #get file names for use in final For loop to complete FRAP analysis ``` ```{r, column normalizer function} #################################### ###Column normalizer function #### ###### Function 3 to be used in For loop below, normalizes one column at a time so must call multiple times to go through the whole table column by column to add back the fluorecence lost from imaging each frame. ###################### column_normalizer = function(column){ temp = numeric() for (i in column) { temp = c(temp, i/column[1]) } return(temp) } #### ``` ```{r, Final Step in FRAP Analysis} ########################### ##Start of For loop #4 to normalize all files, and add back control fluroescence### ########################################################## for(t in 1:length(filename)){ #need to change working directory to starting data file folder direct1 <- StartingDir direct2 <- getwd() print(direct2) startDirectory <- direct1 setwd(direct1) setwd("./starting_CSVdata") ######### basic set-up of useful variables to have checked! raw_data <- read.csv(filename[t], header= TRUE) raw_fluor_data <- raw_data[,-(1:2)] #remove col 1 & 2 #remove all columns with NA in them raw_fluor_data <- raw_fluor_data[ , ! apply( raw_fluor_data , 2 , function(x) all(is.na(x)) ) ] col_len <- dim(raw_fluor_data)[2] print(col_len) colID1 <- as.character(input_column_identities4[t,1]) colID <- unlist(strsplit(colID1, split = "")) #USE this for "cdddmmf" that is inputted by the user above print(length(colID)) stopifnot(length(colID) == col_len) stopifnot(length(colID) == col_len) control_col <- grep('c', colID) #control ROIs extraControl_col <- grep('e', colID) #This is for control ROIs you want to ignore nucleus_col <- grep('n', colID) # put for nucleus ROIs cytoplasm_col <- grep('y', colID) #put y for cytoplasm ROIs frap_col <- grep('f', colID) #frap region ROIs nu_foci_col <- grep('u', colID) #nuclear foci ROIs cyto_foci_col <- grep('o', colID) #cytoplasmic foci ROIs cat("Control is region:", control_col, ", ") cat("Nucleus are region:", nucleus_col, ", ") cat("Cytoplasm are region:", cytoplasm_col, ", ") cat("FRAP region:", frap_col, ", ") cat("Nuclear region:", nu_foci_col, ", ") cat("Cytoplasmic Foci regions are:", cyto_foci_col, "----END GREP--- ") ## calling the normalization function to normalize all ROI values with the first frame collected is set to 1 norm_data = as.numeric(array()) for (i in seq(1, dim(raw_fluor_data)[2])){ norm_data = cbind(norm_data, column_normalizer(raw_fluor_data[,i])) } ########### cleaning data norm_data = norm_data[,-1] #norm_dataNew= norm_dataNew[,-1] norm_data_backup = norm_data #making a backup to compare to if needed in troubleshooting ############################################################### #save normalized data here to use later ########################################## setwd(startDirectory) getwd() setwd(Folder_toPrint_ColumnNorm) getwd() filename_Norm = paste(filename[t], "Norm100.csv", sep ="") write.csv(file = filename_Norm , x = norm_data, row.names = F) #go back into original directory. setwd(startDirectory) getwd() ########################################## ### Add back the control fluorescence loss. ############### #average controls if more than one.. optional, can just use one control region if wanted, just make the rest 'e' instead of 'c' #average the control ROIs if there are more than one if(length(control_col) > 1){ control_avg = rowMeans(norm_data[,control_col]) }else{ control_avg <-norm_data[,control_col] } #control_col_values = norm_data[,control_col] control_col_values = control_avg ##Add back the fluorecence loss at each time point to the other ROIs for (i in seq(1,dim(norm_data)[1])){ # i is row, j is column for (j in seq(1, dim(norm_data)[2])){ #print((control_col_values[1] - control_col_values[i])) norm_data[i,j] = norm_data[i,j] + (control_col_values[1] - control_col_values[i]) cat(i,j) } } # Averaging different ROIs ************ ************* ************* ************ #can change this to reflect the ROIs used in the analysis if (length(nucleus_col) > 1){ nucleus_avg <- rowMeans(norm_data[,nucleus_col]) }else{ nucleus_avg = norm_data[,nucleus_col] } if (length(cytoplasm_col) > 1){ cytoplasm_avg <- rowMeans(norm_data[,cytoplasm_col]) }else{ cytoplasm_avg = norm_data[,cytoplasm_col] } if (length(cyto_foci_col) >1){ cytofoci_avg <- rowMeans(norm_data[,cyto_foci_col]) } else{ cytofoci_avg <- norm_data[,cyto_foci_col] } if (length(nu_foci_col) >1){ nufoci_avg <- rowMeans(norm_data[,nu_foci_col]) }else{ nufoci_avg <- norm_data[,nu_foci_col] } frap_final <- norm_data[,frap_col] #make the final table of data for this sample! final_table <- data.frame(frap_final, nucleus_avg, cytoplasm_avg, nufoci_avg, cytofoci_avg, control_avg) #change directory back to main folder out of the starting folder. setwd("..") #go up a folder #getwd() #save to file: filename_Final = paste(filename[t], "Final.csv", sep ="") # pastes two strings together, if you don't want a space between the strings, use the sep = "" argument #create a new directory to save the new files into! finalDirectory <- paste(startDirectory, "/Final_tables" , sep = "") print(finalDirectory) setwd(finalDirectory) getwd() write.csv(file = filename_Final , x = final_table, row.names = F) #go back into original directory. print(startDirectory) setwd(startDirectory) getwd() } ########### END For loop to normalize data to 100% and then add back the control fluoresence loss ``` ## MISC ```{r} #save input table for column identities of each file (ie. the order the ROIs are saved in the raw file) write.csv(file = 'Day8_table', x = input_column_identities4, row.names = T) ```