%% Reads the tax files % Name: ReadTaxFiles.m % Author: Robert Heyer % Date : 29.04.2015 % Version: 02 % Input: [] % Output 1: matrix, containing the values, the variables(rows) and % samples (columns) %************************************************************************** %% Get files [FILES.fileName, FILES.pathName] = uigetfile('../*.csv', 'Read taxonomy.-files', 'MultiSelect', 'on'); % If only one .csv-file is selected, update variable type if iscell(FILES.fileName) ~=1 FILES.fileName={FILES.fileName}; end %% Import data typeList = cell (0,6); % Get type entries for i = 1 : length(FILES.fileName) delimiter = '\t'; startRow = 1; % Format string for each line of text: % column1: text (%s); % column2: double (%f) formatSpec = '%s%s%s%s%s%s%f%[^\n\r]'; % Open the text file. fileID = fopen([FILES.pathName, cell2mat(FILES.fileName(i))],'r'); % Read columns of data according to format string. dataArray = textscan(fileID, formatSpec, 'Delimiter', delimiter, 'HeaderLines' ,startRow-1, 'ReturnOnError', false); % Close the text file. fclose(fileID); % Create output variable dataArray([7]) = cellfun(@(x) num2cell(x), dataArray([7]), 'UniformOutput', false); data = [dataArray{1:end-1}]; % Create Matrix of non-redundant entries for j = 1 : size(data,1) len = 0; addElement = true; while (len < size(typeList,1) ) len = len+1; if (isequal(typeList(len,1), data(j,1))) % Check wehther already in type list, Uniref 50 is in data column 7 addElement = false; end end if(addElement) typeList(end +1, 1) = data(j,1); % Add order typeList(end, 2) = data(j,2); % Add superkingdom typeList(end, 3) = data(j,3); % Add kingdom typeList(end, 4) = data(j,4); % Add phylum typeList(end, 5) = data(j,5); % Add class typeList(end, 6) = data(j,6); % Add order end end % SYSO fprintf(['IMPORTED: ' cell2mat(FILES.fileName(i)) '\n']); end % Get data matrix matrix = zeros(size(typeList,1) + 1, length(FILES.fileName) + 6); % Define empty matrix matrix = num2cell(matrix); % Transform matrix to nums % Set row names (variables) % typeList = sort(typeList); % Sort alphabetically for r = 1 : size(typeList,1) matrix(r+1, 1:6) = typeList(r,1:6); end clearvars r; % Set column names (samples) for c = 1 : length(FILES.fileName) matrix(1, c+6) = FILES.fileName(1,c); end clearvars c; % Fill the matrix for i = 1 : length(FILES.fileName) delimiter = '\t'; startRow = 1; % Format string for each line of text: % column1: text (%s); % column2: double (%f) formatSpec = '%s%s%s%s%s%s%f%[^\n\r]'; % Open the text file. fileID = fopen([FILES.pathName, cell2mat(FILES.fileName(i))],'r'); % Read columns of data according to format string. dataArray = textscan(fileID, formatSpec, 'Delimiter', delimiter, 'HeaderLines' ,startRow-1, 'ReturnOnError', false); % Close the text file. fclose(fileID); % Create output variable dataArray([7]) = cellfun(@(x) num2cell(x), dataArray([7]), 'UniformOutput', false); data = [dataArray{1:end-1}]; % Create Matrix of non-redundant entries for j = 1 : size(data,1) % Check with each entry into the matrix for m = 2 : size(matrix,1) if(isequal(data(j,1), matrix(m,1))) matrix(m,i+6) = data(j,7); end end end % SYSO fprintf(['READ: ' cell2mat(FILES.fileName(i)) '\n']); end clearvars tax entries; % Sort the matrix matrixB = cell(size(matrix,1),size(matrix,2)); matrixB(1,:) = matrix(1,:); matrixB(2:end,1:end) = sortrows(matrix(2:end, 1:end), [2 3 4 5 6]); matrix =matrixB; clearvars tax_matrix_filtered_descB; MatrixPlusDescription = matrix; % Remove description columns matrix(:,6) = []; matrix(:,5) = []; matrix(:,4) = []; matrix(:,3) = []; matrix(:,2) = []; %% Save matrices TaxSummary = [FILES.pathName 'TaxSummary.csv']; TaxDescriptionSummary = [FILES.pathName 'TaxDescriptionSummary.csv']; xlswrite(TaxSummary, matrix); xlswrite(TaxDescriptionSummary, MatrixPlusDescription); fprintf(['Taxonomy files successfully read and saved: \n']);