%% Reads the tax files
% Name:         ReadTaxFiles.m
% Author:       Robert Heyer
% Date :        29.04.2015
% Version:      02
% Input:        []
% Output 1:     matrix, containing the values, the variables(rows) and
%               samples (columns)
%**************************************************************************



%% Get files
[FILES.fileName, FILES.pathName] = uigetfile('../*.csv', 'Read taxonomy.-files', 'MultiSelect', 'on');
% If only one .csv-file is selected, update variable type
if iscell(FILES.fileName) ~=1
    FILES.fileName={FILES.fileName};
end

%% Import data
typeList = cell (0,6);
% Get type entries
for i = 1 : length(FILES.fileName)
    delimiter = '\t';
    startRow = 1;
    % Format string for each line of text: % column1: text (%s); % column2: double (%f)
    formatSpec = '%s%s%s%s%s%s%f%[^\n\r]';
    % Open the text file.
    fileID = fopen([FILES.pathName, cell2mat(FILES.fileName(i))],'r');
    % Read columns of data according to format string.
    dataArray = textscan(fileID, formatSpec, 'Delimiter', delimiter, 'HeaderLines' ,startRow-1, 'ReturnOnError', false);
    % Close the text file.
    fclose(fileID);
    % Create output variable
    dataArray([7]) = cellfun(@(x) num2cell(x), dataArray([7]), 'UniformOutput', false);
    data = [dataArray{1:end-1}];
    
    % Create Matrix of non-redundant entries
    for j = 1 : size(data,1)
        len = 0;
        addElement = true;
        while (len < size(typeList,1) )
            len = len+1;
            if (isequal(typeList(len,1), data(j,1)))                       % Check wehther already in type list, Uniref 50 is in data column 7
                addElement = false;
            end
        end
        if(addElement)
            typeList(end +1, 1) = data(j,1);                                % Add order
            typeList(end, 2)    = data(j,2);                                  % Add superkingdom
            typeList(end, 3)    = data(j,3);                                  % Add kingdom
            typeList(end, 4)    = data(j,4);                                  % Add phylum
            typeList(end, 5)    = data(j,5);                                  % Add class
            typeList(end, 6)    = data(j,6);                                  % Add order
          
        end
    end   
    % SYSO
    fprintf(['IMPORTED: '  cell2mat(FILES.fileName(i)) '\n']);    
end

% Get data matrix
matrix = zeros(size(typeList,1) + 1, length(FILES.fileName) + 6);          % Define empty matrix 
matrix = num2cell(matrix);                                                 % Transform matrix to nums
% Set row names (variables)
% typeList = sort(typeList);                                                 % Sort alphabetically
for r = 1 : size(typeList,1)
    matrix(r+1, 1:6) = typeList(r,1:6);
end
clearvars r;
% Set column names (samples)
for c = 1 : length(FILES.fileName)
    matrix(1, c+6) = FILES.fileName(1,c);
end
clearvars c;


% Fill the matrix
for i = 1 : length(FILES.fileName)
    delimiter = '\t';
    startRow = 1;
    % Format string for each line of text: % column1: text (%s); % column2: double (%f)
    formatSpec = '%s%s%s%s%s%s%f%[^\n\r]';
    % Open the text file.
    fileID = fopen([FILES.pathName, cell2mat(FILES.fileName(i))],'r');
    % Read columns of data according to format string.
    dataArray = textscan(fileID, formatSpec, 'Delimiter', delimiter, 'HeaderLines' ,startRow-1, 'ReturnOnError', false);
    % Close the text file.
    fclose(fileID);
    % Create output variable
    dataArray([7]) = cellfun(@(x) num2cell(x), dataArray([7]), 'UniformOutput', false);
    data = [dataArray{1:end-1}];
    
    % Create Matrix of non-redundant entries
    for j = 1 : size(data,1)
        % Check with each entry into the matrix
        for m = 2 : size(matrix,1)
            if(isequal(data(j,1), matrix(m,1)))
                matrix(m,i+6) = data(j,7);
            end
        end
     end 
     % SYSO
    fprintf(['READ: '  cell2mat(FILES.fileName(i)) '\n']);    
end
clearvars tax entries;
% Sort the matrix
matrixB = cell(size(matrix,1),size(matrix,2));
matrixB(1,:) = matrix(1,:);
matrixB(2:end,1:end) = sortrows(matrix(2:end, 1:end), [2 3 4 5 6]);
matrix =matrixB;
clearvars tax_matrix_filtered_descB;

MatrixPlusDescription = matrix;
% Remove description columns
matrix(:,6) = [];
matrix(:,5) = [];
matrix(:,4) = [];
matrix(:,3) = [];
matrix(:,2) = [];

%% Save matrices
TaxSummary = [FILES.pathName 'TaxSummary.csv'];
TaxDescriptionSummary = [FILES.pathName 'TaxDescriptionSummary.csv'];
xlswrite(TaxSummary, matrix);
xlswrite(TaxDescriptionSummary, MatrixPlusDescription);


fprintf(['Taxonomy files successfully read and saved: \n']);