Skip to main content
. 2019 May 16;10:452. doi: 10.3389/fgene.2019.00452

Algorithm 2.

Identification of features subsets and feature-optimized models

Input: Dataset with ranked best features S, set of ML classifiers with various hyperparameters, set of criteria, datasets D
Output: Feature-optimized models list L with their identified features subset
function EVALUATE(model, selectedFeatures, dataset D, list of models L)
    trainSetEvaluation = Evaluate model using 10CV, LOOCV, Bootstrap, Repeated Holdout, 0.632+ estimator on train set
    testSetEvaluation = Extract selectedFeatures from test instances of dataset D and perform holdout evaluation with model
    performances = trainSetEvaluation, testSetEvaluation add model with performances and selectedFeatures to L
    return L
end function
for each classifier in classifiers do
    for each criterion in criteria do
        for each featureSearchMethod in featureSearchMethods{FSS, BSS, FSSBSE, BSSFSE)
        do
            if criterion must be maximized
            (see Supplementary Table S2) then
                criterionScore = 0
                rule = “lesser than”
            else
                criterionScore = 1000
                rule = “greater than”
            end if
            if featureSearchMethod = FSS or BSS then
                if featureSearchMethod = BSS then
                    S = invert feature rank order of S
                end if
                for each feature A in S do
                    Add A to selectedFeatures
                    model = Train using classifier with selectedFeatures
                    newCriterionScore = perform 10CV evaluation
                    if newCriterionScore rule CriterionScore
                        then discard a from selectedFeatures
                        else keep a in selectedFeatures
                        criterionScore = newCriterionScore
                    end if
                end for
            else
                if featureSearchMethod = BSSFSE
                    then S = invert feature rank order of S
                end if
                for each feature A in S do
                    Add A to selectedFeatures
                    model = Train using classifier with selectedFeatures
                    newCriterionScore = perform 10CV evaluation
                    if newCriterionScore rule CriterionScore
                    then discard A from selectedFeatures
                    else
                        keep A in selectedFeatures
                        criterionScore = newCriterionScore
                        for each selectedFeature from before last kept
                        feature to the first selected feature in
                        selectedFeatures do
                            remove selectedFeature from selectedFeatures
                            subModel = Train using classifier with selectedFeatures
                            subNewCriterionScore = perform 10CV evaluation
                            if subNewCriterionScore rule
                            NewCriterionScore then
                                discard selectedFeature from selectedFeatures
                                NewCriterionScore = subNewCriterionScore
                            else
                                keep selectedFeature in selectedFeatures
                            end if
                        end for
                    end if
                end for
            end if
            L = EVALUATE(model, selectedFeatures, A, L)
        end for
    end for
    # create models without stepwise feature subset selection approaches
    selectedFeatures = k first features
    model = Train using classifier with selectedFeatures from dataset S
    L = EVALUATE(model, selectedFeatures, A, L)
end for
return L