Input: Dataset with ranked best features S, set of ML classifiers with various hyperparameters, set of criteria, datasets D
|
Output: Feature-optimized models list L with their identified features subset |
|
function EVALUATE(model, selectedFeatures, dataset D, list of models L) |
trainSetEvaluation = Evaluate model using 10CV, LOOCV, Bootstrap, Repeated Holdout, 0.632+ estimator on train set |
testSetEvaluation = Extract selectedFeatures from test instances of dataset D and perform holdout evaluation with model
|
performances = trainSetEvaluation, testSetEvaluation add model with performances and selectedFeatures to L
|
return
L
|
end function |
for each
classifier in classifiers
do
|
for each
criterion in criteria
do
|
for each
featureSearchMethod in featureSearchMethods{FSS, BSS, FSSBSE, BSSFSE) |
do
|
if
criterion must be maximized |
(see Supplementary Table S2) then
|
criterionScore = 0
|
rule = “lesser than” |
else
|
criterionScore = 1000
|
rule = “greater than” |
end if
|
if
featureSearchMethod = FSS or BSS
then
|
if
featureSearchMethod = BSS
then
|
S = invert feature rank order of S
|
end if
|
for each feature A in S
do
|
Add A to selectedFeatures
|
model = Train using classifier with selectedFeatures
|
newCriterionScore = perform 10CV evaluation |
if
newCriterionScore rule CriterionScore
|
then discard a from selectedFeatures
|
else keep a in selectedFeatures
|
criterionScore = newCriterionScore
|
end if
|
end for
|
else
|
if
featureSearchMethod = BSSFSE |
then
S = invert feature rank order of S
|
end if
|
for each feature A in S
do
|
Add A to selectedFeatures
|
model = Train using classifier with selectedFeatures
|
newCriterionScore = perform 10CV evaluation |
if
newCriterionScore rule CriterionScore
|
then discard A from selectedFeatures
|
else
|
keep A in selectedFeatures
|
criterionScore = newCriterionScore
|
for each
selectedFeature from before last kept |
feature to the first selected feature in |
selectedFeatures
do
|
remove selectedFeature from selectedFeatures
|
subModel = Train using classifier with selectedFeatures
|
subNewCriterionScore = perform 10CV evaluation |
if
subNewCriterionScore rule
|
NewCriterionScore
then
|
discard selectedFeature from selectedFeatures
|
NewCriterionScore = subNewCriterionScore
|
else
|
keep selectedFeature in selectedFeatures
|
end if
|
end for
|
end if
|
end for
|
end if
|
L = EVALUATE(model, selectedFeatures, A, L)
|
end for
|
end for
|
# create models without stepwise feature subset selection approaches |
selectedFeatures = k first features |
model = Train using classifier with selectedFeatures from dataset S
|
L = EVALUATE(model, selectedFeatures, A, L)
|
end for |
return
L
|