visx[i] : no. of iterations row i was selected |
iterations: no. of iterations user has gone through |
impact[i][j]: no. of times coli is filled but colj is missing |
entropyx[i]: entropy of row i |
: entropy of column i for {rows} |
I[x][i]: Indicator function = 1 if value is present in row x and column y, 0 otherwise {rows} |
1: |
procedure GENERATE_SUBSET(C) |
2: |
X = C.rows, Y = C.columns |
3: |
|
4: |
for
do
|
5: |
|
6: |
end for
|
7: |
|
8: |
select column to optimize for, with probability proportional to missing values |
9: |
for
do
|
10: |
|
11: |
end for
|
12: |
columns = Top q indices from scoresy
|
13: |
return
|
14: |
end procedure |