. 2022 Apr 5;8:e880. doi: 10.7717/peerj-cs.880

Listing 2. GPU kernel overview—threads are mapped to elements of a path sub-problem, then groups of threads are formed.

These small thread groups cooperatively solve dynamic programming problems, accumulating the final SHAP values using global atomics.

__device__ float GetOneFraction(

const PathElement& e, DatasetT X, size_t row_idx) {

// First element in path (bias term) is always zero

if (e.feature_idx == −1) return 0.0;

// Test the split

// Does the training instance continue down this

// path if the feature is present?

float val = X.GetElement(row_idx, e.feature_idx);

return val >= e.feature_lower_bound &&

val < e.feature_upper_bound;

}

template <typename DatasetT>

__device__ float ComputePhi(

const PathElement& e, size_t row_idx,

const DatasetT& X,

const ContiguousGroup& group,

float zero_fraction) {

float one_fraction = GetOneFraction(e, X, row_idx);

GroupPath path(group, zero_fraction, one_fraction);

size_t unique_path_length = group.size();

// Extend the path

for (auto unique_depth = 1ull;

unique_depth < unique_path_length;

unique_depth++) {

path.Extend();

}

float sum = path.UnwoundPathSum();

return sum * (one_fraction - zero_fraction) * e.v;

}

template <typename DatasetT, size_t kBlockSize,

size_t kRowsPerWarp>

__global__ void ShapKernel(

DatasetT X, size_t bins_per_row,

const PathElement* path_elements,

const size_t* bin_segments, size_t num_groups,

float* phis) {

__shared__ PathElement s_elements[kBlockSize];

PathElement& e = s_elements[threadIdx.x];

// Allocate some portion of rows to this warp

// Fetch the path element assigned to this

// thread

size_t start_row, end_row;

bool thread_active;

ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(

X, bins_per_row, path_elements,

bin_segments, &start_row, &end_row, &e,

&thread_active);

if (!thread_active) return;

float zero_fraction = e.zero_fraction;

auto labelled_group =

active_labeled_partition(e.path_idx);

for (int64_t row_idx = start_row;

row_idx < end_row; row_idx++) {

float phi =

ComputePhi(e, row_idx, X, labelled_group,

zero_fraction);

// Write results

if (!e.IsRoot()) {

atomicAdd(&phis[IndexPhi(

row_idx, num_groups, e.group,

X.NumCols(), e.feature_idx)],

phi);

}