Skip to main content
. 2022 Apr 5;8:e880. doi: 10.7717/peerj-cs.880

Listing 2. GPU kernel overview—threads are mapped to elements of a path sub-problem, then groups of threads are formed.

These small thread groups cooperatively solve dynamic programming problems, accumulating the final SHAP values using global atomics.

__device__ float GetOneFraction(
   const PathElement& e, DatasetT X, size_t row_idx) {
  // First element in path (bias term) is always zero
  if (e.feature_idx ==1) return 0.0;
  // Test the split
  // Does the training instance continue down this
  // path if the feature is present?
  float val = X.GetElement(row_idx, e.feature_idx);
  return val >= e.feature_lower_bound &&
    val < e.feature_upper_bound;
}
template <typename DatasetT>
__device__ float ComputePhi(
   const PathElement& e, size_t row_idx,
   const DatasetT& X,
   const ContiguousGroup& group,
   float zero_fraction) {
  float one_fraction = GetOneFraction(e, X, row_idx);
  GroupPath path(group, zero_fraction, one_fraction);
  size_t unique_path_length = group.size();
  // Extend the path
  for (auto unique_depth = 1ull;
    unique_depth < unique_path_length;
    unique_depth++) {
   path.Extend();
 }
  float sum = path.UnwoundPathSum();
  return sum * (one_fraction - zero_fraction) * e.v;
}
template <typename DatasetT, size_t kBlockSize,
    size_t kRowsPerWarp>
__global__ void ShapKernel(
  DatasetT X, size_t bins_per_row,
  const PathElement* path_elements,
  const size_t* bin_segments, size_t num_groups,
  float* phis) {
__shared__ PathElement s_elements[kBlockSize];
  PathElement& e = s_elements[threadIdx.x];
  // Allocate some portion of rows to this warp
  // Fetch the path element assigned to this
  // thread
  size_t start_row, end_row;
  bool thread_active;
  ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
    X, bins_per_row, path_elements,
    bin_segments, &start_row, &end_row, &e,
    &thread_active);
  if (!thread_active) return;
  float zero_fraction = e.zero_fraction;
  auto labelled_group =
    active_labeled_partition(e.path_idx);
  for (int64_t row_idx = start_row;
    row_idx < end_row; row_idx++) {
   float phi =
    ComputePhi(e, row_idx, X, labelled_group,
      zero_fraction);
  // Write results
  if (!e.IsRoot()) {
   atomicAdd(&phis[IndexPhi(
      row_idx, num_groups, e.group,
      X.NumCols(), e.feature_idx)],
    phi);
   }
  }
 }