__device__ float
GetOneFraction(
|
const
PathElement& e, DatasetT X, size_t
row_idx) {
|
// First element in path (bias term) is always zero
|
if
(e.feature_idx == −1) return 0.0;
|
// Test the split
|
// Does the training instance continue down this
|
// path if the feature is present?
|
float
val = X.GetElement(row_idx, e.feature_idx);
|
return
val >= e.feature_lower_bound &&
|
val < e.feature_upper_bound;
|
}
|
template <typename
DatasetT>
|
__device__ float
ComputePhi(
|
const
PathElement& e, size_t row_idx,
|
const
DatasetT& X,
|
const
ContiguousGroup& group,
|
float zero_fraction) {
|
float
one_fraction = GetOneFraction(e, X, row_idx);
|
GroupPath path(group, zero_fraction, one_fraction);
|
size_t
unique_path_length = group.size();
|
// Extend the path
|
for (auto
unique_depth = 1ull;
|
unique_depth < unique_path_length;
|
unique_depth++) {
|
path.Extend();
|
}
|
float
sum = path.UnwoundPathSum();
|
return
sum * (one_fraction - zero_fraction) * e.v;
|
}
|
template <typename
DatasetT, size_t kBlockSize,
|
size_t
kRowsPerWarp>
|
__global__ void ShapKernel(
|
DatasetT X, size_t
bins_per_row,
|
const
PathElement* path_elements,
|
const size_t* bin_segments, size_t
num_groups,
|
float* phis) {
|
__shared__ PathElement s_elements[kBlockSize];
|
PathElement& e = s_elements[threadIdx.x];
|
// Allocate some portion of rows to this warp
|
// Fetch the path element assigned to this
|
// thread
|
size_t
start_row, end_row;
|
bool
thread_active;
|
ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
|
X, bins_per_row, path_elements,
|
bin_segments, &start_row, &end_row, &e,
|
&thread_active);
|
if
(!thread_active) return;
|
float
zero_fraction = e.zero_fraction;
|
auto
labelled_group =
|
active_labeled_partition(e.path_idx);
|
for (int64_t row_idx = start_row;
|
row_idx < end_row; row_idx++) {
|
float
phi =
|
ComputePhi(e, row_idx, X, labelled_group,
|
zero_fraction);
|
// Write results
|
if
(!e.IsRoot()) {
|
atomicAdd(&phis[IndexPhi(
|
row_idx, num_groups, e.group,
|
X.NumCols(), e.feature_idx)],
|
phi);
|
}
|
}
|
}
|