diff --git a/exercises/src/gpu/grayscott.comp b/exercises/src/gpu/grayscott.comp index 1f68490ab73a105e61e44f0ba14cf2a832d526db..f75c049a80fb81f5540b2ffdb1155328e2aba359 100644 --- a/exercises/src/gpu/grayscott.comp +++ b/exercises/src/gpu/grayscott.comp @@ -57,6 +57,7 @@ void init_shared() { s_expected_steal_count, s_expected_await_count ); + s_acquired_tile = false; init_wait_shared(); #if DEBUG || PROFILE s_num_loop_entry = 0; @@ -154,7 +155,9 @@ void main() { #endif // Notify other work-groups that we are processing the selected tile - if (is_leader()) s_stolen = !leader_try_start_step(); + if (is_leader() && !s_acquired_tile) { + s_stolen = !leader_try_acquire_tile(); + } barrier(); // Selected tile was stolen from us, pick another tile and start over diff --git a/exercises/src/gpu/interface.comp b/exercises/src/gpu/interface.comp index 9fdf5727e47b5350d592b73b8377569ea8180825..baf1874128854600e0ca7f46a4f9255f8b2afe0c 100644 --- a/exercises/src/gpu/interface.comp +++ b/exercises/src/gpu/interface.comp @@ -170,6 +170,9 @@ const uint ERROR_INVALID_TILE_IDX = 12; const uint ERROR_TOO_MANY_BUFFERS = 13; const uint ERROR_INVALID_AWAIT_COUNT = 14; const uint ERROR_BREAKPOINT = 15; // Free for your own use +const uint ERROR_ACQUIRED_TILE_TWICE = 16; +const uint ERROR_FINISHED_TILE_WITHOUT_ACQUIRE = 17; +const uint ERROR_RELEASED_TILE_WITHOUT_ACQUIRE = 18; // Set the metadata error // diff --git a/exercises/src/gpu/shared.comp b/exercises/src/gpu/shared.comp index c08dcdb7600e001b44c0762f0a453f990b94128c..18e72f975f8a1a8786369d52fe10a5fcca586770 100644 --- a/exercises/src/gpu/shared.comp +++ b/exercises/src/gpu/shared.comp @@ -63,6 +63,14 @@ shared uint s_expected_await_count; // kept around in packed form to improve compare-and-swap efficiency. shared uint s_expected_tile_status; +// Truth that this work-group has acquired ownership of the current tile +// +// If true, this means that this work-group has incremented the steal_count of +// the current tile to make it publicly known that it is processing it. When the +// work-group switches to another tile, it will decrement the steal_count and +// set back this variable to false. +shared bool s_acquired_tile; + // Detailed work group activity trace for precise error reporting #if DEBUG // Work-group-private debugging state for g_metadata diff --git a/exercises/src/gpu/step.comp b/exercises/src/gpu/step.comp index 85cc654b9c642f702fe81ef5d85553dc47317b25..67fa3f213245822d2a2ab910ab7bec295115d3f5 100644 --- a/exercises/src/gpu/step.comp +++ b/exercises/src/gpu/step.comp @@ -1,16 +1,25 @@ //! Signaling of start/end of a compute step -// (Leader-only) Attempt to start processing the selected tile +// (Leader-only) Attempt to acquire ownership of the selected tile +// +// Only needs to be called if `s_acquired_tile` is false. Succeeds if the caller +// has up-to-date knowledge of the tile's state which was not invalidated by a +// concurrent work-stealing event. // // Returns true on success, false on failure. Updates `s_expected_tile_status` -// and `s_expected_steal_count` in either case, will also update the -// `s_output_buffer_idx` on success and `s_input_buffer_idx` on failure. -bool leader_try_start_step() { +// and `s_expected_steal_count` in either case. Will also update +// `s_output_buffer_idx` and set `s_acquired_tile` on success. Will update +// `s_input_buffer_idx` and `s_expected_await_count` on failure. +bool leader_try_acquire_tile() { #if DEBUG if (!is_leader()) { set_error(ERROR_NOT_LEADER); return false; } + if (s_acquired_tile) { + set_error(ERROR_ACQUIRED_TILE_TWICE); + return false; + } #endif do { @@ -30,6 +39,7 @@ bool leader_try_start_step() { s_expected_tile_status = desired_tile_status; s_expected_steal_count = desired_steal_count; s_output_buffer_idx = output_buffer_idx(s_input_buffer_idx); + s_acquired_tile = true; return true; } @@ -50,7 +60,6 @@ bool leader_try_start_step() { s_expected_tile_status = status_before_cas; const uint old_input_idx = s_input_buffer_idx; const uint old_steal_count = s_expected_steal_count; - const uint old_await_count = s_expected_await_count; decode_tile_status(status_before_cas, s_input_buffer_idx, s_expected_steal_count, @@ -81,13 +90,17 @@ bool leader_try_finish_step() { set_error(ERROR_NOT_LEADER); return false; } + if (!s_acquired_tile) { + set_error(ERROR_FINISHED_TILE_WITHOUT_ACQUIRE); + return false; + } #endif + const uint prev_input_idx = s_input_buffer_idx; do { // Try to signal other work-groups that we finished this simulation step - const uint desired_steal_count = 0; const uint desired_tile_status = encode_tile_status(s_output_buffer_idx, - desired_steal_count, + s_expected_steal_count, s_expected_await_count); const uint status_before_cas = comp_swap_tile_status( s_tile_idx, @@ -99,7 +112,7 @@ bool leader_try_finish_step() { if (status_before_cas == s_expected_tile_status) { s_expected_tile_status = desired_tile_status; s_input_buffer_idx = s_output_buffer_idx; - s_expected_steal_count = desired_steal_count; + s_output_buffer_idx = output_buffer_idx(prev_input_idx); return true; } @@ -118,9 +131,6 @@ bool leader_try_finish_step() { // Update state variables, prepare to analyze the transition s_expected_tile_status = status_before_cas; - const uint old_input_idx = s_input_buffer_idx; - const uint old_steal_count = s_expected_steal_count; - const uint old_await_count = s_expected_await_count; decode_tile_status(status_before_cas, s_input_buffer_idx, s_expected_steal_count, @@ -128,9 +138,7 @@ bool leader_try_finish_step() { // Did another work-group steal our tile? - if ((s_input_buffer_idx != old_input_idx) - || (s_expected_steal_count != old_steal_count)) - { + if (s_input_buffer_idx != prev_input_idx) { #if PROFILE s_num_stolen_late += 1; #endif @@ -140,4 +148,31 @@ bool leader_try_finish_step() { continue; } } while(true); -} \ No newline at end of file +} + +// (Leader-only) Release ownership of the selected tile +// +// Always succeeds. Invalidates `s_expected_tile_status`, +// `s_expected_steal_count`, `s_input_buffer_idx` and `s_expected_await_count`. +// Clears `s_acquired_tile`. +void leader_release_tile() { + #if DEBUG + if (!is_leader()) { + set_error(ERROR_NOT_LEADER); + return; + } + if (!s_acquired_tile) { + set_error(ERROR_RELEASED_TILE_WITHOUT_ACQUIRE); + return; + } + #endif + dec_tile_status_steal_count(s_tile_idx); + s_acquired_tile = false; + #if DEBUG + s_expected_tile_status = TILE_STATUS_INVALID; + s_expected_steal_count = STEAL_COUNT_INVALID; + s_expected_await_count = AWAIT_COUNT_INVALID; + s_input_buffer_idx = BUFFER_IDX_INVALID; + s_output_buffer_idx = BUFFER_IDX_INVALID; + #endif +} diff --git a/exercises/src/gpu/switch.comp b/exercises/src/gpu/switch.comp index 573f92af61a94683fd5f2e96617dbca3ebe0fd8c..adbca78402d765ab075c6fb0e89ab11c133e51e3 100644 --- a/exercises/src/gpu/switch.comp +++ b/exercises/src/gpu/switch.comp @@ -401,6 +401,12 @@ void switch_tile() { } #endif + // Release ownership of the current tile + if (s_acquired_tile) { + if (is_leader()) leader_release_tile(); + barrier(); + } + // Start with a tile block surrounding our current tile // // Center it as much as possible to maximize the odds of finding a close diff --git a/exercises/src/gpu/tile_status.comp b/exercises/src/gpu/tile_status.comp index b06d2d49833f1cbef038e1ea12a3d9a6a06fbbb3..b29857a31c324d540cb7d63d2ea1cb7990d8fa01 100644 --- a/exercises/src/gpu/tile_status.comp +++ b/exercises/src/gpu/tile_status.comp @@ -275,3 +275,18 @@ uint end_tile_status_wait(in ivec2 tile_idx) { const uint old_neg_await_count = (old_status & NEG_AWAIT_COUNT_MASK) >> NEG_AWAIT_COUNT_OFFSET; return MAX_AWAIT_COUNT - old_neg_await_count; } + +// Decrement the steal_count of a tile +// +// This should only be called if a thread previously increased the steal_count. +void dec_tile_status_steal_count(in ivec2 tile_idx) { + #if DEBUG + if (!is_good_tile_idx(tile_idx)) return; + #endif + const uint global_offset = global_tile_offset(tile_idx); + atomicAdd( + g_tile_status.data[global_offset], + // Subtract 1 from steal_count + 0xffffffff << STEAL_COUNT_OFFSET + ); +}