diff --git a/exercises/src/gpu/grayscott.comp b/exercises/src/gpu/grayscott.comp
index 1f68490ab73a105e61e44f0ba14cf2a832d526db..f75c049a80fb81f5540b2ffdb1155328e2aba359 100644
--- a/exercises/src/gpu/grayscott.comp
+++ b/exercises/src/gpu/grayscott.comp
@@ -57,6 +57,7 @@ void init_shared() {
             s_expected_steal_count,
             s_expected_await_count
         );
+        s_acquired_tile = false;
         init_wait_shared();
         #if DEBUG || PROFILE
             s_num_loop_entry = 0;
@@ -154,7 +155,9 @@ void main() {
         #endif
     
         // Notify other work-groups that we are processing the selected tile
-        if (is_leader()) s_stolen = !leader_try_start_step();
+        if (is_leader() && !s_acquired_tile) {
+            s_stolen = !leader_try_acquire_tile();
+        }
         barrier();
 
         // Selected tile was stolen from us, pick another tile and start over
diff --git a/exercises/src/gpu/interface.comp b/exercises/src/gpu/interface.comp
index 9fdf5727e47b5350d592b73b8377569ea8180825..baf1874128854600e0ca7f46a4f9255f8b2afe0c 100644
--- a/exercises/src/gpu/interface.comp
+++ b/exercises/src/gpu/interface.comp
@@ -170,6 +170,9 @@ const uint ERROR_INVALID_TILE_IDX = 12;
 const uint ERROR_TOO_MANY_BUFFERS = 13;
 const uint ERROR_INVALID_AWAIT_COUNT = 14;
 const uint ERROR_BREAKPOINT = 15;  // Free for your own use
+const uint ERROR_ACQUIRED_TILE_TWICE = 16;
+const uint ERROR_FINISHED_TILE_WITHOUT_ACQUIRE = 17;
+const uint ERROR_RELEASED_TILE_WITHOUT_ACQUIRE = 18;
 
 // Set the metadata error
 //
diff --git a/exercises/src/gpu/shared.comp b/exercises/src/gpu/shared.comp
index c08dcdb7600e001b44c0762f0a453f990b94128c..18e72f975f8a1a8786369d52fe10a5fcca586770 100644
--- a/exercises/src/gpu/shared.comp
+++ b/exercises/src/gpu/shared.comp
@@ -63,6 +63,14 @@ shared uint s_expected_await_count;
 // kept around in packed form to improve compare-and-swap efficiency.
 shared uint s_expected_tile_status;
 
+// Truth that this work-group has acquired ownership of the current tile
+//
+// If true, this means that this work-group has incremented the steal_count of
+// the current tile to make it publicly known that it is processing it. When the
+// work-group switches to another tile, it will decrement the steal_count and
+// set back this variable to false.
+shared bool s_acquired_tile;
+
 // Detailed work group activity trace for precise error reporting
 #if DEBUG
     // Work-group-private debugging state for g_metadata
diff --git a/exercises/src/gpu/step.comp b/exercises/src/gpu/step.comp
index 85cc654b9c642f702fe81ef5d85553dc47317b25..67fa3f213245822d2a2ab910ab7bec295115d3f5 100644
--- a/exercises/src/gpu/step.comp
+++ b/exercises/src/gpu/step.comp
@@ -1,16 +1,25 @@
 //! Signaling of start/end of a compute step
 
-// (Leader-only) Attempt to start processing the selected tile
+// (Leader-only) Attempt to acquire ownership of the selected tile
+//
+// Only needs to be called if `s_acquired_tile` is false. Succeeds if the caller
+// has up-to-date knowledge of the tile's state which was not invalidated by a
+// concurrent work-stealing event.
 //
 // Returns true on success, false on failure. Updates `s_expected_tile_status`
-// and `s_expected_steal_count` in either case, will also update the
-// `s_output_buffer_idx` on success and `s_input_buffer_idx` on failure.
-bool leader_try_start_step() {
+// and `s_expected_steal_count` in either case. Will also update
+// `s_output_buffer_idx` and set `s_acquired_tile` on success. Will update
+// `s_input_buffer_idx` and `s_expected_await_count` on failure.
+bool leader_try_acquire_tile() {
     #if DEBUG
         if (!is_leader()) {
             set_error(ERROR_NOT_LEADER);
             return false;
         }
+        if (s_acquired_tile) {
+            set_error(ERROR_ACQUIRED_TILE_TWICE);
+            return false;
+        }
     #endif
 
     do {
@@ -30,6 +39,7 @@ bool leader_try_start_step() {
             s_expected_tile_status = desired_tile_status;
             s_expected_steal_count = desired_steal_count;
             s_output_buffer_idx = output_buffer_idx(s_input_buffer_idx);
+            s_acquired_tile = true;
             return true;
         }
 
@@ -50,7 +60,6 @@ bool leader_try_start_step() {
         s_expected_tile_status = status_before_cas;
         const uint old_input_idx = s_input_buffer_idx;
         const uint old_steal_count = s_expected_steal_count;
-        const uint old_await_count = s_expected_await_count;
         decode_tile_status(status_before_cas,
                            s_input_buffer_idx,
                            s_expected_steal_count,
@@ -81,13 +90,17 @@ bool leader_try_finish_step() {
             set_error(ERROR_NOT_LEADER);
             return false;
         }
+        if (!s_acquired_tile) {
+            set_error(ERROR_FINISHED_TILE_WITHOUT_ACQUIRE);
+            return false;
+        }
     #endif
 
+    const uint prev_input_idx = s_input_buffer_idx;
     do {
         // Try to signal other work-groups that we finished this simulation step
-        const uint desired_steal_count = 0;
         const uint desired_tile_status = encode_tile_status(s_output_buffer_idx,
-                                                            desired_steal_count,
+                                                            s_expected_steal_count,
                                                             s_expected_await_count);
         const uint status_before_cas = comp_swap_tile_status(
             s_tile_idx,
@@ -99,7 +112,7 @@ bool leader_try_finish_step() {
         if (status_before_cas == s_expected_tile_status) {
             s_expected_tile_status = desired_tile_status;
             s_input_buffer_idx = s_output_buffer_idx;
-            s_expected_steal_count = desired_steal_count;
+            s_output_buffer_idx = output_buffer_idx(prev_input_idx);
             return true;
         }
 
@@ -118,9 +131,6 @@ bool leader_try_finish_step() {
 
         // Update state variables, prepare to analyze the transition
         s_expected_tile_status = status_before_cas;
-        const uint old_input_idx = s_input_buffer_idx;
-        const uint old_steal_count = s_expected_steal_count;
-        const uint old_await_count = s_expected_await_count;
         decode_tile_status(status_before_cas,
                            s_input_buffer_idx,
                            s_expected_steal_count,
@@ -128,9 +138,7 @@ bool leader_try_finish_step() {
 
 
         // Did another work-group steal our tile?
-        if ((s_input_buffer_idx != old_input_idx)
-            || (s_expected_steal_count != old_steal_count))
-        {
+        if (s_input_buffer_idx != prev_input_idx) {
             #if PROFILE
                 s_num_stolen_late += 1;
             #endif
@@ -140,4 +148,31 @@ bool leader_try_finish_step() {
             continue;
         }
     } while(true);
-}
\ No newline at end of file
+}
+
+// (Leader-only) Release ownership of the selected tile
+//
+// Always succeeds. Invalidates `s_expected_tile_status`,
+// `s_expected_steal_count`, `s_input_buffer_idx` and `s_expected_await_count`.
+// Clears `s_acquired_tile`.
+void leader_release_tile() {
+    #if DEBUG
+        if (!is_leader()) {
+            set_error(ERROR_NOT_LEADER);
+            return;
+        }
+        if (!s_acquired_tile) {
+            set_error(ERROR_RELEASED_TILE_WITHOUT_ACQUIRE);
+            return;
+        }
+    #endif
+    dec_tile_status_steal_count(s_tile_idx);
+    s_acquired_tile = false;
+    #if DEBUG
+        s_expected_tile_status = TILE_STATUS_INVALID;
+        s_expected_steal_count = STEAL_COUNT_INVALID;
+        s_expected_await_count = AWAIT_COUNT_INVALID;
+        s_input_buffer_idx = BUFFER_IDX_INVALID;
+        s_output_buffer_idx = BUFFER_IDX_INVALID;
+    #endif
+}
diff --git a/exercises/src/gpu/switch.comp b/exercises/src/gpu/switch.comp
index 573f92af61a94683fd5f2e96617dbca3ebe0fd8c..adbca78402d765ab075c6fb0e89ab11c133e51e3 100644
--- a/exercises/src/gpu/switch.comp
+++ b/exercises/src/gpu/switch.comp
@@ -401,6 +401,12 @@ void switch_tile() {
         }
     #endif
 
+    // Release ownership of the current tile
+    if (s_acquired_tile) {
+        if (is_leader()) leader_release_tile();
+        barrier();
+    }
+
     // Start with a tile block surrounding our current tile
     //
     // Center it as much as possible to maximize the odds of finding a close
diff --git a/exercises/src/gpu/tile_status.comp b/exercises/src/gpu/tile_status.comp
index b06d2d49833f1cbef038e1ea12a3d9a6a06fbbb3..b29857a31c324d540cb7d63d2ea1cb7990d8fa01 100644
--- a/exercises/src/gpu/tile_status.comp
+++ b/exercises/src/gpu/tile_status.comp
@@ -275,3 +275,18 @@ uint end_tile_status_wait(in ivec2 tile_idx) {
     const uint old_neg_await_count = (old_status & NEG_AWAIT_COUNT_MASK) >> NEG_AWAIT_COUNT_OFFSET;
     return MAX_AWAIT_COUNT - old_neg_await_count;
 }
+
+// Decrement the steal_count of a tile
+//
+// This should only be called if a thread previously increased the steal_count.
+void dec_tile_status_steal_count(in ivec2 tile_idx) {
+    #if DEBUG
+        if (!is_good_tile_idx(tile_idx)) return;
+    #endif
+    const uint global_offset = global_tile_offset(tile_idx);
+    atomicAdd(
+        g_tile_status.data[global_offset],
+        // Subtract 1 from steal_count
+        0xffffffff << STEAL_COUNT_OFFSET
+    );
+}