From 5e4069156aa6b8345d66afd8fd379faa6d98a19b Mon Sep 17 00:00:00 2001 From: Paul Mattione <156858817+pmattione-nvidia@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:26:19 -0500 Subject: [PATCH] Fix synchronization bug in bool parquet mukernels (#17302) This fixes a synchronization bug in the parquet microkernels for plain-decoding bools. This closes [several](https://github.com/NVIDIA/spark-rapids/issues/11715) timing [issues](https://github.com/NVIDIA/spark-rapids/issues/11716) found during testing of spark-rapids. Authors: - Paul Mattione (https://github.com/pmattione-nvidia) Approvers: - Bradley Dice (https://github.com/bdice) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/17302 --- cpp/src/io/parquet/decode_fixed.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/io/parquet/decode_fixed.cu b/cpp/src/io/parquet/decode_fixed.cu index aaf5ebfbe7d..9acbe026bb2 100644 --- a/cpp/src/io/parquet/decode_fixed.cu +++ b/cpp/src/io/parquet/decode_fixed.cu @@ -848,6 +848,7 @@ inline __device__ void bool_plain_decode(page_state_s* s, state_buf* sb, int t, { int pos = s->dict_pos; int const target_pos = pos + to_decode; + __syncthreads(); // Make sure all threads have read dict_pos before it changes at the end. while (pos < target_pos) { int const batch_len = min(target_pos - pos, decode_block_size_t);