From b73cab0a7b82a8ab9f066936580b7750feee66af Mon Sep 17 00:00:00 2001 From: Cyprien Huet Date: Tue, 7 Jan 2025 13:32:21 +0400 Subject: [PATCH 1/3] fix --- datafusion/functions-nested/src/set_ops.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index 70d69170fc1e..f26ff0c9eb33 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -513,9 +513,6 @@ fn general_array_distinct( array: &GenericListArray, field: &FieldRef, ) -> Result { - if array.len() == 0 { - return Ok(Arc::new(array.clone()) as ArrayRef); - } let dt = array.value_type(); let mut offsets = Vec::with_capacity(array.len()); offsets.push(OffsetSize::usize_as(0)); @@ -542,6 +539,9 @@ fn general_array_distinct( }; new_arrays.push(array); } + if new_arrays.is_empty() { + return Ok(Arc::new(array.clone()) as ArrayRef); + } let offsets = OffsetBuffer::new(offsets.into()); let new_arrays_ref = new_arrays.iter().map(|v| v.as_ref()).collect::>(); let values = compute::concat(&new_arrays_ref)?; From 268dd672619b7aa2164bbbd20fc162c2590fb6b0 Mon Sep 17 00:00:00 2001 From: Cyprien Huet Date: Wed, 8 Jan 2025 12:17:05 +0400 Subject: [PATCH 2/3] add test --- datafusion/functions-nested/src/set_ops.rs | 3 + datafusion/sqllogictest/test_files/array.slt | 64 +------------------- 2 files changed, 5 insertions(+), 62 deletions(-) diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index f26ff0c9eb33..079e0e3ed214 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -513,6 +513,9 @@ fn general_array_distinct( array: &GenericListArray, field: &FieldRef, ) -> Result { + if array.is_empty() { + return Ok(Arc::new(array.clone()) as ArrayRef); + } let dt = array.value_type(); let mut offsets = Vec::with_capacity(array.len()); offsets.push(OffsetSize::usize_as(0)); diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 90003b28572a..fd3597baa290 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5682,70 +5682,10 @@ NULL [1, 3] query ? -select array_distinct([]); +select array_distinct(arrow_cast(null, 'LargeList(Int64)')); ---- -[] - -query ? -select array_distinct([[], []]); ----- -[[]] - -query ? -select array_distinct(column1) -from array_distinct_table_1D; ----- -[1, 2, 3] -[1, 2, 3, 4, 5] -[3, 5] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_UTF8; ----- -[a, bc, def] -[a, bc, def, defg] -[defg] - -query ? -select array_distinct(column1) -from array_distinct_table_2D; ----- -[[1, 2], [3, 4], [5, 6]] -[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] -[, [5, 6]] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_large; ----- -[1, 2, 3] -[1, 2, 3, 4, 5] -[3, 5] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_fixed; ----- -[1, 2, 3] -[1, 2, 3, 4, 5] -[3, 5] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_UTF8_fixed; ----- -[a, bc, def] -[a, bc, def, defg] -[defg] +NULL -query ? -select array_distinct(column1) -from array_distinct_table_2D_fixed; ----- -[[1, 2], [3, 4], [5, 6]] -[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] -[, [5, 6]] query ??? select array_intersect(column1, column2), From f26a911e8581fdb016f8dd029b1899a19373c826 Mon Sep 17 00:00:00 2001 From: Cyprien Huet Date: Wed, 8 Jan 2025 12:18:42 +0400 Subject: [PATCH 3/3] oops --- datafusion/sqllogictest/test_files/array.slt | 65 ++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index fd3597baa290..83f228b90115 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5686,6 +5686,71 @@ select array_distinct(arrow_cast(null, 'LargeList(Int64)')); ---- NULL +query ? +select array_distinct([]); +---- +[] + +query ? +select array_distinct([[], []]); +---- +[[]] + +query ? +select array_distinct(column1) +from array_distinct_table_1D; +---- +[1, 2, 3] +[1, 2, 3, 4, 5] +[3, 5] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_UTF8; +---- +[a, bc, def] +[a, bc, def, defg] +[defg] + +query ? +select array_distinct(column1) +from array_distinct_table_2D; +---- +[[1, 2], [3, 4], [5, 6]] +[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] +[, [5, 6]] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_large; +---- +[1, 2, 3] +[1, 2, 3, 4, 5] +[3, 5] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_fixed; +---- +[1, 2, 3] +[1, 2, 3, 4, 5] +[3, 5] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_UTF8_fixed; +---- +[a, bc, def] +[a, bc, def, defg] +[defg] + +query ? +select array_distinct(column1) +from array_distinct_table_2D_fixed; +---- +[[1, 2], [3, 4], [5, 6]] +[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] +[, [5, 6]] query ??? select array_intersect(column1, column2),