Skip to content

Commit

Permalink
fix: encode function should work with strings and binary
Browse files Browse the repository at this point in the history
closes #14055
  • Loading branch information
mesejo committed Jan 11, 2025
1 parent 17446ad commit aa86236
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 8 deletions.
23 changes: 18 additions & 5 deletions datafusion/functions/src/encoding/inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,20 @@ impl ScalarUDFImpl for EncodeFunc {
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(arg_types[0].to_owned())
use DataType::*;

Ok(match arg_types[0] {
Utf8 => Utf8,
LargeUtf8 => LargeUtf8,
Binary => Utf8,
LargeBinary => LargeUtf8,
Null => Null,
_ => {
return plan_err!(
"The encode function can only accept Utf8 or Binary or Null."
);
}
})
}

fn invoke_batch(
Expand All @@ -112,12 +125,12 @@ impl ScalarUDFImpl for EncodeFunc {
}

match arg_types[0] {
DataType::Utf8 | DataType::Utf8View | DataType::Binary | DataType::Null => {
DataType::Utf8 | DataType::Utf8View | DataType::Null => {
Ok(vec![DataType::Utf8; 2])
}
DataType::LargeUtf8 | DataType::LargeBinary => {
Ok(vec![DataType::LargeUtf8, DataType::Utf8])
}
DataType::LargeUtf8 => Ok(vec![DataType::LargeUtf8, DataType::Utf8]),
DataType::Binary => Ok(vec![DataType::Binary, DataType::Utf8]),
DataType::LargeBinary => Ok(vec![DataType::LargeBinary, DataType::Utf8]),
_ => plan_err!(
"1st argument should be Utf8 or Binary or Null, got {:?}",
arg_types[0]
Expand Down
24 changes: 21 additions & 3 deletions datafusion/sqllogictest/test_files/encoding.slt
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ CREATE TABLE test(
hex_field TEXT
) as VALUES
(0, 'abc', encode('abc', 'base64'), encode('abc', 'hex')),
(1, 'qweqwe', encode('qweqwe', 'base64'), encode('qweqwe', 'hex')),
(2, NULL, NULL, NULL)
(1, 'qweqwe', encode('qweqwe', 'base64'), encode('qweqwe', 'hex')),
(2, NULL, NULL, NULL),
(3, X'8f50d3f60eae370ddbf85c86219c55108a350165', encode('8f50d3f60eae370ddbf85c86219c55108a350165', 'base64'), encode('8f50d3f60eae370ddbf85c86219c55108a350165', 'hex'))
;

# errors
Expand All @@ -43,34 +44,51 @@ select decode(hex_field, 'non_encoding') from test;
query error
select to_hex(hex_field) from test;

query error
select arrow_cast(decode(X'8f50d3f60eae370ddbf85c86219c55108a350165', 'base64'), 'Utf8');

# Arrays tests
query T
SELECT encode(bin_field, 'hex') FROM test ORDER BY num;
----
616263
717765717765
NULL
8f50d3f60eae370ddbf85c86219c55108a350165

query T
SELECT arrow_cast(decode(base64_field, 'base64'), 'Utf8') FROM test ORDER BY num;
----
abc
qweqwe
NULL
8f50d3f60eae370ddbf85c86219c55108a350165

query T
SELECT arrow_cast(decode(hex_field, 'hex'), 'Utf8') FROM test ORDER BY num;
----
abc
qweqwe
NULL
8f50d3f60eae370ddbf85c86219c55108a350165

query T
select to_hex(num) from test ORDER BY num;
----
0
1
2
3

query T
select encode(bin_field, 'base64') FROM test WHERE num = 3;
----
j1DT9g6uNw3b+FyGIZxVEIo1AWU

query B
select decode(encode(bin_field, 'base64'), 'base64') = X'8f50d3f60eae370ddbf85c86219c55108a350165' FROM test WHERE num = 3;
----
true

# test for Utf8View support for encode
statement ok
Expand Down Expand Up @@ -101,4 +119,4 @@ FROM test_utf8view;
Andrew QW5kcmV3 416e64726577 X WA 58
Xiangpeng WGlhbmdwZW5n 5869616e6770656e67 Xiangpeng WGlhbmdwZW5n 5869616e6770656e67
Raphael UmFwaGFlbA 5261706861656c R Ug 52
NULL NULL NULL R Ug 52
NULL NULL NULL R Ug 52

0 comments on commit aa86236

Please sign in to comment.