diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index 9bee9b8184ea..228d7ca2f046 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -442,173 +442,6 @@ SELECT strpos('josรฉรฉsoj', NULL) ---- NULL -query T -SELECT substr('alphabet', -3) ----- -alphabet - -query T -SELECT substr('alphabet', 0) ----- -alphabet - -query T -SELECT substr('alphabet', 1) ----- -alphabet - -query T -SELECT substr('alphabet', 2) ----- -lphabet - -query T -SELECT substr('alphabet', 3) ----- -phabet - -query T -SELECT substr('alphabet', 30) ----- -(empty) - -query T -SELECT substr('alphabet', 3, 2) ----- -ph - -query T -SELECT substr('alphabet', 3, 20) ----- -phabet - -# test range ouside of string length -query TTTTTTTTTTTT -SELECT - substr('hi๐ŸŒ', 1, 3), - substr('hi๐ŸŒ', 1, 4), - substr('hi๐ŸŒ', 1, 100), - substr('hi๐ŸŒ', 0, 1), - substr('hi๐ŸŒ', 0, 2), - substr('hi๐ŸŒ', 0, 4), - substr('hi๐ŸŒ', 0, 5), - substr('hi๐ŸŒ', -10, 100), - substr('hi๐ŸŒ', -10, 12), - substr('hi๐ŸŒ', -10, 5), - substr('hi๐ŸŒ', 10, 0), - substr('hi๐ŸŒ', 10, 10); ----- -hi๐ŸŒ hi๐ŸŒ hi๐ŸŒ (empty) h hi๐ŸŒ hi๐ŸŒ hi๐ŸŒ h (empty) (empty) (empty) - -query TTTTTTTTTTTT -SELECT - substr('', 1, 3), - substr('', 1, 4), - substr('', 1, 100), - substr('', 0, 1), - substr('', 0, 2), - substr('', 0, 4), - substr('', 0, 5), - substr('', -10, 100), - substr('', -10, 12), - substr('', -10, 5), - substr('', 10, 0), - substr('', 10, 10); ----- -(empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) - -# Nulls -query TTTTTTTTTT -SELECT - substr('alphabet', NULL), - substr(NULL, 1), - substr(NULL, NULL), - substr('alphabet', CAST(NULL AS int), -20), - substr('alphabet', 3, CAST(NULL AS int)), - substr(NULL, 3, -4), - substr(NULL, NULL, 4), - substr(NULL, 1, NULL), - substr('', NULL, NULL), - substr(NULL, NULL, NULL); ----- -NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL - -query T -SELECT substr('Hello๐ŸŒไธ–็•Œ', 5) ----- -o๐ŸŒไธ–็•Œ - -query T -SELECT substr('Hello๐ŸŒไธ–็•Œ', 5, 3) ----- -o๐ŸŒไธ– - -statement ok -create table test_substr ( - c1 VARCHAR -) as values ('foo'), ('hello๐ŸŒไธ–็•Œ'), ('๐Ÿ’ฉ'), ('ThisIsAVeryLongASCIIString'), (''), (NULL); - -statement ok -create table test_substr_stringview as -select c1 as c1, arrow_cast(c1, 'Utf8View') as c1_view from test_substr; - -# `substr()` on `StringViewArray`'s implementation operates directly on view's -# logical pointers, so check it's consistent with `StringArray` -query BBBBBBBBBBBBBB -select - substr(c1, 1) = substr(c1_view, 1), - substr(c1, 3) = substr(c1_view, 3), - substr(c1, 100) = substr(c1_view, 100), - substr(c1, -1) = substr(c1_view, -1), - substr(c1, 0, 0) = substr(c1_view, 0, 0), - substr(c1, -1, 2) = substr(c1_view, -1, 2), - substr(c1, -2, 10) = substr(c1_view, -2, 10), - substr(c1, -100, 200) = substr(c1_view, -100, 200), - substr(c1, -10, 10) = substr(c1_view, -10, 10), - substr(c1, -100, 10) = substr(c1_view, -100, 10), - substr(c1, 1, 100) = substr(c1_view, 1, 100), - substr(c1, 5, 3) = substr(c1_view, 5, 3), - substr(c1, 100, 200) = substr(c1_view, 100, 200), - substr(c1, 8, 0) = substr(c1_view, 8, 0) -from test_substr_stringview; ----- -true true true true true true true true true true true true true true -true true true true true true true true true true true true true true -true true true true true true true true true true true true true true -true true true true true true true true true true true true true true -true true true true true true true true true true true true true true -NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL - -# Check for non-ASCII strings -query TT -select substr(c1_view, 1), substr(c1_view, 5,3) from test_substr_stringview; ----- -foo (empty) -hello๐ŸŒไธ–็•Œ o๐ŸŒไธ– -๐Ÿ’ฉ (empty) -ThisIsAVeryLongASCIIString IsA -(empty) (empty) -NULL NULL - -statement ok -drop table test_substr; - -statement ok -drop table test_substr_stringview; - - -statement error The SUBSTR function can only accept strings, but got Int64. -SELECT substr(1, 3) - -statement error The SUBSTR function can only accept strings, but got Int64. -SELECT substr(1, 3, 4) - -statement error Execution error: negative substring length not allowed -select substr(arrow_cast('foo', 'Utf8View'), 1, -1); - -statement error Execution error: negative substring length not allowed -select substr('', 1, -1); - query T SELECT translate('12345', '143', 'ax') ---- diff --git a/datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part b/datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part new file mode 100644 index 000000000000..dac59016627f --- /dev/null +++ b/datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +query T +SELECT substr('alphabet', -3) +---- +alphabet + +query T +SELECT substr('alphabet', 0) +---- +alphabet + +query T +SELECT substr('alphabet', 1) +---- +alphabet + +query T +SELECT substr('alphabet', 2) +---- +lphabet + +query T +SELECT substr('alphabet', 3) +---- +phabet + +query T +SELECT substr('alphabet', 30) +---- +(empty) + +query T +SELECT substr('alphabet', 3, 2) +---- +ph + +query T +SELECT substr('alphabet', 3, 20) +---- +phabet + +query TT +select + substr(arrow_cast('alphabet', 'LargeUtf8'), 3, 20), + substr(arrow_cast('alphabet', 'Utf8View'), 3, 20); +---- +phabet phabet + +# test range ouside of string length +query TTTTTTTTTTTT +SELECT + substr('hi๐ŸŒ', 1, 3), + substr('hi๐ŸŒ', 1, 4), + substr('hi๐ŸŒ', 1, 100), + substr('hi๐ŸŒ', 0, 1), + substr('hi๐ŸŒ', 0, 2), + substr('hi๐ŸŒ', 0, 4), + substr('hi๐ŸŒ', 0, 5), + substr('hi๐ŸŒ', -10, 100), + substr('hi๐ŸŒ', -10, 12), + substr('hi๐ŸŒ', -10, 5), + substr('hi๐ŸŒ', 10, 0), + substr('hi๐ŸŒ', 10, 10); +---- +hi๐ŸŒ hi๐ŸŒ hi๐ŸŒ (empty) h hi๐ŸŒ hi๐ŸŒ hi๐ŸŒ h (empty) (empty) (empty) + +query TTTTTTTTTTTT +SELECT + substr('', 1, 3), + substr('', 1, 4), + substr('', 1, 100), + substr('', 0, 1), + substr('', 0, 2), + substr('', 0, 4), + substr('', 0, 5), + substr('', -10, 100), + substr('', -10, 12), + substr('', -10, 5), + substr('', 10, 0), + substr('', 10, 10); +---- +(empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) + +# Nulls +query TTTTTTTTTT +SELECT + substr('alphabet', NULL), + substr(NULL, 1), + substr(NULL, NULL), + substr('alphabet', CAST(NULL AS int), -20), + substr('alphabet', 3, CAST(NULL AS int)), + substr(NULL, 3, -4), + substr(NULL, NULL, 4), + substr(NULL, 1, NULL), + substr('', NULL, NULL), + substr(NULL, NULL, NULL); +---- +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL + +query T +SELECT substr('Hello๐ŸŒไธ–็•Œ', 5) +---- +o๐ŸŒไธ–็•Œ + +query T +SELECT substr('Hello๐ŸŒไธ–็•Œ', 5, 3) +---- +o๐ŸŒไธ– + +statement error The SUBSTR function can only accept strings, but got Int64. +SELECT substr(1, 3) + +statement error The SUBSTR function can only accept strings, but got Int64. +SELECT substr(1, 3, 4) + +statement error Execution error: negative substring length not allowed +select substr(arrow_cast('foo', 'Utf8View'), 1, -1); + +statement error Execution error: negative substring length not allowed +select substr('', 1, -1); diff --git a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt b/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt new file mode 100644 index 000000000000..d61cfa2b743a --- /dev/null +++ b/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# -------------------------------------- +# Test `substr()` with literal arguments +# -------------------------------------- +include ./substr_literal.slt.part + +# -------------------------------------- +# Setup test tables with different physical string types (Utf8/Utf8View/LargeUtf8) +# and repeat tests in `substr_table.slt.part` +# -------------------------------------- +statement ok +create table test_substr_base ( + col1 VARCHAR +) as values ('foo'), ('hello๐ŸŒไธ–็•Œ'), ('๐Ÿ’ฉ'), ('ThisIsAVeryLongASCIIString'), (''), (NULL); + +# +# Run1: Utf8 +# +statement ok +create table test_substr as +select arrow_cast(col1, 'Utf8') as c1 from test_substr_base; + +include ./substr_table.slt.part + +statement ok +drop table test_substr; + +# +# Run2: Utf8View +# +statement ok +create table test_substr as +select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base; + +include ./substr_table.slt.part + +statement ok +drop table test_substr; + +# +# Run3: LargeUtf8 +# +statement ok +create table test_substr as +select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base; + +include ./substr_table.slt.part + +statement ok +drop table test_substr; + +# -------------------------------------- +# Cleanup +# -------------------------------------- +statement ok +drop table test_substr_base; \ No newline at end of file diff --git a/datafusion/sqllogictest/test_files/string_functions/substr/substr_table.slt.part b/datafusion/sqllogictest/test_files/string_functions/substr/substr_table.slt.part new file mode 100644 index 000000000000..0d8e62af84e0 --- /dev/null +++ b/datafusion/sqllogictest/test_files/string_functions/substr/substr_table.slt.part @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This file is intended to be run with tables already defined +# with standard values, but different types in string columns +# (String, StringView, etc.) + +query TTTTTTTTTTTTTT +select + substr(c1, 1), + substr(c1, 3), + substr(c1, 100), + substr(c1, -1), + substr(c1, 0, 0), + substr(c1, -1, 2), + substr(c1, -2, 10), + substr(c1, -100, 200), + substr(c1, -10, 10), + substr(c1, -100, 10), + substr(c1, 1, 100), + substr(c1, 5, 3), + substr(c1, 100, 200), + substr(c1, 8, 0) +from test_substr; +---- +foo o (empty) foo (empty) (empty) foo foo (empty) (empty) foo (empty) (empty) (empty) +hello๐ŸŒไธ–็•Œ llo๐ŸŒไธ–็•Œ (empty) hello๐ŸŒไธ–็•Œ (empty) (empty) hello๐ŸŒไธ– hello๐ŸŒไธ–็•Œ (empty) (empty) hello๐ŸŒไธ–็•Œ o๐ŸŒไธ– (empty) (empty) +๐Ÿ’ฉ (empty) (empty) ๐Ÿ’ฉ (empty) (empty) ๐Ÿ’ฉ ๐Ÿ’ฉ (empty) (empty) ๐Ÿ’ฉ (empty) (empty) (empty) +ThisIsAVeryLongASCIIString isIsAVeryLongASCIIString (empty) ThisIsAVeryLongASCIIString (empty) (empty) ThisIsA ThisIsAVeryLongASCIIString (empty) (empty) ThisIsAVeryLongASCIIString IsA (empty) (empty) +(empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL \ No newline at end of file