From 3aab5a29214f3dcaeecd1ae01b7aab373f3489a5 Mon Sep 17 00:00:00 2001 From: wyb Date: Thu, 16 Jan 2025 23:45:16 +0800 Subject: [PATCH] [BugFix] Fix ip string as double type in csv files() Signed-off-by: wyb --- be/src/util/string_parser.hpp | 8 ++++++++ be/test/util/string_parser_test.cpp | 2 ++ test/sql/test_files/R/test_csv_ip | 28 +++++++++++++++++++++++++++ test/sql/test_files/T/test_csv_ip | 12 ++++++++++++ test/sql/test_files/csv_format/ip.csv | 1 + 5 files changed, 51 insertions(+) create mode 100644 test/sql/test_files/R/test_csv_ip create mode 100644 test/sql/test_files/T/test_csv_ip create mode 100644 test/sql/test_files/csv_format/ip.csv diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp index 951b690fa6eed..5a48edea7d12c 100644 --- a/be/src/util/string_parser.hpp +++ b/be/src/util/string_parser.hpp @@ -522,6 +522,14 @@ inline T StringParser::string_to_float_internal(const char* s, int len, ParseRes auto res = fast_float::from_chars(s + i, s + j + 1, val); if (LIKELY(res.ec == std::errc())) { + // 'res.ptr' is set to point right after the parsed number. + // if there are some chars left, treate it failure. + // for example, + // '10.11.12.13' is parsed as 10.11, res.ptr is '.12.13', and it is invalid. + if (res.ptr != s + j + 1) { + *result = PARSE_FAILURE; + return 0; + } if (UNLIKELY(val == std::numeric_limits::infinity())) { *result = PARSE_OVERFLOW; } else { diff --git a/be/test/util/string_parser_test.cpp b/be/test/util/string_parser_test.cpp index 77d0550edb5a1..e7c52dbe1dc1f 100644 --- a/be/test/util/string_parser_test.cpp +++ b/be/test/util/string_parser_test.cpp @@ -540,6 +540,8 @@ TEST(StringToFloat, Basic) { test_all_float_variants("in finity", StringParser::PARSE_FAILURE); test_all_float_variants("na", StringParser::PARSE_FAILURE); test_all_float_variants("ThisIsANaN", StringParser::PARSE_FAILURE); + test_all_float_variants("10.1.2.3", StringParser::PARSE_FAILURE); + test_all_float_variants("10.1 max", StringParser::PARSE_FAILURE); } TEST(StringToFloat, InvalidLeadingTrailing) { diff --git a/test/sql/test_files/R/test_csv_ip b/test/sql/test_files/R/test_csv_ip new file mode 100644 index 0000000000000..8d0b9ac0c73e3 --- /dev/null +++ b/test/sql/test_files/R/test_csv_ip @@ -0,0 +1,28 @@ +-- name: test_csv_ip + +create database db_${uuid0}; +use db_${uuid0}; + +shell: ossutil64 mkdir oss://${oss_bucket}/test_files/csv_format/${uuid0} >/dev/null || echo "exit 0" >/dev/null + +shell: ossutil64 cp --force ./sql/test_files/csv_format/ip.csv oss://${oss_bucket}/test_files/csv_format/${uuid0}/ | grep -Pv "(average|elapsed)" +-- result: +0 + +Succeed: Total num: 1, size: 14. OK num: 1(upload 1 files). +-- !result + + +select * from files("path" = "oss://${oss_bucket}/test_files/csv_format/${uuid0}/*", "format" = "csv", "csv.column_separator" = "|", "csv.row_delimiter" = "\n"); +-- result: +1 10.11.12.13 +-- !result + +desc files("path" = "oss://${oss_bucket}/test_files/csv_format/${uuid0}/*", "format" = "csv", "csv.column_separator" = "|", "csv.row_delimiter" = "\n"); +-- result: +$1 bigint YES +$2 varchar(1048576) YES +-- !result + + +shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/csv_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/T/test_csv_ip b/test/sql/test_files/T/test_csv_ip new file mode 100644 index 0000000000000..b62ffaa4e2d94 --- /dev/null +++ b/test/sql/test_files/T/test_csv_ip @@ -0,0 +1,12 @@ +-- name: test_csv_ip + +create database db_${uuid0}; +use db_${uuid0}; + +shell: ossutil64 mkdir oss://${oss_bucket}/test_files/csv_format/${uuid0} >/dev/null || echo "exit 0" >/dev/null +shell: ossutil64 cp --force ./sql/test_files/csv_format/ip.csv oss://${oss_bucket}/test_files/csv_format/${uuid0}/ | grep -Pv "(average|elapsed)" + +select * from files("path" = "oss://${oss_bucket}/test_files/csv_format/${uuid0}/*", "format" = "csv", "csv.column_separator" = "|", "csv.row_delimiter" = "\n"); +desc files("path" = "oss://${oss_bucket}/test_files/csv_format/${uuid0}/*", "format" = "csv", "csv.column_separator" = "|", "csv.row_delimiter" = "\n"); + +shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/csv_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/csv_format/ip.csv b/test/sql/test_files/csv_format/ip.csv new file mode 100644 index 0000000000000..419eb79e83ba9 --- /dev/null +++ b/test/sql/test_files/csv_format/ip.csv @@ -0,0 +1 @@ +1|10.11.12.13