diff --git a/Cargo.toml b/Cargo.toml index 673625434a52..79f2cc9a9553 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,12 +32,12 @@ rust-version = "1.70" version = "28.0.0" [workspace.dependencies] -arrow = { version = "43.0.0", features = ["prettyprint", "dyn_cmp_dict"] } -arrow-array = { version = "43.0.0", default-features = false, features = ["chrono-tz"] } -arrow-buffer = { version = "43.0.0", default-features = false } -arrow-flight = { version = "43.0.0", features = ["flight-sql-experimental"] } -arrow-schema = { version = "43.0.0", default-features = false } -parquet = { version = "43.0.0", features = ["arrow", "async", "object_store"] } +arrow = { version = "45.0.0", features = ["prettyprint", "dyn_cmp_dict"] } +arrow-array = { version = "45.0.0", default-features = false, features = ["chrono-tz"] } +arrow-buffer = { version = "45.0.0", default-features = false } +arrow-flight = { version = "45.0.0", features = ["flight-sql-experimental"] } +arrow-schema = { version = "45.0.0", default-features = false } +parquet = { version = "45.0.0", features = ["arrow", "async", "object_store"] } sqlparser = { version = "0.36.1", features = ["visitor"] } [profile.release] diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 5e06b3a341dd..22c8e69bc902 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -95,9 +95,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2feeebd77b34b0bc88f224e06d01c27da4733997cc4789a4e056196656cdc59a" +checksum = "b7104b9e9761613ae92fe770c741d6bbf1dbc791a0fe204400aebdd429875741" dependencies = [ "ahash", "arrow-arith", @@ -117,9 +117,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7173f5dc49c0ecb5135f52565af33afd3fdc9a12d13bd6f9973e8b96305e4b2e" +checksum = "38e597a8e8efb8ff52c50eaf8f4d85124ce3c1bf20fab82f476d73739d9ab1c2" dependencies = [ "arrow-array", "arrow-buffer", @@ -132,9 +132,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63d7ea725f7d1f8bb2cffc53ef538557e95fc802e217d5be25122d402e22f3d0" +checksum = "2a86d9c1473db72896bd2345ebb6b8ad75b8553ba390875c76708e8dc5c5492d" dependencies = [ "ahash", "arrow-buffer", @@ -149,9 +149,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdbe439e077f484e5000b9e1d47b5e4c0d15f2b311a8f5bcc682553d5d67a722" +checksum = "234b3b1c8ed00c874bf95972030ac4def6f58e02ea5a7884314388307fb3669b" dependencies = [ "half", "num", @@ -159,9 +159,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93913cc14875770aa1eef5e310765e855effa352c094cb1c7c00607d0f37b4e1" +checksum = "22f61168b853c7faea8cea23a2169fdff9c82fb10ae5e2c07ad1cab8f6884931" dependencies = [ "arrow-array", "arrow-buffer", @@ -177,9 +177,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef55b67c55ed877e6fe7b923121c19dae5e31ca70249ea2779a17b58fb0fbd9a" +checksum = "10b545c114d9bf8569c84d2fbe2020ac4eea8db462c0a37d0b65f41a90d066fe" dependencies = [ "arrow-array", "arrow-buffer", @@ -196,9 +196,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4f4f4a3c54614126a71ab91f6631c9743eb4643d6e9318b74191da9dc6e028b" +checksum = "c6b6852635e7c43e5b242841c7470606ff0ee70eef323004cacc3ecedd33dd8f" dependencies = [ "arrow-buffer", "arrow-schema", @@ -208,9 +208,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d41a3659f984a524ef1c2981d43747b24d8eec78e2425267fcd0ef34ce71cd18" +checksum = "a66da9e16aecd9250af0ae9717ae8dd7ea0d8ca5a3e788fe3de9f4ee508da751" dependencies = [ "arrow-array", "arrow-buffer", @@ -222,9 +222,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10b95faa95a378f56ef32d84cc0104ea998c39ef7cd1faaa6b4cebf8ea92846d" +checksum = "60ee0f9d8997f4be44a60ee5807443e396e025c23cf14d2b74ce56135cb04474" dependencies = [ "arrow-array", "arrow-buffer", @@ -242,9 +242,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c68549a4284d9f8b39586afb8d5ff8158b8f0286353a4844deb1d11cf1ba1f26" +checksum = "7fcab05410e6b241442abdab6e1035177dc082bdb6f17049a4db49faed986d63" dependencies = [ "arrow-array", "arrow-buffer", @@ -257,9 +257,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a75a4a757afc301ce010adadff54d79d66140c4282ed3de565f6ccb716a5cf3" +checksum = "91a847dd9eb0bacd7836ac63b3475c68b2210c2c96d0ec1b808237b973bd5d73" dependencies = [ "ahash", "arrow-array", @@ -272,15 +272,15 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bebcb57eef570b15afbcf2d07d813eb476fde9f6dd69c81004d6476c197e87e" +checksum = "54df8c47918eb634c20e29286e69494fdc20cafa5173eb6dad49c7f6acece733" [[package]] name = "arrow-select" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6e2943fa433a48921e914417173816af64eef61c0a3d448280e6c40a62df221" +checksum = "941dbe481da043c4bd40c805a19ec2fc008846080c4953171b62bcad5ee5f7fb" dependencies = [ "arrow-array", "arrow-buffer", @@ -291,9 +291,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbc92ed638851774f6d7af1ad900b92bc1486746497511868b4298fcbcfa35af" +checksum = "359b2cd9e071d5a3bcf44679f9d85830afebc5b9c98a08019a570a65ae933e0f" dependencies = [ "arrow-array", "arrow-buffer", @@ -2262,9 +2262,9 @@ dependencies = [ [[package]] name = "parquet" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7267a9607c3f955d4d0ac41b88a67cecc0d8d009173ad3da390699a6cb3750" +checksum = "49f9739b984380582bdb7749ae5b5d28839bce899212cf16465c1ac1f8b65d79" dependencies = [ "ahash", "arrow-array", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 4906916e674e..376f2533d860 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -29,7 +29,7 @@ rust-version = "1.70" readme = "README.md" [dependencies] -arrow = "43.0.0" +arrow = "45.0.0" async-trait = "0.1.41" aws-config = "0.55" aws-credential-types = "0.55" diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs index a3953063fd06..8d8c0e4a3941 100644 --- a/datafusion-cli/src/print_format.rs +++ b/datafusion-cli/src/print_format.rs @@ -18,8 +18,9 @@ //! Print format variants use arrow::csv::writer::WriterBuilder; use arrow::json::{ArrayWriter, LineDelimitedWriter}; +use arrow::util::pretty::pretty_format_batches_with_options; use datafusion::arrow::record_batch::RecordBatch; -use datafusion::arrow::util::pretty; +use datafusion::common::format::DEFAULT_FORMAT_OPTIONS; use datafusion::error::{DataFusionError, Result}; use std::str::FromStr; @@ -75,7 +76,12 @@ impl PrintFormat { match self { Self::Csv => println!("{}", print_batches_with_sep(batches, b',')?), Self::Tsv => println!("{}", print_batches_with_sep(batches, b'\t')?), - Self::Table => pretty::print_batches(batches)?, + Self::Table => { + println!( + "{}", + pretty_format_batches_with_options(batches, &DEFAULT_FORMAT_OPTIONS)? + ) + } Self::Json => println!("{}", batches_to_json!(ArrayWriter, batches)), Self::NdJson => { println!("{}", batches_to_json!(LineDelimitedWriter, batches)) diff --git a/datafusion/common/src/format.rs b/datafusion/common/src/format.rs new file mode 100644 index 000000000000..d5421c36cd73 --- /dev/null +++ b/datafusion/common/src/format.rs @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::util::display::{DurationFormat, FormatOptions}; + +/// The default [`FormatOptions`] to use within DataFusion +pub const DEFAULT_FORMAT_OPTIONS: FormatOptions<'static> = + FormatOptions::new().with_duration_format(DurationFormat::Pretty); diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs index 7a46f28b5073..e6116029d65e 100644 --- a/datafusion/common/src/lib.rs +++ b/datafusion/common/src/lib.rs @@ -23,6 +23,7 @@ pub mod delta; mod dfschema; pub mod display; mod error; +pub mod format; mod functional_dependencies; mod join_type; pub mod parsers; diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 96ec5bc76f62..0e016e9372bf 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -46,8 +46,7 @@ use arrow::{ DECIMAL128_MAX_PRECISION, }, }; -use arrow_array::timezone::Tz; -use arrow_array::ArrowNativeTypeOp; +use arrow_array::{timezone::Tz, ArrowNativeTypeOp}; use chrono::{Datelike, Duration, NaiveDate, NaiveDateTime}; // Constants we use throughout this file: @@ -773,55 +772,21 @@ macro_rules! impl_op { ($LHS:expr, $RHS:expr, -) => { match ($LHS, $RHS) { ( - ScalarValue::TimestampSecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampSecond(Some(ts_rhs), tz_rhs), - ) => { - let err = || { - DataFusionError::Execution( - "Overflow while converting seconds to milliseconds".to_string(), - ) - }; - ts_sub_to_interval::( - ts_lhs.checked_mul(1_000).ok_or_else(err)?, - ts_rhs.checked_mul(1_000).ok_or_else(err)?, - tz_lhs.as_deref(), - tz_rhs.as_deref(), - ) - }, + ScalarValue::TimestampSecond(Some(ts_lhs), _), + ScalarValue::TimestampSecond(Some(ts_rhs), _), + ) => Ok(ScalarValue::DurationSecond(Some(ts_lhs.sub_checked(*ts_rhs)?))), ( - ScalarValue::TimestampMillisecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampMillisecond(Some(ts_rhs), tz_rhs), - ) => ts_sub_to_interval::( - *ts_lhs, - *ts_rhs, - tz_lhs.as_deref(), - tz_rhs.as_deref(), - ), + ScalarValue::TimestampMillisecond(Some(ts_lhs), _), + ScalarValue::TimestampMillisecond(Some(ts_rhs), _), + ) => Ok(ScalarValue::DurationMillisecond(Some(ts_lhs.sub_checked(*ts_rhs)?))), ( - ScalarValue::TimestampMicrosecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampMicrosecond(Some(ts_rhs), tz_rhs), - ) => { - let err = || { - DataFusionError::Execution( - "Overflow while converting microseconds to nanoseconds".to_string(), - ) - }; - ts_sub_to_interval::( - ts_lhs.checked_mul(1_000).ok_or_else(err)?, - ts_rhs.checked_mul(1_000).ok_or_else(err)?, - tz_lhs.as_deref(), - tz_rhs.as_deref(), - ) - }, + ScalarValue::TimestampMicrosecond(Some(ts_lhs), _), + ScalarValue::TimestampMicrosecond(Some(ts_rhs), _), + ) => Ok(ScalarValue::DurationMicrosecond(Some(ts_lhs.sub_checked(*ts_rhs)?))), ( - ScalarValue::TimestampNanosecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampNanosecond(Some(ts_rhs), tz_rhs), - ) => ts_sub_to_interval::( - *ts_lhs, - *ts_rhs, - tz_lhs.as_deref(), - tz_rhs.as_deref(), - ), + ScalarValue::TimestampNanosecond(Some(ts_lhs), _), + ScalarValue::TimestampNanosecond(Some(ts_rhs), _), + ) => Ok(ScalarValue::DurationNanosecond(Some(ts_lhs.sub_checked(*ts_rhs)?))), _ => impl_op_arithmetic!($LHS, $RHS, -) } }; @@ -1178,49 +1143,6 @@ pub const MDN_MODE: i8 = 2; pub const MILLISECOND_MODE: bool = false; pub const NANOSECOND_MODE: bool = true; -/// This function computes subtracts `rhs_ts` from `lhs_ts`, taking timezones -/// into account when given. Units of the resulting interval is specified by -/// the constant `TIME_MODE`. -/// The default behavior of Datafusion is the following: -/// - When subtracting timestamps at seconds/milliseconds precision, the output -/// interval will have the type [`IntervalDayTimeType`]. -/// - When subtracting timestamps at microseconds/nanoseconds precision, the -/// output interval will have the type [`IntervalMonthDayNanoType`]. -fn ts_sub_to_interval( - lhs_ts: i64, - rhs_ts: i64, - lhs_tz: Option<&str>, - rhs_tz: Option<&str>, -) -> Result { - let parsed_lhs_tz = parse_timezones(lhs_tz)?; - let parsed_rhs_tz = parse_timezones(rhs_tz)?; - - let (naive_lhs, naive_rhs) = - calculate_naives::(lhs_ts, parsed_lhs_tz, rhs_ts, parsed_rhs_tz)?; - let delta_secs = naive_lhs.signed_duration_since(naive_rhs); - - match TIME_MODE { - MILLISECOND_MODE => { - let as_millisecs = delta_secs.num_milliseconds(); - Ok(ScalarValue::new_interval_dt( - (as_millisecs / MILLISECS_IN_ONE_DAY) as i32, - (as_millisecs % MILLISECS_IN_ONE_DAY) as i32, - )) - } - NANOSECOND_MODE => { - let as_nanosecs = delta_secs.num_nanoseconds().ok_or_else(|| { - DataFusionError::Execution(String::from( - "Can not compute timestamp differences with nanosecond precision", - )) - })?; - Ok(ScalarValue::new_interval_mdn( - 0, - (as_nanosecs / NANOSECS_IN_ONE_DAY) as i32, - as_nanosecs % NANOSECS_IN_ONE_DAY, - )) - } - } -} /// This function parses the timezone from string to Tz. /// If it cannot parse or timezone field is [`None`], it returns [`None`]. @@ -1455,6 +1377,14 @@ where ScalarValue::IntervalDayTime(Some(i)) => add_day_time(prior, *i, sign), ScalarValue::IntervalYearMonth(Some(i)) => shift_months(prior, *i, sign), ScalarValue::IntervalMonthDayNano(Some(i)) => add_m_d_nano(prior, *i, sign), + ScalarValue::DurationSecond(Some(v)) => prior.add(Duration::seconds(*v)), + ScalarValue::DurationMillisecond(Some(v)) => { + prior.add(Duration::milliseconds(*v)) + } + ScalarValue::DurationMicrosecond(Some(v)) => { + prior.add(Duration::microseconds(*v)) + } + ScalarValue::DurationNanosecond(Some(v)) => prior.add(Duration::nanoseconds(*v)), other => Err(DataFusionError::Execution(format!( "DateIntervalExpr does not support non-interval type {other:?}" )))?, @@ -1932,6 +1862,16 @@ impl ScalarValue { DataType::Interval(IntervalUnit::MonthDayNano) => { ScalarValue::IntervalMonthDayNano(Some(0)) } + DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None), + DataType::Duration(TimeUnit::Millisecond) => { + ScalarValue::DurationMillisecond(None) + } + DataType::Duration(TimeUnit::Microsecond) => { + ScalarValue::DurationMicrosecond(None) + } + DataType::Duration(TimeUnit::Nanosecond) => { + ScalarValue::DurationNanosecond(None) + } _ => { return Err(DataFusionError::NotImplemented(format!( "Can't create a zero scalar from data_type \"{datatype:?}\"" @@ -3309,6 +3249,20 @@ impl ScalarValue { IntervalMonthDayNano ) } + + DataType::Duration(TimeUnit::Second) => { + typed_cast!(array, index, DurationSecondArray, DurationSecond) + } + DataType::Duration(TimeUnit::Millisecond) => { + typed_cast!(array, index, DurationMillisecondArray, DurationMillisecond) + } + DataType::Duration(TimeUnit::Microsecond) => { + typed_cast!(array, index, DurationMicrosecondArray, DurationMicrosecond) + } + DataType::Duration(TimeUnit::Nanosecond) => { + typed_cast!(array, index, DurationNanosecondArray, DurationNanosecond) + } + other => { return Err(DataFusionError::NotImplemented(format!( "Can't create a scalar from array of type \"{other:?}\"" @@ -3851,6 +3805,18 @@ impl TryFrom<&DataType> for ScalarValue { DataType::Interval(IntervalUnit::MonthDayNano) => { ScalarValue::IntervalMonthDayNano(None) } + + DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None), + DataType::Duration(TimeUnit::Millisecond) => { + ScalarValue::DurationMillisecond(None) + } + DataType::Duration(TimeUnit::Microsecond) => { + ScalarValue::DurationMicrosecond(None) + } + DataType::Duration(TimeUnit::Nanosecond) => { + ScalarValue::DurationNanosecond(None) + } + DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary( index_type.clone(), Box::new(value_type.as_ref().try_into()?), @@ -4098,7 +4064,7 @@ mod tests { use std::sync::Arc; use arrow::compute::kernels; - use arrow::compute::{self, concat, is_null}; + use arrow::compute::{concat, is_null}; use arrow::datatypes::ArrowPrimitiveType; use arrow::util::pretty::pretty_format_columns; use arrow_array::ArrowNumericType; @@ -4227,7 +4193,7 @@ mod tests { let right_array = right.to_array(); let arrow_left_array = left_array.as_primitive::(); let arrow_right_array = right_array.as_primitive::(); - let arrow_result = compute::add_checked(arrow_left_array, arrow_right_array); + let arrow_result = kernels::numeric::add(arrow_left_array, arrow_right_array); assert_eq!(scalar_result.is_ok(), arrow_result.is_ok()); } @@ -5945,25 +5911,11 @@ mod tests { } } - #[test] - fn timestamp_op_tests() { - // positive interval, edge cases - let test_data = get_timestamp_test_data(1); - for (lhs, rhs, expected) in test_data.into_iter() { - assert_eq!(expected, lhs.sub(rhs).unwrap()) - } - - // negative interval, edge cases - let test_data = get_timestamp_test_data(-1); - for (rhs, lhs, expected) in test_data.into_iter() { - assert_eq!(expected, lhs.sub(rhs).unwrap()); - } - } #[test] fn timestamp_op_random_tests() { // timestamp1 + (or -) interval = timestamp2 // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? - let sample_size = 1000000; + let sample_size = 1000; let timestamps1 = get_random_timestamps(sample_size); let intervals = get_random_intervals(sample_size); // ts(sec) + interval(ns) = ts(sec); however, @@ -5972,18 +5924,12 @@ mod tests { for (idx, ts1) in timestamps1.iter().enumerate() { if idx % 2 == 0 { let timestamp2 = ts1.add(intervals[idx].clone()).unwrap(); - assert_eq!( - intervals[idx], - timestamp2.sub(ts1).unwrap(), - "index:{idx}, operands: {timestamp2:?} (-) {ts1:?}" - ); + let back = timestamp2.sub(intervals[idx].clone()).unwrap(); + assert_eq!(ts1, &back); } else { let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap(); - assert_eq!( - intervals[idx], - ts1.sub(timestamp2.clone()).unwrap(), - "index:{idx}, operands: {ts1:?} (-) {timestamp2:?}" - ); + let back = timestamp2.add(intervals[idx].clone()).unwrap(); + assert_eq!(ts1, &back); }; } } @@ -6068,285 +6014,6 @@ mod tests { check_array(array); } - fn get_timestamp_test_data( - sign: i32, - ) -> Vec<(ScalarValue, ScalarValue, ScalarValue)> { - vec![ - ( - // 1st test case, having the same time but different with timezones - // Since they are timestamps with nanosecond precision, expected type is - // [`IntervalMonthDayNanoType`] - ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_nano_opt(12, 0, 0, 000_000_000) - .unwrap() - .timestamp_nanos(), - ), - Some("+12:00".into()), - ), - ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_nano_opt(0, 0, 0, 000_000_000) - .unwrap() - .timestamp_nanos(), - ), - Some("+00:00".into()), - ), - ScalarValue::new_interval_mdn(0, 0, 0), - ), - // 2nd test case, january with 31 days plus february with 28 days, with timezone - ( - ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2023, 3, 1) - .unwrap() - .and_hms_micro_opt(2, 0, 0, 000_000) - .unwrap() - .timestamp_micros(), - ), - Some("+01:00".into()), - ), - ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_micro_opt(0, 0, 0, 000_000) - .unwrap() - .timestamp_micros(), - ), - Some("-01:00".into()), - ), - ScalarValue::new_interval_mdn(0, sign * 59, 0), - ), - // 3rd test case, 29-days long february minus previous, year with timezone - ( - ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2024, 2, 29) - .unwrap() - .and_hms_milli_opt(10, 10, 0, 000) - .unwrap() - .timestamp_millis(), - ), - Some("+10:10".into()), - ), - ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2023, 12, 31) - .unwrap() - .and_hms_milli_opt(1, 0, 0, 000) - .unwrap() - .timestamp_millis(), - ), - Some("+01:00".into()), - ), - ScalarValue::new_interval_dt(sign * 60, 0), - ), - // 4th test case, leap years occur mostly every 4 years, but every 100 years - // we skip a leap year unless the year is divisible by 400, so 31 + 28 = 59 - ( - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2100, 3, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), - ), - Some("-11:59".into()), - ), - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2100, 1, 1) - .unwrap() - .and_hms_opt(23, 58, 0) - .unwrap() - .timestamp(), - ), - Some("+11:59".into()), - ), - ScalarValue::new_interval_dt(sign * 59, 0), - ), - // 5th test case, without timezone positively seemed, but with timezone, - // negative resulting interval - ( - ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_milli_opt(6, 00, 0, 000) - .unwrap() - .timestamp_millis(), - ), - Some("+06:00".into()), - ), - ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_milli_opt(0, 0, 0, 000) - .unwrap() - .timestamp_millis(), - ), - Some("-12:00".into()), - ), - ScalarValue::new_interval_dt(0, sign * -43_200_000), - ), - // 6th test case, no problem before unix epoch beginning - ( - ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(1970, 1, 1) - .unwrap() - .and_hms_micro_opt(1, 2, 3, 15) - .unwrap() - .timestamp_micros(), - ), - None, - ), - ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(1969, 1, 1) - .unwrap() - .and_hms_micro_opt(0, 0, 0, 000_000) - .unwrap() - .timestamp_micros(), - ), - None, - ), - ScalarValue::new_interval_mdn( - 0, - 365 * sign, - sign as i64 * 3_723_000_015_000, - ), - ), - // 7th test case, no problem with big intervals - ( - ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2100, 1, 1) - .unwrap() - .and_hms_nano_opt(0, 0, 0, 0) - .unwrap() - .timestamp_nanos(), - ), - None, - ), - ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2000, 1, 1) - .unwrap() - .and_hms_nano_opt(0, 0, 0, 000_000_000) - .unwrap() - .timestamp_nanos(), - ), - None, - ), - ScalarValue::new_interval_mdn(0, sign * 36525, 0), - ), - // 8th test case, no problem detecting 366-days long years - ( - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2041, 1, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), - ), - None, - ), - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2040, 1, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), - ), - None, - ), - ScalarValue::new_interval_dt(sign * 366, 0), - ), - // 9th test case, no problem with unrealistic timezones - ( - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 3) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), - ), - Some("+23:59".into()), - ), - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_opt(0, 2, 0) - .unwrap() - .timestamp(), - ), - Some("-23:59".into()), - ), - ScalarValue::new_interval_dt(0, 0), - ), - // 10th test case, parsing different types of timezone input - ( - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 3, 17) - .unwrap() - .and_hms_opt(14, 10, 0) - .unwrap() - .timestamp(), - ), - Some("Europe/Istanbul".into()), - ), - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 3, 17) - .unwrap() - .and_hms_opt(4, 10, 0) - .unwrap() - .timestamp(), - ), - Some("America/Los_Angeles".into()), - ), - ScalarValue::new_interval_dt(0, 0), - ), - // 11th test case, negative results - ( - ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2023, 3, 17) - .unwrap() - .and_hms_milli_opt(4, 10, 0, 0) - .unwrap() - .timestamp_millis(), - ), - None, - ), - ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2023, 3, 17) - .unwrap() - .and_hms_milli_opt(4, 10, 0, 1) - .unwrap() - .timestamp_millis(), - ), - None, - ), - ScalarValue::new_interval_dt(0, -sign), - ), - ] - } - fn get_random_timestamps(sample_size: u64) -> Vec { let vector_size = sample_size; let mut timestamp = vec![]; diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs index 7f73e00dca57..d306ecd10158 100644 --- a/datafusion/core/src/test_util/mod.rs +++ b/datafusion/core/src/test_util/mod.rs @@ -61,9 +61,12 @@ macro_rules! assert_batches_eq { let expected_lines: Vec = $EXPECTED_LINES.iter().map(|&s| s.into()).collect(); - let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS) - .unwrap() - .to_string(); + let formatted = $crate::arrow::util::pretty::pretty_format_batches_with_options( + $CHUNKS, + &$crate::common::format::DEFAULT_FORMAT_OPTIONS, + ) + .unwrap() + .to_string(); let actual_lines: Vec<&str> = formatted.trim().lines().collect(); @@ -97,9 +100,12 @@ macro_rules! assert_batches_sorted_eq { expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable() } - let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS) - .unwrap() - .to_string(); + let formatted = $crate::arrow::util::pretty::pretty_format_batches_with_options( + $CHUNKS, + &$crate::common::format::DEFAULT_FORMAT_OPTIONS, + ) + .unwrap() + .to_string(); // fix for windows: \r\n --> let mut actual_lines: Vec<&str> = formatted.trim().lines().collect(); diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index 60f602b68d63..0482f07795ad 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -576,8 +576,8 @@ async fn timestamp_add_interval_months() -> Result<()> { let res2 = actual[0][1].as_str(); let format = "%Y-%m-%dT%H:%M:%S%.6fZ"; - let t1_naive = chrono::NaiveDateTime::parse_from_str(res1, format).unwrap(); - let t2_naive = chrono::NaiveDateTime::parse_from_str(res2, format).unwrap(); + let t1_naive = NaiveDateTime::parse_from_str(res1, format).unwrap(); + let t2_naive = NaiveDateTime::parse_from_str(res2, format).unwrap(); let year = t1_naive.year() + (t1_naive.month0() as i32 + 17) / 12; let month = (t1_naive.month0() + 17) % 12 + 1; @@ -1035,13 +1035,13 @@ async fn timestamp_sub_with_tz() -> Result<()> { let sql = "SELECT val, ts1 - ts2 AS ts_diff FROM table_a ORDER BY ts2 - ts1"; let actual = execute_to_batches(&ctx, sql).await; let expected = vec![ - "+-----+---------------------------------------------------+", - "| val | ts_diff |", - "+-----+---------------------------------------------------+", - "| 3 | 0 years 0 mons 0 days 10 hours 0 mins 30.000 secs |", - "| 1 | 0 years 0 mons 0 days 10 hours 0 mins 20.000 secs |", - "| 2 | 0 years 0 mons 0 days 10 hours 0 mins 10.000 secs |", - "+-----+---------------------------------------------------+", + "+-----+-----------------------------------+", + "| val | ts_diff |", + "+-----+-----------------------------------+", + "| 3 | 0 days 0 hours 0 mins 30.000 secs |", + "| 1 | 0 days 0 hours 0 mins 20.000 secs |", + "| 2 | 0 days 0 hours 0 mins 10.000 secs |", + "+-----+-----------------------------------+", ]; assert_batches_eq!(expected, &actual); diff --git a/datafusion/core/tests/sql/udf.rs b/datafusion/core/tests/sql/udf.rs index 2907d468066e..86ff6ebac228 100644 --- a/datafusion/core/tests/sql/udf.rs +++ b/datafusion/core/tests/sql/udf.rs @@ -16,7 +16,7 @@ // under the License. use super::*; -use arrow::compute::add; +use arrow::compute::kernels::numeric::add; use datafusion::{ execution::registry::FunctionRegistry, physical_plan::{expressions::AvgAccumulator, functions::make_scalar_function}, diff --git a/datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt b/datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt index 5c82c7e0091d..e3d316a43ff1 100644 --- a/datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt +++ b/datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt @@ -309,9 +309,11 @@ select arrow_cast('30 minutes', 'Interval(MonthDayNano)'); ## Duration -query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nThis feature is not implemented: Can't create a scalar from array of type "Duration\(Second\)" +query ? --- select arrow_cast(interval '30 minutes', 'Duration(Second)'); +---- +0 days 0 hours 30 mins 0 secs query error DataFusion error: Error during planning: Cannot automatically convert Utf8 to Duration\(Second\) select arrow_cast('30 minutes', 'Duration(Second)'); diff --git a/datafusion/core/tests/sqllogictests/test_files/binary.slt b/datafusion/core/tests/sqllogictests/test_files/binary.slt index 54499e29787b..d3a7e8c19334 100644 --- a/datafusion/core/tests/sqllogictests/test_files/binary.slt +++ b/datafusion/core/tests/sqllogictests/test_files/binary.slt @@ -127,14 +127,19 @@ ff0102 FixedSizeBinary(3) 000102 FixedSizeBinary(3) # Comparison -# https://github.com/apache/arrow-rs/pull/4492 -query error DataFusion error: Arrow error: Compute error: eq_dyn_binary_scalar only supports Binary or LargeBinary arrays +query ??BB SELECT column1, column2, column1 = arrow_cast(X'000102', 'FixedSizeBinary(3)'), column1 = column2 FROM t +---- +000102 000102 true true +003102 000102 false false +NULL 000102 NULL NULL +ff0102 000102 false false +000102 000102 true true # Comparison to different sized field diff --git a/datafusion/core/tests/sqllogictests/test_files/dates.slt b/datafusion/core/tests/sqllogictests/test_files/dates.slt index c35f16bc0343..a93a7ff7e73c 100644 --- a/datafusion/core/tests/sqllogictests/test_files/dates.slt +++ b/datafusion/core/tests/sqllogictests/test_files/dates.slt @@ -91,17 +91,19 @@ where d3_date > now() + '5 days'; # DATE minus DATE # https://github.com/apache/arrow-rs/issues/4383 -query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nArrow error: Cast error: Cannot perform arithmetic operation between array of type Date32 and array of type Date32 +query ? SELECT DATE '2023-04-09' - DATE '2023-04-02'; +---- +7 days 0 hours 0 mins 0 secs # DATE minus Timestamp query ? SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp; ---- -0 years 0 mons 8499 days 0 hours 0 mins 0.000000000 secs +8499 days 0 hours 0 mins 0.000000000 secs # Timestamp minus DATE query ? SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01'; ---- -0 years 0 mons 730 days 0 hours 0 mins 0.000000000 secs +730 days 0 hours 0 mins 0.000000000 secs diff --git a/datafusion/core/tests/sqllogictests/test_files/decimal.slt b/datafusion/core/tests/sqllogictests/test_files/decimal.slt index 8fd08f87c849..ec2bb3edaba6 100644 --- a/datafusion/core/tests/sqllogictests/test_files/decimal.slt +++ b/datafusion/core/tests/sqllogictests/test_files/decimal.slt @@ -365,7 +365,7 @@ select c1*c5 from decimal_simple; query T select arrow_typeof(c1/cast(0.00001 as decimal(5,5))) from decimal_simple limit 1; ---- -Decimal128(21, 12) +Decimal128(19, 10) query R rowsort @@ -391,27 +391,27 @@ select c1/cast(0.00001 as decimal(5,5)) from decimal_simple; query T select arrow_typeof(c1/c5) from decimal_simple limit 1; ---- -Decimal128(30, 19) +Decimal128(21, 10) query R rowsort select c1/c5 from decimal_simple; ---- 0.5 -0.641025641026 -0.714285714286 -0.735294117647 +0.641025641 +0.7142857142 +0.7352941176 0.8 -0.857142857143 -0.909090909091 -0.909090909091 +0.8571428571 +0.909090909 +0.909090909 0.9375 -0.961538461538 +0.9615384615 1 1 -1.052631578947 -1.515151515152 -2.727272727273 +1.0526315789 +1.5151515151 +2.7272727272 query T @@ -610,8 +610,10 @@ create table foo (a DECIMAL(38, 20), b DECIMAL(38, 0)); statement ok insert into foo VALUES (1, 5); -query error DataFusion error: Arrow error: Compute error: Overflow happened on: 100000000000000000000 \* 100000000000000000000000000000000000000 +query R select a / b from foo; +---- +0.2 statement ok create table t as values (arrow_cast(123, 'Decimal256(5,2)')); diff --git a/datafusion/core/tests/sqllogictests/test_files/interval.slt b/datafusion/core/tests/sqllogictests/test_files/interval.slt index 043f63958d1b..500876f76221 100644 --- a/datafusion/core/tests/sqllogictests/test_files/interval.slt +++ b/datafusion/core/tests/sqllogictests/test_files/interval.slt @@ -276,6 +276,12 @@ create table t (i interval) as values ('5 days 3 nanoseconds'::interval); statement ok insert into t values ('6 days 7 nanoseconds'::interval) +query ? rowsort +select -i from t order by 1; +---- +0 years 0 mons -5 days 0 hours 0 mins -0.000000003 secs +0 years 0 mons -6 days 0 hours 0 mins -0.000000007 secs + query ?T rowsort select i, diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt index 18445078071a..72f168e01412 100644 --- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt +++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt @@ -1182,10 +1182,10 @@ create table bar (val int, i1 interval, i2 interval) as values query I? SELECT val, ts1 - ts2 FROM foo ORDER BY ts2 - ts1; ---- -4 0 years 0 mons -15250 days -13 hours -28 mins -44.999876545 secs -3 0 years 0 mons 15952 days 23 hours 22 mins 12.667123455 secs -2 0 years 0 mons 8406 days 1 hours 1 mins 54.877123455 secs -1 0 years 0 mons 53 days 16 hours 0 mins 20.000000024 secs +3 15952 days 23 hours 22 mins 12.667123455 secs +2 8406 days 1 hours 1 mins 54.877123455 secs +1 53 days 16 hours 0 mins 20.000000024 secs +4 -15250 days -13 hours -28 mins -44.999876545 secs # Interval - Interval query ? @@ -1233,7 +1233,7 @@ SELECT ts1 + i FROM foo; 2003-07-12T01:31:15.000123463 # Timestamp + Timestamp => error -query error DataFusion error: Arrow error: Cast error: Cannot perform arithmetic operation between array of type Timestamp\(Nanosecond, None\) and array of type Timestamp\(Nanosecond, None\) +query error DataFusion error: Error during planning: Cannot get result type for temporal operation Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\): Invalid argument error: Invalid timestamp arithmetic operation: Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\) SELECT ts1 + ts2 FROM foo; @@ -1241,16 +1241,16 @@ FROM foo; query ? SELECT '2000-01-01T00:00:00'::timestamp - '2000-01-01T00:00:00'::timestamp; ---- -0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs +0 days 0 hours 0 mins 0.000000000 secs # large timestamp - small timestamp query ? SELECT '2000-01-01T00:00:00'::timestamp - '2010-01-01T00:00:00'::timestamp; ---- -0 years 0 mons -3653 days 0 hours 0 mins 0.000000000 secs +-3653 days 0 hours 0 mins 0.000000000 secs # Interval - Timestamp => error -# statement error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to +# statement error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types # TODO: This query should raise error # query P # SELECT i - ts1 from FOO; diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 602448f1a2ff..6df4dc5e4bfc 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -17,6 +17,7 @@ //! Coercion rules for matching argument types for binary operators +use arrow::array::{new_empty_array, Array}; use arrow::compute::can_cast_types; use arrow::datatypes::{ DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, @@ -127,8 +128,23 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result Operator::Multiply | Operator::Divide| Operator::Modulo => { - // TODO: this logic would be easier to follow if the functions were inlined - if let Some(ret) = mathematics_temporal_result_type(lhs, rhs, op) { + let get_result = |lhs, rhs| { + use arrow::compute::kernels::numeric::*; + let l = new_empty_array(lhs); + let r = new_empty_array(rhs); + + let result = match op { + Operator::Plus => add_wrapping(&l, &r), + Operator::Minus => sub_wrapping(&l, &r), + Operator::Multiply => mul_wrapping(&l, &r), + Operator::Divide => div(&l, &r), + Operator::Modulo => rem(&l, &r), + _ => unreachable!(), + }; + result.map(|x| x.data_type().clone()) + }; + + if let Ok(ret) = get_result(lhs, rhs) { // Temporal arithmetic, e.g. Date32 + Interval Ok(Signature{ lhs: lhs.clone(), @@ -138,9 +154,9 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result } else if let Some(coerced) = temporal_coercion(lhs, rhs) { // Temporal arithmetic by first coercing to a common time representation // e.g. Date32 - Timestamp - let ret = mathematics_temporal_result_type(&coerced, &coerced, op).ok_or_else(|| { + let ret = get_result(&coerced, &coerced).map_err(|e| { DataFusionError::Plan(format!( - "Cannot get result type for temporal operation {coerced} {op} {coerced}" + "Cannot get result type for temporal operation {coerced} {op} {coerced}: {e}" )) })?; Ok(Signature{ @@ -150,9 +166,9 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result }) } else if let Some((lhs, rhs)) = math_decimal_coercion(lhs, rhs) { // Decimal arithmetic, e.g. Decimal(10, 2) + Decimal(10, 0) - let ret = decimal_op_mathematics_type(op, &lhs, &rhs).ok_or_else(|| { + let ret = get_result(&lhs, &rhs).map_err(|e| { DataFusionError::Plan(format!( - "Cannot get result type for decimal operation {lhs} {op} {rhs}" + "Cannot get result type for decimal operation {lhs} {op} {rhs}: {e}" )) })?; Ok(Signature{ @@ -172,46 +188,6 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result } } -/// Returns the result type of applying mathematics operations such as -/// `+` to arguments of `lhs_type` and `rhs_type`. -fn mathematics_temporal_result_type( - lhs_type: &DataType, - rhs_type: &DataType, - op: &Operator, -) -> Option { - use arrow::datatypes::DataType::*; - use arrow::datatypes::IntervalUnit::*; - use arrow::datatypes::TimeUnit::*; - - match (lhs_type, rhs_type) { - // datetime +/- interval - (Timestamp(_, _) | Date32 | Date64, Interval(_)) => Some(lhs_type.clone()), - (Interval(_), Timestamp(_, _) | Date32 | Date64) => { - if matches!(op, Operator::Plus) { - Some(rhs_type.clone()) - } else { - None - } - } - // interval +/- - (Interval(l), Interval(h)) if l == h => Some(lhs_type.clone()), - (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)), - // timestamp - timestamp - (Timestamp(Second, _), Timestamp(Second, _)) - | (Timestamp(Millisecond, _), Timestamp(Millisecond, _)) => { - Some(Interval(DayTime)) - } - (Timestamp(Microsecond, _), Timestamp(Microsecond, _)) - | (Timestamp(Nanosecond, _), Timestamp(Nanosecond, _)) => { - Some(Interval(MonthDayNano)) - } - // date - date - (Date32, Date32) => Some(Interval(DayTime)), - (Date64, Date64) => Some(Interval(MonthDayNano)), - _ => None, - } -} - /// returns the resulting type of a binary expression evaluating the `op` with the left and right hand types pub fn get_result_type( lhs: &DataType, @@ -583,107 +559,6 @@ fn create_decimal256_type(precision: u8, scale: i8) -> DataType { ) } -/// Returns the coerced type of applying mathematics operations on decimal types. -/// Two sides of the mathematics operation will be coerced to the same type. Note -/// that we don't coerce the decimal operands in analysis phase, but do it in the -/// execution phase because this is not idempotent. -pub fn coercion_decimal_mathematics_type( - mathematics_op: &Operator, - left_decimal_type: &DataType, - right_decimal_type: &DataType, -) -> Option { - // TODO: Move this logic into kernel implementations - use arrow::datatypes::DataType::*; - match (left_decimal_type, right_decimal_type) { - // The promotion rule from spark - // https://github.com/apache/spark/blob/c20af535803a7250fef047c2bf0fe30be242369d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala#L35 - (Decimal128(_, _), Decimal128(_, _)) => match mathematics_op { - Operator::Plus | Operator::Minus => decimal_op_mathematics_type( - mathematics_op, - left_decimal_type, - right_decimal_type, - ), - Operator::Divide | Operator::Modulo => { - get_wider_decimal_type(left_decimal_type, right_decimal_type) - } - _ => None, - }, - _ => None, - } -} - -/// Returns the output type of applying mathematics operations on two decimal types. -/// The rule is from spark. Note that this is different to the coerced type applied -/// to two sides of the arithmetic operation. -pub fn decimal_op_mathematics_type( - mathematics_op: &Operator, - left_decimal_type: &DataType, - right_decimal_type: &DataType, -) -> Option { - use arrow::datatypes::DataType::*; - match (left_decimal_type, right_decimal_type) { - // The coercion rule from spark - // https://github.com/apache/spark/blob/c20af535803a7250fef047c2bf0fe30be242369d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala#L35 - (Decimal128(p1, s1), Decimal128(p2, s2)) => { - match mathematics_op { - Operator::Plus | Operator::Minus => { - // max(s1, s2) - let result_scale = *s1.max(s2); - // max(s1, s2) + max(p1-s1, p2-s2) + 1 - let result_precision = - result_scale + (*p1 as i8 - *s1).max(*p2 as i8 - *s2) + 1; - Some(create_decimal_type(result_precision as u8, result_scale)) - } - Operator::Multiply => { - // s1 + s2 - let result_scale = *s1 + *s2; - // p1 + p2 + 1 - let result_precision = *p1 + *p2 + 1; - Some(create_decimal_type(result_precision, result_scale)) - } - Operator::Divide => { - // max(6, s1 + p2 + 1) - let result_scale = 6.max(*s1 + *p2 as i8 + 1); - // p1 - s1 + s2 + max(6, s1 + p2 + 1) - let result_precision = result_scale + *p1 as i8 - *s1 + *s2; - Some(create_decimal_type(result_precision as u8, result_scale)) - } - Operator::Modulo => { - // max(s1, s2) - let result_scale = *s1.max(s2); - // min(p1-s1, p2-s2) + max(s1, s2) - let result_precision = - result_scale + (*p1 as i8 - *s1).min(*p2 as i8 - *s2); - Some(create_decimal_type(result_precision as u8, result_scale)) - } - _ => None, - } - } - (Dictionary(_, lhs_value_type), Dictionary(_, rhs_value_type)) => { - decimal_op_mathematics_type( - mathematics_op, - lhs_value_type.as_ref(), - rhs_value_type.as_ref(), - ) - } - (Dictionary(key_type, value_type), _) => { - let value_type = decimal_op_mathematics_type( - mathematics_op, - value_type.as_ref(), - right_decimal_type, - ); - value_type - .map(|value_type| Dictionary(key_type.clone(), Box::new(value_type))) - } - (_, Dictionary(_, value_type)) => decimal_op_mathematics_type( - mathematics_op, - left_decimal_type, - value_type.as_ref(), - ), - _ => None, - } -} - /// Determine if at least of one of lhs and rhs is numeric, and the other must be NULL or numeric fn both_numeric_or_null_and_numeric(lhs_type: &DataType, rhs_type: &DataType) -> bool { use arrow::datatypes::DataType::*; @@ -904,8 +779,8 @@ fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { mod tests { use arrow::datatypes::DataType; - use datafusion_common::DataFusionError; use datafusion_common::Result; + use datafusion_common::{assert_contains, DataFusionError}; use crate::Operator; @@ -1003,53 +878,6 @@ mod tests { coerce_numeric_type_to_decimal(&DataType::Float64).unwrap(), DataType::Decimal128(30, 15) ); - - let op = Operator::Plus; - let left_decimal_type = DataType::Decimal128(10, 3); - let right_decimal_type = DataType::Decimal128(20, 4); - let result = coercion_decimal_mathematics_type( - &op, - &left_decimal_type, - &right_decimal_type, - ); - assert_eq!(DataType::Decimal128(21, 4), result.unwrap()); - let op = Operator::Minus; - let result = coercion_decimal_mathematics_type( - &op, - &left_decimal_type, - &right_decimal_type, - ); - assert_eq!(DataType::Decimal128(21, 4), result.unwrap()); - let op = Operator::Multiply; - let result = coercion_decimal_mathematics_type( - &op, - &left_decimal_type, - &right_decimal_type, - ); - assert_eq!(None, result); - let result = - decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type); - assert_eq!(DataType::Decimal128(31, 7), result.unwrap()); - let op = Operator::Divide; - let result = coercion_decimal_mathematics_type( - &op, - &left_decimal_type, - &right_decimal_type, - ); - assert_eq!(DataType::Decimal128(20, 4), result.unwrap()); - let result = - decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type); - assert_eq!(DataType::Decimal128(35, 24), result.unwrap()); - let op = Operator::Modulo; - let result = coercion_decimal_mathematics_type( - &op, - &left_decimal_type, - &right_decimal_type, - ); - assert_eq!(DataType::Decimal128(20, 4), result.unwrap()); - let result = - decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type); - assert_eq!(DataType::Decimal128(11, 4), result.unwrap()); } #[test] @@ -1112,11 +940,14 @@ mod tests { assert_eq!(lhs.to_string(), "Timestamp(Millisecond, None)"); assert_eq!(rhs.to_string(), "Timestamp(Millisecond, None)"); - let (lhs, rhs) = - get_input_types(&DataType::Date32, &Operator::Plus, &DataType::Date64) - .unwrap(); - assert_eq!(lhs.to_string(), "Date64"); - assert_eq!(rhs.to_string(), "Date64"); + let err = get_input_types(&DataType::Date32, &Operator::Plus, &DataType::Date64) + .unwrap_err() + .to_string(); + + assert_contains!( + &err, + "Cannot get result type for temporal operation Date64 + Date64" + ); Ok(()) } @@ -1315,26 +1146,13 @@ mod tests { fn test_math_decimal_coercion_rule( lhs_type: DataType, rhs_type: DataType, - mathematics_op: Operator, expected_lhs_type: DataType, expected_rhs_type: DataType, - expected_coerced_type: Option, - expected_output_type: DataType, ) { // The coerced types for lhs and rhs, if any of them is not decimal let (lhs_type, rhs_type) = math_decimal_coercion(&lhs_type, &rhs_type).unwrap(); assert_eq!(lhs_type, expected_lhs_type); assert_eq!(rhs_type, expected_rhs_type); - - // The coerced type of decimal math expression, applied during expression evaluation - let coerced_type = - coercion_decimal_mathematics_type(&mathematics_op, &lhs_type, &rhs_type); - assert_eq!(coerced_type, expected_coerced_type); - - // The output type of decimal math expression - let output_type = - decimal_op_mathematics_type(&mathematics_op, &lhs_type, &rhs_type).unwrap(); - assert_eq!(output_type, expected_output_type); } #[test] @@ -1342,61 +1160,43 @@ mod tests { test_math_decimal_coercion_rule( DataType::Decimal128(10, 2), DataType::Decimal128(10, 2), - Operator::Plus, DataType::Decimal128(10, 2), DataType::Decimal128(10, 2), - Some(DataType::Decimal128(11, 2)), - DataType::Decimal128(11, 2), ); test_math_decimal_coercion_rule( DataType::Int32, DataType::Decimal128(10, 2), - Operator::Plus, DataType::Decimal128(10, 0), DataType::Decimal128(10, 2), - Some(DataType::Decimal128(13, 2)), - DataType::Decimal128(13, 2), ); test_math_decimal_coercion_rule( DataType::Int32, DataType::Decimal128(10, 2), - Operator::Minus, DataType::Decimal128(10, 0), DataType::Decimal128(10, 2), - Some(DataType::Decimal128(13, 2)), - DataType::Decimal128(13, 2), ); test_math_decimal_coercion_rule( DataType::Int32, DataType::Decimal128(10, 2), - Operator::Multiply, DataType::Decimal128(10, 0), DataType::Decimal128(10, 2), - None, - DataType::Decimal128(21, 2), ); test_math_decimal_coercion_rule( DataType::Int32, DataType::Decimal128(10, 2), - Operator::Divide, DataType::Decimal128(10, 0), DataType::Decimal128(10, 2), - Some(DataType::Decimal128(12, 2)), - DataType::Decimal128(23, 11), ); test_math_decimal_coercion_rule( DataType::Int32, DataType::Decimal128(10, 2), - Operator::Modulo, DataType::Decimal128(10, 0), DataType::Decimal128(10, 2), - Some(DataType::Decimal128(12, 2)), - DataType::Decimal128(10, 2), ); Ok(()) diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 34633f6e1dc3..3e2a3d418398 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -23,13 +23,7 @@ use std::hash::{Hash, Hasher}; use std::{any::Any, sync::Arc}; use arrow::array::*; -use arrow::compute::kernels::arithmetic::{ - add_dyn, add_scalar_dyn as add_dyn_scalar, divide_dyn_checked, - divide_scalar_dyn as divide_dyn_scalar, modulus_dyn, - modulus_scalar_dyn as modulus_dyn_scalar, multiply_dyn, - multiply_scalar_dyn as multiply_dyn_scalar, subtract_dyn, - subtract_scalar_dyn as subtract_dyn_scalar, -}; +use arrow::compute::cast; use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene}; use arrow::compute::kernels::comparison::regexp_is_match_utf8; use arrow::compute::kernels::comparison::regexp_is_match_utf8_scalar; @@ -49,50 +43,39 @@ use arrow::compute::kernels::comparison::{ eq_dyn_utf8_scalar, gt_dyn_utf8_scalar, gt_eq_dyn_utf8_scalar, lt_dyn_utf8_scalar, lt_eq_dyn_utf8_scalar, neq_dyn_utf8_scalar, }; -use arrow::compute::kernels::concat_elements::concat_elements_utf8; -use arrow::compute::{cast, CastOptions}; use arrow::datatypes::*; use arrow::record_batch::RecordBatch; use adapter::{eq_dyn, gt_dyn, gt_eq_dyn, lt_dyn, lt_eq_dyn, neq_dyn}; +use arrow::compute::kernels::concat_elements::concat_elements_utf8; + use kernels::{ bitwise_and_dyn, bitwise_and_dyn_scalar, bitwise_or_dyn, bitwise_or_dyn_scalar, bitwise_shift_left_dyn, bitwise_shift_left_dyn_scalar, bitwise_shift_right_dyn, bitwise_shift_right_dyn_scalar, bitwise_xor_dyn, bitwise_xor_dyn_scalar, }; use kernels_arrow::{ - add_decimal_dyn_scalar, add_dyn_decimal, add_dyn_temporal, divide_decimal_dyn_scalar, - divide_dyn_checked_decimal, is_distinct_from, is_distinct_from_binary, - is_distinct_from_bool, is_distinct_from_decimal, is_distinct_from_f32, - is_distinct_from_f64, is_distinct_from_null, is_distinct_from_utf8, - is_not_distinct_from, is_not_distinct_from_binary, is_not_distinct_from_bool, - is_not_distinct_from_decimal, is_not_distinct_from_f32, is_not_distinct_from_f64, - is_not_distinct_from_null, is_not_distinct_from_utf8, modulus_decimal_dyn_scalar, - modulus_dyn_decimal, multiply_decimal_dyn_scalar, multiply_dyn_decimal, - subtract_decimal_dyn_scalar, subtract_dyn_decimal, subtract_dyn_temporal, + is_distinct_from, is_distinct_from_binary, is_distinct_from_bool, + is_distinct_from_decimal, is_distinct_from_f32, is_distinct_from_f64, + is_distinct_from_null, is_distinct_from_utf8, is_not_distinct_from, + is_not_distinct_from_binary, is_not_distinct_from_bool, is_not_distinct_from_decimal, + is_not_distinct_from_f32, is_not_distinct_from_f64, is_not_distinct_from_null, + is_not_distinct_from_utf8, }; -use self::kernels_arrow::{ - add_dyn_temporal_left_scalar, add_dyn_temporal_right_scalar, - subtract_dyn_temporal_left_scalar, subtract_dyn_temporal_right_scalar, -}; use crate::array_expressions::{ array_append, array_concat, array_has_all, array_prepend, }; -use crate::expressions::cast_column; use crate::intervals::cp_solver::{propagate_arithmetic, propagate_comparison}; use crate::intervals::{apply_operator, Interval}; use crate::physical_expr::down_cast_any_ref; use crate::PhysicalExpr; +use arrow_array::{Datum, Scalar}; use datafusion_common::cast::as_boolean_array; -use datafusion_common::plan_err; use datafusion_common::ScalarValue; use datafusion_common::{DataFusionError, Result}; -use datafusion_expr::type_coercion::binary::{ - coercion_decimal_mathematics_type, get_result_type, -}; -use datafusion_expr::type_coercion::{is_decimal, is_timestamp, is_utf8_or_large_utf8}; +use datafusion_expr::type_coercion::binary::get_result_type; use datafusion_expr::{ColumnarValue, Operator}; /// Binary expression @@ -369,47 +352,6 @@ macro_rules! compute_op_dyn_scalar { }}; } -/// Invoke a dyn compute kernel on a data array and a scalar value -/// LEFT is Primitive or Dictionary array of numeric values, RIGHT is scalar value -/// OP_TYPE is the return type of scalar function -/// SCALAR_TYPE is the type of the scalar value -/// Different to `compute_op_dyn_scalar`, this calls the `_dyn_scalar` functions that -/// take a `SCALAR_TYPE`. -macro_rules! compute_primitive_op_dyn_scalar { - ($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr, $SCALAR_TYPE:ident) => {{ - // generate the scalar function name, such as lt_dyn_scalar, from the $OP parameter - // (which could have a value of lt_dyn) and the suffix _scalar - if let Some(value) = $RIGHT { - Ok(Arc::new(paste::expr! {[<$OP _dyn_scalar>]::<$SCALAR_TYPE>}( - $LEFT, - value, - )?)) - } else { - // when the $RIGHT is a NULL, generate a NULL array of $OP_TYPE - Ok(Arc::new(new_null_array($OP_TYPE, $LEFT.len()))) - } - }}; -} - -/// Invoke a dyn decimal compute kernel on a data array and a scalar value -/// LEFT is Decimal or Dictionary array of decimal values, RIGHT is scalar value -/// OP_TYPE is the return type of scalar function -macro_rules! compute_primitive_decimal_op_dyn_scalar { - ($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr, $RET_TYPE:expr) => {{ - // generate the scalar function name, such as add_decimal_dyn_scalar, - // from the $OP parameter (which could have a value of add) and the - // suffix _decimal_dyn_scalar - if let Some(value) = $RIGHT { - Ok(paste::expr! {[<$OP _decimal_dyn_scalar>]}( - $LEFT, value, $RET_TYPE, - )?) - } else { - // when the $RIGHT is a NULL, generate a NULL array of $OP_TYPE - Ok(Arc::new(new_null_array($OP_TYPE, $LEFT.len()))) - } - }}; -} - /// Invoke a compute kernel on array(s) macro_rules! compute_op { // invoke binary operator @@ -447,58 +389,6 @@ macro_rules! binary_string_array_op { }}; } -/// Invoke a compute kernel on a pair of arrays -/// The binary_primitive_array_op macro only evaluates for primitive types -/// like integers and floats. -macro_rules! binary_primitive_array_op_dyn { - ($LEFT:expr, $RIGHT:expr, $OP:ident, $RET_TYPE:expr) => {{ - match $LEFT.data_type() { - DataType::Decimal128(_, _) => { - Ok(paste::expr! {[<$OP _decimal>]}(&$LEFT, &$RIGHT, $RET_TYPE)?) - } - DataType::Dictionary(_, value_type) - if matches!(value_type.as_ref(), &DataType::Decimal128(_, _)) => - { - Ok(paste::expr! {[<$OP _decimal>]}(&$LEFT, &$RIGHT, $RET_TYPE)?) - } - _ => Ok(Arc::new( - $OP(&$LEFT, &$RIGHT).map_err(|err| DataFusionError::ArrowError(err))?, - )), - } - }}; -} - -/// Invoke a compute dyn kernel on an array and a scalar -/// The binary_primitive_array_op_dyn_scalar macro only evaluates for primitive -/// types like integers and floats. -macro_rules! binary_primitive_array_op_dyn_scalar { - ($LEFT:expr, $RIGHT:expr, $OP:ident, $RET_TYPE:expr) => {{ - // unwrap underlying (non dictionary) value - let right = unwrap_dict_value($RIGHT); - let op_type = $LEFT.data_type(); - - let result: Result> = match right { - ScalarValue::Decimal128(v, _, _) => compute_primitive_decimal_op_dyn_scalar!($LEFT, v, $OP, op_type, $RET_TYPE), - ScalarValue::Int8(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Int8Type), - ScalarValue::Int16(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Int16Type), - ScalarValue::Int32(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Int32Type), - ScalarValue::Int64(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Int64Type), - ScalarValue::UInt8(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, UInt8Type), - ScalarValue::UInt16(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, UInt16Type), - ScalarValue::UInt32(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, UInt32Type), - ScalarValue::UInt64(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, UInt64Type), - ScalarValue::Float32(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Float32Type), - ScalarValue::Float64(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Float64Type), - other => Err(DataFusionError::Internal(format!( - "Data type {:?} not supported for scalar operation '{}' on dyn array", - other, stringify!($OP))) - ) - }; - - Some(result) - }} -} - /// The binary_array_op macro includes types that extend beyond the primitive, /// such as Utf8 strings. #[macro_export] @@ -691,21 +581,29 @@ impl PhysicalExpr for BinaryExpr { let schema = batch.schema(); let input_schema = schema.as_ref(); - // Coerce decimal types to the same scale and precision - let coerced_type = coercion_decimal_mathematics_type( - &self.op, - &left_data_type, - &right_data_type, - ); - let (left_value, right_value) = if let Some(coerced_type) = coerced_type { - let options = CastOptions::default(); - let left_value = cast_column(&left_value, &coerced_type, Some(&options))?; - let right_value = cast_column(&right_value, &coerced_type, Some(&options))?; - (left_value, right_value) - } else { - // No need to coerce if it is not decimal or not math operation - (left_value, right_value) - }; + if self.op.is_numerical_operators() { + return match (&left_value, &right_value) { + (ColumnarValue::Array(left), ColumnarValue::Array(right)) => { + self.evaluate_datum(&left.as_ref(), &right.as_ref()) + } + (ColumnarValue::Scalar(left), ColumnarValue::Array(right)) => { + let left = left.to_array(); + self.evaluate_datum(&Scalar::new(left.as_ref()), &right.as_ref()) + } + (ColumnarValue::Array(left), ColumnarValue::Scalar(right)) => { + let right = right.to_array(); + self.evaluate_datum(&left.as_ref(), &Scalar::new(right.as_ref())) + } + (ColumnarValue::Scalar(left), ColumnarValue::Scalar(right)) => { + let left = left.to_array(); + let right = right.to_array(); + self.evaluate_datum( + &Scalar::new(left.as_ref()), + &Scalar::new(right.as_ref()), + ) + } + }; + } let result_type = self.data_type(input_schema)?; @@ -713,10 +611,9 @@ impl PhysicalExpr for BinaryExpr { let scalar_result = match (&left_value, &right_value) { (ColumnarValue::Array(array), ColumnarValue::Scalar(scalar)) => { // if left is array and right is literal - use scalar operations - self.evaluate_array_scalar(array, scalar.clone(), &result_type)? - .map(|r| { - r.and_then(|a| to_result_type_array(&self.op, a, &result_type)) - }) + self.evaluate_array_scalar(array, scalar.clone())?.map(|r| { + r.and_then(|a| to_result_type_array(&self.op, a, &result_type)) + }) } (ColumnarValue::Scalar(scalar), ColumnarValue::Array(array)) => { // if right is literal and left is array - reverse operator and parameters @@ -734,14 +631,8 @@ impl PhysicalExpr for BinaryExpr { left_value.into_array(batch.num_rows()), right_value.into_array(batch.num_rows()), ); - self.evaluate_with_resolved_args( - left, - &left_data_type, - right, - &right_data_type, - &result_type, - ) - .map(|a| ColumnarValue::Array(a)) + self.evaluate_with_resolved_args(left, &left_data_type, right, &right_data_type) + .map(|a| ColumnarValue::Array(a)) } fn children(&self) -> Vec> { @@ -915,13 +806,35 @@ fn to_result_type_array( } impl BinaryExpr { + /// Evaluate the expression using [`Datum`] + fn evaluate_datum( + &self, + left: &dyn Datum, + right: &dyn Datum, + ) -> Result { + use arrow::compute::kernels::numeric::*; + let array = match self.op { + Operator::Plus => add_wrapping(left, right)?, + Operator::Minus => sub_wrapping(left, right)?, + Operator::Multiply => mul_wrapping(left, right)?, + Operator::Divide => div(left, right)?, + Operator::Modulo => rem(left, right)?, + _ => unreachable!(), + }; + + if left.get().1 && right.get().1 { + let scalar = ScalarValue::try_from_array(array.as_ref(), 0)?; + return Ok(ColumnarValue::Scalar(scalar)); + } + Ok(ColumnarValue::Array(array)) + } + /// Evaluate the expression of the left input is an array and /// right is literal - use scalar operations fn evaluate_array_scalar( &self, array: &dyn Array, scalar: ScalarValue, - result_type: &DataType, ) -> Result>> { use Operator::*; let bool_type = &DataType::Boolean; @@ -932,26 +845,8 @@ impl BinaryExpr { GtEq => binary_array_op_dyn_scalar!(array, scalar, gt_eq, bool_type), Eq => binary_array_op_dyn_scalar!(array, scalar, eq, bool_type), NotEq => binary_array_op_dyn_scalar!(array, scalar, neq, bool_type), - Plus => { - binary_primitive_array_op_dyn_scalar!(array, scalar, add, result_type) - } - Minus => binary_primitive_array_op_dyn_scalar!( - array, - scalar, - subtract, - result_type - ), - Multiply => binary_primitive_array_op_dyn_scalar!( - array, - scalar, - multiply, - result_type - ), - Divide => { - binary_primitive_array_op_dyn_scalar!(array, scalar, divide, result_type) - } - Modulo => { - binary_primitive_array_op_dyn_scalar!(array, scalar, modulus, result_type) + Plus | Minus | Multiply | Divide | Modulo => { + unreachable!() } RegexMatch => binary_string_array_flag_op_scalar!( array, @@ -1021,7 +916,6 @@ impl BinaryExpr { left_data_type: &DataType, right: Arc, right_data_type: &DataType, - result_type: &DataType, ) -> Result { use Operator::*; match &self.op { @@ -1042,24 +936,7 @@ impl BinaryExpr { } } IsNotDistinctFrom => binary_array_op!(left, right, is_not_distinct_from), - Plus => binary_primitive_array_op_dyn!(left, right, add_dyn, result_type), - Minus => { - binary_primitive_array_op_dyn!(left, right, subtract_dyn, result_type) - } - Multiply => { - binary_primitive_array_op_dyn!(left, right, multiply_dyn, result_type) - } - Divide => { - binary_primitive_array_op_dyn!( - left, - right, - divide_dyn_checked, - result_type - ) - } - Modulo => { - binary_primitive_array_op_dyn!(left, right, modulus_dyn, result_type) - } + Plus | Minus | Multiply | Divide | Modulo => unreachable!(), And => { if left_data_type == &DataType::Boolean { boolean_op!(&left, &right, and_kleene) @@ -1118,56 +995,11 @@ pub fn binary( lhs: Arc, op: Operator, rhs: Arc, - input_schema: &Schema, + _input_schema: &Schema, ) -> Result> { - let lhs_type = &lhs.data_type(input_schema)?; - let rhs_type = &rhs.data_type(input_schema)?; - if (is_utf8_or_large_utf8(lhs_type) && is_timestamp(rhs_type)) - || (is_timestamp(lhs_type) && is_utf8_or_large_utf8(rhs_type)) - { - return plan_err!( - "The type of {lhs_type} {op:?} {rhs_type} of binary physical should be same" - ); - } - if !lhs_type.eq(rhs_type) && (!is_decimal(lhs_type) && !is_decimal(rhs_type)) { - return Err(DataFusionError::Internal(format!( - "The type of {lhs_type} {op:?} {rhs_type} of binary physical should be same" - ))); - } Ok(Arc::new(BinaryExpr::new(lhs, op, rhs))) } -pub fn resolve_temporal_op( - lhs: &ArrayRef, - sign: i32, - rhs: &ArrayRef, -) -> Result { - match sign { - 1 => add_dyn_temporal(lhs, rhs), - -1 => subtract_dyn_temporal(lhs, rhs), - other => Err(DataFusionError::Internal(format!( - "Undefined operation for temporal types {other}" - ))), - } -} - -pub fn resolve_temporal_op_scalar( - arr: &ArrayRef, - sign: i32, - scalar: &ScalarValue, - swap: bool, -) -> Result { - match (sign, swap) { - (1, false) => add_dyn_temporal_right_scalar(arr, scalar), - (1, true) => add_dyn_temporal_left_scalar(scalar, arr), - (-1, false) => subtract_dyn_temporal_right_scalar(arr, scalar), - (-1, true) => subtract_dyn_temporal_left_scalar(scalar, arr), - _ => Err(DataFusionError::Internal( - "Undefined operation for temporal types".to_string(), - )), - } -} - #[cfg(test)] mod tests { use super::*; @@ -2434,14 +2266,14 @@ mod tests { Operator::Divide, create_decimal_array( &[ - Some(99193548387), // 0.99193548387 + Some(9919), // 0.9919 None, None, - Some(100813008130), // 1.0081300813 - Some(100000000000), // 1.0 + Some(10081), // 1.0081 + Some(10000), // 1.0 ], - 21, - 11, + 14, + 4, ), )?; @@ -2520,15 +2352,9 @@ mod tests { let a = DictionaryArray::try_new(keys, decimal_array)?; let decimal_array = Arc::new(create_decimal_array( - &[ - Some(6150000000000), - Some(6100000000000), - None, - Some(6200000000000), - Some(6150000000000), - ], - 21, - 11, + &[Some(615000), Some(610000), None, Some(620000), Some(615000)], + 14, + 4, )); apply_arithmetic_scalar( @@ -3964,14 +3790,9 @@ mod tests { Field::new("b", DataType::Decimal128(10, 2), true), ])); let expect = Arc::new(create_decimal_array( - &[ - Some(10000000000000), - None, - Some(10081967213114), - Some(10000000000000), - ], - 23, - 11, + &[Some(1000000), None, Some(1008196), Some(1000000)], + 16, + 4, )) as ArrayRef; apply_decimal_arithmetic_op( &schema, diff --git a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs index 9c6645c30371..26ff7c369e5d 100644 --- a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs +++ b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs @@ -18,41 +18,8 @@ //! This module contains computation kernels that are eventually //! destined for arrow-rs but are in datafusion until they are ported. -use arrow::compute::{ - add_dyn, add_scalar_dyn, divide_dyn_checked, divide_scalar_dyn, modulus_dyn, - modulus_scalar_dyn, multiply_fixed_point, multiply_scalar_checked_dyn, - multiply_scalar_dyn, subtract_dyn, subtract_scalar_dyn, try_unary, -}; -use arrow::datatypes::{Date32Type, Date64Type, Decimal128Type}; use arrow::{array::*, datatypes::ArrowNumericType}; -use arrow_array::ArrowNativeTypeOp; -use arrow_schema::{DataType, IntervalUnit}; -use chrono::{Days, Duration, Months, NaiveDate, NaiveDateTime}; -use datafusion_common::cast::{as_date32_array, as_date64_array, as_decimal128_array}; -use datafusion_common::scalar::{date32_op, date64_op}; -use datafusion_common::{DataFusionError, Result, ScalarValue}; -use std::ops::Add; -use std::sync::Arc; - -use arrow::compute::unary; -use arrow::datatypes::*; - -use arrow_array::temporal_conversions::{MILLISECONDS_IN_DAY, NANOSECONDS_IN_DAY}; -use datafusion_common::delta::shift_months; -use datafusion_common::scalar::{ - calculate_naives, microseconds_add, microseconds_sub, milliseconds_add, - milliseconds_sub, nanoseconds_add, nanoseconds_sub, op_dt, op_dt_mdn, op_mdn, op_ym, - op_ym_dt, op_ym_mdn, parse_timezones, seconds_add, MILLISECOND_MODE, NANOSECOND_MODE, -}; - -use arrow::datatypes::TimeUnit; - -use datafusion_common::cast::{ - as_interval_dt_array, as_interval_mdn_array, as_interval_ym_array, - as_timestamp_microsecond_array, as_timestamp_millisecond_array, - as_timestamp_nanosecond_array, as_timestamp_second_array, -}; -use datafusion_common::scalar::*; +use datafusion_common::Result; // Simple (low performance) kernels until optimized kernels are added to arrow // See https://github.com/apache/arrow-rs/issues/960 @@ -303,1748 +270,9 @@ pub(crate) fn is_not_distinct_from_decimal( .collect()) } -pub(crate) fn add_dyn_decimal( - left: &dyn Array, - right: &dyn Array, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - let array = add_dyn(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn add_decimal_dyn_scalar( - left: &dyn Array, - right: i128, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - - let array = add_scalar_dyn::(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn add_dyn_temporal(left: &ArrayRef, right: &ArrayRef) -> Result { - match (left.data_type(), right.data_type()) { - (DataType::Timestamp(..), DataType::Timestamp(..)) => ts_array_op(left, right), - (DataType::Interval(..), DataType::Interval(..)) => { - interval_array_op(left, right, 1) - } - (DataType::Timestamp(..), DataType::Interval(..)) => { - ts_interval_array_op(left, 1, right) - } - (DataType::Interval(..), DataType::Timestamp(..)) => { - ts_interval_array_op(right, 1, left) - } - _ => { - // fall back to kernels in arrow-rs - Ok(add_dyn(left, right)?) - } - } -} - -pub(crate) fn add_dyn_temporal_right_scalar( - left: &ArrayRef, - right: &ScalarValue, -) -> Result { - match (left.data_type(), right.get_datatype()) { - // Date32 + Interval - (DataType::Date32, DataType::Interval(..)) => { - let left = as_date32_array(&left)?; - let ret = Arc::new(try_unary::(left, |days| { - Ok(date32_op(days, right, 1)?) - })?) as _; - Ok(ret) - } - // Date64 + Interval - (DataType::Date64, DataType::Interval(..)) => { - let left = as_date64_array(&left)?; - let ret = Arc::new(try_unary::(left, |ms| { - Ok(date64_op(ms, right, 1)?) - })?) as _; - Ok(ret) - } - // Interval + Interval - (DataType::Interval(..), DataType::Interval(..)) => { - interval_op_scalar_interval(left, 1, right) - } - // Timestamp + Interval - (DataType::Timestamp(..), DataType::Interval(..)) => { - ts_op_scalar_interval(left, 1, right) - } - _ => { - // fall back to kernels in arrow-rs - Ok(add_dyn(left, &right.to_array())?) - } - } -} - -pub(crate) fn add_dyn_temporal_left_scalar( - left: &ScalarValue, - right: &ArrayRef, -) -> Result { - match (left.get_datatype(), right.data_type()) { - // Date32 + Interval - (DataType::Date32, DataType::Interval(..)) => { - if let ScalarValue::Date32(Some(left)) = left { - scalar_date32_array_interval_op( - *left, - right, - NaiveDate::checked_add_days, - NaiveDate::checked_add_months, - ) - } else { - Err(DataFusionError::Internal( - "Date32 value is None".to_string(), - )) - } - } - // Date64 + Interval - (DataType::Date64, DataType::Interval(..)) => { - if let ScalarValue::Date64(Some(left)) = left { - scalar_date64_array_interval_op( - *left, - right, - NaiveDate::checked_add_days, - NaiveDate::checked_add_months, - ) - } else { - Err(DataFusionError::Internal( - "Date64 value is None".to_string(), - )) - } - } - // Interval + Interval - (DataType::Interval(..), DataType::Interval(..)) => { - scalar_interval_op_interval(left, 1, right) - } - // Timestamp + Interval - (DataType::Timestamp(..), DataType::Interval(..)) => { - scalar_ts_op_interval(left, 1, right) - } - _ => { - // fall back to kernels in arrow-rs - Ok(add_dyn(&left.to_array(), right)?) - } - } -} - -pub(crate) fn subtract_decimal_dyn_scalar( - left: &dyn Array, - right: i128, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - - let array = subtract_scalar_dyn::(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn subtract_dyn_temporal( - left: &ArrayRef, - right: &ArrayRef, -) -> Result { - match (left.data_type(), right.data_type()) { - (DataType::Timestamp(..), DataType::Timestamp(..)) => ts_array_op(left, right), - (DataType::Interval(..), DataType::Interval(..)) => { - interval_array_op(left, right, -1) - } - (DataType::Timestamp(..), DataType::Interval(..)) => { - ts_interval_array_op(left, -1, right) - } - (DataType::Interval(..), DataType::Timestamp(..)) => { - ts_interval_array_op(right, -1, left) - } - _ => { - // fall back to kernels in arrow-rs - Ok(subtract_dyn(left, right)?) - } - } -} - -pub(crate) fn subtract_dyn_temporal_right_scalar( - left: &ArrayRef, - right: &ScalarValue, -) -> Result { - match (left.data_type(), right.get_datatype()) { - // Date32 - Interval - (DataType::Date32, DataType::Interval(..)) => { - let left = as_date32_array(&left)?; - let ret = Arc::new(try_unary::(left, |days| { - Ok(date32_op(days, right, -1)?) - })?) as _; - Ok(ret) - } - // Date64 - Interval - (DataType::Date64, DataType::Interval(..)) => { - let left = as_date64_array(&left)?; - let ret = Arc::new(try_unary::(left, |ms| { - Ok(date64_op(ms, right, -1)?) - })?) as _; - Ok(ret) - } - // Timestamp - Timestamp - (DataType::Timestamp(..), DataType::Timestamp(..)) => { - ts_sub_scalar_ts(left, right) - } - // Interval - Interval - (DataType::Interval(..), DataType::Interval(..)) => { - interval_op_scalar_interval(left, -1, right) - } - // Timestamp - Interval - (DataType::Timestamp(..), DataType::Interval(..)) => { - ts_op_scalar_interval(left, -1, right) - } - _ => { - // fall back to kernels in arrow-rs - Ok(subtract_dyn(left, &right.to_array())?) - } - } -} - -pub(crate) fn subtract_dyn_temporal_left_scalar( - left: &ScalarValue, - right: &ArrayRef, -) -> Result { - match (left.get_datatype(), right.data_type()) { - // Date32 - Interval - (DataType::Date32, DataType::Interval(..)) => { - if let ScalarValue::Date32(Some(left)) = left { - scalar_date32_array_interval_op( - *left, - right, - NaiveDate::checked_sub_days, - NaiveDate::checked_sub_months, - ) - } else { - Err(DataFusionError::Internal( - "Date32 value is None".to_string(), - )) - } - } - // Date64 - Interval - (DataType::Date64, DataType::Interval(..)) => { - if let ScalarValue::Date64(Some(left)) = left { - scalar_date64_array_interval_op( - *left, - right, - NaiveDate::checked_sub_days, - NaiveDate::checked_sub_months, - ) - } else { - Err(DataFusionError::Internal( - "Date64 value is None".to_string(), - )) - } - } - // Timestamp - Timestamp - (DataType::Timestamp(..), DataType::Timestamp(..)) => { - scalar_ts_sub_ts(left, right) - } - // Interval - Interval - (DataType::Interval(..), DataType::Interval(..)) => { - scalar_interval_op_interval(left, -1, right) - } - // Timestamp - Interval - (DataType::Timestamp(..), DataType::Interval(..)) => { - scalar_ts_op_interval(left, -1, right) - } - _ => { - // fall back to kernels in arrow-rs - Ok(subtract_dyn(&left.to_array(), right)?) - } - } -} - -fn scalar_date32_array_interval_op( - left: i32, - right: &ArrayRef, - day_op: fn(NaiveDate, Days) -> Option, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1) - .ok_or_else(|| DataFusionError::Execution("Invalid Date entered".to_string()))?; - let prior = epoch.add(Duration::days(left as i64)); - match right.data_type() { - DataType::Interval(IntervalUnit::YearMonth) => { - date32_interval_ym_op(right, &epoch, &prior, month_op) - } - DataType::Interval(IntervalUnit::DayTime) => { - date32_interval_dt_op(right, &epoch, &prior, day_op) - } - DataType::Interval(IntervalUnit::MonthDayNano) => { - date32_interval_mdn_op(right, &epoch, &prior, day_op, month_op) - } - _ => Err(DataFusionError::Internal(format!( - "Expected type is an interval, but {} is found", - right.data_type() - ))), - } -} - -fn scalar_date64_array_interval_op( - left: i64, - right: &ArrayRef, - day_op: fn(NaiveDate, Days) -> Option, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1) - .ok_or_else(|| DataFusionError::Execution("Invalid Date entered".to_string()))?; - let prior = epoch.add(Duration::milliseconds(left)); - match right.data_type() { - DataType::Interval(IntervalUnit::YearMonth) => { - date64_interval_ym_op(right, &epoch, &prior, month_op) - } - DataType::Interval(IntervalUnit::DayTime) => { - date64_interval_dt_op(right, &epoch, &prior, day_op) - } - DataType::Interval(IntervalUnit::MonthDayNano) => { - date64_interval_mdn_op(right, &epoch, &prior, day_op, month_op) - } - _ => Err(DataFusionError::Internal(format!( - "Expected type is an interval, but {} is found", - right.data_type() - ))), - } -} - -fn get_precision_scale(data_type: &DataType) -> Result<(u8, i8)> { - match data_type { - DataType::Decimal128(precision, scale) => Ok((*precision, *scale)), - DataType::Dictionary(_, value_type) => match value_type.as_ref() { - DataType::Decimal128(precision, scale) => Ok((*precision, *scale)), - _ => Err(DataFusionError::Internal( - "Unexpected data type".to_string(), - )), - }, - _ => Err(DataFusionError::Internal( - "Unexpected data type".to_string(), - )), - } -} - -fn decimal_array_with_precision_scale( - array: ArrayRef, - precision: u8, - scale: i8, -) -> Result { - let array = array.as_ref(); - let decimal_array = match array.data_type() { - DataType::Decimal128(_, _) => { - let array = as_decimal128_array(array)?; - Arc::new(array.clone().with_precision_and_scale(precision, scale)?) - as ArrayRef - } - _ => { - return Err(DataFusionError::Internal( - "Unexpected data type".to_string(), - )) - } - }; - Ok(decimal_array) -} - -pub(crate) fn multiply_decimal_dyn_scalar( - left: &dyn Array, - right: i128, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - let array = multiply_scalar_dyn::(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn divide_decimal_dyn_scalar( - left: &dyn Array, - right: i128, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - - let mul = 10_i128.pow(scale as u32); - let array = multiply_scalar_checked_dyn::(left, mul)?; - - let array = divide_scalar_dyn::(&array, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn subtract_dyn_decimal( - left: &dyn Array, - right: &dyn Array, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - let array = subtract_dyn(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -/// Remove this once arrow-rs provides `multiply_fixed_point_dyn`. -/// -fn multiply_fixed_point_dyn( - left: &dyn Array, - right: &dyn Array, - required_scale: i8, -) -> Result { - match (left.data_type(), right.data_type()) { - (DataType::Decimal128(_, _), DataType::Decimal128(_, _)) => { - let left = left.as_primitive::(); - let right = right.as_primitive::(); - - Ok(multiply_fixed_point(left, right, required_scale) - .map(|a| Arc::new(a) as ArrayRef)?) - } - (_, _) => Err(DataFusionError::Internal(format!( - "Unsupported data type {}, {}", - left.data_type(), - right.data_type() - ))), - } -} - -pub(crate) fn multiply_dyn_decimal( - left: &dyn Array, - right: &dyn Array, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - let array = multiply_fixed_point_dyn(left, right, scale)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn divide_dyn_checked_decimal( - left: &dyn Array, - right: &dyn Array, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - - let mul = 10_i128.pow(scale as u32); - let array = multiply_scalar_checked_dyn::(left, mul)?; - - // Restore to original precision and scale (metadata only) - let (org_precision, org_scale) = get_precision_scale(right.data_type())?; - let array = decimal_array_with_precision_scale(array, org_precision, org_scale)?; - let array = divide_dyn_checked(&array, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn modulus_dyn_decimal( - left: &dyn Array, - right: &dyn Array, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - let array = modulus_dyn(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn modulus_decimal_dyn_scalar( - left: &dyn Array, - right: i128, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - - let array = modulus_scalar_dyn::(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -macro_rules! sub_timestamp_macro { - ($array:expr, $rhs:expr, $caster:expr, $interval_type:ty, $opt_tz_lhs:expr, $multiplier:expr, - $opt_tz_rhs:expr, $unit_sub:expr, $naive_sub_fn:expr, $counter:expr) => {{ - let prim_array = $caster(&$array)?; - let ret: PrimitiveArray<$interval_type> = try_unary(prim_array, |lhs| { - let (parsed_lhs_tz, parsed_rhs_tz) = - (parse_timezones($opt_tz_lhs)?, parse_timezones($opt_tz_rhs)?); - let (naive_lhs, naive_rhs) = calculate_naives::<$unit_sub>( - lhs.mul_wrapping($multiplier), - parsed_lhs_tz, - $rhs.mul_wrapping($multiplier), - parsed_rhs_tz, - )?; - Ok($naive_sub_fn($counter(&naive_lhs), $counter(&naive_rhs))) - })?; - Arc::new(ret) as _ - }}; -} - -macro_rules! sub_timestamp_left_scalar_macro { - ($array:expr, $lhs:expr, $caster:expr, $interval_type:ty, $opt_tz_lhs:expr, $multiplier:expr, - $opt_tz_rhs:expr, $unit_sub:expr, $naive_sub_fn:expr, $counter:expr) => {{ - let prim_array = $caster(&$array)?; - let ret: PrimitiveArray<$interval_type> = try_unary(prim_array, |rhs| { - let (parsed_lhs_tz, parsed_rhs_tz) = - (parse_timezones($opt_tz_lhs)?, parse_timezones($opt_tz_rhs)?); - let (naive_lhs, naive_rhs) = calculate_naives::<$unit_sub>( - $lhs.mul_wrapping($multiplier), - parsed_lhs_tz, - rhs.mul_wrapping($multiplier), - parsed_rhs_tz, - )?; - Ok($naive_sub_fn($counter(&naive_lhs), $counter(&naive_rhs))) - })?; - Arc::new(ret) as _ - }}; -} - -macro_rules! op_timestamp_interval_macro { - ($array:expr, $as_timestamp:expr, $ts_type:ty, $fn_op:expr, $scalar:expr, $sign:expr, $tz:expr) => {{ - let array = $as_timestamp(&$array)?; - let ret: PrimitiveArray<$ts_type> = - try_unary::<$ts_type, _, $ts_type>(array, |ts_s| { - Ok($fn_op(ts_s, $scalar, $sign)?) - })?; - Arc::new(ret.with_timezone_opt($tz.clone())) as _ - }}; -} - -macro_rules! scalar_ts_op_interval_macro { - ($ts:ident, $tz:ident, $interval:ident, $sign:ident, - $caster1:expr, $type1:ty, $type2:ty, $op:expr, $back_caster:expr) => {{ - let interval = $caster1(&$interval)?; - let ret: PrimitiveArray<$type1> = - try_unary::<$type2, _, $type1>(interval, |e| { - let prior = $ts.ok_or_else(|| { - DataFusionError::Internal("Timestamp is out-of-range".to_string()) - })?; - Ok($back_caster(&$op(prior, e, $sign))) - })?; - Arc::new(ret.with_timezone_opt($tz.clone())) as _ - }}; -} - -macro_rules! op_interval_macro { - ($array:expr, $as_interval:expr, $interval_type:ty, $fn_op:expr, $scalar:expr, $sign:expr) => {{ - let array = $as_interval(&$array)?; - let ret: PrimitiveArray<$interval_type> = - unary(array, |lhs| $fn_op(lhs, *$scalar, $sign)); - Arc::new(ret) as _ - }}; -} - -macro_rules! op_interval_cross_macro { - ($array:expr, $as_interval:expr, $commute:expr, $fn_op:expr, $scalar:expr, $sign:expr, $t1:ty, $t2:ty) => {{ - let array = $as_interval(&$array)?; - let ret: PrimitiveArray = if $commute { - unary(array, |lhs| { - $fn_op(*$scalar as $t1, lhs as $t2, $sign, $commute) - }) - } else { - unary(array, |lhs| { - $fn_op(lhs as $t1, *$scalar as $t2, $sign, $commute) - }) - }; - Arc::new(ret) as _ - }}; -} - -macro_rules! ts_sub_op { - ($lhs:ident, $rhs:ident, $lhs_tz:ident, $rhs_tz:ident, $coef:expr, $caster:expr, $op:expr, $ts_unit:expr, $mode:expr, $type_out:ty) => {{ - let prim_array_lhs = $caster(&$lhs)?; - let prim_array_rhs = $caster(&$rhs)?; - let ret: PrimitiveArray<$type_out> = - arrow::compute::try_binary(prim_array_lhs, prim_array_rhs, |ts1, ts2| { - let (parsed_lhs_tz, parsed_rhs_tz) = ( - parse_timezones($lhs_tz.as_deref())?, - parse_timezones($rhs_tz.as_deref())?, - ); - let (naive_lhs, naive_rhs) = calculate_naives::<$mode>( - ts1.mul_wrapping($coef), - parsed_lhs_tz, - ts2.mul_wrapping($coef), - parsed_rhs_tz, - )?; - Ok($op($ts_unit(&naive_lhs), $ts_unit(&naive_rhs))) - })?; - Arc::new(ret) as _ - }}; -} - -macro_rules! interval_op { - ($lhs:ident, $rhs:ident, $caster:expr, $op:expr, $sign:ident, $type_in:ty) => {{ - let prim_array_lhs = $caster(&$lhs)?; - let prim_array_rhs = $caster(&$rhs)?; - Arc::new(arrow::compute::binary::<$type_in, $type_in, _, $type_in>( - prim_array_lhs, - prim_array_rhs, - |interval1, interval2| $op(interval1, interval2, $sign), - )?) as _ - }}; -} - -macro_rules! interval_cross_op { - ($lhs:ident, $rhs:ident, $caster1:expr, $caster2:expr, $op:expr, $sign:ident, $commute:ident, $type_in1:ty, $type_in2:ty) => {{ - let prim_array_lhs = $caster1(&$lhs)?; - let prim_array_rhs = $caster2(&$rhs)?; - Arc::new(arrow::compute::binary::< - $type_in1, - $type_in2, - _, - IntervalMonthDayNanoType, - >( - prim_array_lhs, - prim_array_rhs, - |interval1, interval2| $op(interval1, interval2, $sign, $commute), - )?) as _ - }}; -} - -macro_rules! ts_interval_op { - ($lhs:ident, $rhs:ident, $tz:ident, $caster1:expr, $caster2:expr, $op:expr, $sign:ident, $type_in1:ty, $type_in2:ty) => {{ - let prim_array_lhs = $caster1(&$lhs)?; - let prim_array_rhs = $caster2(&$rhs)?; - let ret: PrimitiveArray<$type_in1> = arrow::compute::try_binary( - prim_array_lhs, - prim_array_rhs, - |ts, interval| Ok($op(ts, interval as i128, $sign)?), - )?; - Arc::new(ret.with_timezone_opt($tz.clone())) as _ - }}; -} - -/// This function handles timestamp - timestamp operations where the former is -/// an array and the latter is a scalar, resulting in an array. -pub fn ts_sub_scalar_ts(array: &ArrayRef, scalar: &ScalarValue) -> Result { - let ret = match (array.data_type(), scalar) { - ( - DataType::Timestamp(TimeUnit::Second, opt_tz_lhs), - ScalarValue::TimestampSecond(Some(rhs), opt_tz_rhs), - ) => { - sub_timestamp_macro!( - array, - rhs, - as_timestamp_second_array, - IntervalDayTimeType, - opt_tz_lhs.as_deref(), - 1000, - opt_tz_rhs.as_deref(), - MILLISECOND_MODE, - seconds_sub, - NaiveDateTime::timestamp - ) - } - ( - DataType::Timestamp(TimeUnit::Millisecond, opt_tz_lhs), - ScalarValue::TimestampMillisecond(Some(rhs), opt_tz_rhs), - ) => { - sub_timestamp_macro!( - array, - rhs, - as_timestamp_millisecond_array, - IntervalDayTimeType, - opt_tz_lhs.as_deref(), - 1, - opt_tz_rhs.as_deref(), - MILLISECOND_MODE, - milliseconds_sub, - NaiveDateTime::timestamp_millis - ) - } - ( - DataType::Timestamp(TimeUnit::Microsecond, opt_tz_lhs), - ScalarValue::TimestampMicrosecond(Some(rhs), opt_tz_rhs), - ) => { - sub_timestamp_macro!( - array, - rhs, - as_timestamp_microsecond_array, - IntervalMonthDayNanoType, - opt_tz_lhs.as_deref(), - 1000, - opt_tz_rhs.as_deref(), - NANOSECOND_MODE, - microseconds_sub, - NaiveDateTime::timestamp_micros - ) - } - ( - DataType::Timestamp(TimeUnit::Nanosecond, opt_tz_lhs), - ScalarValue::TimestampNanosecond(Some(rhs), opt_tz_rhs), - ) => { - sub_timestamp_macro!( - array, - rhs, - as_timestamp_nanosecond_array, - IntervalMonthDayNanoType, - opt_tz_lhs.as_deref(), - 1, - opt_tz_rhs.as_deref(), - NANOSECOND_MODE, - nanoseconds_sub, - NaiveDateTime::timestamp_nanos - ) - } - (_, _) => { - return Err(DataFusionError::Internal(format!( - "Invalid array - scalar types for Timestamp subtraction: {:?} - {:?}", - array.data_type(), - scalar.get_datatype() - ))); - } - }; - Ok(ret) -} - -/// This function handles timestamp - timestamp operations where the former is -/// a scalar and the latter is an array, resulting in an array. -pub fn scalar_ts_sub_ts(scalar: &ScalarValue, array: &ArrayRef) -> Result { - let ret = match (scalar, array.data_type()) { - ( - ScalarValue::TimestampSecond(Some(lhs), opt_tz_lhs), - DataType::Timestamp(TimeUnit::Second, opt_tz_rhs), - ) => { - sub_timestamp_left_scalar_macro!( - array, - lhs, - as_timestamp_second_array, - IntervalDayTimeType, - opt_tz_lhs.as_deref(), - 1000, - opt_tz_rhs.as_deref(), - MILLISECOND_MODE, - seconds_sub, - NaiveDateTime::timestamp - ) - } - ( - ScalarValue::TimestampMillisecond(Some(lhs), opt_tz_lhs), - DataType::Timestamp(TimeUnit::Millisecond, opt_tz_rhs), - ) => { - sub_timestamp_left_scalar_macro!( - array, - lhs, - as_timestamp_millisecond_array, - IntervalDayTimeType, - opt_tz_lhs.as_deref(), - 1, - opt_tz_rhs.as_deref(), - MILLISECOND_MODE, - milliseconds_sub, - NaiveDateTime::timestamp_millis - ) - } - ( - ScalarValue::TimestampMicrosecond(Some(lhs), opt_tz_lhs), - DataType::Timestamp(TimeUnit::Microsecond, opt_tz_rhs), - ) => { - sub_timestamp_left_scalar_macro!( - array, - lhs, - as_timestamp_microsecond_array, - IntervalMonthDayNanoType, - opt_tz_lhs.as_deref(), - 1000, - opt_tz_rhs.as_deref(), - NANOSECOND_MODE, - microseconds_sub, - NaiveDateTime::timestamp_micros - ) - } - ( - ScalarValue::TimestampNanosecond(Some(lhs), opt_tz_lhs), - DataType::Timestamp(TimeUnit::Nanosecond, opt_tz_rhs), - ) => { - sub_timestamp_left_scalar_macro!( - array, - lhs, - as_timestamp_nanosecond_array, - IntervalMonthDayNanoType, - opt_tz_lhs.as_deref(), - 1, - opt_tz_rhs.as_deref(), - NANOSECOND_MODE, - nanoseconds_sub, - NaiveDateTime::timestamp_nanos - ) - } - (_, _) => { - return Err(DataFusionError::Internal(format!( - "Invalid scalar - array types for Timestamp subtraction: {:?} - {:?}", - scalar.get_datatype(), - array.data_type() - ))); - } - }; - Ok(ret) -} - -/// This function handles timestamp +/- interval operations where the former is -/// an array and the latter is a scalar, resulting in an array. -pub fn ts_op_scalar_interval( - array: &ArrayRef, - sign: i32, - scalar: &ScalarValue, -) -> Result { - let ret = match array.data_type() { - DataType::Timestamp(TimeUnit::Second, tz) => { - op_timestamp_interval_macro!( - array, - as_timestamp_second_array, - TimestampSecondType, - seconds_add, - scalar, - sign, - tz - ) - } - DataType::Timestamp(TimeUnit::Millisecond, tz) => { - op_timestamp_interval_macro!( - array, - as_timestamp_millisecond_array, - TimestampMillisecondType, - milliseconds_add, - scalar, - sign, - tz - ) - } - DataType::Timestamp(TimeUnit::Microsecond, tz) => { - op_timestamp_interval_macro!( - array, - as_timestamp_microsecond_array, - TimestampMicrosecondType, - microseconds_add, - scalar, - sign, - tz - ) - } - DataType::Timestamp(TimeUnit::Nanosecond, tz) => { - op_timestamp_interval_macro!( - array, - as_timestamp_nanosecond_array, - TimestampNanosecondType, - nanoseconds_add, - scalar, - sign, - tz - ) - } - _ => Err(DataFusionError::Internal(format!( - "Invalid lhs type for Timestamp vs Interval operations: {}", - array.data_type() - )))?, - }; - Ok(ret) -} - -/// This function handles timestamp +/- interval operations where the former is -/// a scalar and the latter is an array, resulting in an array. -pub fn scalar_ts_op_interval( - scalar: &ScalarValue, - sign: i32, - array: &ArrayRef, -) -> Result { - use DataType::*; - use IntervalUnit::*; - use ScalarValue::*; - let ret = match (scalar, array.data_type()) { - // Second op YearMonth - (TimestampSecond(Some(ts_sec), tz), Interval(YearMonth)) => { - let naive_date = NaiveDateTime::from_timestamp_opt(*ts_sec, 0); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_ym_array, - TimestampSecondType, - IntervalYearMonthType, - shift_months, - NaiveDateTime::timestamp - ) - } - // Millisecond op YearMonth - (TimestampMillisecond(Some(ts_ms), tz), Interval(YearMonth)) => { - let naive_date = NaiveDateTime::from_timestamp_millis(*ts_ms); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_ym_array, - TimestampSecondType, - IntervalYearMonthType, - shift_months, - NaiveDateTime::timestamp - ) - } - // Microsecond op YearMonth - (TimestampMicrosecond(Some(ts_us), tz), Interval(YearMonth)) => { - let naive_date = NaiveDateTime::from_timestamp_micros(*ts_us); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_ym_array, - TimestampSecondType, - IntervalYearMonthType, - shift_months, - NaiveDateTime::timestamp - ) - } - // Nanosecond op YearMonth - (TimestampNanosecond(Some(ts_ns), tz), Interval(YearMonth)) => { - let naive_date = NaiveDateTime::from_timestamp_opt( - ts_ns.div_euclid(1_000_000_000), - ts_ns.rem_euclid(1_000_000_000).try_into().map_err(|_| { - DataFusionError::Internal("Overflow of divison".to_string()) - })?, - ); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_ym_array, - TimestampSecondType, - IntervalYearMonthType, - shift_months, - NaiveDateTime::timestamp - ) - } - // Second op DayTime - (TimestampSecond(Some(ts_sec), tz), Interval(DayTime)) => { - let naive_date = NaiveDateTime::from_timestamp_opt(*ts_sec, 0); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_dt_array, - TimestampSecondType, - IntervalDayTimeType, - add_day_time, - NaiveDateTime::timestamp - ) - } - // Millisecond op DayTime - (TimestampMillisecond(Some(ts_ms), tz), Interval(DayTime)) => { - let naive_date = NaiveDateTime::from_timestamp_millis(*ts_ms); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_dt_array, - TimestampMillisecondType, - IntervalDayTimeType, - add_day_time, - NaiveDateTime::timestamp_millis - ) - } - // Microsecond op DayTime - (TimestampMicrosecond(Some(ts_us), tz), Interval(DayTime)) => { - let naive_date = NaiveDateTime::from_timestamp_micros(*ts_us); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_dt_array, - TimestampMicrosecondType, - IntervalDayTimeType, - add_day_time, - NaiveDateTime::timestamp_micros - ) - } - // Nanosecond op DayTime - (TimestampNanosecond(Some(ts_ns), tz), Interval(DayTime)) => { - let naive_date = NaiveDateTime::from_timestamp_opt( - ts_ns.div_euclid(1_000_000_000), - ts_ns.rem_euclid(1_000_000_000).try_into().map_err(|_| { - DataFusionError::Internal("Overflow of divison".to_string()) - })?, - ); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_dt_array, - TimestampNanosecondType, - IntervalDayTimeType, - add_day_time, - NaiveDateTime::timestamp_nanos - ) - } - // Second op MonthDayNano - (TimestampSecond(Some(ts_sec), tz), Interval(MonthDayNano)) => { - let naive_date = NaiveDateTime::from_timestamp_opt(*ts_sec, 0); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_mdn_array, - TimestampSecondType, - IntervalMonthDayNanoType, - add_m_d_nano, - NaiveDateTime::timestamp - ) - } - // Millisecond op MonthDayNano - (TimestampMillisecond(Some(ts_ms), tz), Interval(MonthDayNano)) => { - let naive_date = NaiveDateTime::from_timestamp_millis(*ts_ms); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_mdn_array, - TimestampMillisecondType, - IntervalMonthDayNanoType, - add_m_d_nano, - NaiveDateTime::timestamp_millis - ) - } - // Microsecond op MonthDayNano - (TimestampMicrosecond(Some(ts_us), tz), Interval(MonthDayNano)) => { - let naive_date = NaiveDateTime::from_timestamp_micros(*ts_us); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_mdn_array, - TimestampMicrosecondType, - IntervalMonthDayNanoType, - add_m_d_nano, - NaiveDateTime::timestamp_micros - ) - } - - // Nanosecond op MonthDayNano - (TimestampNanosecond(Some(ts_ns), tz), Interval(MonthDayNano)) => { - let naive_date = NaiveDateTime::from_timestamp_opt( - ts_ns.div_euclid(1_000_000_000), - ts_ns.rem_euclid(1_000_000_000).try_into().map_err(|_| { - DataFusionError::Internal("Overflow of divison".to_string()) - })?, - ); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_mdn_array, - TimestampNanosecondType, - IntervalMonthDayNanoType, - add_m_d_nano, - NaiveDateTime::timestamp_nanos - ) - } - _ => Err(DataFusionError::Internal( - "Invalid types for Timestamp vs Interval operations".to_string(), - ))?, - }; - Ok(ret) -} - -/// This function handles interval +/- interval operations where the former is -/// an array and the latter is a scalar, resulting in an interval array. -pub fn interval_op_scalar_interval( - array: &ArrayRef, - sign: i32, - scalar: &ScalarValue, -) -> Result { - use DataType::*; - use IntervalUnit::*; - use ScalarValue::*; - let ret = match (array.data_type(), scalar) { - (Interval(YearMonth), IntervalYearMonth(Some(rhs))) => { - op_interval_macro!( - array, - as_interval_ym_array, - IntervalYearMonthType, - op_ym, - rhs, - sign - ) - } - (Interval(YearMonth), IntervalDayTime(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_ym_array, - false, - op_ym_dt, - rhs, - sign, - i32, - i64 - ) - } - (Interval(YearMonth), IntervalMonthDayNano(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_ym_array, - false, - op_ym_mdn, - rhs, - sign, - i32, - i128 - ) - } - (Interval(DayTime), IntervalYearMonth(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_dt_array, - true, - op_ym_dt, - rhs, - sign, - i32, - i64 - ) - } - (Interval(DayTime), IntervalDayTime(Some(rhs))) => { - op_interval_macro!( - array, - as_interval_dt_array, - IntervalDayTimeType, - op_dt, - rhs, - sign - ) - } - (Interval(DayTime), IntervalMonthDayNano(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_dt_array, - false, - op_dt_mdn, - rhs, - sign, - i64, - i128 - ) - } - (Interval(MonthDayNano), IntervalYearMonth(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_mdn_array, - true, - op_ym_mdn, - rhs, - sign, - i32, - i128 - ) - } - (Interval(MonthDayNano), IntervalDayTime(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_mdn_array, - true, - op_dt_mdn, - rhs, - sign, - i64, - i128 - ) - } - (Interval(MonthDayNano), IntervalMonthDayNano(Some(rhs))) => { - op_interval_macro!( - array, - as_interval_mdn_array, - IntervalMonthDayNanoType, - op_mdn, - rhs, - sign - ) - } - _ => Err(DataFusionError::Internal(format!( - "Invalid operands for Interval vs Interval operations: {} - {}", - array.data_type(), - scalar.get_datatype(), - )))?, - }; - Ok(ret) -} - -/// This function handles interval +/- interval operations where the former is -/// a scalar and the latter is an array, resulting in an interval array. -pub fn scalar_interval_op_interval( - scalar: &ScalarValue, - sign: i32, - array: &ArrayRef, -) -> Result { - use DataType::*; - use IntervalUnit::*; - use ScalarValue::*; - let ret = match (scalar, array.data_type()) { - // YearMonth op YearMonth - (IntervalYearMonth(Some(lhs)), Interval(YearMonth)) => { - let array = as_interval_ym_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_ym(*lhs, rhs, sign)); - Arc::new(ret) as _ - } - // DayTime op YearMonth - (IntervalDayTime(Some(lhs)), Interval(YearMonth)) => { - let array = as_interval_ym_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_ym_dt(rhs, *lhs, sign, true)); - Arc::new(ret) as _ - } - // MonthDayNano op YearMonth - (IntervalMonthDayNano(Some(lhs)), Interval(YearMonth)) => { - let array = as_interval_ym_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_ym_mdn(rhs, *lhs, sign, true)); - Arc::new(ret) as _ - } - // YearMonth op DayTime - (IntervalYearMonth(Some(lhs)), Interval(DayTime)) => { - let array = as_interval_dt_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_ym_dt(*lhs, rhs, sign, false)); - Arc::new(ret) as _ - } - // DayTime op DayTime - (IntervalDayTime(Some(lhs)), Interval(DayTime)) => { - let array = as_interval_dt_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_dt(*lhs, rhs, sign)); - Arc::new(ret) as _ - } - // MonthDayNano op DayTime - (IntervalMonthDayNano(Some(lhs)), Interval(DayTime)) => { - let array = as_interval_dt_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_dt_mdn(rhs, *lhs, sign, true)); - Arc::new(ret) as _ - } - // YearMonth op MonthDayNano - (IntervalYearMonth(Some(lhs)), Interval(MonthDayNano)) => { - let array = as_interval_mdn_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_ym_mdn(*lhs, rhs, sign, false)); - Arc::new(ret) as _ - } - // DayTime op MonthDayNano - (IntervalDayTime(Some(lhs)), Interval(MonthDayNano)) => { - let array = as_interval_mdn_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_dt_mdn(*lhs, rhs, sign, false)); - Arc::new(ret) as _ - } - // MonthDayNano op MonthDayNano - (IntervalMonthDayNano(Some(lhs)), Interval(MonthDayNano)) => { - let array = as_interval_mdn_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_mdn(*lhs, rhs, sign)); - Arc::new(ret) as _ - } - _ => Err(DataFusionError::Internal(format!( - "Invalid operands for Interval vs Interval operations: {} - {}", - scalar.get_datatype(), - array.data_type(), - )))?, - }; - Ok(ret) -} - -/// Performs a timestamp subtraction operation on two arrays and returns the resulting array. -pub fn ts_array_op(array_lhs: &ArrayRef, array_rhs: &ArrayRef) -> Result { - use DataType::*; - use TimeUnit::*; - match (array_lhs.data_type(), array_rhs.data_type()) { - (Timestamp(Second, opt_tz_lhs), Timestamp(Second, opt_tz_rhs)) => Ok(ts_sub_op!( - array_lhs, - array_rhs, - opt_tz_lhs, - opt_tz_rhs, - 1000i64, - as_timestamp_second_array, - seconds_sub, - NaiveDateTime::timestamp, - MILLISECOND_MODE, - IntervalDayTimeType - )), - (Timestamp(Millisecond, opt_tz_lhs), Timestamp(Millisecond, opt_tz_rhs)) => { - Ok(ts_sub_op!( - array_lhs, - array_rhs, - opt_tz_lhs, - opt_tz_rhs, - 1i64, - as_timestamp_millisecond_array, - milliseconds_sub, - NaiveDateTime::timestamp_millis, - MILLISECOND_MODE, - IntervalDayTimeType - )) - } - (Timestamp(Microsecond, opt_tz_lhs), Timestamp(Microsecond, opt_tz_rhs)) => { - Ok(ts_sub_op!( - array_lhs, - array_rhs, - opt_tz_lhs, - opt_tz_rhs, - 1000i64, - as_timestamp_microsecond_array, - microseconds_sub, - NaiveDateTime::timestamp_micros, - NANOSECOND_MODE, - IntervalMonthDayNanoType - )) - } - (Timestamp(Nanosecond, opt_tz_lhs), Timestamp(Nanosecond, opt_tz_rhs)) => { - Ok(ts_sub_op!( - array_lhs, - array_rhs, - opt_tz_lhs, - opt_tz_rhs, - 1i64, - as_timestamp_nanosecond_array, - nanoseconds_sub, - NaiveDateTime::timestamp_nanos, - NANOSECOND_MODE, - IntervalMonthDayNanoType - )) - } - (_, _) => Err(DataFusionError::Execution(format!( - "Invalid array types for Timestamp subtraction: {} - {}", - array_lhs.data_type(), - array_rhs.data_type() - ))), - } -} -/// Performs an interval operation on two arrays and returns the resulting array. -/// The operation sign determines whether to perform addition or subtraction. -/// The data type and unit of the two input arrays must match the supported combinations. -pub fn interval_array_op( - array_lhs: &ArrayRef, - array_rhs: &ArrayRef, - sign: i32, -) -> Result { - use DataType::*; - use IntervalUnit::*; - match (array_lhs.data_type(), array_rhs.data_type()) { - (Interval(YearMonth), Interval(YearMonth)) => Ok(interval_op!( - array_lhs, - array_rhs, - as_interval_ym_array, - op_ym, - sign, - IntervalYearMonthType - )), - (Interval(YearMonth), Interval(DayTime)) => Ok(interval_cross_op!( - array_lhs, - array_rhs, - as_interval_ym_array, - as_interval_dt_array, - op_ym_dt, - sign, - false, - IntervalYearMonthType, - IntervalDayTimeType - )), - (Interval(YearMonth), Interval(MonthDayNano)) => Ok(interval_cross_op!( - array_lhs, - array_rhs, - as_interval_ym_array, - as_interval_mdn_array, - op_ym_mdn, - sign, - false, - IntervalYearMonthType, - IntervalMonthDayNanoType - )), - (Interval(DayTime), Interval(YearMonth)) => Ok(interval_cross_op!( - array_rhs, - array_lhs, - as_interval_ym_array, - as_interval_dt_array, - op_ym_dt, - sign, - true, - IntervalYearMonthType, - IntervalDayTimeType - )), - (Interval(DayTime), Interval(DayTime)) => Ok(interval_op!( - array_lhs, - array_rhs, - as_interval_dt_array, - op_dt, - sign, - IntervalDayTimeType - )), - (Interval(DayTime), Interval(MonthDayNano)) => Ok(interval_cross_op!( - array_lhs, - array_rhs, - as_interval_dt_array, - as_interval_mdn_array, - op_dt_mdn, - sign, - false, - IntervalDayTimeType, - IntervalMonthDayNanoType - )), - (Interval(MonthDayNano), Interval(YearMonth)) => Ok(interval_cross_op!( - array_rhs, - array_lhs, - as_interval_ym_array, - as_interval_mdn_array, - op_ym_mdn, - sign, - true, - IntervalYearMonthType, - IntervalMonthDayNanoType - )), - (Interval(MonthDayNano), Interval(DayTime)) => Ok(interval_cross_op!( - array_rhs, - array_lhs, - as_interval_dt_array, - as_interval_mdn_array, - op_dt_mdn, - sign, - true, - IntervalDayTimeType, - IntervalMonthDayNanoType - )), - (Interval(MonthDayNano), Interval(MonthDayNano)) => Ok(interval_op!( - array_lhs, - array_rhs, - as_interval_mdn_array, - op_mdn, - sign, - IntervalMonthDayNanoType - )), - (_, _) => Err(DataFusionError::Execution(format!( - "Invalid array types for Interval operation: {} {} {}", - array_lhs.data_type(), - sign, - array_rhs.data_type() - ))), - } -} - -/// Performs a timestamp/interval operation on two arrays and returns the resulting array. -/// The operation sign determines whether to perform addition or subtraction. -/// The data type and unit of the two input arrays must match the supported combinations. -pub fn ts_interval_array_op( - array_lhs: &ArrayRef, - sign: i32, - array_rhs: &ArrayRef, -) -> Result { - use DataType::*; - use IntervalUnit::*; - use TimeUnit::*; - match (array_lhs.data_type(), array_rhs.data_type()) { - (Timestamp(Second, tz), Interval(YearMonth)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_second_array, - as_interval_ym_array, - seconds_add_array::, - sign, - TimestampSecondType, - IntervalYearMonthType - )), - (Timestamp(Second, tz), Interval(DayTime)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_second_array, - as_interval_dt_array, - seconds_add_array::, - sign, - TimestampSecondType, - IntervalDayTimeType - )), - (Timestamp(Second, tz), Interval(MonthDayNano)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_second_array, - as_interval_mdn_array, - seconds_add_array::, - sign, - TimestampSecondType, - IntervalMonthDayNanoType - )), - (Timestamp(Millisecond, tz), Interval(YearMonth)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_millisecond_array, - as_interval_ym_array, - milliseconds_add_array::, - sign, - TimestampMillisecondType, - IntervalYearMonthType - )), - (Timestamp(Millisecond, tz), Interval(DayTime)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_millisecond_array, - as_interval_dt_array, - milliseconds_add_array::, - sign, - TimestampMillisecondType, - IntervalDayTimeType - )), - (Timestamp(Millisecond, tz), Interval(MonthDayNano)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_millisecond_array, - as_interval_mdn_array, - milliseconds_add_array::, - sign, - TimestampMillisecondType, - IntervalMonthDayNanoType - )), - (Timestamp(Microsecond, tz), Interval(YearMonth)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_microsecond_array, - as_interval_ym_array, - microseconds_add_array::, - sign, - TimestampMicrosecondType, - IntervalYearMonthType - )), - (Timestamp(Microsecond, tz), Interval(DayTime)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_microsecond_array, - as_interval_dt_array, - microseconds_add_array::, - sign, - TimestampMicrosecondType, - IntervalDayTimeType - )), - (Timestamp(Microsecond, tz), Interval(MonthDayNano)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_microsecond_array, - as_interval_mdn_array, - microseconds_add_array::, - sign, - TimestampMicrosecondType, - IntervalMonthDayNanoType - )), - (Timestamp(Nanosecond, tz), Interval(YearMonth)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_nanosecond_array, - as_interval_ym_array, - nanoseconds_add_array::, - sign, - TimestampNanosecondType, - IntervalYearMonthType - )), - (Timestamp(Nanosecond, tz), Interval(DayTime)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_nanosecond_array, - as_interval_dt_array, - nanoseconds_add_array::, - sign, - TimestampNanosecondType, - IntervalDayTimeType - )), - (Timestamp(Nanosecond, tz), Interval(MonthDayNano)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_nanosecond_array, - as_interval_mdn_array, - nanoseconds_add_array::, - sign, - TimestampNanosecondType, - IntervalMonthDayNanoType - )), - (_, _) => Err(DataFusionError::Execution(format!( - "Invalid array types for Timestamp Interval operation: {} {} {}", - array_lhs.data_type(), - sign, - array_rhs.data_type() - ))), - } -} - -#[inline] -pub fn date32_interval_ym_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |ym| { - let months = Months::new(ym.try_into().map_err(|_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - })?); - let value = month_op(*prior, months).ok_or_else(|| { - DataFusionError::Internal("Resulting date is out of range".to_string()) - })?; - Ok((value - *epoch).num_days() as i32) - }, - )?) as _; - Ok(ret) -} - -#[inline] -pub fn date32_interval_dt_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - day_op: fn(NaiveDate, Days) -> Option, -) -> Result { - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |dt| { - let (days, millis) = IntervalDayTimeType::to_parts(dt); - let days = Days::new(days.try_into().map_err(|_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - })?); - let value = day_op(*prior, days).ok_or_else(|| { - DataFusionError::Internal("Resulting date is out of range".to_string()) - })?; - let milli_days = millis as i64 / MILLISECONDS_IN_DAY; - Ok(((value - *epoch).num_days() - milli_days) as i32) - }, - )?) as _; - Ok(ret) -} - -#[inline] -pub fn date32_interval_mdn_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - day_op: fn(NaiveDate, Days) -> Option, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let cast_err = |_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - }; - let out_of_range = - || DataFusionError::Internal("Resulting date is out of range".to_string()); - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |mdn| { - let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(mdn); - let months_obj = Months::new(months.try_into().map_err(cast_err)?); - let month_diff = month_op(*prior, months_obj).ok_or_else(out_of_range)?; - let days_obj = Days::new(days.try_into().map_err(cast_err)?); - let value = day_op(month_diff, days_obj).ok_or_else(out_of_range)?; - let nano_days = nanos / NANOSECONDS_IN_DAY; - Ok(((value - *epoch).num_days() - nano_days) as i32) - }, - )?) as _; - Ok(ret) -} - -#[inline] -pub fn date64_interval_ym_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |ym| { - let months_obj = Months::new(ym.try_into().map_err(|_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - })?); - let date = month_op(*prior, months_obj).ok_or_else(|| { - DataFusionError::Internal("Resulting date is out of range".to_string()) - })?; - Ok((date - *epoch).num_milliseconds()) - }, - )?) as _; - Ok(ret) -} - -#[inline] -pub fn date64_interval_dt_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - day_op: fn(NaiveDate, Days) -> Option, -) -> Result { - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |dt| { - let (days, millis) = IntervalDayTimeType::to_parts(dt); - let days_obj = Days::new(days.try_into().map_err(|_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - })?); - let date = day_op(*prior, days_obj).ok_or_else(|| { - DataFusionError::Internal("Resulting date is out of range".to_string()) - })?; - Ok((date - *epoch).num_milliseconds() - millis as i64) - }, - )?) as _; - Ok(ret) -} - -#[inline] -pub fn date64_interval_mdn_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - day_op: fn(NaiveDate, Days) -> Option, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let cast_err = |_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - }; - let out_of_range = - || DataFusionError::Internal("Resulting date is out of range".to_string()); - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |mdn| { - let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(mdn); - let months_obj = Months::new(months.try_into().map_err(cast_err)?); - let month_diff = month_op(*prior, months_obj).ok_or_else(out_of_range)?; - let days_obj = Days::new(days.try_into().map_err(cast_err)?); - let value = day_op(month_diff, days_obj).ok_or_else(out_of_range)?; - Ok((value - *epoch).num_milliseconds() - nanos / 1_000_000) - }, - )?) as _; - Ok(ret) -} - #[cfg(test)] mod tests { use super::*; - use datafusion_expr::type_coercion::binary::decimal_op_mathematics_type; - use datafusion_expr::Operator; fn create_decimal_array( array: &[Option], @@ -2111,194 +339,6 @@ mod tests { Ok(()) } - #[test] - fn arithmetic_decimal_op_test() -> Result<()> { - let value_i128: i128 = 123; - let left_decimal_array = create_decimal_array( - &[ - Some(value_i128), - None, - Some(value_i128 - 1), - Some(value_i128 + 1), - ], - 25, - 3, - ); - let right_decimal_array = create_decimal_array( - &[ - Some(value_i128), - Some(value_i128), - Some(value_i128), - Some(value_i128), - ], - 25, - 3, - ); - // add - let result_type = decimal_op_mathematics_type( - &Operator::Plus, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let result = - add_dyn_decimal(&left_decimal_array, &right_decimal_array, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(246), None, Some(245), Some(247)], 26, 3); - assert_eq!(&expect, result); - let result = add_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(133), None, Some(132), Some(134)], 26, 3); - assert_eq!(&expect, result); - // subtract - let result_type = decimal_op_mathematics_type( - &Operator::Minus, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let result = subtract_dyn_decimal( - &left_decimal_array, - &right_decimal_array, - &result_type, - )?; - let result = as_decimal128_array(&result)?; - let expect = create_decimal_array(&[Some(0), None, Some(-1), Some(1)], 26, 3); - assert_eq!(&expect, result); - let result = subtract_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(113), None, Some(112), Some(114)], 26, 3); - assert_eq!(&expect, result); - // multiply - let result_type = decimal_op_mathematics_type( - &Operator::Multiply, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let result = multiply_dyn_decimal( - &left_decimal_array, - &right_decimal_array, - &result_type, - )?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(15129), None, Some(15006), Some(15252)], 38, 6); - assert_eq!(&expect, result); - let result = multiply_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(1230), None, Some(1220), Some(1240)], 38, 6); - assert_eq!(&expect, result); - // divide - let result_type = decimal_op_mathematics_type( - &Operator::Divide, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let left_decimal_array = create_decimal_array( - &[ - Some(1234567), - None, - Some(1234567), - Some(1234567), - Some(1234567), - ], - 25, - 3, - ); - let right_decimal_array = create_decimal_array( - &[Some(10), Some(100), Some(55), Some(-123), None], - 25, - 3, - ); - let result = divide_dyn_checked_decimal( - &left_decimal_array, - &right_decimal_array, - &result_type, - )?; - let result = as_decimal128_array(&result)?; - let expect = create_decimal_array( - &[ - Some(12345670000000000000000000000000000), - None, - Some(2244667272727272727272727272727272), - Some(-1003713008130081300813008130081300), - None, - ], - 38, - 29, - ); - assert_eq!(&expect, result); - let result = divide_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = create_decimal_array( - &[ - Some(12345670000000000000000000000000000), - None, - Some(12345670000000000000000000000000000), - Some(12345670000000000000000000000000000), - Some(12345670000000000000000000000000000), - ], - 38, - 29, - ); - assert_eq!(&expect, result); - // modulus - let result_type = decimal_op_mathematics_type( - &Operator::Modulo, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let result = - modulus_dyn_decimal(&left_decimal_array, &right_decimal_array, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(7), None, Some(37), Some(16), None], 25, 3); - assert_eq!(&expect, result); - let result = modulus_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(7), None, Some(7), Some(7), Some(7)], 25, 3); - assert_eq!(&expect, result); - - Ok(()) - } - - #[test] - fn arithmetic_decimal_divide_by_zero() { - let left_decimal_array = create_decimal_array(&[Some(101)], 10, 1); - let right_decimal_array = create_decimal_array(&[Some(0)], 1, 1); - - let result_type = decimal_op_mathematics_type( - &Operator::Divide, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let err = - divide_decimal_dyn_scalar(&left_decimal_array, 0, &result_type).unwrap_err(); - assert_eq!("Arrow error: Divide by zero error", err.to_string()); - let result_type = decimal_op_mathematics_type( - &Operator::Modulo, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let err = - modulus_dyn_decimal(&left_decimal_array, &right_decimal_array, &result_type) - .unwrap_err(); - assert_eq!("Arrow error: Divide by zero error", err.to_string()); - let err = - modulus_decimal_dyn_scalar(&left_decimal_array, 0, &result_type).unwrap_err(); - assert_eq!("Arrow error: Divide by zero error", err.to_string()); - } - #[test] fn is_distinct_from_non_nulls() -> Result<()> { let left_int_array = @@ -2361,75 +401,4 @@ mod tests { ); Ok(()) } - - #[test] - fn test_decimal_multiply_fixed_point_dyn() { - // [123456789] - let a = Decimal128Array::from(vec![123456789000000000000000000]) - .with_precision_and_scale(38, 18) - .unwrap(); - - // [10] - let b = Decimal128Array::from(vec![10000000000000000000]) - .with_precision_and_scale(38, 18) - .unwrap(); - - // Avoid overflow by reducing the scale. - let result = multiply_fixed_point_dyn(&a, &b, 28).unwrap(); - // [1234567890] - let expected = Arc::new( - Decimal128Array::from(vec![12345678900000000000000000000000000000]) - .with_precision_and_scale(38, 28) - .unwrap(), - ) as ArrayRef; - - assert_eq!(&expected, &result); - assert_eq!( - result.as_primitive::().value_as_string(0), - "1234567890.0000000000000000000000000000" - ); - - // [123456789, 10, 10] - let a = Decimal128Array::from(vec![ - 123456789000000000000000000, - 10000000000000000000, - 10000000000000000000, - ]) - .with_precision_and_scale(38, 18) - .unwrap(); - - // [10, 123456789, 12] - let b = Decimal128Array::from(vec![ - 10000000000000000000, - 123456789000000000000000000, - 12000000000000000000, - ]) - .with_precision_and_scale(38, 18) - .unwrap(); - - let result = multiply_fixed_point_dyn(&a, &b, 28).unwrap(); - let expected = Arc::new( - Decimal128Array::from(vec![ - Some(12345678900000000000000000000000000000), - Some(12345678900000000000000000000000000000), - Some(1200000000000000000000000000000), - ]) - .with_precision_and_scale(38, 28) - .unwrap(), - ) as ArrayRef; - - assert_eq!(&expected, &result); - assert_eq!( - result.as_primitive::().value_as_string(0), - "1234567890.0000000000000000000000000000" - ); - assert_eq!( - result.as_primitive::().value_as_string(1), - "1234567890.0000000000000000000000000000" - ); - assert_eq!( - result.as_primitive::().value_as_string(2), - "120.0000000000000000000000000000" - ); - } } diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs index b6c3536a1e96..c260a427a84f 100644 --- a/datafusion/physical-expr/src/expressions/cast.rs +++ b/datafusion/physical-expr/src/expressions/cast.rs @@ -28,17 +28,15 @@ use arrow::compute::{kernels, CastOptions}; use arrow::datatypes::{DataType, Schema}; use arrow::record_batch::RecordBatch; use compute::can_cast_types; +use datafusion_common::format::DEFAULT_FORMAT_OPTIONS; use datafusion_common::ScalarValue; use datafusion_common::{DataFusionError, Result}; use datafusion_expr::ColumnarValue; -/// provide DataFusion default cast options -fn default_cast_options() -> CastOptions<'static> { - CastOptions { - safe: false, - format_options: Default::default(), - } -} +const DEFAULT_CAST_OPTIONS: CastOptions<'static> = CastOptions { + safe: false, + format_options: DEFAULT_FORMAT_OPTIONS, +}; /// CAST expression casts an expression to a specific data type and returns a runtime error on invalid cast #[derive(Debug, Clone)] @@ -61,7 +59,7 @@ impl CastExpr { Self { expr, cast_type, - cast_options: cast_options.unwrap_or_else(default_cast_options), + cast_options: cast_options.unwrap_or(DEFAULT_CAST_OPTIONS), } } @@ -163,7 +161,7 @@ pub fn cast_column( cast_type: &DataType, cast_options: Option<&CastOptions<'static>>, ) -> Result { - let cast_options = cast_options.cloned().unwrap_or_else(default_cast_options); + let cast_options = cast_options.cloned().unwrap_or(DEFAULT_CAST_OPTIONS); match value { ColumnarValue::Array(array) => Ok(ColumnarValue::Array( kernels::cast::cast_with_options(array, cast_type, &cast_options)?, diff --git a/datafusion/physical-expr/src/expressions/datetime.rs b/datafusion/physical-expr/src/expressions/datetime.rs deleted file mode 100644 index 4d0ee5cc7dbc..000000000000 --- a/datafusion/physical-expr/src/expressions/datetime.rs +++ /dev/null @@ -1,931 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::intervals::cp_solver::{propagate_arithmetic, propagate_comparison}; -use crate::intervals::{apply_operator, Interval}; -use crate::physical_expr::down_cast_any_ref; -use crate::PhysicalExpr; -use arrow::datatypes::{DataType, Schema}; -use arrow::record_batch::RecordBatch; - -use datafusion_common::{DataFusionError, Result}; -use datafusion_expr::type_coercion::binary::get_result_type; -use datafusion_expr::{ColumnarValue, Operator}; -use std::any::Any; -use std::fmt::{Display, Formatter}; -use std::hash::{Hash, Hasher}; -use std::sync::Arc; - -use super::binary::{resolve_temporal_op, resolve_temporal_op_scalar}; - -/// Perform DATE/TIME/TIMESTAMP +/ INTERVAL math -#[derive(Debug, Hash)] -pub struct DateTimeIntervalExpr { - lhs: Arc, - op: Operator, - rhs: Arc, -} - -impl DateTimeIntervalExpr { - /// Create a new instance of DateIntervalExpr - pub fn new( - lhs: Arc, - op: Operator, - rhs: Arc, - ) -> Self { - Self { lhs, op, rhs } - } - - /// Get the left-hand side expression - pub fn lhs(&self) -> &Arc { - &self.lhs - } - - /// Get the operator - pub fn op(&self) -> &Operator { - &self.op - } - - /// Get the right-hand side expression - pub fn rhs(&self) -> &Arc { - &self.rhs - } -} - -impl Display for DateTimeIntervalExpr { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{} {} {}", self.lhs, self.op, self.rhs) - } -} - -impl PhysicalExpr for DateTimeIntervalExpr { - fn as_any(&self) -> &dyn Any { - self - } - - fn data_type(&self, input_schema: &Schema) -> Result { - get_result_type( - &self.lhs.data_type(input_schema)?, - &Operator::Minus, - &self.rhs.data_type(input_schema)?, - ) - } - - fn nullable(&self, input_schema: &Schema) -> Result { - self.lhs.nullable(input_schema) - } - - fn evaluate(&self, batch: &RecordBatch) -> Result { - let lhs_value = self.lhs.evaluate(batch)?; - let rhs_value = self.rhs.evaluate(batch)?; - // Invert sign for subtraction - let sign = match self.op { - Operator::Plus => 1, - Operator::Minus => -1, - _ => { - // this should be unreachable because we check the operators in `try_new` - let msg = "Invalid operator for DateIntervalExpr"; - return Err(DataFusionError::Internal(msg.to_string())); - } - }; - // RHS is first checked. If it is a Scalar, there are 2 options: - // Either LHS is also a Scalar and matching operation is applied, - // or LHS is an Array and unary operations for related types are - // applied in evaluate_array function. If RHS is an Array, then - // LHS must also be, moreover; they must be the same Timestamp type. - match (lhs_value, rhs_value) { - (ColumnarValue::Scalar(operand_lhs), ColumnarValue::Scalar(operand_rhs)) => { - Ok(ColumnarValue::Scalar(if sign > 0 { - operand_lhs.add(&operand_rhs)? - } else { - operand_lhs.sub(&operand_rhs)? - })) - } - // This function evaluates temporal array vs scalar operations, such as timestamp - timestamp, - // interval + interval, timestamp + interval, and interval + timestamp. It takes one array and one scalar as input - // and an integer sign representing the operation (+1 for addition and -1 for subtraction). - (ColumnarValue::Array(arr), ColumnarValue::Scalar(scalar)) => { - Ok(ColumnarValue::Array(resolve_temporal_op_scalar( - &arr, sign, &scalar, false, - )?)) - } - // This function evaluates operations between a scalar value and an array of temporal - // values. One example is calculating the duration between a scalar timestamp and an - // array of timestamps (i.e. `now() - some_column`). - (ColumnarValue::Scalar(scalar), ColumnarValue::Array(arr)) => { - Ok(ColumnarValue::Array(resolve_temporal_op_scalar( - &arr, sign, &scalar, true, - )?)) - } - // This function evaluates temporal array operations, such as timestamp - timestamp, interval + interval, - // timestamp + interval, and interval + timestamp. It takes two arrays as input and an integer sign representing - // the operation (+1 for addition and -1 for subtraction). - (ColumnarValue::Array(array_lhs), ColumnarValue::Array(array_rhs)) => Ok( - ColumnarValue::Array(resolve_temporal_op(&array_lhs, sign, &array_rhs)?), - ), - } - } - - fn evaluate_bounds(&self, children: &[&Interval]) -> Result { - // Get children intervals: - let left_interval = children[0]; - let right_interval = children[1]; - // Calculate current node's interval: - apply_operator(&self.op, left_interval, right_interval) - } - - fn propagate_constraints( - &self, - interval: &Interval, - children: &[&Interval], - ) -> Result>> { - // Get children intervals. Graph brings - let left_interval = children[0]; - let right_interval = children[1]; - let (left, right) = if self.op.is_comparison_operator() { - if interval == &Interval::CERTAINLY_FALSE { - // TODO: We will handle strictly false clauses by negating - // the comparison operator (e.g. GT to LE, LT to GE) - // once open/closed intervals are supported. - return Ok(vec![]); - } - // Propagate the comparison operator. - propagate_comparison(&self.op, left_interval, right_interval)? - } else { - // Propagate the arithmetic operator. - propagate_arithmetic(&self.op, interval, left_interval, right_interval)? - }; - Ok(vec![left, right]) - } - - fn children(&self) -> Vec> { - vec![self.lhs.clone(), self.rhs.clone()] - } - - fn with_new_children( - self: Arc, - children: Vec>, - ) -> Result> { - Ok(Arc::new(DateTimeIntervalExpr::new( - children[0].clone(), - self.op, - children[1].clone(), - ))) - } - - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } -} - -impl PartialEq for DateTimeIntervalExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| self.lhs.eq(&x.lhs) && self.op == x.op && self.rhs.eq(&x.rhs)) - .unwrap_or(false) - } -} - -/// create a DateIntervalExpr -pub fn date_time_interval_expr( - lhs: Arc, - op: Operator, - rhs: Arc, - input_schema: &Schema, -) -> Result> { - match ( - lhs.data_type(input_schema)?, - op, - rhs.data_type(input_schema)?, - ) { - ( - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _), - Operator::Plus | Operator::Minus, - DataType::Interval(_), - ) - | (DataType::Timestamp(_, _), Operator::Minus, DataType::Timestamp(_, _)) - | (DataType::Interval(_), Operator::Plus, DataType::Timestamp(_, _)) - | ( - DataType::Interval(_), - Operator::Plus | Operator::Minus, - DataType::Interval(_), - ) => Ok(Arc::new(DateTimeIntervalExpr::new(lhs, op, rhs))), - (lhs, _, rhs) => Err(DataFusionError::Execution(format!( - "Invalid operation {op} between '{lhs}' and '{rhs}' for DateIntervalExpr" - ))), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::create_physical_expr; - use crate::execution_props::ExecutionProps; - use arrow::array::{ArrayRef, Date32Builder}; - use arrow::datatypes::*; - use arrow_array::IntervalMonthDayNanoArray; - use chrono::{Duration, NaiveDate}; - use datafusion_common::{Column, Result, ScalarValue, ToDFSchema}; - use datafusion_expr::Expr; - use std::ops::Add; - - #[test] - fn add_32_day_time() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(1, 0)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::Date32(Some(d))) => { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let res = epoch.add(Duration::days(d as i64)); - assert_eq!(format!("{res:?}").as_str(), "1970-01-02"); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn sub_32_year_month() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Minus; - let interval = Expr::Literal(ScalarValue::IntervalYearMonth(Some(13))); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::Date32(Some(d))) => { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let res = epoch.add(Duration::days(d as i64)); - assert_eq!(format!("{res:?}").as_str(), "1968-12-01"); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn add_64_day_time() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date64(Some(0))); - let op = Operator::Plus; - let interval = - Expr::Literal(ScalarValue::new_interval_dt(-15, -24 * 60 * 60 * 1000)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::Date64(Some(d))) => { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let res = epoch.add(Duration::milliseconds(d)); - assert_eq!(format!("{res:?}").as_str(), "1969-12-16"); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn add_32_year_month() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::IntervalYearMonth(Some(1))); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::Date32(Some(d))) => { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let res = epoch.add(Duration::days(d as i64)); - assert_eq!(format!("{res:?}").as_str(), "1970-02-01"); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn add_32_month_day_nano() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::new_interval_mdn(-12, -15, -42)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::Date32(Some(d))) => { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let res = epoch.add(Duration::days(d as i64)); - assert_eq!(format!("{res:?}").as_str(), "1968-12-17"); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn add_1_millisecond() -> Result<()> { - // setup - let now_ts_ns = chrono::Utc::now().timestamp_nanos(); - let dt = Expr::Literal(ScalarValue::TimestampNanosecond(Some(now_ts_ns), None)); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(0, 1)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(ts), None)) => { - assert_eq!(ts, now_ts_ns + 1_000_000); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - Ok(()) - } - - #[test] - fn add_2_hours() -> Result<()> { - // setup - let now_ts_s = chrono::Utc::now().timestamp(); - let dt = Expr::Literal(ScalarValue::TimestampSecond(Some(now_ts_s), None)); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(0, 2 * 3600 * 1_000)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(ts), None)) => { - assert_eq!(ts, now_ts_s + 2 * 3600); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - Ok(()) - } - - #[test] - fn sub_4_hours() -> Result<()> { - // setup - let now_ts_s = chrono::Utc::now().timestamp(); - let dt = Expr::Literal(ScalarValue::TimestampSecond(Some(now_ts_s), None)); - let op = Operator::Minus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(0, 4 * 3600 * 1_000)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(ts), None)) => { - assert_eq!(ts, now_ts_s - 4 * 3600); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - Ok(()) - } - - #[test] - fn add_8_days() -> Result<()> { - // setup - let now_ts_ns = chrono::Utc::now().timestamp_nanos(); - let dt = Expr::Literal(ScalarValue::TimestampNanosecond(Some(now_ts_ns), None)); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(8, 0)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(ts), None)) => { - assert_eq!(ts, now_ts_ns + 8 * 86400 * 1_000_000_000); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - Ok(()) - } - - #[test] - fn sub_16_days() -> Result<()> { - // setup - let now_ts_ns = chrono::Utc::now().timestamp_nanos(); - let dt = Expr::Literal(ScalarValue::TimestampNanosecond(Some(now_ts_ns), None)); - let op = Operator::Minus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(16, 0)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(ts), None)) => { - assert_eq!(ts, now_ts_ns - 16 * 86400 * 1_000_000_000); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - Ok(()) - } - - #[test] - fn array_add_26_days() -> Result<()> { - let mut builder = Date32Builder::with_capacity(8); - builder.append_slice(&[0, 1, 2, 3, 4, 5, 6, 7]); - let a: ArrayRef = Arc::new(builder.finish()); - - let schema = Schema::new(vec![Field::new("a", DataType::Date32, false)]); - let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![a])?; - let dfs = schema.clone().to_dfschema()?; - let props = ExecutionProps::new(); - - let dt = Expr::Column(Column::from_name("a")); - let interval = Expr::Literal(ScalarValue::new_interval_dt(26, 0)); - let op = Operator::Plus; - - let lhs = create_physical_expr(&dt, &dfs, &schema, &props)?; - let rhs = create_physical_expr(&interval, &dfs, &schema, &props)?; - - let cut = date_time_interval_expr(lhs, op, rhs, &schema)?; - let res = cut.evaluate(&batch)?; - - let mut builder = Date32Builder::with_capacity(8); - builder.append_slice(&[26, 27, 28, 29, 30, 31, 32, 33]); - let expected: ArrayRef = Arc::new(builder.finish()); - - // assert - match res { - ColumnarValue::Array(array) => { - assert_eq!(&array, &expected) - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn invalid_interval() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::Null); - - // exercise - let res = exercise(&dt, op, &interval); - assert!(res.is_err(), "Can't add a NULL interval"); - - Ok(()) - } - - #[test] - fn invalid_date() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Null); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(0))); - - // exercise - let res = exercise(&dt, op, &interval); - assert!(res.is_err(), "Can't add to NULL date"); - - Ok(()) - } - - #[test] - fn invalid_op() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Eq; - let interval = Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(0))); - - // exercise - let res = exercise(&dt, op, &interval); - assert!(res.is_err(), "Can't add dates with == operator"); - - Ok(()) - } - - fn exercise(dt: &Expr, op: Operator, interval: &Expr) -> Result { - let mut builder = Date32Builder::with_capacity(1); - builder.append_value(0); - let a: ArrayRef = Arc::new(builder.finish()); - let schema = Schema::new(vec![Field::new("a", DataType::Date32, false)]); - let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![a])?; - - let dfs = schema.clone().to_dfschema()?; - let props = ExecutionProps::new(); - - let lhs = create_physical_expr(dt, &dfs, &schema, &props)?; - let rhs = create_physical_expr(interval, &dfs, &schema, &props)?; - - let lhs_str = format!("{lhs}"); - let rhs_str = format!("{rhs}"); - - let cut = DateTimeIntervalExpr::new(lhs, op, rhs); - - assert_eq!(lhs_str, format!("{}", cut.lhs())); - assert_eq!(op, cut.op().clone()); - assert_eq!(rhs_str, format!("{}", cut.rhs())); - - let res = cut.evaluate(&batch)?; - Ok(res) - } - - // In this test, ArrayRef of one element arrays is evaluated with some ScalarValues, - // aiming that resolve_temporal_op_scalar function is working properly and shows the same - // behavior with ScalarValue arithmetic. - fn experiment( - timestamp_scalar: ScalarValue, - interval_scalar: ScalarValue, - ) -> Result<()> { - let timestamp_array = timestamp_scalar.to_array(); - let interval_array = interval_scalar.to_array(); - - // timestamp + interval - let res1 = - resolve_temporal_op_scalar(×tamp_array, 1, &interval_scalar, false)?; - let res2 = timestamp_scalar.add(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Timestamp Scalar={timestamp_scalar} + Interval Scalar={interval_scalar}" - ); - let res1 = - resolve_temporal_op_scalar(×tamp_array, 1, &interval_scalar, true)?; - let res2 = interval_scalar.add(×tamp_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Timestamp Scalar={timestamp_scalar} + Interval Scalar={interval_scalar}" - ); - - // timestamp - interval - let res1 = - resolve_temporal_op_scalar(×tamp_array, -1, &interval_scalar, false)?; - let res2 = timestamp_scalar.sub(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Timestamp Scalar={timestamp_scalar} - Interval Scalar={interval_scalar}" - ); - - // timestamp - timestamp - let res1 = - resolve_temporal_op_scalar(×tamp_array, -1, ×tamp_scalar, false)?; - let res2 = timestamp_scalar.sub(×tamp_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Timestamp Scalar={timestamp_scalar} - Timestamp Scalar={timestamp_scalar}" - ); - let res1 = - resolve_temporal_op_scalar(×tamp_array, -1, ×tamp_scalar, true)?; - let res2 = timestamp_scalar.sub(×tamp_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Timestamp Scalar={timestamp_scalar} - Timestamp Scalar={timestamp_scalar}" - ); - - // interval - interval - let res1 = - resolve_temporal_op_scalar(&interval_array, -1, &interval_scalar, false)?; - let res2 = interval_scalar.sub(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Interval Scalar={interval_scalar} - Interval Scalar={interval_scalar}" - ); - let res1 = - resolve_temporal_op_scalar(&interval_array, -1, &interval_scalar, true)?; - let res2 = interval_scalar.sub(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Interval Scalar={interval_scalar} - Interval Scalar={interval_scalar}" - ); - - // interval + interval - let res1 = - resolve_temporal_op_scalar(&interval_array, 1, &interval_scalar, false)?; - let res2 = interval_scalar.add(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Interval Scalar={interval_scalar} + Interval Scalar={interval_scalar}" - ); - let res1 = - resolve_temporal_op_scalar(&interval_array, 1, &interval_scalar, true)?; - let res2 = interval_scalar.add(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Interval Scalar={interval_scalar} + Interval Scalar={interval_scalar}" - ); - - Ok(()) - } - #[test] - fn test_evalute_with_scalar() -> Result<()> { - // Timestamp (sec) & Interval (DayTime) - let timestamp_scalar = ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(0, 1_000); - - experiment(timestamp_scalar, interval_scalar)?; - - // Timestamp (millisec) & Interval (DayTime) - let timestamp_scalar = ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_milli_opt(0, 0, 0, 0) - .unwrap() - .timestamp_millis(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(0, 1_000); - - experiment(timestamp_scalar, interval_scalar)?; - - // Timestamp (nanosec) & Interval (MonthDayNano) - let timestamp_scalar = ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_nano_opt(0, 0, 0, 0) - .unwrap() - .timestamp_nanos(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_mdn(0, 0, 1_000); - - experiment(timestamp_scalar, interval_scalar)?; - - // Timestamp (nanosec) & Interval (MonthDayNano), negatively resulting cases - - let timestamp_scalar = ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(1970, 1, 1) - .unwrap() - .and_hms_nano_opt(0, 0, 0, 000) - .unwrap() - .timestamp_nanos(), - ), - None, - ); - - Arc::new(IntervalMonthDayNanoArray::from(vec![1_000])); // 1 us - let interval_scalar = ScalarValue::new_interval_mdn(0, 0, 1_000); - - experiment(timestamp_scalar, interval_scalar)?; - - // Timestamp (sec) & Interval (YearMonth) - let timestamp_scalar = ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_ym(0, 1); - - experiment(timestamp_scalar, interval_scalar)?; - - // More test with all matchings of timestamps and intervals - let timestamp_scalar = ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_opt(23, 59, 59) - .unwrap() - .timestamp(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_ym(0, 1); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_opt(23, 59, 59) - .unwrap() - .timestamp(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(10, 100000); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_opt(23, 59, 59) - .unwrap() - .timestamp(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_mdn(13, 32, 123456); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_milli_opt(23, 59, 59, 909) - .unwrap() - .timestamp_millis(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_ym(0, 1); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_milli_opt(23, 59, 59, 909) - .unwrap() - .timestamp_millis(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(10, 100000); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_milli_opt(23, 59, 59, 909) - .unwrap() - .timestamp_millis(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_mdn(13, 32, 123456); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_micro_opt(23, 59, 59, 987654) - .unwrap() - .timestamp_micros(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_ym(0, 1); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_micro_opt(23, 59, 59, 987654) - .unwrap() - .timestamp_micros(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(10, 100000); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_micro_opt(23, 59, 59, 987654) - .unwrap() - .timestamp_micros(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_mdn(13, 32, 123456); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_nano_opt(23, 59, 59, 999999999) - .unwrap() - .timestamp_nanos(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_ym(0, 1); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_nano_opt(23, 59, 59, 999999999) - .unwrap() - .timestamp_nanos(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(10, 100000); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_nano_opt(23, 59, 59, 999999999) - .unwrap() - .timestamp_nanos(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_mdn(13, 32, 123456); - - experiment(timestamp_scalar, interval_scalar)?; - - Ok(()) - } -} diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index b7e9d2cd8010..022e0ae02ed3 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -22,7 +22,6 @@ mod binary; mod case; mod cast; mod column; -mod datetime; mod get_indexed_field; mod in_list; mod is_not_null; @@ -81,7 +80,6 @@ pub use binary::{binary, BinaryExpr}; pub use case::{case, CaseExpr}; pub use cast::{cast, cast_column, cast_with_options, CastExpr}; pub use column::{col, Column, UnKnownColumn}; -pub use datetime::{date_time_interval_expr, DateTimeIntervalExpr}; pub use get_indexed_field::{GetFieldAccessExpr, GetIndexedFieldExpr}; pub use in_list::{in_list, InListExpr}; pub use is_not_null::{is_not_null, IsNotNullExpr}; diff --git a/datafusion/physical-expr/src/expressions/negative.rs b/datafusion/physical-expr/src/expressions/negative.rs index 7f1bd43fec70..dc45d6dbdd95 100644 --- a/datafusion/physical-expr/src/expressions/negative.rs +++ b/datafusion/physical-expr/src/expressions/negative.rs @@ -21,14 +21,9 @@ use std::any::Any; use std::hash::{Hash, Hasher}; use std::sync::Arc; -use arrow::array::ArrayRef; -use arrow::compute::kernels::arithmetic::negate; use arrow::{ - array::{ - Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, - IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, - }, - datatypes::{DataType, IntervalUnit, Schema}, + compute::kernels::numeric::neg_wrapping, + datatypes::{DataType, Schema}, record_batch::RecordBatch, }; @@ -40,18 +35,6 @@ use datafusion_expr::{ ColumnarValue, }; -/// Invoke a compute kernel on array(s) -macro_rules! compute_op { - // invoke unary operator - ($OPERAND:expr, $OP:ident, $DT:ident) => {{ - let operand = $OPERAND - .as_any() - .downcast_ref::<$DT>() - .expect("compute_op failed to downcast array"); - Ok(Arc::new($OP(&operand)?)) - }}; -} - /// Negative expression #[derive(Debug, Hash)] pub struct NegativeExpr { @@ -95,23 +78,8 @@ impl PhysicalExpr for NegativeExpr { let arg = self.arg.evaluate(batch)?; match arg { ColumnarValue::Array(array) => { - let result: Result = match array.data_type() { - DataType::Int8 => compute_op!(array, negate, Int8Array), - DataType::Int16 => compute_op!(array, negate, Int16Array), - DataType::Int32 => compute_op!(array, negate, Int32Array), - DataType::Int64 => compute_op!(array, negate, Int64Array), - DataType::Float32 => compute_op!(array, negate, Float32Array), - DataType::Float64 => compute_op!(array, negate, Float64Array), - DataType::Interval(IntervalUnit::YearMonth) => compute_op!(array, negate, IntervalYearMonthArray), - DataType::Interval(IntervalUnit::DayTime) => compute_op!(array, negate, IntervalDayTimeArray), - DataType::Interval(IntervalUnit::MonthDayNano) => compute_op!(array, negate, IntervalMonthDayNanoArray), - _ => Err(DataFusionError::Internal(format!( - "(- '{:?}') can't be evaluated because the expression's type is {:?}, not signed numeric", - self, - array.data_type(), - ))), - }; - result.map(|a| ColumnarValue::Array(a)) + let result = neg_wrapping(array.as_ref())?; + Ok(ColumnarValue::Array(result)) } ColumnarValue::Scalar(scalar) => { Ok(ColumnarValue::Scalar((scalar.arithmetic_negate())?)) @@ -174,7 +142,6 @@ mod tests { use arrow::array::*; use arrow::datatypes::*; use arrow_schema::DataType::{Float32, Float64, Int16, Int32, Int64, Int8}; - use arrow_schema::IntervalUnit::{DayTime, MonthDayNano, YearMonth}; use datafusion_common::{cast::as_primitive_array, Result}; use paste::paste; @@ -203,31 +170,6 @@ mod tests { }; } - macro_rules! test_array_negative_op_intervals { - ($DATA_TY:tt, $($VALUE:expr),* ) => { - let schema = Schema::new(vec![Field::new("a", DataType::Interval(IntervalUnit::$DATA_TY), true)]); - let expr = negative(col("a", &schema)?, &schema)?; - assert_eq!(expr.data_type(&schema)?, DataType::Interval(IntervalUnit::$DATA_TY)); - assert!(expr.nullable(&schema)?); - let mut arr = Vec::new(); - let mut arr_expected = Vec::new(); - $( - arr.push(Some($VALUE)); - arr_expected.push(Some(-$VALUE)); - )+ - arr.push(None); - arr_expected.push(None); - let input = paste!{[]::from(arr)}; - let expected = &paste!{[]::from(arr_expected)}; - let batch = - RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(input)])?; - let result = expr.evaluate(&batch)?.into_array(batch.num_rows()); - let result = - as_primitive_array(&result).expect(format!("failed to downcast to {:?}Array", $DATA_TY).as_str()); - assert_eq!(result, expected); - }; - } - #[test] fn array_negative_op() -> Result<()> { test_array_negative_op!(Int8, 2i8, 1i8); @@ -236,9 +178,6 @@ mod tests { test_array_negative_op!(Int64, 23456i64, 12345i64); test_array_negative_op!(Float32, 2345.0f32, 1234.0f32); test_array_negative_op!(Float64, 23456.0f64, 12345.0f64); - test_array_negative_op_intervals!(YearMonth, 2345i32, 1234i32); - test_array_negative_op_intervals!(DayTime, 23456i64, 12345i64); - test_array_negative_op_intervals!(MonthDayNano, 234567i128, 123456i128); Ok(()) } } diff --git a/datafusion/physical-expr/src/expressions/try_cast.rs b/datafusion/physical-expr/src/expressions/try_cast.rs index 92ffaa1a8842..4450fc0df5cf 100644 --- a/datafusion/physical-expr/src/expressions/try_cast.rs +++ b/datafusion/physical-expr/src/expressions/try_cast.rs @@ -23,10 +23,11 @@ use std::sync::Arc; use crate::physical_expr::down_cast_any_ref; use crate::PhysicalExpr; use arrow::compute; -use arrow::compute::kernels; +use arrow::compute::{cast_with_options, CastOptions}; use arrow::datatypes::{DataType, Schema}; use arrow::record_batch::RecordBatch; use compute::can_cast_types; +use datafusion_common::format::DEFAULT_FORMAT_OPTIONS; use datafusion_common::ScalarValue; use datafusion_common::{DataFusionError, Result}; use datafusion_expr::ColumnarValue; @@ -79,14 +80,18 @@ impl PhysicalExpr for TryCastExpr { fn evaluate(&self, batch: &RecordBatch) -> Result { let value = self.expr.evaluate(batch)?; + let options = CastOptions { + safe: true, + format_options: DEFAULT_FORMAT_OPTIONS, + }; match value { - ColumnarValue::Array(array) => Ok(ColumnarValue::Array(kernels::cast::cast( - &array, - &self.cast_type, - )?)), + ColumnarValue::Array(array) => { + let cast = cast_with_options(&array, &self.cast_type, &options)?; + Ok(ColumnarValue::Array(cast)) + } ColumnarValue::Scalar(scalar) => { - let scalar_array = scalar.to_array(); - let cast_array = kernels::cast::cast(&scalar_array, &self.cast_type)?; + let array = scalar.to_array(); + let cast_array = cast_with_options(&array, &self.cast_type, &options)?; let cast_scalar = ScalarValue::try_from_array(&cast_array, 0)?; Ok(ColumnarValue::Scalar(cast_scalar)) } diff --git a/datafusion/physical-expr/src/intervals/test_utils.rs b/datafusion/physical-expr/src/intervals/test_utils.rs index 8e695c255696..075b8240353d 100644 --- a/datafusion/physical-expr/src/intervals/test_utils.rs +++ b/datafusion/physical-expr/src/intervals/test_utils.rs @@ -19,7 +19,7 @@ use std::sync::Arc; -use crate::expressions::{date_time_interval_expr, BinaryExpr, Literal}; +use crate::expressions::{binary, BinaryExpr, Literal}; use crate::PhysicalExpr; use arrow_schema::Schema; use datafusion_common::{DataFusionError, ScalarValue}; @@ -78,22 +78,10 @@ pub fn gen_conjunctive_temporal_expr( d: ScalarValue, schema: &Schema, ) -> Result, DataFusionError> { - let left_and_1 = date_time_interval_expr( - left_col.clone(), - op_1, - Arc::new(Literal::new(a)), - schema, - )?; - let left_and_2 = date_time_interval_expr( - right_col.clone(), - op_2, - Arc::new(Literal::new(b)), - schema, - )?; - let right_and_1 = - date_time_interval_expr(left_col, op_3, Arc::new(Literal::new(c)), schema)?; - let right_and_2 = - date_time_interval_expr(right_col, op_4, Arc::new(Literal::new(d)), schema)?; + let left_and_1 = binary(left_col.clone(), op_1, Arc::new(Literal::new(a)), schema)?; + let left_and_2 = binary(right_col.clone(), op_2, Arc::new(Literal::new(b)), schema)?; + let right_and_1 = binary(left_col, op_3, Arc::new(Literal::new(c)), schema)?; + let right_and_2 = binary(right_col, op_4, Arc::new(Literal::new(d)), schema)?; let left_expr = Arc::new(BinaryExpr::new(left_and_1, Operator::Gt, left_and_2)); let right_expr = Arc::new(BinaryExpr::new(right_and_1, Operator::Lt, right_and_2)); Ok(Arc::new(BinaryExpr::new( diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index f2211701fa1a..3a698eb8c7b9 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -15,18 +15,16 @@ // specific language governing permissions and limitations // under the License. +use crate::expressions::GetFieldAccessExpr; use crate::var_provider::is_system_variables; use crate::{ execution_props::ExecutionProps, - expressions::{ - self, binary, date_time_interval_expr, like, Column, GetFieldAccessExpr, - GetIndexedFieldExpr, Literal, - }, + expressions::{self, binary, like, Column, GetIndexedFieldExpr, Literal}, functions, udf, var_provider::VarType, PhysicalExpr, }; -use arrow::datatypes::{DataType, Schema}; +use arrow::datatypes::Schema; use datafusion_common::plan_err; use datafusion_common::{DFSchema, DataFusionError, Result, ScalarValue}; use datafusion_expr::expr::{Alias, Cast, InList, ScalarFunction, ScalarUDF}; @@ -182,45 +180,14 @@ pub fn create_physical_expr( input_schema, execution_props, )?; - // Match the data types and operator to determine the appropriate expression, if - // they are supported temporal types and operations, create DateTimeIntervalExpr, - // else create BinaryExpr. - match ( - lhs.data_type(input_schema)?, - op, - rhs.data_type(input_schema)?, - ) { - ( - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _), - Operator::Plus | Operator::Minus, - DataType::Interval(_), - ) => Ok(date_time_interval_expr(lhs, *op, rhs, input_schema)?), - ( - DataType::Interval(_), - Operator::Plus | Operator::Minus, - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _), - ) => Ok(date_time_interval_expr(rhs, *op, lhs, input_schema)?), - ( - DataType::Timestamp(_, _), - Operator::Minus, - DataType::Timestamp(_, _), - ) => Ok(date_time_interval_expr(lhs, *op, rhs, input_schema)?), - ( - DataType::Interval(_), - Operator::Plus | Operator::Minus, - DataType::Interval(_), - ) => Ok(date_time_interval_expr(lhs, *op, rhs, input_schema)?), - _ => { - // Note that the logical planner is responsible - // for type coercion on the arguments (e.g. if one - // argument was originally Int32 and one was - // Int64 they will both be coerced to Int64). - // - // There should be no coercion during physical - // planning. - binary(lhs, *op, rhs, input_schema) - } - } + // Note that the logical planner is responsible + // for type coercion on the arguments (e.g. if one + // argument was originally Int32 and one was + // Int64 they will both be coerced to Int64). + // + // There should be no coercion during physical + // planning. + binary(lhs, *op, rhs, input_schema) } Expr::Like(Like { negated, diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 2254a8cd3f30..1081fca2e1fb 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -1115,6 +1115,9 @@ message PhysicalExtensionNode { // physical expressions message PhysicalExprNode { + // Was date_time_interval_expr + reserved 17; + oneof ExprType { // column references PhysicalColumn column = 1; @@ -1145,8 +1148,6 @@ message PhysicalExprNode { PhysicalScalarUdfNode scalar_udf = 16; - PhysicalDateTimeIntervalExprNode date_time_interval_expr = 17; - PhysicalLikeExprNode like_expr = 18; PhysicalGetIndexedFieldExprNode get_indexed_field_expr = 19; diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index d65e45d51d42..8691487c7282 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -15013,9 +15013,6 @@ impl serde::Serialize for PhysicalExprNode { physical_expr_node::ExprType::ScalarUdf(v) => { struct_ser.serialize_field("scalarUdf", v)?; } - physical_expr_node::ExprType::DateTimeIntervalExpr(v) => { - struct_ser.serialize_field("dateTimeIntervalExpr", v)?; - } physical_expr_node::ExprType::LikeExpr(v) => { struct_ser.serialize_field("likeExpr", v)?; } @@ -15061,8 +15058,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode { "windowExpr", "scalar_udf", "scalarUdf", - "date_time_interval_expr", - "dateTimeIntervalExpr", "like_expr", "likeExpr", "get_indexed_field_expr", @@ -15087,7 +15082,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode { TryCast, WindowExpr, ScalarUdf, - DateTimeIntervalExpr, LikeExpr, GetIndexedFieldExpr, } @@ -15127,7 +15121,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode { "tryCast" | "try_cast" => Ok(GeneratedField::TryCast), "windowExpr" | "window_expr" => Ok(GeneratedField::WindowExpr), "scalarUdf" | "scalar_udf" => Ok(GeneratedField::ScalarUdf), - "dateTimeIntervalExpr" | "date_time_interval_expr" => Ok(GeneratedField::DateTimeIntervalExpr), "likeExpr" | "like_expr" => Ok(GeneratedField::LikeExpr), "getIndexedFieldExpr" | "get_indexed_field_expr" => Ok(GeneratedField::GetIndexedFieldExpr), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), @@ -15262,13 +15255,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode { return Err(serde::de::Error::duplicate_field("scalarUdf")); } expr_type__ = map.next_value::<::std::option::Option<_>>()?.map(physical_expr_node::ExprType::ScalarUdf) -; - } - GeneratedField::DateTimeIntervalExpr => { - if expr_type__.is_some() { - return Err(serde::de::Error::duplicate_field("dateTimeIntervalExpr")); - } - expr_type__ = map.next_value::<::std::option::Option<_>>()?.map(physical_expr_node::ExprType::DateTimeIntervalExpr) ; } GeneratedField::LikeExpr => { diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 867853b128fc..87371ba2772c 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -1521,7 +1521,7 @@ pub struct PhysicalExtensionNode { pub struct PhysicalExprNode { #[prost( oneof = "physical_expr_node::ExprType", - tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19" + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19" )] pub expr_type: ::core::option::Option, } @@ -1567,10 +1567,6 @@ pub mod physical_expr_node { WindowExpr(::prost::alloc::boxed::Box), #[prost(message, tag = "16")] ScalarUdf(super::PhysicalScalarUdfNode), - #[prost(message, tag = "17")] - DateTimeIntervalExpr( - ::prost::alloc::boxed::Box, - ), #[prost(message, tag = "18")] LikeExpr(::prost::alloc::boxed::Box), #[prost(message, tag = "19")] @@ -1675,10 +1671,10 @@ pub struct PhysicalBinaryExprNode { #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct PhysicalDateTimeIntervalExprNode { - #[prost(message, optional, boxed, tag = "1")] - pub l: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag = "2")] - pub r: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag = "1")] + pub l: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub r: ::core::option::Option, #[prost(string, tag = "3")] pub op: ::prost::alloc::string::String, } diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs index e084188ccd04..d9553f9e769e 100644 --- a/datafusion/proto/src/physical_plan/from_proto.rs +++ b/datafusion/proto/src/physical_plan/from_proto.rs @@ -29,10 +29,8 @@ use datafusion::execution::context::ExecutionProps; use datafusion::execution::FunctionRegistry; use datafusion::logical_expr::window_function::WindowFunction; use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr}; -use datafusion::physical_plan::expressions::{ - date_time_interval_expr, GetFieldAccessExpr, GetIndexedFieldExpr, -}; use datafusion::physical_plan::expressions::{in_list, LikeExpr}; +use datafusion::physical_plan::expressions::{GetFieldAccessExpr, GetIndexedFieldExpr}; use datafusion::physical_plan::{ expressions::{ BinaryExpr, CaseExpr, CastExpr, Column, IsNotNullExpr, IsNullExpr, Literal, @@ -125,22 +123,6 @@ pub fn parse_physical_expr( input_schema, )?, )), - ExprType::DateTimeIntervalExpr(expr) => date_time_interval_expr( - parse_required_physical_expr( - expr.l.as_deref(), - registry, - "left", - input_schema, - )?, - logical_plan::from_proto::from_proto_binary_op(&expr.op)?, - parse_required_physical_expr( - expr.r.as_deref(), - registry, - "right", - input_schema, - )?, - input_schema, - )?, ExprType::AggregateExpr(_) => { return Err(DataFusionError::NotImplemented( "Cannot convert aggregate expr node to physical expression".to_owned(), diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index fdb2ef88cb9e..7fe193ffbb4b 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -1397,12 +1397,10 @@ mod roundtrip_tests { use datafusion::logical_expr::create_udf; use datafusion::logical_expr::{BuiltinScalarFunction, Volatility}; use datafusion::physical_expr::expressions::in_list; + use datafusion::physical_expr::expressions::GetFieldAccessExpr; use datafusion::physical_expr::ScalarFunctionExpr; use datafusion::physical_plan::aggregates::PhysicalGroupBy; - use datafusion::physical_plan::expressions::{ - date_time_interval_expr, like, BinaryExpr, GetFieldAccessExpr, - GetIndexedFieldExpr, - }; + use datafusion::physical_plan::expressions::{like, BinaryExpr, GetIndexedFieldExpr}; use datafusion::physical_plan::functions::make_scalar_function; use datafusion::physical_plan::projection::ProjectionExec; use datafusion::physical_plan::{functions, udaf}; @@ -1485,7 +1483,7 @@ mod roundtrip_tests { let date_expr = col("some_date", &schema)?; let literal_expr = col("some_interval", &schema)?; let date_time_interval_expr = - date_time_interval_expr(date_expr, Operator::Plus, literal_expr, &schema)?; + binary(date_expr, Operator::Plus, literal_expr, &schema)?; let plan = Arc::new(ProjectionExec::try_new( vec![(date_time_interval_expr, "result".to_string())], input, diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs index 45c38b5ad5e0..243e035f3806 100644 --- a/datafusion/proto/src/physical_plan/to_proto.rs +++ b/datafusion/proto/src/physical_plan/to_proto.rs @@ -48,9 +48,7 @@ use crate::protobuf::{ ScalarValue, }; use datafusion::logical_expr::BuiltinScalarFunction; -use datafusion::physical_expr::expressions::{ - DateTimeIntervalExpr, GetFieldAccessExpr, GetIndexedFieldExpr, -}; +use datafusion::physical_expr::expressions::{GetFieldAccessExpr, GetIndexedFieldExpr}; use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr}; use datafusion::physical_plan::joins::utils::JoinSide; use datafusion::physical_plan::udaf::AggregateFunctionExpr; @@ -365,20 +363,6 @@ impl TryFrom> for protobuf::PhysicalExprNode { )), }) } - } else if let Some(expr) = expr.downcast_ref::() { - let dti_expr = Box::new(protobuf::PhysicalDateTimeIntervalExprNode { - l: Some(Box::new(expr.lhs().to_owned().try_into()?)), - r: Some(Box::new(expr.rhs().to_owned().try_into()?)), - op: format!("{:?}", expr.op()), - }); - - Ok(protobuf::PhysicalExprNode { - expr_type: Some( - protobuf::physical_expr_node::ExprType::DateTimeIntervalExpr( - dti_expr, - ), - ), - }) } else if let Some(expr) = expr.downcast_ref::() { Ok(protobuf::PhysicalExprNode { expr_type: Some(protobuf::physical_expr_node::ExprType::LikeExpr( diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index 954926ae3310..85cd6fcd5422 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. +use arrow::util::display::ArrayFormatter; use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBatch}; +use datafusion_common::format::DEFAULT_FORMAT_OPTIONS; use datafusion_common::DFField; use datafusion_common::DataFusionError; use lazy_static::lazy_static; @@ -223,7 +225,10 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result { DataType::Utf8 => { Ok(varchar_to_str(get_row_value!(array::StringArray, col, row))) } - _ => arrow::util::display::array_value_to_string(col, row), + _ => { + let f = ArrayFormatter::try_new(col.as_ref(), &DEFAULT_FORMAT_OPTIONS); + Ok(f.unwrap().value(row).to_string()) + } } .map_err(DFSqlLogicTestError::Arrow) }