From 65851a94f93dd3c8d8b0f1fe11d5a2228af08dba Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 15 Apr 2024 14:37:38 -0700 Subject: [PATCH] Apply dictionary coercion change --- .../expr/src/type_coercion/functions.rs | 62 ++++++++++++------- 1 file changed, 40 insertions(+), 22 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index d4095a72fe3e..cc33128a7a0e 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -311,17 +311,30 @@ fn coerced_from<'a>( type_from: &'a DataType, ) -> Option { use self::DataType::*; - - match type_into { + // match Dictionary first + match (type_into, type_from) { + // coerced dictionary first + (_, Dictionary(_, value_type)) + if coerced_from(type_into, value_type).is_some() => + { + Some(type_into.clone()) + } + (Dictionary(_, value_type), _) + if coerced_from(value_type, type_from).is_some() => + { + Some(type_into.clone()) + } // coerced into type_into - Int8 if matches!(type_from, Null | Int8) => Some(type_into.clone()), - Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) => { + (Int8, _) if matches!(type_from, Null | Int8) => Some(type_into.clone()), + (Int16, _) if matches!(type_from, Null | Int8 | Int16 | UInt8) => { Some(type_into.clone()) } - Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | UInt16) => { + (Int32, _) + if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | UInt16) => + { Some(type_into.clone()) } - Int64 + (Int64, _) if matches!( type_from, Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 @@ -329,15 +342,17 @@ fn coerced_from<'a>( { Some(type_into.clone()) } - UInt8 if matches!(type_from, Null | UInt8) => Some(type_into.clone()), - UInt16 if matches!(type_from, Null | UInt8 | UInt16) => Some(type_into.clone()), - UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => { + (UInt8, _) if matches!(type_from, Null | UInt8) => Some(type_into.clone()), + (UInt16, _) if matches!(type_from, Null | UInt8 | UInt16) => { Some(type_into.clone()) } - UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64) => { + (UInt32, _) if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => { Some(type_into.clone()) } - Float32 + (UInt64, _) if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64) => { + Some(type_into.clone()) + } + (Float32, _) if matches!( type_from, Null | Int8 @@ -353,7 +368,7 @@ fn coerced_from<'a>( { Some(type_into.clone()) } - Float64 + (Float64, _) if matches!( type_from, Null | Int8 @@ -371,7 +386,7 @@ fn coerced_from<'a>( { Some(type_into.clone()) } - Timestamp(TimeUnit::Nanosecond, None) + (Timestamp(TimeUnit::Nanosecond, None), _) if matches!( type_from, Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8 @@ -379,23 +394,27 @@ fn coerced_from<'a>( { Some(type_into.clone()) } - Interval(_) if matches!(type_from, Utf8 | LargeUtf8) => Some(type_into.clone()), + (Interval(_), _) if matches!(type_from, Utf8 | LargeUtf8) => { + Some(type_into.clone()) + } // Any type can be coerced into strings - Utf8 | LargeUtf8 => Some(type_into.clone()), - Null if can_cast_types(type_from, type_into) => Some(type_into.clone()), + (Utf8 | LargeUtf8, _) => Some(type_into.clone()), + (Null, _) if can_cast_types(type_from, type_into) => Some(type_into.clone()), - List(_) if matches!(type_from, FixedSizeList(_, _)) => Some(type_into.clone()), + (List(_), _) if matches!(type_from, FixedSizeList(_, _)) => { + Some(type_into.clone()) + } // Only accept list and largelist with the same number of dimensions unless the type is Null. // List or LargeList with different dimensions should be handled in TypeSignature or other places before this - List(_) | LargeList(_) + (List(_) | LargeList(_), _) if datafusion_common::utils::base_type(type_from).eq(&Null) || list_ndims(type_from) == list_ndims(type_into) => { Some(type_into.clone()) } // should be able to coerce wildcard fixed size list to non wildcard fixed size list - FixedSizeList(f_into, FIXED_SIZE_LIST_WILDCARD) => match type_from { + (FixedSizeList(f_into, FIXED_SIZE_LIST_WILDCARD), _) => match type_from { FixedSizeList(f_from, size_from) => { match coerced_from(f_into.data_type(), f_from.data_type()) { Some(data_type) if &data_type != f_into.data_type() => { @@ -410,7 +429,7 @@ fn coerced_from<'a>( _ => None, }, - Timestamp(unit, Some(tz)) if tz.as_ref() == TIMEZONE_WILDCARD => { + (Timestamp(unit, Some(tz)), _) if tz.as_ref() == TIMEZONE_WILDCARD => { match type_from { Timestamp(_, Some(from_tz)) => { Some(Timestamp(unit.clone(), Some(from_tz.clone()))) @@ -422,7 +441,7 @@ fn coerced_from<'a>( _ => None, } } - Timestamp(_, Some(_)) + (Timestamp(_, Some(_)), _) if matches!( type_from, Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8 @@ -430,7 +449,6 @@ fn coerced_from<'a>( { Some(type_into.clone()) } - // More coerce rules. // Note that not all rules in `comparison_coercion` can be reused here. // For example, all numeric types can be coerced into Utf8 for comparison,