From d78d71157bd90ad4dabd4609d7ddf80a26b78454 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 7 Jan 2025 15:41:48 -0500 Subject: [PATCH] Encapsulate fields of `EquivalenceProperties` --- .../core/src/physical_optimizer/sanity_checker.rs | 2 +- .../core/tests/fuzz_cases/equivalence/ordering.rs | 8 ++++---- .../tests/fuzz_cases/equivalence/projection.rs | 8 ++++---- .../tests/fuzz_cases/equivalence/properties.rs | 4 ++-- .../core/tests/fuzz_cases/equivalence/utils.rs | 6 +++--- .../physical-expr/src/equivalence/ordering.rs | 14 ++++++++------ .../physical-expr/src/equivalence/properties.rs | 14 +++++++------- datafusion/physical-plan/src/memory.rs | 4 +++- datafusion/physical-plan/src/union.rs | 4 ++-- 9 files changed, 34 insertions(+), 30 deletions(-) diff --git a/datafusion/core/src/physical_optimizer/sanity_checker.rs b/datafusion/core/src/physical_optimizer/sanity_checker.rs index f4b0f7c6069b..8e8787aec96b 100644 --- a/datafusion/core/src/physical_optimizer/sanity_checker.rs +++ b/datafusion/core/src/physical_optimizer/sanity_checker.rs @@ -144,7 +144,7 @@ pub fn check_plan_sanity( plan_str, format_physical_sort_requirement_list(&sort_req), idx, - child_eq_props.oeq_class + child_eq_props.oeq_class() ); } } diff --git a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs index 525baadd14a5..ecf267185bae 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs @@ -68,8 +68,8 @@ fn test_ordering_satisfy_with_equivalence_random() -> Result<()> { table_data_with_properties.clone(), )?; let err_msg = format!( - "Error in test case requirement:{:?}, expected: {:?}, eq_properties.oeq_class: {:?}, eq_properties.eq_group: {:?}, eq_properties.constants: {:?}", - requirement, expected, eq_properties.oeq_class, eq_properties.eq_group, eq_properties.constants + "Error in test case requirement:{:?}, expected: {:?}, eq_properties {}", + requirement, expected, eq_properties ); // Check whether ordering_satisfy API result and // experimental result matches. @@ -141,8 +141,8 @@ fn test_ordering_satisfy_with_equivalence_complex_random() -> Result<()> { table_data_with_properties.clone(), )?; let err_msg = format!( - "Error in test case requirement:{:?}, expected: {:?}, eq_properties.oeq_class: {:?}, eq_properties.eq_group: {:?}, eq_properties.constants: {:?}", - requirement, expected, eq_properties.oeq_class, eq_properties.eq_group, eq_properties.constants + "Error in test case requirement:{:?}, expected: {:?}, eq_properties: {}", + requirement, expected, eq_properties, ); // Check whether ordering_satisfy API result and // experimental result matches. diff --git a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs index 3df3e0348e42..f71df50fce2f 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs @@ -82,8 +82,8 @@ fn project_orderings_random() -> Result<()> { // Make sure each ordering after projection is valid. for ordering in projected_eq.oeq_class().iter() { let err_msg = format!( - "Error in test case ordering:{:?}, eq_properties.oeq_class: {:?}, eq_properties.eq_group: {:?}, eq_properties.constants: {:?}, proj_exprs: {:?}", - ordering, eq_properties.oeq_class, eq_properties.eq_group, eq_properties.constants, proj_exprs + "Error in test case ordering:{:?}, eq_properties {}, proj_exprs: {:?}", + ordering, eq_properties, proj_exprs, ); // Since ordered section satisfies schema, we expect // that result will be same after sort (e.g sort was unnecessary). @@ -179,8 +179,8 @@ fn ordering_satisfy_after_projection_random() -> Result<()> { projected_batch.clone(), )?; let err_msg = format!( - "Error in test case requirement:{:?}, expected: {:?}, eq_properties.oeq_class: {:?}, eq_properties.eq_group: {:?}, eq_properties.constants: {:?}, projected_eq.oeq_class: {:?}, projected_eq.eq_group: {:?}, projected_eq.constants: {:?}, projection_mapping: {:?}", - requirement, expected, eq_properties.oeq_class, eq_properties.eq_group, eq_properties.constants, projected_eq.oeq_class, projected_eq.eq_group, projected_eq.constants, projection_mapping + "Error in test case requirement:{:?}, expected: {:?}, eq_properties: {}, projected_eq: {}, projection_mapping: {:?}", + requirement, expected, eq_properties, projected_eq, projection_mapping ); // Check whether ordering_satisfy API result and // experimental result matches. diff --git a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs index 82586bd79eda..fc21c620a711 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs @@ -83,8 +83,8 @@ fn test_find_longest_permutation_random() -> Result<()> { ); let err_msg = format!( - "Error in test case ordering:{:?}, eq_properties.oeq_class: {:?}, eq_properties.eq_group: {:?}, eq_properties.constants: {:?}", - ordering, eq_properties.oeq_class, eq_properties.eq_group, eq_properties.constants + "Error in test case ordering:{:?}, eq_properties: {}", + ordering, eq_properties ); assert_eq!(ordering.len(), indices.len(), "{}", err_msg); // Since ordered section satisfies schema, we expect diff --git a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs index e18dab35fc91..f8d0ea8200c3 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs @@ -373,7 +373,7 @@ pub fn generate_table_for_eq_properties( }; // Fill constant columns - for constant in &eq_properties.constants { + for constant in eq_properties.constants() { let col = constant.expr().as_any().downcast_ref::().unwrap(); let (idx, _field) = schema.column_with_name(col.name()).unwrap(); let arr = @@ -382,7 +382,7 @@ pub fn generate_table_for_eq_properties( } // Fill columns based on ordering equivalences - for ordering in eq_properties.oeq_class.iter() { + for ordering in eq_properties.oeq_class().iter() { let (sort_columns, indices): (Vec<_>, Vec<_>) = ordering .iter() .map(|PhysicalSortExpr { expr, options }| { @@ -406,7 +406,7 @@ pub fn generate_table_for_eq_properties( } // Fill columns based on equivalence groups - for eq_group in eq_properties.eq_group.iter() { + for eq_group in eq_properties.eq_group().iter() { let representative_array = get_representative_arr(eq_group, &schema_vec, Arc::clone(schema)) .unwrap_or_else(|| generate_random_array(n_elem, n_distinct)); diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs index 24e2fc7dbaf5..0ae5f4af8f08 100644 --- a/datafusion/physical-expr/src/equivalence/ordering.rs +++ b/datafusion/physical-expr/src/equivalence/ordering.rs @@ -291,15 +291,17 @@ mod tests { }, ]); // finer ordering satisfies, crude ordering should return true - let mut eq_properties_finer = - EquivalenceProperties::new(Arc::clone(&input_schema)); - eq_properties_finer.oeq_class.push(finer.clone()); + let eq_properties_finer = EquivalenceProperties::new_with_orderings( + Arc::clone(&input_schema), + &[finer.clone()], + ); assert!(eq_properties_finer.ordering_satisfy(crude.as_ref())); // Crude ordering doesn't satisfy finer ordering. should return false - let mut eq_properties_crude = - EquivalenceProperties::new(Arc::clone(&input_schema)); - eq_properties_crude.oeq_class.push(crude); + let eq_properties_crude = EquivalenceProperties::new_with_orderings( + Arc::clone(&input_schema), + &[crude.clone()], + ); assert!(!eq_properties_crude.ordering_satisfy(finer.as_ref())); Ok(()) } diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs index c3d458103285..d2eeccda2cae 100755 --- a/datafusion/physical-expr/src/equivalence/properties.rs +++ b/datafusion/physical-expr/src/equivalence/properties.rs @@ -124,15 +124,15 @@ use itertools::Itertools; /// ``` #[derive(Debug, Clone)] pub struct EquivalenceProperties { - /// Collection of equivalence classes that store expressions with the same - /// value. - pub eq_group: EquivalenceGroup, - /// Equivalent sort expressions for this table. - pub oeq_class: OrderingEquivalenceClass, - /// Expressions whose values are constant throughout the table. + /// Distinct equivalence classes (exprs known to have the same expressions) + eq_group: EquivalenceGroup, + /// Equivalent sort expressions + oeq_class: OrderingEquivalenceClass, + /// Expressions whose values are constant + /// /// TODO: We do not need to track constants separately, they can be tracked /// inside `eq_groups` as `Literal` expressions. - pub constants: Vec, + constants: Vec, /// Schema associated with this object. schema: SchemaRef, } diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs index 521008ce9b02..c61a1f0ae5da 100644 --- a/datafusion/physical-plan/src/memory.rs +++ b/datafusion/physical-plan/src/memory.rs @@ -260,7 +260,9 @@ impl MemoryExec { ProjectionMapping::try_new(&proj_exprs, &self.original_schema())?; sort_information = base_eqp .project(&projection_mapping, self.schema()) - .oeq_class + .oeq_class() + // TODO add a take / into to avoid the clone + .clone() .orderings; } diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 6e768a3d87bc..cfa919425c54 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -843,9 +843,9 @@ mod tests { ) { // Check whether orderings are same. let lhs_orderings = lhs.oeq_class(); - let rhs_orderings = &rhs.oeq_class.orderings; + let rhs_orderings = rhs.oeq_class(); assert_eq!(lhs_orderings.len(), rhs_orderings.len(), "{}", err_msg); - for rhs_ordering in rhs_orderings { + for rhs_ordering in rhs_orderings.iter() { assert!(lhs_orderings.contains(rhs_ordering), "{}", err_msg); } }