Skip to content

Commit

Permalink
Introduce HashMap and HashSet type aliases (#13236)
Browse files Browse the repository at this point in the history
* Unite all references to hashbrown::HashMap by using a common type definition

* Replace some use of std::collections::HashMap with hashbrown::HashMap

* Replace some use of std::collections::HashMap with hashbrown::HashMap

* Replace some use of std::collections::HashMap with hashbrown::HashMap

* Unite all references to hashbrown::HashSet by using a common type definition

* Replace some use of std::collections::HashSet with hashbrown::HashSet
  • Loading branch information
drauschenbach authored Nov 5, 2024
1 parent eeb9d58 commit 2e52580
Show file tree
Hide file tree
Showing 41 changed files with 67 additions and 79 deletions.
3 changes: 1 addition & 2 deletions datafusion/common/src/functional_dependencies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,12 @@
//! FunctionalDependencies keeps track of functional dependencies
//! inside DFSchema.
use std::collections::HashSet;
use std::fmt::{Display, Formatter};
use std::ops::Deref;
use std::vec::IntoIter;

use crate::utils::{merge_and_order_indices, set_difference};
use crate::{DFSchema, JoinType};
use crate::{DFSchema, HashSet, JoinType};

/// This object defines a constraint on a table.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
Expand Down
5 changes: 5 additions & 0 deletions datafusion/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pub use functional_dependencies::{
get_target_functional_dependencies, Constraint, Constraints, Dependency,
FunctionalDependence, FunctionalDependencies,
};
use hashbrown::hash_map::DefaultHashBuilder;
pub use join_type::{JoinConstraint, JoinSide, JoinType};
pub use param_value::ParamValues;
pub use scalar::{ScalarType, ScalarValue};
Expand All @@ -87,6 +88,10 @@ pub use error::{
_substrait_datafusion_err,
};

// The HashMap and HashSet implementations that should be used as the uniform defaults
pub type HashMap<K, V, S = DefaultHashBuilder> = hashbrown::HashMap<K, V, S>;
pub type HashSet<T, S = DefaultHashBuilder> = hashbrown::HashSet<T, S>;

/// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
/// not possible. In normal usage of DataFusion the downcast should always succeed.
///
Expand Down
3 changes: 1 addition & 2 deletions datafusion/core/src/bin/print_functions_docs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@
// under the License.

use datafusion::execution::SessionStateDefaults;
use datafusion_common::{not_impl_err, Result};
use datafusion_common::{not_impl_err, HashSet, Result};
use datafusion_expr::{
aggregate_doc_sections, scalar_doc_sections, window_doc_sections, AggregateUDF,
DocSection, Documentation, ScalarUDF, WindowUDF,
};
use hashbrown::HashSet;
use itertools::Itertools;
use std::env::args;
use std::fmt::Write as _;
Expand Down
6 changes: 4 additions & 2 deletions datafusion/core/src/catalog_common/listing_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@
//! [`ListingSchemaProvider`]: [`SchemaProvider`] that scans ObjectStores for tables automatically
use std::any::Any;
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::path::Path;
use std::sync::{Arc, Mutex};

use crate::catalog::{SchemaProvider, TableProvider, TableProviderFactory};
use crate::execution::context::SessionState;

use datafusion_common::{Constraints, DFSchema, DataFusionError, TableReference};
use datafusion_common::{
Constraints, DFSchema, DataFusionError, HashMap, TableReference,
};
use datafusion_expr::CreateExternalTable;

use async_trait::async_trait;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ use datafusion_physical_plan::metrics::MetricsSet;

use async_trait::async_trait;
use bytes::Bytes;
use hashbrown::HashMap;
use datafusion_common::HashMap;
use log::debug;
use object_store::buffered::BufWriter;
use parquet::arrow::arrow_writer::{
Expand Down
3 changes: 1 addition & 2 deletions datafusion/core/src/datasource/listing/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,14 @@

//! Helper functions for the table implementation
use std::collections::HashMap;
use std::mem;
use std::sync::Arc;

use super::ListingTableUrl;
use super::PartitionedFile;
use crate::execution::context::SessionState;
use datafusion_common::internal_err;
use datafusion_common::{Result, ScalarValue};
use datafusion_common::{HashMap, Result, ScalarValue};
use datafusion_expr::{BinaryExpr, Operator};

use arrow::{
Expand Down
4 changes: 1 addition & 3 deletions datafusion/core/src/physical_optimizer/sort_pushdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
use datafusion_common::tree_node::{
ConcreteTreeNode, Transformed, TreeNode, TreeNodeRecursion,
};
use datafusion_common::{plan_err, JoinSide, Result};
use datafusion_common::{plan_err, HashSet, JoinSide, Result};
use datafusion_expr::JoinType;
use datafusion_physical_expr::expressions::Column;
use datafusion_physical_expr::utils::collect_columns;
Expand All @@ -41,8 +41,6 @@ use datafusion_physical_expr_common::sort_expr::{
LexOrdering, LexOrderingRef, LexRequirement,
};

use hashbrown::HashSet;

/// This is a "data class" we use within the [`EnforceSorting`] rule to push
/// down [`SortExec`] in the plan. In some cases, we can reduce the total
/// computational cost by pushing down `SortExec`s through some executors. The
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ use test_utils::{add_empty_batches, StringBatchGenerator};
use crate::fuzz_cases::aggregation_fuzzer::{
AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder,
};
use datafusion_common::HashMap;
use datafusion_physical_expr_common::sort_expr::LexOrdering;
use hashbrown::HashMap;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use tokio::task::JoinSet;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/fuzz_cases/window_fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
use test_utils::add_empty_batches;

use datafusion::functions_window::row_number::row_number_udwf;
use datafusion_common::HashMap;
use datafusion_functions_window::lead_lag::{lag_udwf, lead_udwf};
use datafusion_functions_window::rank::{dense_rank_udwf, rank_udwf};
use datafusion_physical_expr_common::sort_expr::LexOrdering;
use hashbrown::HashMap;
use rand::distributions::Alphanumeric;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
// under the License.

use std::any::Any;
use std::collections::HashMap;
use std::hash::{DefaultHasher, Hash, Hasher};
use std::sync::Arc;

Expand All @@ -39,7 +38,8 @@ use datafusion_common::cast::{as_float64_array, as_int32_array};
use datafusion_common::tree_node::{Transformed, TreeNode};
use datafusion_common::{
assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, internal_err,
not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, Result, ScalarValue,
not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, HashMap, Result,
ScalarValue,
};
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
Expand Down
2 changes: 1 addition & 1 deletion datafusion/execution/src/memory_pool/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
// under the License.

use crate::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
use datafusion_common::HashMap;
use datafusion_common::{resources_datafusion_err, DataFusionError, Result};
use hashbrown::HashMap;
use log::debug;
use parking_lot::Mutex;
use std::{
Expand Down
3 changes: 1 addition & 2 deletions datafusion/expr/src/conditional_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
use crate::expr::Case;
use crate::{expr_schema::ExprSchemable, Expr};
use arrow::datatypes::DataType;
use datafusion_common::{plan_err, DFSchema, Result};
use std::collections::HashSet;
use datafusion_common::{plan_err, DFSchema, HashSet, Result};

/// Helper struct for building [Expr::Case]
pub struct CaseBuilder {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/execution_props.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use crate::var_provider::{VarProvider, VarType};
use chrono::{DateTime, TimeZone, Utc};
use datafusion_common::alias::AliasGenerator;
use std::collections::HashMap;
use datafusion_common::HashMap;
use std::sync::Arc;

/// Holds per-query execution properties and data (such as statement
Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! Logical Expressions: [`Expr`]
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::fmt::{self, Display, Formatter, Write};
use std::hash::{Hash, Hasher};
use std::mem;
Expand All @@ -39,7 +39,7 @@ use datafusion_common::tree_node::{
Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
};
use datafusion_common::{
plan_err, Column, DFSchema, Result, ScalarValue, TableReference,
plan_err, Column, DFSchema, HashMap, Result, ScalarValue, TableReference,
};
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
use sqlparser::ast::{
Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
use crate::expr_rewriter::FunctionRewrite;
use crate::planner::ExprPlanner;
use crate::{AggregateUDF, ScalarUDF, UserDefinedLogicalNode, WindowUDF};
use datafusion_common::{not_impl_err, plan_datafusion_err, Result};
use std::collections::{HashMap, HashSet};
use datafusion_common::{not_impl_err, plan_datafusion_err, HashMap, Result};
use std::collections::HashSet;
use std::fmt::Debug;
use std::sync::Arc;

Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! Expression utilities
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::ops::Deref;
use std::sync::Arc;

Expand All @@ -36,7 +36,7 @@ use datafusion_common::tree_node::{
use datafusion_common::utils::get_at_indices;
use datafusion_common::{
internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef,
DataFusionError, Result, TableReference,
DataFusionError, HashMap, Result, TableReference,
};

use indexmap::IndexSet;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions-aggregate/src/median.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.

use std::collections::HashSet;
use std::fmt::{Debug, Formatter};
use std::mem::{size_of, size_of_val};
use std::sync::{Arc, OnceLock};
Expand All @@ -33,7 +32,7 @@ use arrow::array::Array;
use arrow::array::ArrowNativeTypeOp;
use arrow::datatypes::ArrowNativeType;

use datafusion_common::{DataFusionError, Result, ScalarValue};
use datafusion_common::{DataFusionError, HashSet, Result, ScalarValue};
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
use datafusion_expr::function::StateFieldsArgs;
use datafusion_expr::{
Expand Down
7 changes: 4 additions & 3 deletions datafusion/functions-aggregate/src/regr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ use arrow::{
datatypes::DataType,
datatypes::Field,
};
use datafusion_common::{downcast_value, plan_err, unwrap_or_internal_err, ScalarValue};
use datafusion_common::{DataFusionError, Result};
use datafusion_common::{
downcast_value, plan_err, unwrap_or_internal_err, DataFusionError, HashMap, Result,
ScalarValue,
};
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
use datafusion_expr::type_coercion::aggregates::NUMERICS;
Expand All @@ -34,7 +36,6 @@ use datafusion_expr::{
Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
};
use std::any::Any;
use std::collections::HashMap;
use std::fmt::Debug;
use std::mem::size_of_val;
use std::sync::OnceLock;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions-nested/src/except.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,12 @@ use arrow_array::cast::AsArray;
use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
use arrow_buffer::OffsetBuffer;
use arrow_schema::{DataType, FieldRef};
use datafusion_common::{exec_err, internal_err, Result};
use datafusion_common::{exec_err, internal_err, HashSet, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use std::any::Any;
use std::collections::HashSet;
use std::sync::{Arc, OnceLock};

make_udf_expr_and_func!(
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions-nested/src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::collections::{HashSet, VecDeque};
use std::collections::VecDeque;
use std::sync::{Arc, OnceLock};

use arrow::array::ArrayData;
Expand All @@ -25,7 +25,7 @@ use arrow_buffer::{Buffer, ToByteSlice};
use arrow_schema::{DataType, Field, SchemaBuilder};

use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays};
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_common::{exec_err, HashSet, Result, ScalarValue};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP;
use datafusion_expr::{
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/core/named_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@

use arrow::array::StructArray;
use arrow::datatypes::{DataType, Field, Fields};
use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
use datafusion_common::{exec_err, internal_err, HashSet, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRUCT;
use datafusion_expr::{ColumnarValue, Documentation, Expr, ExprSchemable};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use hashbrown::HashSet;
use std::any::Any;
use std::sync::{Arc, OnceLock};

Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions/src/unicode/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use arrow::array::{
ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, OffsetSizeTrait,
};
use arrow::datatypes::DataType;
use hashbrown::HashMap;
use datafusion_common::HashMap;
use unicode_segmentation::UnicodeSegmentation;

use crate::utils::{make_scalar_function, utf8_to_str_type};
Expand Down
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/decorrelate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! [`PullUpCorrelatedExpr`] converts correlated subqueries to `Joins`
use std::collections::{BTreeSet, HashMap};
use std::collections::BTreeSet;
use std::ops::Deref;
use std::sync::Arc;

Expand All @@ -27,7 +27,7 @@ use crate::utils::collect_subquery_cols;
use datafusion_common::tree_node::{
Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
};
use datafusion_common::{plan_err, Column, DFSchemaRef, Result, ScalarValue};
use datafusion_common::{plan_err, Column, DFSchemaRef, HashMap, Result, ScalarValue};
use datafusion_expr::expr::Alias;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_expr::utils::{conjunction, find_join_exprs, split_conjunction};
Expand Down
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/optimize_projections/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@
mod required_indices;

use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::sync::Arc;

use crate::optimizer::ApplyOrder;
use crate::{OptimizerConfig, OptimizerRule};

use datafusion_common::{
get_required_group_by_exprs_indices, internal_datafusion_err, internal_err, Column,
JoinType, Result,
HashMap, JoinType, Result,
};
use datafusion_expr::expr::Alias;
use datafusion_expr::Unnest;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/optimizer/src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

//! [`Optimizer`] and [`OptimizerRule`]
use std::collections::HashSet;
use std::fmt::Debug;
use std::sync::Arc;

Expand All @@ -29,7 +28,7 @@ use datafusion_common::alias::AliasGenerator;
use datafusion_common::config::ConfigOptions;
use datafusion_common::instant::Instant;
use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
use datafusion_common::{internal_err, DFSchema, DataFusionError, Result};
use datafusion_common::{internal_err, DFSchema, DataFusionError, HashSet, Result};
use datafusion_expr::logical_plan::LogicalPlan;

use crate::common_subexpr_eliminate::CommonSubexprEliminate;
Expand Down
6 changes: 3 additions & 3 deletions datafusion/optimizer/src/single_distinct_to_groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ use std::sync::Arc;
use crate::optimizer::ApplyOrder;
use crate::{OptimizerConfig, OptimizerRule};

use datafusion_common::{internal_err, tree_node::Transformed, DataFusionError, Result};
use datafusion_common::{
internal_err, tree_node::Transformed, DataFusionError, HashSet, Result,
};
use datafusion_expr::builder::project;
use datafusion_expr::{
col,
Expand All @@ -31,8 +33,6 @@ use datafusion_expr::{
Expr,
};

use hashbrown::HashSet;

/// single distinct to group by optimizer rule
/// ```text
/// Before:
Expand Down
Loading

0 comments on commit 2e52580

Please sign in to comment.