Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce HashMap and HashSet type aliases #13236

Merged
merged 6 commits into from
Nov 5, 2024
3 changes: 1 addition & 2 deletions datafusion/common/src/functional_dependencies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,12 @@
//! FunctionalDependencies keeps track of functional dependencies
//! inside DFSchema.
use std::collections::HashSet;
use std::fmt::{Display, Formatter};
use std::ops::Deref;
use std::vec::IntoIter;

use crate::utils::{merge_and_order_indices, set_difference};
use crate::{DFSchema, JoinType};
use crate::{DFSchema, HashSet, JoinType};

/// This object defines a constraint on a table.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
Expand Down
5 changes: 5 additions & 0 deletions datafusion/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pub use functional_dependencies::{
get_target_functional_dependencies, Constraint, Constraints, Dependency,
FunctionalDependence, FunctionalDependencies,
};
use hashbrown::hash_map::DefaultHashBuilder;
pub use join_type::{JoinConstraint, JoinSide, JoinType};
pub use param_value::ParamValues;
pub use scalar::{ScalarType, ScalarValue};
Expand All @@ -87,6 +88,10 @@ pub use error::{
_substrait_datafusion_err,
};

// The HashMap and HashSet implementations that should be used as the uniform defaults
pub type HashMap<K, V, S = DefaultHashBuilder> = hashbrown::HashMap<K, V, S>;
pub type HashSet<T, S = DefaultHashBuilder> = hashbrown::HashSet<T, S>;

/// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
/// not possible. In normal usage of DataFusion the downcast should always succeed.
///
Expand Down
3 changes: 1 addition & 2 deletions datafusion/core/src/bin/print_functions_docs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@
// under the License.

use datafusion::execution::SessionStateDefaults;
use datafusion_common::{not_impl_err, Result};
use datafusion_common::{not_impl_err, HashSet, Result};
use datafusion_expr::{
aggregate_doc_sections, scalar_doc_sections, window_doc_sections, AggregateUDF,
DocSection, Documentation, ScalarUDF, WindowUDF,
};
use hashbrown::HashSet;
use itertools::Itertools;
use std::env::args;
use std::fmt::Write as _;
Expand Down
6 changes: 4 additions & 2 deletions datafusion/core/src/catalog_common/listing_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@
//! [`ListingSchemaProvider`]: [`SchemaProvider`] that scans ObjectStores for tables automatically
use std::any::Any;
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::path::Path;
use std::sync::{Arc, Mutex};

use crate::catalog::{SchemaProvider, TableProvider, TableProviderFactory};
use crate::execution::context::SessionState;

use datafusion_common::{Constraints, DFSchema, DataFusionError, TableReference};
use datafusion_common::{
Constraints, DFSchema, DataFusionError, HashMap, TableReference,
};
use datafusion_expr::CreateExternalTable;

use async_trait::async_trait;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ use datafusion_physical_plan::metrics::MetricsSet;

use async_trait::async_trait;
use bytes::Bytes;
use hashbrown::HashMap;
use datafusion_common::HashMap;
use log::debug;
use object_store::buffered::BufWriter;
use parquet::arrow::arrow_writer::{
Expand Down
3 changes: 1 addition & 2 deletions datafusion/core/src/datasource/listing/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,14 @@

//! Helper functions for the table implementation
use std::collections::HashMap;
use std::mem;
use std::sync::Arc;

use super::ListingTableUrl;
use super::PartitionedFile;
use crate::execution::context::SessionState;
use datafusion_common::internal_err;
use datafusion_common::{Result, ScalarValue};
use datafusion_common::{HashMap, Result, ScalarValue};
use datafusion_expr::{BinaryExpr, Operator};

use arrow::{
Expand Down
4 changes: 1 addition & 3 deletions datafusion/core/src/physical_optimizer/sort_pushdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
use datafusion_common::tree_node::{
ConcreteTreeNode, Transformed, TreeNode, TreeNodeRecursion,
};
use datafusion_common::{plan_err, JoinSide, Result};
use datafusion_common::{plan_err, HashSet, JoinSide, Result};
use datafusion_expr::JoinType;
use datafusion_physical_expr::expressions::Column;
use datafusion_physical_expr::utils::collect_columns;
Expand All @@ -41,8 +41,6 @@ use datafusion_physical_expr_common::sort_expr::{
LexOrdering, LexOrderingRef, LexRequirement,
};

use hashbrown::HashSet;

/// This is a "data class" we use within the [`EnforceSorting`] rule to push
/// down [`SortExec`] in the plan. In some cases, we can reduce the total
/// computational cost by pushing down `SortExec`s through some executors. The
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ use test_utils::{add_empty_batches, StringBatchGenerator};
use crate::fuzz_cases::aggregation_fuzzer::{
AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder,
};
use datafusion_common::HashMap;
use datafusion_physical_expr_common::sort_expr::LexOrdering;
use hashbrown::HashMap;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use tokio::task::JoinSet;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/fuzz_cases/window_fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
use test_utils::add_empty_batches;

use datafusion::functions_window::row_number::row_number_udwf;
use datafusion_common::HashMap;
use datafusion_functions_window::lead_lag::{lag_udwf, lead_udwf};
use datafusion_functions_window::rank::{dense_rank_udwf, rank_udwf};
use datafusion_physical_expr_common::sort_expr::LexOrdering;
use hashbrown::HashMap;
use rand::distributions::Alphanumeric;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
// under the License.

use std::any::Any;
use std::collections::HashMap;
use std::hash::{DefaultHasher, Hash, Hasher};
use std::sync::Arc;

Expand All @@ -39,7 +38,8 @@ use datafusion_common::cast::{as_float64_array, as_int32_array};
use datafusion_common::tree_node::{Transformed, TreeNode};
use datafusion_common::{
assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, internal_err,
not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, Result, ScalarValue,
not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, HashMap, Result,
ScalarValue,
};
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
Expand Down
2 changes: 1 addition & 1 deletion datafusion/execution/src/memory_pool/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
// under the License.

use crate::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
use datafusion_common::HashMap;
use datafusion_common::{resources_datafusion_err, DataFusionError, Result};
use hashbrown::HashMap;
use log::debug;
use parking_lot::Mutex;
use std::{
Expand Down
3 changes: 1 addition & 2 deletions datafusion/expr/src/conditional_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
use crate::expr::Case;
use crate::{expr_schema::ExprSchemable, Expr};
use arrow::datatypes::DataType;
use datafusion_common::{plan_err, DFSchema, Result};
use std::collections::HashSet;
use datafusion_common::{plan_err, DFSchema, HashSet, Result};

/// Helper struct for building [Expr::Case]
pub struct CaseBuilder {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/execution_props.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use crate::var_provider::{VarProvider, VarType};
use chrono::{DateTime, TimeZone, Utc};
use datafusion_common::alias::AliasGenerator;
use std::collections::HashMap;
use datafusion_common::HashMap;
use std::sync::Arc;

/// Holds per-query execution properties and data (such as statement
Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! Logical Expressions: [`Expr`]
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::fmt::{self, Display, Formatter, Write};
use std::hash::{Hash, Hasher};
use std::mem;
Expand All @@ -39,7 +39,7 @@ use datafusion_common::tree_node::{
Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
};
use datafusion_common::{
plan_err, Column, DFSchema, Result, ScalarValue, TableReference,
plan_err, Column, DFSchema, HashMap, Result, ScalarValue, TableReference,
};
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
use sqlparser::ast::{
Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
use crate::expr_rewriter::FunctionRewrite;
use crate::planner::ExprPlanner;
use crate::{AggregateUDF, ScalarUDF, UserDefinedLogicalNode, WindowUDF};
use datafusion_common::{not_impl_err, plan_datafusion_err, Result};
use std::collections::{HashMap, HashSet};
use datafusion_common::{not_impl_err, plan_datafusion_err, HashMap, Result};
use std::collections::HashSet;
use std::fmt::Debug;
use std::sync::Arc;

Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! Expression utilities
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::ops::Deref;
use std::sync::Arc;

Expand All @@ -36,7 +36,7 @@ use datafusion_common::tree_node::{
use datafusion_common::utils::get_at_indices;
use datafusion_common::{
internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef,
DataFusionError, Result, TableReference,
DataFusionError, HashMap, Result, TableReference,
};

use indexmap::IndexSet;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions-aggregate/src/median.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.

use std::collections::HashSet;
use std::fmt::{Debug, Formatter};
use std::mem::{size_of, size_of_val};
use std::sync::{Arc, OnceLock};
Expand All @@ -33,7 +32,7 @@ use arrow::array::Array;
use arrow::array::ArrowNativeTypeOp;
use arrow::datatypes::ArrowNativeType;

use datafusion_common::{DataFusionError, Result, ScalarValue};
use datafusion_common::{DataFusionError, HashSet, Result, ScalarValue};
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
use datafusion_expr::function::StateFieldsArgs;
use datafusion_expr::{
Expand Down
7 changes: 4 additions & 3 deletions datafusion/functions-aggregate/src/regr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ use arrow::{
datatypes::DataType,
datatypes::Field,
};
use datafusion_common::{downcast_value, plan_err, unwrap_or_internal_err, ScalarValue};
use datafusion_common::{DataFusionError, Result};
use datafusion_common::{
downcast_value, plan_err, unwrap_or_internal_err, DataFusionError, HashMap, Result,
ScalarValue,
};
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
use datafusion_expr::type_coercion::aggregates::NUMERICS;
Expand All @@ -34,7 +36,6 @@ use datafusion_expr::{
Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
};
use std::any::Any;
use std::collections::HashMap;
use std::fmt::Debug;
use std::mem::size_of_val;
use std::sync::OnceLock;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions-nested/src/except.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,12 @@ use arrow_array::cast::AsArray;
use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
use arrow_buffer::OffsetBuffer;
use arrow_schema::{DataType, FieldRef};
use datafusion_common::{exec_err, internal_err, Result};
use datafusion_common::{exec_err, internal_err, HashSet, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use std::any::Any;
use std::collections::HashSet;
use std::sync::{Arc, OnceLock};

make_udf_expr_and_func!(
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions-nested/src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::collections::{HashSet, VecDeque};
use std::collections::VecDeque;
use std::sync::{Arc, OnceLock};

use arrow::array::ArrayData;
Expand All @@ -25,7 +25,7 @@ use arrow_buffer::{Buffer, ToByteSlice};
use arrow_schema::{DataType, Field, SchemaBuilder};

use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays};
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_common::{exec_err, HashSet, Result, ScalarValue};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP;
use datafusion_expr::{
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/core/named_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@

use arrow::array::StructArray;
use arrow::datatypes::{DataType, Field, Fields};
use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
use datafusion_common::{exec_err, internal_err, HashSet, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRUCT;
use datafusion_expr::{ColumnarValue, Documentation, Expr, ExprSchemable};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use hashbrown::HashSet;
use std::any::Any;
use std::sync::{Arc, OnceLock};

Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions/src/unicode/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use arrow::array::{
ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, OffsetSizeTrait,
};
use arrow::datatypes::DataType;
use hashbrown::HashMap;
use datafusion_common::HashMap;
use unicode_segmentation::UnicodeSegmentation;

use crate::utils::{make_scalar_function, utf8_to_str_type};
Expand Down
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/decorrelate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! [`PullUpCorrelatedExpr`] converts correlated subqueries to `Joins`
use std::collections::{BTreeSet, HashMap};
use std::collections::BTreeSet;
use std::ops::Deref;
use std::sync::Arc;

Expand All @@ -27,7 +27,7 @@ use crate::utils::collect_subquery_cols;
use datafusion_common::tree_node::{
Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
};
use datafusion_common::{plan_err, Column, DFSchemaRef, Result, ScalarValue};
use datafusion_common::{plan_err, Column, DFSchemaRef, HashMap, Result, ScalarValue};
use datafusion_expr::expr::Alias;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_expr::utils::{conjunction, find_join_exprs, split_conjunction};
Expand Down
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/optimize_projections/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@

mod required_indices;

use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::sync::Arc;

use crate::optimizer::ApplyOrder;
use crate::{OptimizerConfig, OptimizerRule};

use datafusion_common::{
get_required_group_by_exprs_indices, internal_datafusion_err, internal_err, Column,
JoinType, Result,
HashMap, JoinType, Result,
};
use datafusion_expr::expr::Alias;
use datafusion_expr::Unnest;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/optimizer/src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

//! [`Optimizer`] and [`OptimizerRule`]
use std::collections::HashSet;
use std::fmt::Debug;
use std::sync::Arc;

Expand All @@ -29,7 +28,7 @@ use datafusion_common::alias::AliasGenerator;
use datafusion_common::config::ConfigOptions;
use datafusion_common::instant::Instant;
use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
use datafusion_common::{internal_err, DFSchema, DataFusionError, Result};
use datafusion_common::{internal_err, DFSchema, DataFusionError, HashSet, Result};
use datafusion_expr::logical_plan::LogicalPlan;

use crate::common_subexpr_eliminate::CommonSubexprEliminate;
Expand Down
6 changes: 3 additions & 3 deletions datafusion/optimizer/src/single_distinct_to_groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ use std::sync::Arc;
use crate::optimizer::ApplyOrder;
use crate::{OptimizerConfig, OptimizerRule};

use datafusion_common::{internal_err, tree_node::Transformed, DataFusionError, Result};
use datafusion_common::{
internal_err, tree_node::Transformed, DataFusionError, HashSet, Result,
};
use datafusion_expr::builder::project;
use datafusion_expr::{
col,
Expand All @@ -31,8 +33,6 @@ use datafusion_expr::{
Expr,
};

use hashbrown::HashSet;

/// single distinct to group by optimizer rule
/// ```text
/// Before:
Expand Down
Loading
Loading