Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce HashMap and HashSet type aliases #13236

Merged
merged 6 commits into from
Nov 5, 2024
4 changes: 4 additions & 0 deletions datafusion/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pub use functional_dependencies::{
get_target_functional_dependencies, Constraint, Constraints, Dependency,
FunctionalDependence, FunctionalDependencies,
};
use hashbrown::hash_map::DefaultHashBuilder;
pub use join_type::{JoinConstraint, JoinSide, JoinType};
pub use param_value::ParamValues;
pub use scalar::{ScalarType, ScalarValue};
Expand All @@ -87,6 +88,9 @@ pub use error::{
_substrait_datafusion_err,
};

// The HashMap implementation that should be the uniform default
pub type HashMap<K, V, S = DefaultHashBuilder> = hashbrown::HashMap<K, V, S>;

/// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
/// not possible. In normal usage of DataFusion the downcast should always succeed.
///
Expand Down
6 changes: 4 additions & 2 deletions datafusion/core/src/catalog_common/listing_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@
//! [`ListingSchemaProvider`]: [`SchemaProvider`] that scans ObjectStores for tables automatically

use std::any::Any;
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::path::Path;
use std::sync::{Arc, Mutex};

use crate::catalog::{SchemaProvider, TableProvider, TableProviderFactory};
use crate::execution::context::SessionState;

use datafusion_common::{Constraints, DFSchema, DataFusionError, TableReference};
use datafusion_common::{
Constraints, DFSchema, DataFusionError, HashMap, TableReference,
};
use datafusion_expr::CreateExternalTable;

use async_trait::async_trait;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ use datafusion_physical_plan::metrics::MetricsSet;

use async_trait::async_trait;
use bytes::Bytes;
use hashbrown::HashMap;
use datafusion_common::HashMap;
use log::debug;
use object_store::buffered::BufWriter;
use parquet::arrow::arrow_writer::{
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ use test_utils::{add_empty_batches, StringBatchGenerator};
use crate::fuzz_cases::aggregation_fuzzer::{
AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder,
};
use datafusion_common::HashMap;
use datafusion_physical_expr_common::sort_expr::LexOrdering;
use hashbrown::HashMap;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use tokio::task::JoinSet;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/fuzz_cases/window_fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
use test_utils::add_empty_batches;

use datafusion::functions_window::row_number::row_number_udwf;
use datafusion_common::HashMap;
use datafusion_functions_window::lead_lag::{lag_udwf, lead_udwf};
use datafusion_functions_window::rank::{dense_rank_udwf, rank_udwf};
use datafusion_physical_expr_common::sort_expr::LexOrdering;
use hashbrown::HashMap;
use rand::distributions::Alphanumeric;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
// under the License.

use std::any::Any;
use std::collections::HashMap;
use std::hash::{DefaultHasher, Hash, Hasher};
use std::sync::Arc;

Expand All @@ -39,7 +38,8 @@ use datafusion_common::cast::{as_float64_array, as_int32_array};
use datafusion_common::tree_node::{Transformed, TreeNode};
use datafusion_common::{
assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, internal_err,
not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, Result, ScalarValue,
not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, HashMap, Result,
ScalarValue,
};
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
Expand Down
2 changes: 1 addition & 1 deletion datafusion/execution/src/memory_pool/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
// under the License.

use crate::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
use datafusion_common::HashMap;
use datafusion_common::{resources_datafusion_err, DataFusionError, Result};
use hashbrown::HashMap;
use log::debug;
use parking_lot::Mutex;
use std::{
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/execution_props.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use crate::var_provider::{VarProvider, VarType};
use chrono::{DateTime, TimeZone, Utc};
use datafusion_common::alias::AliasGenerator;
use std::collections::HashMap;
use datafusion_common::HashMap;
use std::sync::Arc;

/// Holds per-query execution properties and data (such as statement
Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
use crate::expr_rewriter::FunctionRewrite;
use crate::planner::ExprPlanner;
use crate::{AggregateUDF, ScalarUDF, UserDefinedLogicalNode, WindowUDF};
use datafusion_common::{not_impl_err, plan_datafusion_err, Result};
use std::collections::{HashMap, HashSet};
use datafusion_common::{not_impl_err, plan_datafusion_err, HashMap, Result};
use std::collections::HashSet;
use std::fmt::Debug;
use std::sync::Arc;

Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! Expression utilities
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::ops::Deref;
use std::sync::Arc;

Expand All @@ -36,7 +36,7 @@ use datafusion_common::tree_node::{
use datafusion_common::utils::get_at_indices;
use datafusion_common::{
internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef,
DataFusionError, Result, TableReference,
DataFusionError, HashMap, Result, TableReference,
};

use indexmap::IndexSet;
Expand Down
7 changes: 4 additions & 3 deletions datafusion/functions-aggregate/src/regr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ use arrow::{
datatypes::DataType,
datatypes::Field,
};
use datafusion_common::{downcast_value, plan_err, unwrap_or_internal_err, ScalarValue};
use datafusion_common::{DataFusionError, Result};
use datafusion_common::{
downcast_value, plan_err, unwrap_or_internal_err, DataFusionError, HashMap, Result,
ScalarValue,
};
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
use datafusion_expr::type_coercion::aggregates::NUMERICS;
Expand All @@ -34,7 +36,6 @@ use datafusion_expr::{
Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
};
use std::any::Any;
use std::collections::HashMap;
use std::fmt::Debug;
use std::mem::size_of_val;
use std::sync::OnceLock;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions/src/unicode/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use arrow::array::{
ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, OffsetSizeTrait,
};
use arrow::datatypes::DataType;
use hashbrown::HashMap;
use datafusion_common::HashMap;
use unicode_segmentation::UnicodeSegmentation;

use crate::utils::{make_scalar_function, utf8_to_str_type};
Expand Down
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/decorrelate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! [`PullUpCorrelatedExpr`] converts correlated subqueries to `Joins`

use std::collections::{BTreeSet, HashMap};
use std::collections::BTreeSet;
use std::ops::Deref;
use std::sync::Arc;

Expand All @@ -27,7 +27,7 @@ use crate::utils::collect_subquery_cols;
use datafusion_common::tree_node::{
Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
};
use datafusion_common::{plan_err, Column, DFSchemaRef, Result, ScalarValue};
use datafusion_common::{plan_err, Column, DFSchemaRef, HashMap, Result, ScalarValue};
use datafusion_expr::expr::Alias;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_expr::utils::{conjunction, find_join_exprs, split_conjunction};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr-common/src/binary_view_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ where
#[cfg(test)]
mod tests {
use arrow::array::{BinaryViewArray, GenericByteViewArray, StringViewArray};
use hashbrown::HashMap;
use datafusion_common::HashMap;

use super::*;

Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/src/expressions/in_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ use datafusion_expr::ColumnarValue;
use datafusion_physical_expr_common::datum::compare_with_eq;

use ahash::RandomState;
use datafusion_common::HashMap;
use hashbrown::hash_map::RawEntryMut;
use hashbrown::HashMap;

/// InList
pub struct InListExpr {
Expand Down
4 changes: 2 additions & 2 deletions datafusion/physical-expr/src/utils/guarantee.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@

use crate::utils::split_disjunction;
use crate::{split_conjunction, PhysicalExpr};
use datafusion_common::{Column, ScalarValue};
use datafusion_common::{Column, HashMap, ScalarValue};
use datafusion_expr::Operator;
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::fmt::{self, Display, Formatter};
use std::sync::Arc;

Expand Down
3 changes: 1 addition & 2 deletions datafusion/physical-expr/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ pub use guarantee::{Guarantee, LiteralGuarantee};
use hashbrown::HashSet;

use std::borrow::Borrow;
use std::collections::HashMap;
use std::sync::Arc;

use crate::expressions::{BinaryExpr, Column};
Expand All @@ -32,7 +31,7 @@ use arrow::datatypes::SchemaRef;
use datafusion_common::tree_node::{
Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
};
use datafusion_common::Result;
use datafusion_common::{HashMap, Result};
use datafusion_expr::Operator;

use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-plan/src/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use std::{
sync::Arc,
};

use hashbrown::HashMap;
use datafusion_common::HashMap;

// public exports
pub use baseline::{BaselineMetrics, RecordOutput};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-plan/src/repartition/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ use datafusion_execution::TaskContext;
use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr};

use crate::execution_plan::CardinalityEffect;
use datafusion_common::HashMap;
use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
use futures::stream::Stream;
use futures::{FutureExt, StreamExt, TryStreamExt};
use hashbrown::HashMap;
use log::trace;
use parking_lot::Mutex;

Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-plan/src/topk/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ use std::{cmp::Ordering, collections::BinaryHeap, sync::Arc};
use crate::{stream::RecordBatchStreamAdapter, SendableRecordBatchStream};
use arrow_array::{Array, ArrayRef, RecordBatch};
use arrow_schema::SchemaRef;
use datafusion_common::HashMap;
use datafusion_common::Result;
use datafusion_execution::{
memory_pool::{MemoryConsumer, MemoryReservation},
runtime_env::RuntimeEnv,
};
use datafusion_physical_expr::PhysicalSortExpr;
use datafusion_physical_expr_common::sort_expr::LexOrdering;
use hashbrown::HashMap;

use super::metrics::{BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder};

Expand Down
3 changes: 1 addition & 2 deletions datafusion/physical-plan/src/unnest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
//! Define a plan for unnesting values in columns that contain a list type.

use std::cmp::{self, Ordering};
use std::collections::HashMap;
use std::{any::Any, sync::Arc};

use super::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
Expand All @@ -40,7 +39,7 @@ use arrow::record_batch::RecordBatch;
use arrow_array::{Int64Array, Scalar, StructArray};
use arrow_ord::cmp::lt;
use datafusion_common::{
exec_datafusion_err, exec_err, internal_err, Result, UnnestOptions,
exec_datafusion_err, exec_err, internal_err, HashMap, Result, UnnestOptions,
};
use datafusion_execution::TaskContext;
use datafusion_physical_expr::EquivalenceProperties;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

use std::any::Any;
use std::cmp::{min, Ordering};
use std::collections::{HashMap, VecDeque};
use std::collections::VecDeque;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
Expand Down Expand Up @@ -51,7 +51,9 @@ use datafusion_common::stats::Precision;
use datafusion_common::utils::{
evaluate_partition_ranges, get_at_indices, get_row_at_idx,
};
use datafusion_common::{arrow_datafusion_err, exec_err, DataFusionError, Result};
use datafusion_common::{
arrow_datafusion_err, exec_err, DataFusionError, HashMap, Result,
};
use datafusion_execution::TaskContext;
use datafusion_expr::window_state::{PartitionBatchState, WindowAggState};
use datafusion_expr::ColumnarValue;
Expand Down
7 changes: 2 additions & 5 deletions datafusion/sql/src/unparser/rewrite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,12 @@
// specific language governing permissions and limitations
// under the License.

use std::{
collections::{HashMap, HashSet},
sync::Arc,
};
use std::{collections::HashSet, sync::Arc};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you could do the same to HashSet?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


use arrow_schema::Schema;
use datafusion_common::{
tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
Column, Result, TableReference,
Column, HashMap, Result, TableReference,
};
use datafusion_expr::{expr::Alias, tree_node::transform_sort_vec};
use datafusion_expr::{Expr, LogicalPlan, Projection, Sort, SortExpr};
Expand Down
5 changes: 2 additions & 3 deletions datafusion/sql/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

//! SQL Utility Functions

use std::collections::HashMap;
use std::vec;

use arrow_schema::{
Expand All @@ -27,8 +26,8 @@ use datafusion_common::tree_node::{
Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
};
use datafusion_common::{
exec_err, internal_err, plan_err, Column, DFSchemaRef, DataFusionError, Result,
ScalarValue,
exec_err, internal_err, plan_err, Column, DFSchemaRef, DataFusionError, HashMap,
Result, ScalarValue,
};
use datafusion_expr::builder::get_struct_unnested_columns;
use datafusion_expr::expr::{Alias, GroupingSet, Unnest, WindowFunction};
Expand Down
3 changes: 1 addition & 2 deletions datafusion/substrait/src/extensions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use datafusion::common::{plan_err, DataFusionError};
use std::collections::HashMap;
use datafusion::common::{plan_err, DataFusionError, HashMap};
use substrait::proto::extensions::simple_extension_declaration::{
ExtensionFunction, ExtensionType, ExtensionTypeVariation, MappingType,
};
Expand Down
Loading