From 0ca7f55ebbe127d598e7fe7845f73b3b3bb3202e Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Wed, 31 Jan 2024 15:25:40 -0500 Subject: [PATCH] Add benchmarks for to_timestamp and make_date functions (#9086) * Make date benchmark. * Make date benchmark update, added to_timestamp benchmark test. * cargo tomlfmt --- datafusion/physical-expr/Cargo.toml | 8 ++ datafusion/physical-expr/benches/make_date.rs | 115 ++++++++++++++++ .../physical-expr/benches/to_timestamp.rs | 125 ++++++++++++++++++ 3 files changed, 248 insertions(+) create mode 100644 datafusion/physical-expr/benches/make_date.rs create mode 100644 datafusion/physical-expr/benches/to_timestamp.rs diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 61eba042f939..dc3ecdb14fb5 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -79,3 +79,11 @@ rstest = { workspace = true } [[bench]] harness = false name = "in_list" + +[[bench]] +harness = false +name = "make_date" + +[[bench]] +harness = false +name = "to_timestamp" diff --git a/datafusion/physical-expr/benches/make_date.rs b/datafusion/physical-expr/benches/make_date.rs new file mode 100644 index 000000000000..819d9539f2ce --- /dev/null +++ b/datafusion/physical-expr/benches/make_date.rs @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use std::sync::Arc; + +use arrow_array::{ArrayRef, Int32Array}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use rand::rngs::ThreadRng; +use rand::Rng; + +use datafusion_common::ScalarValue; +use datafusion_expr::ColumnarValue; +use datafusion_physical_expr::datetime_expressions::make_date; + +fn years(rng: &mut ThreadRng) -> Int32Array { + let mut years = vec![]; + for _ in 0..1000 { + years.push(rng.gen_range(1900..2050)); + } + + Int32Array::from(years) +} + +fn months(rng: &mut ThreadRng) -> Int32Array { + let mut months = vec![]; + for _ in 0..1000 { + months.push(rng.gen_range(1..13)); + } + + Int32Array::from(months) +} + +fn days(rng: &mut ThreadRng) -> Int32Array { + let mut days = vec![]; + for _ in 0..1000 { + days.push(rng.gen_range(1..29)); + } + + Int32Array::from(days) +} +fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("make_date_col_col_col_1000", |b| { + let mut rng = rand::thread_rng(); + let years = ColumnarValue::Array(Arc::new(years(&mut rng)) as ArrayRef); + let months = ColumnarValue::Array(Arc::new(months(&mut rng)) as ArrayRef); + let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); + + b.iter(|| { + black_box( + make_date(&[years.clone(), months.clone(), days.clone()]) + .expect("make_date should work on valid values"), + ) + }) + }); + + c.bench_function("make_date_scalar_col_col_1000", |b| { + let mut rng = rand::thread_rng(); + let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025))); + let months = ColumnarValue::Array(Arc::new(months(&mut rng)) as ArrayRef); + let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); + + b.iter(|| { + black_box( + make_date(&[year.clone(), months.clone(), days.clone()]) + .expect("make_date should work on valid values"), + ) + }) + }); + + c.bench_function("make_date_scalar_scalar_col_1000", |b| { + let mut rng = rand::thread_rng(); + let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025))); + let month = ColumnarValue::Scalar(ScalarValue::Int32(Some(11))); + let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); + + b.iter(|| { + black_box( + make_date(&[year.clone(), month.clone(), days.clone()]) + .expect("make_date should work on valid values"), + ) + }) + }); + + c.bench_function("make_date_scalar_scalar_scalar", |b| { + let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025))); + let month = ColumnarValue::Scalar(ScalarValue::Int32(Some(11))); + let day = ColumnarValue::Scalar(ScalarValue::Int32(Some(26))); + + b.iter(|| { + black_box( + make_date(&[year.clone(), month.clone(), day.clone()]) + .expect("make_date should work on valid values"), + ) + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/physical-expr/benches/to_timestamp.rs b/datafusion/physical-expr/benches/to_timestamp.rs new file mode 100644 index 000000000000..1934f69ef11c --- /dev/null +++ b/datafusion/physical-expr/benches/to_timestamp.rs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use std::sync::Arc; + +use arrow_array::builder::StringBuilder; +use arrow_array::ArrayRef; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use datafusion_expr::ColumnarValue; +use datafusion_physical_expr::datetime_expressions::to_timestamp; + +fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("to_timestamp_no_formats", |b| { + let mut inputs = StringBuilder::new(); + inputs.append_value("1997-01-31T09:26:56.123Z"); + inputs.append_value("1997-01-31T09:26:56.123-05:00"); + inputs.append_value("1997-01-31 09:26:56.123-05:00"); + inputs.append_value("2023-01-01 04:05:06.789 -08"); + inputs.append_value("1997-01-31T09:26:56.123"); + inputs.append_value("1997-01-31 09:26:56.123"); + inputs.append_value("1997-01-31 09:26:56"); + inputs.append_value("1997-01-31 13:26:56"); + inputs.append_value("1997-01-31 13:26:56+04:00"); + inputs.append_value("1997-01-31"); + + let string_array = ColumnarValue::Array(Arc::new(inputs.finish()) as ArrayRef); + + b.iter(|| { + black_box( + to_timestamp(&[string_array.clone()]) + .expect("to_timestamp should work on valid values"), + ) + }) + }); + + c.bench_function("to_timestamp_with_formats", |b| { + let mut inputs = StringBuilder::new(); + let mut format1_builder = StringBuilder::with_capacity(2, 10); + let mut format2_builder = StringBuilder::with_capacity(2, 10); + let mut format3_builder = StringBuilder::with_capacity(2, 10); + + inputs.append_value("1997-01-31T09:26:56.123Z"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f%Z"); + + inputs.append_value("1997-01-31T09:26:56.123-05:00"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f%z"); + + inputs.append_value("1997-01-31 09:26:56.123-05:00"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f%Z"); + + inputs.append_value("2023-01-01 04:05:06.789 -08"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f %#z"); + + inputs.append_value("1997-01-31T09:26:56.123"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f"); + + inputs.append_value("1997-01-31 09:26:56.123"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f"); + + inputs.append_value("1997-01-31 09:26:56"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H:%M:%S"); + + inputs.append_value("1997-01-31 092656"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H%M%S"); + + inputs.append_value("1997-01-31 092656+04:00"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H%M%S%:z"); + + inputs.append_value("Sun Jul 8 00:34:60 2001"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d 00:00:00"); + + let args = [ + ColumnarValue::Array(Arc::new(inputs.finish()) as ArrayRef), + ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef), + ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef), + ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef), + ]; + b.iter(|| { + black_box( + to_timestamp(&args.clone()) + .expect("to_timestamp should work on valid values"), + ) + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches);