Skip to content

Commit

Permalink
merge main and resolve conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
JasonLi-cn committed Apr 12, 2024
2 parents fd04d4a + 118eecd commit f6f2ea2
Show file tree
Hide file tree
Showing 193 changed files with 7,784 additions and 4,662 deletions.
3 changes: 0 additions & 3 deletions .github/actions/setup-rust-runtime/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,9 @@ runs:
#
# Set debuginfo=line-tables-only as debuginfo=0 causes immensely slow build
# See for more details: https://github.com/rust-lang/rust/issues/119560
#
# set RUST_MIN_STACK to avoid rust stack overflows on tpc-ds tests
run: |
echo "RUSTC_WRAPPER=sccache" >> $GITHUB_ENV
echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV
echo "RUST_BACKTRACE=1" >> $GITHUB_ENV
echo "RUST_MIN_STACK=3000000" >> $GITHUB_ENV
echo "RUSTFLAGS=-C debuginfo=line-tables-only -C incremental=false" >> $GITHUB_ENV
8 changes: 6 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ members = [
"datafusion/core",
"datafusion/expr",
"datafusion/execution",
"datafusion/functions-aggregate",
"datafusion/functions",
"datafusion/functions-array",
"datafusion/optimizer",
"datafusion/physical-expr-common",
"datafusion/physical-expr",
"datafusion/physical-plan",
"datafusion/proto",
Expand All @@ -48,7 +50,7 @@ homepage = "https://github.com/apache/arrow-datafusion"
license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/apache/arrow-datafusion"
rust-version = "1.72"
rust-version = "1.73"
version = "37.0.0"

[workspace.dependencies]
Expand Down Expand Up @@ -77,9 +79,11 @@ datafusion-common-runtime = { path = "datafusion/common-runtime", version = "37.
datafusion-execution = { path = "datafusion/execution", version = "37.0.0" }
datafusion-expr = { path = "datafusion/expr", version = "37.0.0" }
datafusion-functions = { path = "datafusion/functions", version = "37.0.0" }
datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "37.0.0" }
datafusion-functions-array = { path = "datafusion/functions-array", version = "37.0.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "37.0.0", default-features = false }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "37.0.0", default-features = false }
datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "37.0.0", default-features = false }
datafusion-physical-plan = { path = "datafusion/physical-plan", version = "37.0.0" }
datafusion-proto = { path = "datafusion/proto", version = "37.0.0" }
datafusion-sql = { path = "datafusion/sql", version = "37.0.0" }
Expand All @@ -97,7 +101,7 @@ object_store = { version = "0.9.1", default-features = false }
parking_lot = "0.12"
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
rand = "0.8"
rstest = "0.18.0"
rstest = "0.19.0"
serde_json = "1"
sqlparser = { version = "0.44.0", features = ["visitor"] }
tempfile = "3"
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/queries/clickbench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ LIMIT 10;
Here are some interesting statistics about the data used in the queries
Max length of `"SearchPhrase"` is 1113 characters
```sql
select min(length("SearchPhrase")) as "SearchPhrase_len_min", max(length("SearchPhrase")) "SearchPhrase_len_max" from 'hits.parquet' limit 10;
> select min(length("SearchPhrase")) as "SearchPhrase_len_min", max(length("SearchPhrase")) "SearchPhrase_len_max" from 'hits.parquet' limit 10;
+----------------------+----------------------+
| SearchPhrase_len_min | SearchPhrase_len_max |
+----------------------+----------------------+
Expand All @@ -74,7 +74,7 @@ Max length of `"SearchPhrase"` is 1113 characters

Here is the schema of the data
```sql
describe 'hits.parquet';
> describe 'hits.parquet';
+-----------------------+-----------+-------------+
| column_name | data_type | is_nullable |
+-----------------------+-----------+-------------+
Expand Down
30 changes: 28 additions & 2 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ license = "Apache-2.0"
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
# Specify MSRV here as `cargo msrv` doesn't support workspace version
rust-version = "1.72"
rust-version = "1.73"
readme = "README.md"

[dependencies]
Expand All @@ -45,7 +45,6 @@ datafusion = { path = "../datafusion/core", version = "37.0.0", features = [
"unicode_expressions",
"compression",
] }
datafusion-common = { path = "../datafusion/common" }
dirs = "4.0.0"
env_logger = "0.9"
futures = "0.3"
Expand Down
2 changes: 1 addition & 1 deletion datafusion-cli/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

FROM rust:1.72-bullseye as builder
FROM rust:1.73-bullseye as builder

COPY . /usr/src/arrow-datafusion
COPY ./datafusion /usr/src/arrow-datafusion/datafusion
Expand Down
2 changes: 1 addition & 1 deletion datafusion-cli/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ use datafusion::arrow::array::{ArrayRef, StringArray};
use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::common::exec_err;
use datafusion::common::instant::Instant;
use datafusion::error::{DataFusionError, Result};
use datafusion::prelude::SessionContext;
use datafusion_common::instant::Instant;
use std::fs::File;
use std::io::BufReader;
use std::str::FromStr;
Expand Down
8 changes: 4 additions & 4 deletions datafusion-cli/src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ use datafusion::prelude::SessionContext;
use datafusion::sql::parser::{DFParser, Statement};
use datafusion::sql::sqlparser::dialect::dialect_from_str;

use datafusion_common::FileType;
use datafusion::common::FileType;
use rustyline::error::ReadlineError;
use rustyline::Editor;
use tokio::signal;
Expand Down Expand Up @@ -131,7 +131,7 @@ pub async fn exec_from_repl(
rl.load_history(".history").ok();

loop {
match rl.readline(" ") {
match rl.readline("> ") {
Ok(line) if line.starts_with('\\') => {
rl.add_history_entry(line.trim_end())?;
let command = line.split_whitespace().collect::<Vec<_>>().join(" ");
Expand Down Expand Up @@ -350,8 +350,8 @@ pub(crate) async fn register_object_store_and_config_extensions(
mod tests {
use super::*;

use datafusion_common::config::FormatOptions;
use datafusion_common::plan_err;
use datafusion::common::config::FormatOptions;
use datafusion::common::plan_err;

use url::Url;

Expand Down
9 changes: 4 additions & 5 deletions datafusion-cli/src/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
use std::borrow::Cow;

use datafusion::common::sql_err;
use datafusion::common::sql_datafusion_err;
use datafusion::error::DataFusionError;
use datafusion::sql::parser::{DFParser, Statement};
use datafusion::sql::sqlparser::dialect::dialect_from_str;
Expand Down Expand Up @@ -189,10 +189,9 @@ pub fn unescape_input(input: &str) -> datafusion::error::Result<String> {
't' => '\t',
'\\' => '\\',
_ => {
return sql_err!(ParserError::TokenizerError(format!(
"unsupported escape char: '\\{}'",
next_char
),))
return Err(sql_datafusion_err!(ParserError::TokenizerError(
format!("unsupported escape char: '\\{}'", next_char)
)))
}
});
}
Expand Down
6 changes: 3 additions & 3 deletions datafusion-cli/src/object_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ use std::any::Any;
use std::fmt::{Debug, Display};
use std::sync::Arc;

use datafusion::common::config::{
ConfigEntry, ConfigExtension, ConfigField, ExtensionOptions, TableOptions, Visit,
};
use datafusion::common::{exec_datafusion_err, exec_err, internal_err};
use datafusion::error::{DataFusionError, Result};
use datafusion::execution::context::SessionState;
use datafusion::prelude::SessionContext;
use datafusion_common::config::{
ConfigEntry, ConfigExtension, ConfigField, ExtensionOptions, TableOptions, Visit,
};

use async_trait::async_trait;
use aws_credential_types::provider::ProvideCredentials;
Expand Down
2 changes: 1 addition & 1 deletion datafusion-cli/src/print_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use datafusion_common::instant::Instant;
use datafusion::common::instant::Instant;
use std::fmt::{Display, Formatter};
use std::io::Write;
use std::pin::Pin;
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/rewrite_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ pub fn main() -> Result<()> {

// then run the optimizer with our custom rule
let optimizer = Optimizer::with_rules(vec![Arc::new(MyOptimizerRule {})]);
let optimized_plan = optimizer.optimize(&analyzed_plan, &config, observe)?;
let optimized_plan = optimizer.optimize(analyzed_plan, &config, observe)?;
println!(
"Optimized Logical Plan:\n\n{}\n",
optimized_plan.display_indent()
Expand Down
29 changes: 13 additions & 16 deletions datafusion/common/src/dfschema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ impl DFSchema {
&self,
qualifier: Option<&TableReference>,
name: &str,
) -> Result<Option<usize>> {
) -> Option<usize> {
let mut matches = self
.iter()
.enumerate()
Expand All @@ -345,19 +345,19 @@ impl DFSchema {
(None, Some(_)) | (None, None) => f.name() == name,
})
.map(|(idx, _)| idx);
Ok(matches.next())
matches.next()
}

/// Find the index of the column with the given qualifier and name
pub fn index_of_column(&self, col: &Column) -> Result<usize> {
self.index_of_column_by_name(col.relation.as_ref(), &col.name)?
self.index_of_column_by_name(col.relation.as_ref(), &col.name)
.ok_or_else(|| field_not_found(col.relation.clone(), &col.name, self))
}

/// Check if the column is in the current schema
pub fn is_column_from_schema(&self, col: &Column) -> Result<bool> {
pub fn is_column_from_schema(&self, col: &Column) -> bool {
self.index_of_column_by_name(col.relation.as_ref(), &col.name)
.map(|idx| idx.is_some())
.is_some()
}

/// Find the field with the given name
Expand All @@ -381,7 +381,7 @@ impl DFSchema {
) -> Result<(Option<&TableReference>, &Field)> {
if let Some(qualifier) = qualifier {
let idx = self
.index_of_column_by_name(Some(qualifier), name)?
.index_of_column_by_name(Some(qualifier), name)
.ok_or_else(|| field_not_found(Some(qualifier.clone()), name, self))?;
Ok((self.field_qualifiers[idx].as_ref(), self.field(idx)))
} else {
Expand Down Expand Up @@ -519,7 +519,7 @@ impl DFSchema {
name: &str,
) -> Result<&Field> {
let idx = self
.index_of_column_by_name(Some(qualifier), name)?
.index_of_column_by_name(Some(qualifier), name)
.ok_or_else(|| field_not_found(Some(qualifier.clone()), name, self))?;

Ok(self.field(idx))
Expand Down Expand Up @@ -1190,11 +1190,8 @@ mod tests {
.to_string(),
expected_help
);
assert!(schema.index_of_column_by_name(None, "y").unwrap().is_none());
assert!(schema
.index_of_column_by_name(None, "t1.c0")
.unwrap()
.is_none());
assert!(schema.index_of_column_by_name(None, "y").is_none());
assert!(schema.index_of_column_by_name(None, "t1.c0").is_none());

Ok(())
}
Expand Down Expand Up @@ -1284,28 +1281,28 @@ mod tests {
{
let col = Column::from_qualified_name("t1.c0");
let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
assert!(schema.is_column_from_schema(&col)?);
assert!(schema.is_column_from_schema(&col));
}

// qualified not exists
{
let col = Column::from_qualified_name("t1.c2");
let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
assert!(!schema.is_column_from_schema(&col)?);
assert!(!schema.is_column_from_schema(&col));
}

// unqualified exists
{
let col = Column::from_name("c0");
let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
assert!(schema.is_column_from_schema(&col)?);
assert!(schema.is_column_from_schema(&col));
}

// unqualified not exists
{
let col = Column::from_name("c2");
let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
assert!(!schema.is_column_from_schema(&col)?);
assert!(!schema.is_column_from_schema(&col));
}

Ok(())
Expand Down
Loading

0 comments on commit f6f2ea2

Please sign in to comment.