From 3482be30561505e664f623c9e821869488daa896 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Wed, 18 Oct 2023 11:46:43 -0400 Subject: [PATCH] sql: Suggest other columns names when one can't be found (#22446) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR updates `PlanError::UnknownColumn` to suggest similarly named columns if the one specified cannot be found. ### Motivation Closes https://github.com/MaterializeInc/materialize/issues/22427 ### Checklist - [ ] This PR has adequate test coverage / QA involvement has been duly considered. - [ ] This PR has an associated up-to-date [design doc](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/README.md), is a design doc ([template](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/00000000_template.md)), or is sufficiently small to not require a design. - [ ] If this PR evolves [an existing `$T ⇔ Proto$T` mapping](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/command-and-response-binary-encoding.md) (possibly in a backwards-incompatible way), then it is tagged with a `T-proto` label. - [ ] If this PR will require changes to cloud orchestration or tests, there is a companion cloud PR to account for those changes that is tagged with the release-blocker label ([example](https://github.com/MaterializeInc/cloud/pull/5021)). - [x] This PR includes the following [user-facing behavior changes](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/guide-changes.md#what-changes-require-a-release-note): - Updates an error message to be more helpful. --- src/repr/src/relation.rs | 14 +++++++ src/sql/src/names.rs | 2 + src/sql/src/plan/error.rs | 14 ++++++- src/sql/src/plan/query.rs | 32 +++++++++----- src/sql/src/plan/scope.rs | 34 +++++++++++---- src/sql/src/plan/statement/dml.rs | 1 + src/sql/src/pure/postgres.rs | 12 +++++- test/pg-cdc/pg-cdc.td | 11 +++++ .../cockroach/case_sensitive_names.slt | 42 ++++++++++++++++++- 9 files changed, 138 insertions(+), 24 deletions(-) diff --git a/src/repr/src/relation.rs b/src/repr/src/relation.rs index ba8ea1918e1bf..390d8e4a23903 100644 --- a/src/repr/src/relation.rs +++ b/src/repr/src/relation.rs @@ -289,6 +289,11 @@ impl ColumnName { pub fn as_mut_str(&mut self) -> &mut String { &mut self.0 } + + /// Returns if this [`ColumnName`] is similar to the provided one. + pub fn is_similar(&self, other: &ColumnName) -> bool { + self.0.to_lowercase() == other.as_str().to_lowercase() + } } impl fmt::Display for ColumnName { @@ -496,6 +501,15 @@ impl RelationDesc { self.names.iter() } + /// Returns an iterator over the names of the columns in this relation that are "similar" to + /// the provided `name`. + pub fn iter_similar_names<'a>( + &'a self, + name: &'a ColumnName, + ) -> impl Iterator { + self.iter_names().filter(|n| n.is_similar(name)) + } + /// Finds a column by name. /// /// Returns the index and type of the column named `name`. If no column with diff --git a/src/sql/src/names.rs b/src/sql/src/names.rs index ec294623e86cd..04ef3372c9022 100644 --- a/src/sql/src/names.rs +++ b/src/sql/src/names.rs @@ -1764,9 +1764,11 @@ impl<'a> Fold for NameResolver<'a> { let name = normalize::column_name(column_name.column); let Some((index, _typ)) = desc.get_by_name(&name) else { if self.status.is_ok() { + let similar = desc.iter_similar_names(&name).cloned().collect(); self.status = Err(PlanError::UnknownColumn { table: Some(full_name.clone().into()), column: name, + similar, }) } return ResolvedColumnName::Error; diff --git a/src/sql/src/plan/error.rs b/src/sql/src/plan/error.rs index 0605fb9fa31dd..11906b4db21da 100644 --- a/src/sql/src/plan/error.rs +++ b/src/sql/src/plan/error.rs @@ -58,6 +58,7 @@ pub enum PlanError { UnknownColumn { table: Option, column: ColumnName, + similar: Box<[ColumnName]>, }, UngroupedColumn { table: Option, @@ -340,6 +341,17 @@ impl PlanError { Self::KafkaSourcePurification(e) => e.hint(), Self::TestScriptSourcePurification(e) => e.hint(), Self::LoadGeneratorSourcePurification(e) => e.hint(), + Self::UnknownColumn { table, similar, .. } => { + let suffix = "Make sure to surround case sensitive names in double quotes."; + match &similar[..] { + [] => None, + [column] => Some(format!("The similarly named column {} does exist. {suffix}", ColumnDisplay { table, column })), + names => { + let similar = names.into_iter().map(|column| ColumnDisplay { table, column }).join(", "); + Some(format!("There are similarly named columns that do exist: {similar}. {suffix}")) + } + } + } _ => None, } } @@ -362,7 +374,7 @@ impl fmt::Display for PlanError { } Ok(()) } - Self::UnknownColumn { table, column } => write!( + Self::UnknownColumn { table, column, similar: _ } => write!( f, "column {} does not exist", ColumnDisplay { table, column } diff --git a/src/sql/src/plan/query.rs b/src/sql/src/plan/query.rs index c168b6f9c70d9..d8817e72d80f1 100644 --- a/src/sql/src/plan/query.rs +++ b/src/sql/src/plan/query.rs @@ -2104,14 +2104,20 @@ fn plan_view_select( // Checks if an unknown column error was the result of not including that // column in the GROUP BY clause and produces a friendlier error instead. let check_ungrouped_col = |e| match e { - PlanError::UnknownColumn { table, column } => { - match from_scope.resolve(&qcx.outer_scopes, table.as_ref(), &column) { - Ok(ColumnRef { level: 0, column }) => { - PlanError::ungrouped_column(&from_scope.items[column]) - } - _ => PlanError::UnknownColumn { table, column }, + PlanError::UnknownColumn { + table, + column, + similar, + } => match from_scope.resolve(&qcx.outer_scopes, table.as_ref(), &column) { + Ok(ColumnRef { level: 0, column }) => { + PlanError::ungrouped_column(&from_scope.items[column]) } - } + _ => PlanError::UnknownColumn { + table, + column, + similar, + }, + }, e => e, }; @@ -2453,6 +2459,7 @@ fn plan_group_by_expr<'a>( Err(PlanError::UnknownColumn { table: None, column, + similar, }) => { // The expression was a simple identifier that did not match an // input column. See if it matches an output column. @@ -2469,6 +2476,7 @@ fn plan_group_by_expr<'a>( Err(PlanError::UnknownColumn { table: None, column, + similar, }) } } @@ -4627,12 +4635,13 @@ fn plan_identifier(ecx: &ExprContext, names: &[Ident]) -> Result return Ok(HirScalarExpr::Column(i)), - Err(PlanError::UnknownColumn { .. }) => (), + Err(PlanError::UnknownColumn { similar, .. }) => similar, Err(e) => return Err(e), - } + }; // The name doesn't refer to a column. Check if it is a whole-row reference // to a table. @@ -4649,6 +4658,7 @@ fn plan_identifier(ecx: &ExprContext, names: &[Ident]) -> Result Err(PlanError::UnknownColumn { table: None, column: col_name, + similar: similar_names, }), // The name refers to a table that is the result of a function that // returned a single column. Per PostgreSQL, this is a special case diff --git a/src/sql/src/plan/scope.rs b/src/sql/src/plan/scope.rs index d8debca360e5d..4beecd584793c 100644 --- a/src/sql/src/plan/scope.rs +++ b/src/sql/src/plan/scope.rs @@ -269,6 +269,10 @@ impl Scope { Ok(items) } + /// Returns a matching [`ColumnRef`], if one exists. + /// + /// Filters all visible items against the provided `matches` closure, and then matches this + /// filtered set against the provided `column_name`. fn resolve_internal<'a, M>( &'a self, outer_scopes: &[Scope], @@ -281,12 +285,26 @@ impl Scope { { let mut results = self .all_items(outer_scopes) - .filter(|(column, lat_level, item)| (matches)(*column, *lat_level, item)); + .filter(|(column, lat_level, item)| { + (matches)(*column, *lat_level, item) && item.column_name == *column_name + }); match results.next() { - None => Err(PlanError::UnknownColumn { - table: table_name.cloned(), - column: column_name.clone(), - }), + None => { + let similar = self + .all_items(outer_scopes) + .filter(|(column, lat_level, item)| (matches)(*column, *lat_level, item)) + .filter_map(|(_col, _lat, item)| { + item.column_name + .is_similar(column_name) + .then(|| item.column_name.clone()) + }) + .collect(); + Err(PlanError::UnknownColumn { + table: table_name.cloned(), + column: column_name.clone(), + similar, + }) + } Some((column, lat_level, item)) => { if results .find(|(_column, lat_level2, _item)| lat_level == *lat_level2) @@ -321,9 +339,7 @@ impl Scope { let table_name = None; self.resolve_internal( outer_scopes, - |_column, _lat_level, item| { - item.allow_unqualified_references && item.column_name == *column_name - }, + |_column, _lat_level, item| item.allow_unqualified_references, table_name, column_name, ) @@ -368,7 +384,7 @@ impl Scope { } if item.table_name.as_ref().map(|n| n.matches(table_name)) == Some(true) { seen_at_level = Some(lat_level); - item.column_name == *column_name + true } else { false } diff --git a/src/sql/src/plan/statement/dml.rs b/src/sql/src/plan/statement/dml.rs index 68d1349b6bb44..30afe6459a3bf 100644 --- a/src/sql/src/plan/statement/dml.rs +++ b/src/sql/src/plan/statement/dml.rs @@ -673,6 +673,7 @@ pub fn plan_subscribe( Err(PlanError::UnknownColumn { table: None, column, + similar: _, }) if &column == &mz_diff => { // mz_diff is being used in an expression. Since mz_diff isn't part of the table // it looks like an unknown column. Instead, return a better error diff --git a/src/sql/src/pure/postgres.rs b/src/sql/src/pure/postgres.rs index a708edfddc189..5d3f8bad43327 100644 --- a/src/sql/src/pure/postgres.rs +++ b/src/sql/src/pure/postgres.rs @@ -146,6 +146,15 @@ pub(super) fn generate_text_columns( })?; if !desc.columns.iter().any(|column| column.name == col) { + let column = mz_repr::ColumnName::from(col); + let similar = desc + .columns + .iter() + .filter_map(|c| { + let c_name = mz_repr::ColumnName::from(c.name.clone()); + c_name.is_similar(&column).then_some(c_name) + }) + .collect(); return Err(PlanError::InvalidOptionValue { option_name: option_name.to_string(), err: Box::new(PlanError::UnknownColumn { @@ -153,7 +162,8 @@ pub(super) fn generate_text_columns( normalize::unresolved_item_name(fully_qualified_name) .expect("known to be of valid len"), ), - column: mz_repr::ColumnName::from(col), + column, + similar, }), }); } diff --git a/test/pg-cdc/pg-cdc.td b/test/pg-cdc/pg-cdc.td index db211d3018e02..e14f5f92a0fc3 100644 --- a/test/pg-cdc/pg-cdc.td +++ b/test/pg-cdc/pg-cdc.td @@ -599,6 +599,17 @@ DELETE FROM conflict_schema.conflict_table; ); contains: invalid TEXT COLUMNS option value: column "pk_table.col_dne" does not exist +! CREATE SOURCE case_sensitive_names + FROM POSTGRES CONNECTION pg ( + PUBLICATION 'mz_source', + TEXT COLUMNS [pk_table."F2"] + ) + FOR TABLES ( + "enum_table" + ); +contains: invalid TEXT COLUMNS option value: column "pk_table.F2" does not exist +hint: The similarly named column "pk_table.f2" does exist. + ! CREATE SOURCE enum_source FROM POSTGRES CONNECTION pg ( PUBLICATION 'mz_source', diff --git a/test/sqllogictest/cockroach/case_sensitive_names.slt b/test/sqllogictest/cockroach/case_sensitive_names.slt index c1859293d3141..646a516fc75a9 100644 --- a/test/sqllogictest/cockroach/case_sensitive_names.slt +++ b/test/sqllogictest/cockroach/case_sensitive_names.slt @@ -204,10 +204,10 @@ SELECT x, X, "Y" FROM foo ---- x x Y -statement error column "X" does not exist +statement error db error: ERROR: column "X" does not exist\nHINT: The similarly named column "x" does exist. SELECT "X" FROM foo -statement error column "y" does not exist +statement error column "y" does not exist\nHINT: The similarly named column "Y" does exist. SELECT Y FROM foo # The following should not be ambiguous. @@ -216,6 +216,44 @@ SELECT Y, "Y" FROM (SELECT x as y, "Y" FROM foo) ---- y Y +statement ok +CREATE TABLE foo_multi_case ("cAsE" INT, "CASE" INT) + +statement error ERROR: column "foo_multi_case.case" does not exist\nHINT: There are similarly named columns that do exist: "foo_multi_case.cAsE", "foo_multi_case.CASE". Make sure to surround case sensitive names in double quotes. +SELECT foo_multi_case.case FROM foo_multi_case; + +statement ok +CREATE TABLE j1 ("X" int); + +statement ok +INSERT INTO j1 VALUES (1), (2), (3); + +statement ok +CREATE TABLE j2 ("X" int, "Y" int); + +statement ok +INSERT INTO j2 VALUES (1, 5), (2, 7), (8, 9); + +statement ok +CREATE TABLE j3 ("X" int, "Z" int); + +statement ok +INSERT INTO j3 VALUES (1, 7), (10, 11), (12, 13); + +statement error db error: ERROR: column "Y" does not exist\nHINT: The similarly named column "y" does exist. +SELECT "Y" FROM ( SELECT "X" as y FROM j1 NATURAL JOIN j2 ); + +query I +SELECT y FROM ( SELECT "X" as y FROM j1 NATURAL JOIN j2 ); +---- +1 +2 + +# Even though the column "Y" exists, it should not get suggested since it's not in scope. + +statement error db error: ERROR: column "y" does not exist +SELECT "X", y FROM j1 LEFT JOIN ( SELECT "X", "Z" FROM j2 LEFT JOIN j3 USING ("X") ) USING ("X"); + # Case sensitivity of view names. mode standard