Merge branch 'remove-brackets' of github.com:jkosh44/materialize into…

… 0dt-tests
MaterializeInc · Jul 17, 2024 · b5db35e · b5db35e
2 parents 300c87d + 715f9e4
commit b5db35e
Show file tree

Hide file tree

Showing 21 changed files with 408 additions and 229 deletions.
diff --git a/ci/nightly/pipeline.template.yml b/ci/nightly/pipeline.template.yml
@@ -1080,7 +1080,7 @@ steps:
         plugins:
           - ./ci/plugins/mzcompose:
               composition: version-consistency
-              args: ["--seed=$BUILDKITE_JOB_ID", "--max-runtime-in-sec=1200", "--evaluation-strategy=dataflow_rendering"]
+              args: ["--seed=$BUILDKITE_JOB_ID", "--max-runtime-in-sec=1200", "--evaluation-strategy=dataflow_rendering", "--other-tag=common-ancestor"]
 
       - id: output-consistency-version-ctf
         label: "Output consistency (version for CTF)"
@@ -1091,7 +1091,7 @@ steps:
         plugins:
           - ./ci/plugins/mzcompose:
               composition: version-consistency
-              args: ["--seed=$BUILDKITE_JOB_ID", "--max-runtime-in-sec=1200", "--evaluation-strategy=constant_folding"]
+              args: ["--seed=$BUILDKITE_JOB_ID", "--max-runtime-in-sec=1200", "--evaluation-strategy=constant_folding", "--other-tag=common-ancestor"]
 
 
   - group: SQLsmith

diff --git a/doc/developer/feature-benchmark.md b/doc/developer/feature-benchmark.md
@@ -59,13 +59,8 @@ interference of any defaults that may be in effect and that can change over time
 
 ## Running manually in Buildkite
 
-Go to the [Buildkite Nightly Job](https://buildkite.com/materialize/nightly), click the down arrow button
-at the top right and select `New Build`. Put the **full SHA** of your commit in `Commit` and the name
-of your branch in `Branch` including the Github username you forked with, e.g. `username:branch`.
-Click `Create Build` and wait for the build start, at which point you will
-have the opportunity to select `feature-benchmark` from the list.
-
-If you want to run a specific senario only, click `Options` and put `MZCOMPOSE_SCENARIO=...` in the text box.
+Go to [Trigger CI](https://trigger-ci.dev.materialize.com/) and enter your pull request, select Feature Benchmark to only run that test.
+If you want to run a specific senario only, enter a Feature Benchmark Scenario.
 For example, to run all scenarios that are subclasses of `Kafka`, use `MZCOMPOSE_SCENARIO=Kafka`.
 
 # Output

diff --git a/misc/python/materialize/checks/all_checks/cluster.py b/misc/python/materialize/checks/all_checks/cluster.py
@@ -80,7 +80,7 @@ def validate(self) -> Testdrive:
         )
 
 
-class AlterClusterToManaged(Check):
+class AlterCluster(Check):
     def manipulate(self) -> list[Testdrive]:
         return [
             Testdrive(dedent(s))
@@ -103,6 +103,8 @@ def manipulate(self) -> list[Testdrive]:
                 """,
                 """
                 > ALTER CLUSTER alter_cluster1 SET (MANAGED);
+
+                >[version>10600] ALTER CLUSTER alter_cluster1 SET (introspection debugging = TRUE, introspection interval = '45s');
                 """,
             ]
         ]
@@ -126,7 +128,10 @@ def validate(self) -> Testdrive:
                 123
 
                 >[version>10600] SHOW CREATE CLUSTER alter_cluster1;
-                alter_cluster1 "CREATE CLUSTER \\"alter_cluster1\\" (DISK = true, INTROSPECTION DEBUGGING = false, INTROSPECTION INTERVAL = INTERVAL '00:00:01', MANAGED = true, REPLICATION FACTOR = 1, SIZE = '2-2', SCHEDULE = MANUAL)"
+                alter_cluster1 "CREATE CLUSTER \\"alter_cluster1\\" (DISK = true, INTROSPECTION DEBUGGING = true, INTROSPECTION INTERVAL = INTERVAL '00:00:45', MANAGED = true, REPLICATION FACTOR = 1, SIZE = '2-2', SCHEDULE = MANUAL)"
+
+                >[version>10600] SELECT name, introspection_debugging, introspection_interval FROM mz_catalog.mz_clusters WHERE name = 'alter_cluster1';
+                alter_cluster1 true "00:00:45"
            """
             )
         )

diff --git a/misc/python/materialize/cli/ci_annotate_errors.py b/misc/python/materialize/cli/ci_annotate_errors.py
@@ -24,8 +24,8 @@
 
 from junitparser.junitparser import Error, Failure, JUnitXml
 
-from materialize import ci_util, ui
-from materialize.buildkite import add_annotation_raw, get_artifact_url
+from materialize import buildkite, ci_util, ui
+from materialize.buildkite import BuildkiteEnvVar, add_annotation_raw, get_artifact_url
 from materialize.buildkite_insights.buildkite_api import builds_api, generic_api
 from materialize.buildkite_insights.buildkite_api.buildkite_constants import (
     BUILDKITE_RELEVANT_COMPLETED_BUILD_STEP_STATES,
@@ -352,9 +352,10 @@ def main() -> int:
 
         return_code = annotate_logged_errors(args.log_files, test_analytics)
     except Exception as e:
+        step_key = buildkite.get_var(BuildkiteEnvVar.BUILDKITE_STEP_KEY)
         add_annotation_raw(
             style="error",
-            markdown=f"ci_annotate_errors failed, report this to #team-testing:\n```\n{e}\n```",
+            markdown=f"ci_annotate_errors failed in step {step_key}, report this to #team-testing:\n```\n{e}\n```",
         )
         raise
 

diff --git a/misc/python/materialize/output_consistency/expression/expression_characteristics.py b/misc/python/materialize/output_consistency/expression/expression_characteristics.py
@@ -32,6 +32,8 @@ class ExpressionCharacteristics(Enum):
 
     INTERVAL_WITH_MONTHS = 130
     """time interval containing months or years"""
+    DATE_WITH_SHORT_YEAR = 131
+    INCOMPLETE_TIME_VALUE = 132
 
     STRING_EMPTY = 140
     STRING_WITH_SPECIAL_SPACE_CHARS = 141

diff --git a/...terialize/output_consistency/ignore_filter/internal_output_inconsistency_ignore_filter.py b/...terialize/output_consistency/ignore_filter/internal_output_inconsistency_ignore_filter.py
@@ -44,10 +44,15 @@
 )
 from materialize.output_consistency.query.query_result import QueryFailure
 from materialize.output_consistency.query.query_template import QueryTemplate
+from materialize.output_consistency.selection.selection import (
+    ALL_QUERY_COLUMNS_BY_INDEX_SELECTION,
+)
 from materialize.output_consistency.validation.validation_message import (
     ValidationError,
 )
 
+AGGREGATION_SHORTCUT_FUNCTION_NAMES = {"count", "string_agg"}
+
 
 class InternalOutputInconsistencyIgnoreFilter(GenericInconsistencyIgnoreFilter):
     """Allows specifying and excluding expressions with known output inconsistencies"""
@@ -192,55 +197,46 @@ def _shall_ignore_error_mismatch(
         query_template: QueryTemplate,
         contains_aggregation: bool,
     ) -> IgnoreVerdict:
+        all_characteristics = query_template.get_involved_characteristics(
+            ALL_QUERY_COLUMNS_BY_INDEX_SELECTION
+        )
+
         if self._uses_shortcut_optimization(
             query_template.select_expressions, contains_aggregation
         ):
-            return YesIgnore("#17189")
+            return YesIgnore("#17189: evaluation order")
 
         if self._uses_eager_evaluation(query_template):
-            return YesIgnore("#17189")
+            return YesIgnore("#17189: evaluation order")
 
         if query_template.where_expression is not None:
             # The error message may depend on the evaluation order of the where expression.
-            return YesIgnore("#17189")
+            return YesIgnore("#17189: evaluation order")
 
-        return NoIgnore()
-
-    def _uses_shortcut_optimization(
-        self, expressions: list[Expression], contains_aggregation: bool
-    ) -> bool:
-        if self._uses_aggregation_shortcut_optimization(
-            expressions, contains_aggregation
+        if (
+            ExpressionCharacteristics.INFINITY in all_characteristics
+            and ExpressionCharacteristics.MAX_VALUE in all_characteristics
         ):
-            return True
-        if self._might_use_null_shortcut_optimization(expressions):
-            return True
+            return YesIgnore("#17189: evaluation order")
 
-        return False
+        return NoIgnore()
 
-    def _uses_aggregation_shortcut_optimization(
+    def _uses_shortcut_optimization(
         self, expressions: list[Expression], contains_aggregation: bool
     ) -> bool:
-        if not contains_aggregation:
-            # all current known optimizations causing issues involve aggregations
-            return False
-
-        def is_function_taking_shortcut(expression: Expression) -> bool:
-            functions_taking_shortcuts = {"count", "string_agg"}
-
-            if isinstance(expression, ExpressionWithArgs):
-                operation = expression.operation
-                return (
-                    isinstance(operation, DbFunction)
-                    and operation.function_name_in_lower_case
-                    in functions_taking_shortcuts
-                )
-            return False
-
         for expression in expressions:
-            if expression.contains(is_function_taking_shortcut, True):
+            if expression.matches(
+                partial(
+                    uses_aggregation_shortcut_optimization,
+                    contains_aggregation=contains_aggregation,
+                ),
+                True,
+            ):
                 return True
 
+        if self._might_use_null_shortcut_optimization(expressions):
+            return True
+
         return False
 
     def _might_use_null_shortcut_optimization(
@@ -271,3 +267,19 @@ def _uses_eager_evaluation(self, query_template: QueryTemplate) -> bool:
             ),
             True,
         )
+
+
+def uses_aggregation_shortcut_optimization(
+    expression: Expression, contains_aggregation: bool
+) -> bool:
+    if not contains_aggregation:
+        # all current known optimizations causing issues involve aggregations
+        return False
+
+    return expression.matches(
+        partial(
+            matches_fun_by_any_name,
+            function_names_in_lower_case=AGGREGATION_SHORTCUT_FUNCTION_NAMES,
+        ),
+        True,
+    )
diff --git a/misc/python/materialize/output_consistency/input_data/types/date_time_types_provider.py b/misc/python/materialize/output_consistency/input_data/types/date_time_types_provider.py
@@ -26,7 +26,7 @@ def __init__(
         type_name: str,
         min_value: str,
         max_value: str,
-        further_values: list[str],
+        further_values: list[tuple[str, set[ExpressionCharacteristics]]],
         further_values_with_fixed_timezone: list[str] = [],
         has_time_zone: bool = False,
         is_pg_compatible: bool = True,
@@ -65,19 +65,40 @@ def resolve_return_type_spec(
     # BC, AD not working, see: https://github.com/MaterializeInc/materialize/issues/19637
     "0001-01-01",
     "99999-12-31",
-    ["2023-06-01", "2024-02-29"],
+    [
+        ("2023-06-01", set()),
+        ("2024-02-29", set()),
+        ("01-02-03", {ExpressionCharacteristics.DATE_WITH_SHORT_YEAR}),
+    ],
     is_max_value_pg_compatible=False,
 )
 TIME_TYPE = DateTimeDataType(
-    TIME_TYPE_IDENTIFIER, "TIME", "00:00:00", "23:59:59.999999", ["01:02:03.000001"]
+    TIME_TYPE_IDENTIFIER,
+    "TIME",
+    "00:00:00",
+    "23:59:59.999999",
+    [
+        ("01:02:03.000001", set()),
+        ("11:", {ExpressionCharacteristics.INCOMPLETE_TIME_VALUE}),
+    ],
 )
 TIMESTAMP_TYPE = DateTimeDataType(
     TIMESTAMP_TYPE_IDENTIFIER,
     "TIMESTAMP",
     # BC, AD not working, see: https://github.com/MaterializeInc/materialize/issues/19637
     "0001-01-01 00:00:00",
     "99999-12-31 23:59:59",
-    ["2023-02-28 11:22:33.44444", "2024-02-29 23:50:00"],
+    [
+        ("2023-02-28 11:22:33.44444", set()),
+        ("2024-02-29 23:50:00", set()),
+        (
+            "01-02-03 11:",
+            {
+                ExpressionCharacteristics.DATE_WITH_SHORT_YEAR,
+                ExpressionCharacteristics.INCOMPLETE_TIME_VALUE,
+            },
+        ),
+    ],
     is_max_value_pg_compatible=False,
 )
 TIMESTAMPTZ_TYPE = DateTimeDataType(
@@ -86,7 +107,7 @@ def resolve_return_type_spec(
     # BC, AD not working, see: https://github.com/MaterializeInc/materialize/issues/19637
     "0001-01-01 00:00:00",
     "99999-12-31 23:59:59",
-    further_values=["2023-06-01 11:22:33.44444"],
+    further_values=[("2023-06-01 11:22:33.44444", set())],
     further_values_with_fixed_timezone=[
         # leap year
         "2024-02-29 11:50:00 EST",
@@ -105,11 +126,11 @@ def resolve_return_type_spec(
     "-178956970 years -8 months -2147483648 days -2562047788:00:54.775808",
     "178956970 years 7 months 2147483647 days 2562047788:00:54.775807",
     [
-        "2 years 3 months 4 days 11:22:33.456789",
-        "100 months 100 days",
-        "44:45:45",
-        "45 minutes",
-        "70 minutes",
+        ("2 years 3 months 4 days 11:22:33.456789", set()),
+        ("100 months 100 days", set()),
+        ("44:45:45", set()),
+        ("45 minutes", set()),
+        ("70 minutes", set()),
     ],
     # type is compatible but causes too many issues for now
     is_pg_compatible=False,

diff --git a/misc/python/materialize/output_consistency/input_data/values/date_time_values_provider.py b/misc/python/materialize/output_consistency/input_data/values/date_time_values_provider.py
@@ -50,11 +50,15 @@ def __create_values(
             is_pg_compatible=_date_time_data_type.is_max_value_pg_compatible,
         )
 
-        for index, value in enumerate(_date_time_data_type.further_values):
+        for index, value_and_characteristics in enumerate(
+            _date_time_data_type.further_values
+        ):
+            value = value_and_characteristics[0]
+            characteristics = value_and_characteristics[1]
             _values_of_type.add_raw_value(
                 f"'{value}{timezone_value_suffix}'",
                 f"VAL_{index + 1}{timezone_column_suffix}",
-                set(),
+                characteristics,
             )
 
     if _timezone is None:

diff --git a/misc/python/materialize/output_consistency/output/reproduction_code_printer.py b/misc/python/materialize/output_consistency/output/reproduction_code_printer.py
@@ -11,9 +11,6 @@
 from materialize.output_consistency.execution.evaluation_strategy import (
     EvaluationStrategy,
 )
-from materialize.output_consistency.expression.expression_characteristics import (
-    ExpressionCharacteristics,
-)
 from materialize.output_consistency.input_data.test_input_data import (
     ConsistencyTestInputData,
 )
@@ -108,8 +105,8 @@ def print_reproduction_code_of_error(self, error: ValidationError) -> None:
         )
         self.print_separator_line()
 
-        characteristics = self.__get_involved_characteristics(
-            query_template, query_column_selection
+        characteristics = query_template.get_involved_characteristics(
+            query_column_selection
         )
         characteristic_names = ", ".join([char.name for char in characteristics])
         self._print_text(
@@ -182,23 +179,3 @@ def __get_involved_column_names(
                 column_names.add(leaf_expression.column_name)
 
         return column_names
-
-    def __get_involved_characteristics(
-        self,
-        query_template: QueryTemplate,
-        query_column_selection: QueryColumnByIndexSelection,
-    ) -> set[ExpressionCharacteristics]:
-        all_involved_characteristics: set[ExpressionCharacteristics] = set()
-
-        for index, expression in enumerate(query_template.select_expressions):
-            if not query_column_selection.is_included(index):
-                continue
-
-            characteristics = expression.recursively_collect_involved_characteristics(
-                query_template.row_selection
-            )
-            all_involved_characteristics = all_involved_characteristics.union(
-                characteristics
-            )
-
-        return all_involved_characteristics
diff --git a/misc/python/materialize/output_consistency/query/query_template.py b/misc/python/materialize/output_consistency/query/query_template.py
@@ -19,6 +19,9 @@
     ValueStorageLayout,
 )
 from materialize.output_consistency.expression.expression import Expression
+from materialize.output_consistency.expression.expression_characteristics import (
+    ExpressionCharacteristics,
+)
 from materialize.output_consistency.query.query_format import QueryOutputFormat
 from materialize.output_consistency.selection.selection import (
     DataRowSelection,
@@ -230,3 +233,22 @@ def matches_specific_select_or_filter_expression(
             self.where_expression is not None
             and self.where_expression.matches(predicate, check_recursively)
         )
+
+    def get_involved_characteristics(
+        self,
+        query_column_selection: QueryColumnByIndexSelection,
+    ) -> set[ExpressionCharacteristics]:
+        all_involved_characteristics: set[ExpressionCharacteristics] = set()
+
+        for index, expression in enumerate(self.select_expressions):
+            if not query_column_selection.is_included(index):
+                continue
+
+            characteristics = expression.recursively_collect_involved_characteristics(
+                self.row_selection
+            )
+            all_involved_characteristics = all_involved_characteristics.union(
+                characteristics
+            )
+
+        return all_involved_characteristics