Skip to content

Commit

Permalink
Merge branch 'remove-brackets' of github.com:jkosh44/materialize into…
Browse files Browse the repository at this point in the history
… 0dt-tests
  • Loading branch information
jkosh44 committed Jul 17, 2024
2 parents 300c87d + 715f9e4 commit b5db35e
Show file tree
Hide file tree
Showing 21 changed files with 408 additions and 229 deletions.
4 changes: 2 additions & 2 deletions ci/nightly/pipeline.template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,7 @@ steps:
plugins:
- ./ci/plugins/mzcompose:
composition: version-consistency
args: ["--seed=$BUILDKITE_JOB_ID", "--max-runtime-in-sec=1200", "--evaluation-strategy=dataflow_rendering"]
args: ["--seed=$BUILDKITE_JOB_ID", "--max-runtime-in-sec=1200", "--evaluation-strategy=dataflow_rendering", "--other-tag=common-ancestor"]

- id: output-consistency-version-ctf
label: "Output consistency (version for CTF)"
Expand All @@ -1091,7 +1091,7 @@ steps:
plugins:
- ./ci/plugins/mzcompose:
composition: version-consistency
args: ["--seed=$BUILDKITE_JOB_ID", "--max-runtime-in-sec=1200", "--evaluation-strategy=constant_folding"]
args: ["--seed=$BUILDKITE_JOB_ID", "--max-runtime-in-sec=1200", "--evaluation-strategy=constant_folding", "--other-tag=common-ancestor"]


- group: SQLsmith
Expand Down
9 changes: 2 additions & 7 deletions doc/developer/feature-benchmark.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,8 @@ interference of any defaults that may be in effect and that can change over time

## Running manually in Buildkite

Go to the [Buildkite Nightly Job](https://buildkite.com/materialize/nightly), click the down arrow button
at the top right and select `New Build`. Put the **full SHA** of your commit in `Commit` and the name
of your branch in `Branch` including the Github username you forked with, e.g. `username:branch`.
Click `Create Build` and wait for the build start, at which point you will
have the opportunity to select `feature-benchmark` from the list.

If you want to run a specific senario only, click `Options` and put `MZCOMPOSE_SCENARIO=...` in the text box.
Go to [Trigger CI](https://trigger-ci.dev.materialize.com/) and enter your pull request, select Feature Benchmark to only run that test.
If you want to run a specific senario only, enter a Feature Benchmark Scenario.
For example, to run all scenarios that are subclasses of `Kafka`, use `MZCOMPOSE_SCENARIO=Kafka`.

# Output
Expand Down
9 changes: 7 additions & 2 deletions misc/python/materialize/checks/all_checks/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def validate(self) -> Testdrive:
)


class AlterClusterToManaged(Check):
class AlterCluster(Check):
def manipulate(self) -> list[Testdrive]:
return [
Testdrive(dedent(s))
Expand All @@ -103,6 +103,8 @@ def manipulate(self) -> list[Testdrive]:
""",
"""
> ALTER CLUSTER alter_cluster1 SET (MANAGED);
>[version>10600] ALTER CLUSTER alter_cluster1 SET (introspection debugging = TRUE, introspection interval = '45s');
""",
]
]
Expand All @@ -126,7 +128,10 @@ def validate(self) -> Testdrive:
123
>[version>10600] SHOW CREATE CLUSTER alter_cluster1;
alter_cluster1 "CREATE CLUSTER \\"alter_cluster1\\" (DISK = true, INTROSPECTION DEBUGGING = false, INTROSPECTION INTERVAL = INTERVAL '00:00:01', MANAGED = true, REPLICATION FACTOR = 1, SIZE = '2-2', SCHEDULE = MANUAL)"
alter_cluster1 "CREATE CLUSTER \\"alter_cluster1\\" (DISK = true, INTROSPECTION DEBUGGING = true, INTROSPECTION INTERVAL = INTERVAL '00:00:45', MANAGED = true, REPLICATION FACTOR = 1, SIZE = '2-2', SCHEDULE = MANUAL)"
>[version>10600] SELECT name, introspection_debugging, introspection_interval FROM mz_catalog.mz_clusters WHERE name = 'alter_cluster1';
alter_cluster1 true "00:00:45"
"""
)
)
Expand Down
7 changes: 4 additions & 3 deletions misc/python/materialize/cli/ci_annotate_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@

from junitparser.junitparser import Error, Failure, JUnitXml

from materialize import ci_util, ui
from materialize.buildkite import add_annotation_raw, get_artifact_url
from materialize import buildkite, ci_util, ui
from materialize.buildkite import BuildkiteEnvVar, add_annotation_raw, get_artifact_url
from materialize.buildkite_insights.buildkite_api import builds_api, generic_api
from materialize.buildkite_insights.buildkite_api.buildkite_constants import (
BUILDKITE_RELEVANT_COMPLETED_BUILD_STEP_STATES,
Expand Down Expand Up @@ -352,9 +352,10 @@ def main() -> int:

return_code = annotate_logged_errors(args.log_files, test_analytics)
except Exception as e:
step_key = buildkite.get_var(BuildkiteEnvVar.BUILDKITE_STEP_KEY)
add_annotation_raw(
style="error",
markdown=f"ci_annotate_errors failed, report this to #team-testing:\n```\n{e}\n```",
markdown=f"ci_annotate_errors failed in step {step_key}, report this to #team-testing:\n```\n{e}\n```",
)
raise

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ class ExpressionCharacteristics(Enum):

INTERVAL_WITH_MONTHS = 130
"""time interval containing months or years"""
DATE_WITH_SHORT_YEAR = 131
INCOMPLETE_TIME_VALUE = 132

STRING_EMPTY = 140
STRING_WITH_SPECIAL_SPACE_CHARS = 141
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,15 @@
)
from materialize.output_consistency.query.query_result import QueryFailure
from materialize.output_consistency.query.query_template import QueryTemplate
from materialize.output_consistency.selection.selection import (
ALL_QUERY_COLUMNS_BY_INDEX_SELECTION,
)
from materialize.output_consistency.validation.validation_message import (
ValidationError,
)

AGGREGATION_SHORTCUT_FUNCTION_NAMES = {"count", "string_agg"}


class InternalOutputInconsistencyIgnoreFilter(GenericInconsistencyIgnoreFilter):
"""Allows specifying and excluding expressions with known output inconsistencies"""
Expand Down Expand Up @@ -192,55 +197,46 @@ def _shall_ignore_error_mismatch(
query_template: QueryTemplate,
contains_aggregation: bool,
) -> IgnoreVerdict:
all_characteristics = query_template.get_involved_characteristics(
ALL_QUERY_COLUMNS_BY_INDEX_SELECTION
)

if self._uses_shortcut_optimization(
query_template.select_expressions, contains_aggregation
):
return YesIgnore("#17189")
return YesIgnore("#17189: evaluation order")

if self._uses_eager_evaluation(query_template):
return YesIgnore("#17189")
return YesIgnore("#17189: evaluation order")

if query_template.where_expression is not None:
# The error message may depend on the evaluation order of the where expression.
return YesIgnore("#17189")
return YesIgnore("#17189: evaluation order")

return NoIgnore()

def _uses_shortcut_optimization(
self, expressions: list[Expression], contains_aggregation: bool
) -> bool:
if self._uses_aggregation_shortcut_optimization(
expressions, contains_aggregation
if (
ExpressionCharacteristics.INFINITY in all_characteristics
and ExpressionCharacteristics.MAX_VALUE in all_characteristics
):
return True
if self._might_use_null_shortcut_optimization(expressions):
return True
return YesIgnore("#17189: evaluation order")

return False
return NoIgnore()

def _uses_aggregation_shortcut_optimization(
def _uses_shortcut_optimization(
self, expressions: list[Expression], contains_aggregation: bool
) -> bool:
if not contains_aggregation:
# all current known optimizations causing issues involve aggregations
return False

def is_function_taking_shortcut(expression: Expression) -> bool:
functions_taking_shortcuts = {"count", "string_agg"}

if isinstance(expression, ExpressionWithArgs):
operation = expression.operation
return (
isinstance(operation, DbFunction)
and operation.function_name_in_lower_case
in functions_taking_shortcuts
)
return False

for expression in expressions:
if expression.contains(is_function_taking_shortcut, True):
if expression.matches(
partial(
uses_aggregation_shortcut_optimization,
contains_aggregation=contains_aggregation,
),
True,
):
return True

if self._might_use_null_shortcut_optimization(expressions):
return True

return False

def _might_use_null_shortcut_optimization(
Expand Down Expand Up @@ -271,3 +267,19 @@ def _uses_eager_evaluation(self, query_template: QueryTemplate) -> bool:
),
True,
)


def uses_aggregation_shortcut_optimization(
expression: Expression, contains_aggregation: bool
) -> bool:
if not contains_aggregation:
# all current known optimizations causing issues involve aggregations
return False

return expression.matches(
partial(
matches_fun_by_any_name,
function_names_in_lower_case=AGGREGATION_SHORTCUT_FUNCTION_NAMES,
),
True,
)
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(
type_name: str,
min_value: str,
max_value: str,
further_values: list[str],
further_values: list[tuple[str, set[ExpressionCharacteristics]]],
further_values_with_fixed_timezone: list[str] = [],
has_time_zone: bool = False,
is_pg_compatible: bool = True,
Expand Down Expand Up @@ -65,19 +65,40 @@ def resolve_return_type_spec(
# BC, AD not working, see: https://github.com/MaterializeInc/materialize/issues/19637
"0001-01-01",
"99999-12-31",
["2023-06-01", "2024-02-29"],
[
("2023-06-01", set()),
("2024-02-29", set()),
("01-02-03", {ExpressionCharacteristics.DATE_WITH_SHORT_YEAR}),
],
is_max_value_pg_compatible=False,
)
TIME_TYPE = DateTimeDataType(
TIME_TYPE_IDENTIFIER, "TIME", "00:00:00", "23:59:59.999999", ["01:02:03.000001"]
TIME_TYPE_IDENTIFIER,
"TIME",
"00:00:00",
"23:59:59.999999",
[
("01:02:03.000001", set()),
("11:", {ExpressionCharacteristics.INCOMPLETE_TIME_VALUE}),
],
)
TIMESTAMP_TYPE = DateTimeDataType(
TIMESTAMP_TYPE_IDENTIFIER,
"TIMESTAMP",
# BC, AD not working, see: https://github.com/MaterializeInc/materialize/issues/19637
"0001-01-01 00:00:00",
"99999-12-31 23:59:59",
["2023-02-28 11:22:33.44444", "2024-02-29 23:50:00"],
[
("2023-02-28 11:22:33.44444", set()),
("2024-02-29 23:50:00", set()),
(
"01-02-03 11:",
{
ExpressionCharacteristics.DATE_WITH_SHORT_YEAR,
ExpressionCharacteristics.INCOMPLETE_TIME_VALUE,
},
),
],
is_max_value_pg_compatible=False,
)
TIMESTAMPTZ_TYPE = DateTimeDataType(
Expand All @@ -86,7 +107,7 @@ def resolve_return_type_spec(
# BC, AD not working, see: https://github.com/MaterializeInc/materialize/issues/19637
"0001-01-01 00:00:00",
"99999-12-31 23:59:59",
further_values=["2023-06-01 11:22:33.44444"],
further_values=[("2023-06-01 11:22:33.44444", set())],
further_values_with_fixed_timezone=[
# leap year
"2024-02-29 11:50:00 EST",
Expand All @@ -105,11 +126,11 @@ def resolve_return_type_spec(
"-178956970 years -8 months -2147483648 days -2562047788:00:54.775808",
"178956970 years 7 months 2147483647 days 2562047788:00:54.775807",
[
"2 years 3 months 4 days 11:22:33.456789",
"100 months 100 days",
"44:45:45",
"45 minutes",
"70 minutes",
("2 years 3 months 4 days 11:22:33.456789", set()),
("100 months 100 days", set()),
("44:45:45", set()),
("45 minutes", set()),
("70 minutes", set()),
],
# type is compatible but causes too many issues for now
is_pg_compatible=False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,15 @@ def __create_values(
is_pg_compatible=_date_time_data_type.is_max_value_pg_compatible,
)

for index, value in enumerate(_date_time_data_type.further_values):
for index, value_and_characteristics in enumerate(
_date_time_data_type.further_values
):
value = value_and_characteristics[0]
characteristics = value_and_characteristics[1]
_values_of_type.add_raw_value(
f"'{value}{timezone_value_suffix}'",
f"VAL_{index + 1}{timezone_column_suffix}",
set(),
characteristics,
)

if _timezone is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
from materialize.output_consistency.execution.evaluation_strategy import (
EvaluationStrategy,
)
from materialize.output_consistency.expression.expression_characteristics import (
ExpressionCharacteristics,
)
from materialize.output_consistency.input_data.test_input_data import (
ConsistencyTestInputData,
)
Expand Down Expand Up @@ -108,8 +105,8 @@ def print_reproduction_code_of_error(self, error: ValidationError) -> None:
)
self.print_separator_line()

characteristics = self.__get_involved_characteristics(
query_template, query_column_selection
characteristics = query_template.get_involved_characteristics(
query_column_selection
)
characteristic_names = ", ".join([char.name for char in characteristics])
self._print_text(
Expand Down Expand Up @@ -182,23 +179,3 @@ def __get_involved_column_names(
column_names.add(leaf_expression.column_name)

return column_names

def __get_involved_characteristics(
self,
query_template: QueryTemplate,
query_column_selection: QueryColumnByIndexSelection,
) -> set[ExpressionCharacteristics]:
all_involved_characteristics: set[ExpressionCharacteristics] = set()

for index, expression in enumerate(query_template.select_expressions):
if not query_column_selection.is_included(index):
continue

characteristics = expression.recursively_collect_involved_characteristics(
query_template.row_selection
)
all_involved_characteristics = all_involved_characteristics.union(
characteristics
)

return all_involved_characteristics
22 changes: 22 additions & 0 deletions misc/python/materialize/output_consistency/query/query_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
ValueStorageLayout,
)
from materialize.output_consistency.expression.expression import Expression
from materialize.output_consistency.expression.expression_characteristics import (
ExpressionCharacteristics,
)
from materialize.output_consistency.query.query_format import QueryOutputFormat
from materialize.output_consistency.selection.selection import (
DataRowSelection,
Expand Down Expand Up @@ -230,3 +233,22 @@ def matches_specific_select_or_filter_expression(
self.where_expression is not None
and self.where_expression.matches(predicate, check_recursively)
)

def get_involved_characteristics(
self,
query_column_selection: QueryColumnByIndexSelection,
) -> set[ExpressionCharacteristics]:
all_involved_characteristics: set[ExpressionCharacteristics] = set()

for index, expression in enumerate(self.select_expressions):
if not query_column_selection.is_included(index):
continue

characteristics = expression.recursively_collect_involved_characteristics(
self.row_selection
)
all_involved_characteristics = all_involved_characteristics.union(
characteristics
)

return all_involved_characteristics
Loading

0 comments on commit b5db35e

Please sign in to comment.