From a7ce7c93b93c588c472a0543fb8460bec82d1091 Mon Sep 17 00:00:00 2001 From: Artem Fadeev Date: Mon, 30 Sep 2024 14:51:55 +0300 Subject: [PATCH] Fix smart statement timeout update logic and aqo_stat_store Note: due to a mix of absolute and relative time in set_timeout_if_need function, smart statement timeout feature doesn't currently work since its timeouts are set in the past. This commit changes checked precondition for smart statement timeout change to fix array indexing bug, but the feature itself remains broken. This commit also fixes arithmetic errors in aqo_stat_store in the case of fully filled arrays. --- expected/aqo_query_stat.out | 155 ++++++++++++++++++++++++++++++++++++ postprocessing.c | 21 +++-- regress_schedule | 1 + sql/aqo_query_stat.sql | 74 +++++++++++++++++ storage.c | 18 +++-- 5 files changed, 254 insertions(+), 15 deletions(-) create mode 100644 expected/aqo_query_stat.out create mode 100644 sql/aqo_query_stat.sql diff --git a/expected/aqo_query_stat.out b/expected/aqo_query_stat.out new file mode 100644 index 00000000..2478b4e5 --- /dev/null +++ b/expected/aqo_query_stat.out @@ -0,0 +1,155 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE IF EXISTS A; +NOTICE: table "a" does not exist, skipping +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; +DROP TABLE IF EXISTS B; +NOTICE: table "b" does not exist, skipping +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; + count +------- + 8 +(1 row) + +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +--------------------+--------------+---------------------+------------------------ + {0.22,0.362,0.398} | {0.392,0.21} | 3 | 2 +(1 row) + +SELECT true AS success from aqo_reset(); + success +--------- + t +(1 row) + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; + count +------- + 135 +(1 row) + +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; + aqo_query_stat_update +----------------------- + t +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +------------------------------------------------------+------------------------------------------------------+---------------------+------------------------ + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | 100 | 50 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + count +------- + 100 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +---------------------------------------------------------------------+----------------------------------------------------------+---------------------+------------------------ + {5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.392,0.344,0.34,0.362} | {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.218} | 104 | 51 +(1 row) + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/postprocessing.c b/postprocessing.c index 99c48646..452876f4 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -30,6 +30,8 @@ #include "machine_learning.h" #include "storage.h" +#define SMART_TIMEOUT_ERROR_THRESHOLD (0.1) + bool aqo_learn_statement_timeout = false; @@ -762,7 +764,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); - double error = .0; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -828,18 +829,22 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (stat != NULL) { - /* Store all learn data into the AQO service relations. */ - if (!query_context.adding_query && query_context.auto_tuning) - automatical_query_tuning(query_context.query_hash, stat); - - error = stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - cardinality_sum_errors/(1 + cardinality_num_objects); - - if ( aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0 && error >= 0.1) + Assert(!query_context.use_aqo || stat->cur_stat_slot_aqo > 0); + /* If query used aqo, increase smart timeout if needed */ + if (query_context.use_aqo && + aqo_learn_statement_timeout_enable && + aqo_statement_timeout > 0 && + stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - + cardinality_sum_errors/(1 + cardinality_num_objects) >= SMART_TIMEOUT_ERROR_THRESHOLD) { int64 fintime = increase_smart_timeout(); elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); } + /* Store all learn data into the AQO service relations. */ + if (!query_context.adding_query && query_context.auto_tuning) + automatical_query_tuning(query_context.query_hash, stat); + pfree(stat); } } diff --git a/regress_schedule b/regress_schedule index 96b2cb93..f3084fc8 100644 --- a/regress_schedule +++ b/regress_schedule @@ -23,3 +23,4 @@ test: look_a_like test: feature_subspace test: eclasses test: eclasses_mchar +test: aqo_query_stat diff --git a/sql/aqo_query_stat.sql b/sql/aqo_query_stat.sql new file mode 100644 index 00000000..a9228b5e --- /dev/null +++ b/sql/aqo_query_stat.sql @@ -0,0 +1,74 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE IF EXISTS A; +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; + +DROP TABLE IF EXISTS B; +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; + +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ + +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; + +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT true AS success from aqo_reset(); + + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; + +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index 79b1b11d..a65ce463 100644 --- a/storage.c +++ b/storage.c @@ -233,7 +233,9 @@ reset_deactivated_queries(void) /* * Update AQO statistics. * - * Add a record (or update an existed) to stat storage for the query class. + * In append mode, append one element to exec_time, plan_time, est_error arrays + * (or their *_aqo counterparts, if use_aqo is true). Without append mode, add a + * record (or overwrite an existing) to stat storage for the query class. * Returns a copy of stat entry, allocated in current memory context. Caller is * in charge to free this struct after usage. * If stat hash table is full, return NULL and log this fact. @@ -312,19 +314,20 @@ aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, if (use_aqo) { Assert(entry->cur_stat_slot_aqo >= 0); - pos = entry->cur_stat_slot_aqo; - if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE - 1) + if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE) entry->cur_stat_slot_aqo++; else { size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error_aqo[0]); - Assert(entry->cur_stat_slot_aqo = STAT_SAMPLE_SIZE - 1); + Assert(entry->cur_stat_slot_aqo == STAT_SAMPLE_SIZE); + memmove(entry->plan_time_aqo, &entry->plan_time_aqo[1], sz); memmove(entry->exec_time_aqo, &entry->exec_time_aqo[1], sz); memmove(entry->est_error_aqo, &entry->est_error_aqo[1], sz); } + pos = entry->cur_stat_slot_aqo - 1; entry->execs_with_aqo++; entry->plan_time_aqo[pos] = *stat_arg->plan_time_aqo; entry->exec_time_aqo[pos] = *stat_arg->exec_time_aqo; @@ -333,19 +336,20 @@ aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, else { Assert(entry->cur_stat_slot >= 0); - pos = entry->cur_stat_slot; - if (entry->cur_stat_slot < STAT_SAMPLE_SIZE - 1) + if (entry->cur_stat_slot < STAT_SAMPLE_SIZE) entry->cur_stat_slot++; else { size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error[0]); - Assert(entry->cur_stat_slot = STAT_SAMPLE_SIZE - 1); + Assert(entry->cur_stat_slot == STAT_SAMPLE_SIZE); + memmove(entry->plan_time, &entry->plan_time[1], sz); memmove(entry->exec_time, &entry->exec_time[1], sz); memmove(entry->est_error, &entry->est_error[1], sz); } + pos = entry->cur_stat_slot - 1; entry->execs_without_aqo++; entry->plan_time[pos] = *stat_arg->plan_time; entry->exec_time[pos] = *stat_arg->exec_time;