diff --git a/expected/aqo_query_stat.out b/expected/aqo_query_stat.out new file mode 100644 index 0000000..2478b4e --- /dev/null +++ b/expected/aqo_query_stat.out @@ -0,0 +1,155 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE IF EXISTS A; +NOTICE: table "a" does not exist, skipping +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; +DROP TABLE IF EXISTS B; +NOTICE: table "b" does not exist, skipping +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; + count +------- + 8 +(1 row) + +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +--------------------+--------------+---------------------+------------------------ + {0.22,0.362,0.398} | {0.392,0.21} | 3 | 2 +(1 row) + +SELECT true AS success from aqo_reset(); + success +--------- + t +(1 row) + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; + count +------- + 135 +(1 row) + +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; + aqo_query_stat_update +----------------------- + t +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +------------------------------------------------------+------------------------------------------------------+---------------------+------------------------ + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | 100 | 50 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + count +------- + 100 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +---------------------------------------------------------------------+----------------------------------------------------------+---------------------+------------------------ + {5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.392,0.344,0.34,0.362} | {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.218} | 104 | 51 +(1 row) + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/postprocessing.c b/postprocessing.c index 99c4864..452876f 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -30,6 +30,8 @@ #include "machine_learning.h" #include "storage.h" +#define SMART_TIMEOUT_ERROR_THRESHOLD (0.1) + bool aqo_learn_statement_timeout = false; @@ -762,7 +764,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); - double error = .0; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -828,18 +829,22 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (stat != NULL) { - /* Store all learn data into the AQO service relations. */ - if (!query_context.adding_query && query_context.auto_tuning) - automatical_query_tuning(query_context.query_hash, stat); - - error = stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - cardinality_sum_errors/(1 + cardinality_num_objects); - - if ( aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0 && error >= 0.1) + Assert(!query_context.use_aqo || stat->cur_stat_slot_aqo > 0); + /* If query used aqo, increase smart timeout if needed */ + if (query_context.use_aqo && + aqo_learn_statement_timeout_enable && + aqo_statement_timeout > 0 && + stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - + cardinality_sum_errors/(1 + cardinality_num_objects) >= SMART_TIMEOUT_ERROR_THRESHOLD) { int64 fintime = increase_smart_timeout(); elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); } + /* Store all learn data into the AQO service relations. */ + if (!query_context.adding_query && query_context.auto_tuning) + automatical_query_tuning(query_context.query_hash, stat); + pfree(stat); } } diff --git a/regress_schedule b/regress_schedule index 96b2cb9..f3084fc 100644 --- a/regress_schedule +++ b/regress_schedule @@ -23,3 +23,4 @@ test: look_a_like test: feature_subspace test: eclasses test: eclasses_mchar +test: aqo_query_stat diff --git a/sql/aqo_query_stat.sql b/sql/aqo_query_stat.sql new file mode 100644 index 0000000..a9228b5 --- /dev/null +++ b/sql/aqo_query_stat.sql @@ -0,0 +1,74 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE IF EXISTS A; +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; + +DROP TABLE IF EXISTS B; +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; + +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ + +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; + +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT true AS success from aqo_reset(); + + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; + +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index 79b1b11..a65ce46 100644 --- a/storage.c +++ b/storage.c @@ -233,7 +233,9 @@ reset_deactivated_queries(void) /* * Update AQO statistics. * - * Add a record (or update an existed) to stat storage for the query class. + * In append mode, append one element to exec_time, plan_time, est_error arrays + * (or their *_aqo counterparts, if use_aqo is true). Without append mode, add a + * record (or overwrite an existing) to stat storage for the query class. * Returns a copy of stat entry, allocated in current memory context. Caller is * in charge to free this struct after usage. * If stat hash table is full, return NULL and log this fact. @@ -312,19 +314,20 @@ aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, if (use_aqo) { Assert(entry->cur_stat_slot_aqo >= 0); - pos = entry->cur_stat_slot_aqo; - if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE - 1) + if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE) entry->cur_stat_slot_aqo++; else { size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error_aqo[0]); - Assert(entry->cur_stat_slot_aqo = STAT_SAMPLE_SIZE - 1); + Assert(entry->cur_stat_slot_aqo == STAT_SAMPLE_SIZE); + memmove(entry->plan_time_aqo, &entry->plan_time_aqo[1], sz); memmove(entry->exec_time_aqo, &entry->exec_time_aqo[1], sz); memmove(entry->est_error_aqo, &entry->est_error_aqo[1], sz); } + pos = entry->cur_stat_slot_aqo - 1; entry->execs_with_aqo++; entry->plan_time_aqo[pos] = *stat_arg->plan_time_aqo; entry->exec_time_aqo[pos] = *stat_arg->exec_time_aqo; @@ -333,19 +336,20 @@ aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, else { Assert(entry->cur_stat_slot >= 0); - pos = entry->cur_stat_slot; - if (entry->cur_stat_slot < STAT_SAMPLE_SIZE - 1) + if (entry->cur_stat_slot < STAT_SAMPLE_SIZE) entry->cur_stat_slot++; else { size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error[0]); - Assert(entry->cur_stat_slot = STAT_SAMPLE_SIZE - 1); + Assert(entry->cur_stat_slot == STAT_SAMPLE_SIZE); + memmove(entry->plan_time, &entry->plan_time[1], sz); memmove(entry->exec_time, &entry->exec_time[1], sz); memmove(entry->est_error, &entry->est_error[1], sz); } + pos = entry->cur_stat_slot - 1; entry->execs_without_aqo++; entry->plan_time[pos] = *stat_arg->plan_time; entry->exec_time[pos] = *stat_arg->exec_time;