Skip to content

Commit

Permalink
Fix build_knn_matrix (now called update_knn_matrix)
Browse files Browse the repository at this point in the history
Previous version of build_knn_matrix had an unreachable branch (`if
(features!=NULL)`), which lead to use_wide_search having no effect.
There was also a memory bug of copying a memory area into itself.

predict_for_relation was fixed with interoperation of use_wide_search
and predict_with_few_neighbors features in mind. Additions to the
look_a_like regression test reflect those changes.

This commit also removes unused arguments from several functions and
fixes a couple of typos.
  • Loading branch information
Artem Fadeev committed Sep 30, 2024
1 parent 7ff9d8b commit 09cd836
Show file tree
Hide file tree
Showing 11 changed files with 248 additions and 77 deletions.
23 changes: 11 additions & 12 deletions cardinality_estimation.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,17 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns,
&ncols, &features);
data = OkNNr_allocate(ncols);

if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL))
if (load_aqo_data(query_context.fspace_hash, *fss, data, false) &&
data->rows >= (aqo_predict_with_few_neighbors ? 1 : aqo_k))
result = OkNNr_predict(data, features);
/* Try to search in surrounding feature spaces for the same node */
else if (use_wide_search && load_aqo_data(query_context.fspace_hash, *fss, data, true))
{
elog(DEBUG5, "[AQO] Make prediction for fss "INT64_FORMAT" by a neighbour "
"includes %d feature(s) and %d fact(s).",
(int64) *fss, data->cols, data->rows);
result = OkNNr_predict(data, features);
}
else
{
/*
Expand All @@ -91,17 +100,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns,
* small part of paths was used for AQO learning and stored into
* the AQO knowledge base.
*/

/* Try to search in surrounding feature spaces for the same node */
if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search, features))
result = -1;
else
{
elog(DEBUG5, "[AQO] Make prediction for fss %d by a neighbour "
"includes %d feature(s) and %d fact(s).",
*fss, data->cols, data->rows);
result = OkNNr_predict(data, features);
}
result = -1;
}

#ifdef AQO_DEBUG_PRINT
Expand Down
2 changes: 1 addition & 1 deletion cardinality_hooks.c
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs,
*fss = get_grouped_exprs_hash(child_fss, group_exprs);
memset(&data, 0, sizeof(OkNNrdata));

if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL))
if (!load_aqo_data(query_context.fspace_hash, *fss, &data, false))
return -1;

Assert(data.rows == 1);
Expand Down
1 change: 1 addition & 0 deletions expected/gucs.out
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,5 @@ SELECT count(*) FROM aqo_query_stat;
0
(1 row)

DROP TABLE t;
DROP EXTENSION aqo;
125 changes: 124 additions & 1 deletion expected/look_a_like.out
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ SELECT true AS success FROM aqo_reset();
SET aqo.wide_search = 'on';
SET aqo.mode = 'learn';
SET aqo.show_details = 'on';
set aqo.show_hash = 'off';
SET aqo.show_hash = 'off';
SET aqo.min_neighbors_for_predicting = 1;
SET aqo.predict_with_few_neighbors = 'off';
SET enable_nestloop = 'off';
SET enable_mergejoin = 'off';
SET enable_material = 'off';
Expand Down Expand Up @@ -553,9 +554,131 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L
JOINS: 2
(24 rows)

-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote
-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll
-- refer to it as fs0.
-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on',
-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here.
SELECT str AS result
FROM expln('
SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
result
----------------------------------------------------------------------
Seq Scan on public.a (actual rows=100 loops=1)
AQO: rows=20, error=-400%
Output: x1, x2, x3
Filter: ((a.x1 > '-100'::integer) AND (a.x2 < 10) AND (a.x3 < 10))
Using aqo: true
AQO mode: LEARN
JOINS: 0
(7 rows)

-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid)
-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors
-- is 'off', AQO is obliged to use both data records for fss0.
SET aqo.min_neighbors_for_predicting = 2;
SELECT str AS result
FROM expln('
SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
result
--------------------------------------------------------
Seq Scan on public.a (actual rows=80 loops=1)
AQO: rows=77, error=-4%
Output: x1, x2, x3
Filter: ((a.x1 > 1) AND (a.x2 < 10) AND (a.x3 < 10))
Rows Removed by Filter: 20
Using aqo: true
AQO mode: LEARN
JOINS: 0
(8 rows)

-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid)
-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use
-- three data records from two neighbors for this one.
SET aqo.min_neighbors_for_predicting = 3;
SELECT str AS result
FROM expln('
SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
result
--------------------------------------------------------------
HashAggregate (actual rows=6 loops=1)
AQO not used
Output: x2
Group Key: a.x2
-> Seq Scan on public.a (actual rows=60 loops=1)
AQO: rows=71, error=15%
Output: x1, x2, x3
Filter: ((a.x1 > 3) AND (a.x2 < 10) AND (a.x3 < 10))
Rows Removed by Filter: 40
Using aqo: true
AQO mode: LEARN
JOINS: 0
(12 rows)

-----
DROP TABLE IF EXISTS t;
NOTICE: table "t" does not exist, skipping
CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x;
ANALYZE t;
SELECT true AS success FROM aqo_reset();
success
---------
t
(1 row)

-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid)
-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation
-- than neighbors' records.
SELECT str AS result
FROM expln('
select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
result
------------------------------------------------------------------
Seq Scan on public.t (actual rows=10000 loops=1)
AQO not used
Output: x, y, z
Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000))
Using aqo: true
AQO mode: LEARN
JOINS: 0
(7 rows)

DO
$$
BEGIN
for counter in 1..20 loop
EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter);
EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter);
EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter);
end loop;
END;
$$ LANGUAGE PLPGSQL;
-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used.
SELECT str AS result
FROM expln('
select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
result
------------------------------------------------------------------
Seq Scan on public.t (actual rows=10000 loops=1)
AQO: rows=9987, error=-0%
Output: x, y, z
Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000))
Using aqo: true
AQO mode: LEARN
JOINS: 0
(7 rows)

RESET aqo.wide_search;
RESET aqo.predict_with_few_neighbors;
RESET aqo.min_neighbors_for_predicting;
DROP EXTENSION aqo CASCADE;
DROP TABLE a;
DROP TABLE b;
DROP TABLE c;
DROP TABLE t;
DROP FUNCTION expln;
1 change: 1 addition & 0 deletions expected/unsupported.out
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ $$ LANGUAGE PLPGSQL;
SET aqo.mode = 'learn';
SET aqo.show_details = 'on';
DROP TABLE IF EXISTS t;
NOTICE: table "t" does not exist, skipping
CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs;
ANALYZE t;
CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y
Expand Down
1 change: 1 addition & 0 deletions machine_learning.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ OkNNr_predict(OkNNrdata *data, double *features)

if (!aqo_predict_with_few_neighbors && data->rows < aqo_k)
return -1.;
Assert(data->rows > 0);

for (i = 0; i < data->rows; ++i)
distances[i] = fs_distance(data->matrix[i], features, data->cols);
Expand Down
2 changes: 1 addition & 1 deletion postprocessing.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data,
double *features, double target, double rfactor,
List *reloids)
{
if (!load_fss_ext(fs, fss, data, NULL))
if (!load_aqo_data(fs, fss, data, false))
data->rows = 0;

data->rows = OkNNr_learn(data, features, target, rfactor);
Expand Down
1 change: 1 addition & 0 deletions sql/gucs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,5 @@ SELECT count(*) FROM aqo_query_stat;
SELECT true AS success FROM aqo_reset();
SELECT count(*) FROM aqo_query_stat;

DROP TABLE t;
DROP EXTENSION aqo;
66 changes: 65 additions & 1 deletion sql/look_a_like.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ SET aqo.wide_search = 'on';

SET aqo.mode = 'learn';
SET aqo.show_details = 'on';
set aqo.show_hash = 'off';
SET aqo.show_hash = 'off';
SET aqo.min_neighbors_for_predicting = 1;
SET aqo.predict_with_few_neighbors = 'off';
SET enable_nestloop = 'off';
SET enable_mergejoin = 'off';
SET enable_material = 'off';
Expand Down Expand Up @@ -142,10 +143,73 @@ FROM expln('
SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';


-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote
-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll
-- refer to it as fs0.

-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on',
-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here.
SELECT str AS result
FROM expln('
SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid)

-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors
-- is 'off', AQO is obliged to use both data records for fss0.
SET aqo.min_neighbors_for_predicting = 2;
SELECT str AS result
FROM expln('
SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid)

-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use
-- three data records from two neighbors for this one.
SET aqo.min_neighbors_for_predicting = 3;
SELECT str AS result
FROM expln('
SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';

-----
DROP TABLE IF EXISTS t;
CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x;
ANALYZE t;
SELECT true AS success FROM aqo_reset();

-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid)
-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation
-- than neighbors' records.
SELECT str AS result
FROM expln('
select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
DO
$$
BEGIN
for counter in 1..20 loop
EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter);
EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter);
EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter);
end loop;
END;
$$ LANGUAGE PLPGSQL;
-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used.
SELECT str AS result
FROM expln('
select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';


RESET aqo.wide_search;
RESET aqo.predict_with_few_neighbors;
RESET aqo.min_neighbors_for_predicting;
DROP EXTENSION aqo CASCADE;

DROP TABLE a;
DROP TABLE b;
DROP TABLE c;
DROP TABLE t;
DROP FUNCTION expln;
Loading

0 comments on commit 09cd836

Please sign in to comment.