diff --git a/src/pgduckdb_hooks.cpp b/src/pgduckdb_hooks.cpp index 92a6801c..bc3d921f 100644 --- a/src/pgduckdb_hooks.cpp +++ b/src/pgduckdb_hooks.cpp @@ -106,6 +106,13 @@ ContainsDuckdbItems(Node *node, void *context) { } } + if (IsA(node, Aggref)) { + Aggref *func = castNode(Aggref, node); + if (pgduckdb::IsDuckdbOnlyFunction(func->aggfnoid)) { + return true; + } + } + #if PG_VERSION_NUM >= 160000 return expression_tree_walker(node, ContainsDuckdbItems, context); #else @@ -118,6 +125,27 @@ NeedsDuckdbExecution(Query *query) { return ContainsDuckdbItems((Node *)query, NULL); } +/* + * We only check ContainsFromClause if duckdb.force_execution is set to true. + * + * If there's no FROM clause, we're only selecting constants. From a + * performance perspective there's not really a point in using DuckDB. If we + * forward all of such queries to DuckDB anyway, then many queries that are + * used to inspect postgres will throw a warning or return incorrect results. + * For example: + * + * SELECT current_setting('work_mem'); + * + * So even if a user sets duckdb.force_execution = true, we still won't + * forward such queries to DuckDB. With one exception: If the query + * requires duckdb e.g. due to a duckdb-only function being used, we'll + * still executing this in DuckDB. + */ +static bool +ContainsFromClause(Query *query) { + return query->rtable; +} + static bool IsAllowedStatement(Query *query, bool throw_error = false) { int elevel = throw_error ? ERROR : DEBUG4; @@ -142,15 +170,6 @@ IsAllowedStatement(Query *query, bool throw_error = false) { } } - /* - * If there's no rtable, we're only selecting constants. There's no point - * in using DuckDB for that. - */ - if (!query->rtable) { - elog(elevel, "DuckDB usage requires at least one table"); - return false; - } - /* * If any table is from pg_catalog, we don't want to use DuckDB. This is * because DuckDB has its own pg_catalog tables that contain different data @@ -172,7 +191,7 @@ DuckdbPlannerHook_Cpp(Query *parse, const char *query_string, int cursor_options IsAllowedStatement(parse, true); return DuckdbPlanNode(parse, query_string, cursor_options, bound_params, true); - } else if (duckdb_force_execution && IsAllowedStatement(parse)) { + } else if (duckdb_force_execution && IsAllowedStatement(parse) && ContainsFromClause(parse)) { PlannedStmt *duckdbPlan = DuckdbPlanNode(parse, query_string, cursor_options, bound_params, false); if (duckdbPlan) { return duckdbPlan; diff --git a/test/regression/expected/approx_count_distinct.out b/test/regression/expected/approx_count_distinct.out index 27dc2254..ddec3dfc 100644 --- a/test/regression/expected/approx_count_distinct.out +++ b/test/regression/expected/approx_count_distinct.out @@ -30,4 +30,17 @@ SELECT a, approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a; 5 | 1 (8 rows) +SELECT approx_count_distinct(1); + approx_count_distinct +----------------------- + 1 +(1 row) + +SET duckdb.force_execution = false; +SELECT approx_count_distinct(1); + approx_count_distinct +----------------------- + 1 +(1 row) + DROP TABLE t; diff --git a/test/regression/sql/approx_count_distinct.sql b/test/regression/sql/approx_count_distinct.sql index 0942e044..7ce369f4 100644 --- a/test/regression/sql/approx_count_distinct.sql +++ b/test/regression/sql/approx_count_distinct.sql @@ -4,4 +4,7 @@ INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h'); SELECT approx_count_distinct(a), approx_count_distinct(b) FROM t; SELECT a, approx_count_distinct(b) FROM t GROUP BY a ORDER BY a; SELECT a, approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a; +SELECT approx_count_distinct(1); +SET duckdb.force_execution = false; +SELECT approx_count_distinct(1); DROP TABLE t;