diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql index e69de29b..f64f71b7 100644 --- a/sql/pg_duckdb--0.2.0--0.3.0.sql +++ b/sql/pg_duckdb--0.2.0--0.3.0.sql @@ -0,0 +1,16 @@ +CREATE FUNCTION @extschema@.approx_count_distinct_sfunc(bigint, anyelement) +RETURNS bigint LANGUAGE 'plpgsql' +SET search_path = pg_catalog, pg_temp +AS +$func$ +BEGIN + RAISE EXCEPTION 'Aggregate `approx_count_distinct(ANYELEMENT)` only works with Duckdb execution.'; +END; +$func$; + +CREATE AGGREGATE @extschema@.approx_count_distinct(anyelement) +( + sfunc = @extschema@.approx_count_distinct_sfunc, + stype = bigint, + initcond = 0 +); diff --git a/src/pgduckdb_metadata_cache.cpp b/src/pgduckdb_metadata_cache.cpp index 76558548..4d812930 100644 --- a/src/pgduckdb_metadata_cache.cpp +++ b/src/pgduckdb_metadata_cache.cpp @@ -110,7 +110,7 @@ BuildDuckdbOnlyFunctions() { * caching its OID as a DuckDB-only function. */ const char *function_names[] = {"read_parquet", "read_csv", "iceberg_scan", "iceberg_metadata", - "iceberg_snapshots", "delta_scan", "read_json"}; + "iceberg_snapshots", "delta_scan", "read_json", "approx_count_distinct"}; for (uint32_t i = 0; i < lengthof(function_names); i++) { CatCList *catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(function_names[i])); diff --git a/test/regression/expected/approx_count_distinct.out b/test/regression/expected/approx_count_distinct.out new file mode 100644 index 00000000..27dc2254 --- /dev/null +++ b/test/regression/expected/approx_count_distinct.out @@ -0,0 +1,33 @@ +CREATE TABLE t (a int, b text); +INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'); +INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h'); +SELECT approx_count_distinct(a), approx_count_distinct(b) FROM t; + approx_count_distinct | approx_count_distinct +-----------------------+----------------------- + 5 | 9 +(1 row) + +SELECT a, approx_count_distinct(b) FROM t GROUP BY a ORDER BY a; + a | approx_count_distinct +---+----------------------- + 1 | 1 + 2 | 2 + 3 | 2 + 4 | 2 + 5 | 1 +(5 rows) + +SELECT a, approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a; + a | approx_count_distinct +---+----------------------- + 1 | 1 + 2 | 2 + 2 | 2 + 3 | 2 + 3 | 2 + 4 | 2 + 4 | 2 + 5 | 1 +(8 rows) + +DROP TABLE t; diff --git a/test/regression/expected/transactions.out b/test/regression/expected/transactions.out index 4ebebfe7..ffcd0e04 100644 --- a/test/regression/expected/transactions.out +++ b/test/regression/expected/transactions.out @@ -159,3 +159,4 @@ FETCH PRIOR FROM c; COMMIT; DROP FUNCTION f, f2; +DROP TABLE t; diff --git a/test/regression/schedule b/test/regression/schedule index 896686b6..51d16a7b 100644 --- a/test/regression/schedule +++ b/test/regression/schedule @@ -26,3 +26,4 @@ test: transaction_errors test: secrets test: prepare test: function +test: approx_count_distinct diff --git a/test/regression/sql/approx_count_distinct.sql b/test/regression/sql/approx_count_distinct.sql new file mode 100644 index 00000000..0942e044 --- /dev/null +++ b/test/regression/sql/approx_count_distinct.sql @@ -0,0 +1,7 @@ +CREATE TABLE t (a int, b text); +INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'); +INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h'); +SELECT approx_count_distinct(a), approx_count_distinct(b) FROM t; +SELECT a, approx_count_distinct(b) FROM t GROUP BY a ORDER BY a; +SELECT a, approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a; +DROP TABLE t; diff --git a/test/regression/sql/transactions.sql b/test/regression/sql/transactions.sql index 244b7b3e..70b52014 100644 --- a/test/regression/sql/transactions.sql +++ b/test/regression/sql/transactions.sql @@ -115,3 +115,4 @@ FETCH PRIOR FROM c; COMMIT; DROP FUNCTION f, f2; +DROP TABLE t;