duckdb · JelteF · Jan 2, 2025 · Dec 17, 2024
diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
@@ -0,0 +1,16 @@
+CREATE FUNCTION @[email protected]_count_distinct_sfunc(bigint, anyelement)
+RETURNS bigint LANGUAGE 'plpgsql'
+SET search_path = pg_catalog, pg_temp
+AS
+$func$
+BEGIN
+    RAISE EXCEPTION 'Aggregate `approx_count_distinct(ANYELEMENT)` only works with Duckdb execution.';
+END;
+$func$;
+
+CREATE AGGREGATE @[email protected]_count_distinct(anyelement)
+(
+    sfunc = @[email protected]_count_distinct_sfunc,
+    stype = bigint,
+    initcond = 0
+);
diff --git a/src/pgduckdb_metadata_cache.cpp b/src/pgduckdb_metadata_cache.cpp
@@ -110,7 +110,7 @@ BuildDuckdbOnlyFunctions() {
 	 * caching its OID as a DuckDB-only function.
 	 */
 	const char *function_names[] = {"read_parquet",      "read_csv",   "iceberg_scan", "iceberg_metadata",
-	                                "iceberg_snapshots", "delta_scan", "read_json"};
+	                                "iceberg_snapshots", "delta_scan", "read_json",    "approx_count_distinct"};
 
 	for (uint32_t i = 0; i < lengthof(function_names); i++) {
 		CatCList *catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(function_names[i]));

diff --git a/test/regression/expected/approx_count_distinct.out b/test/regression/expected/approx_count_distinct.out
@@ -0,0 +1,33 @@
+CREATE TABLE t (a int, b text);
+INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e');
+INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h');
+SELECT approx_count_distinct(a), approx_count_distinct(b) FROM t;
+ approx_count_distinct | approx_count_distinct 
+-----------------------+-----------------------
+                     5 |                     9
+(1 row)
+
+SELECT a, approx_count_distinct(b) FROM t GROUP BY a ORDER BY a;
+ a | approx_count_distinct 
+---+-----------------------
+ 1 |                     1
+ 2 |                     2
+ 3 |                     2
+ 4 |                     2
+ 5 |                     1
+(5 rows)
+
+SELECT a, approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a;
+ a | approx_count_distinct 
+---+-----------------------
+ 1 |                     1
+ 2 |                     2
+ 2 |                     2
+ 3 |                     2
+ 3 |                     2
+ 4 |                     2
+ 4 |                     2
+ 5 |                     1
+(8 rows)
+
+DROP TABLE t;
diff --git a/test/regression/expected/transactions.out b/test/regression/expected/transactions.out
@@ -159,3 +159,4 @@ FETCH PRIOR FROM c;
 
 COMMIT;
 DROP FUNCTION f, f2;
+DROP TABLE t;
diff --git a/test/regression/schedule b/test/regression/schedule
@@ -26,3 +26,4 @@ test: transaction_errors
 test: secrets
 test: prepare
 test: function
+test: approx_count_distinct
diff --git a/test/regression/sql/approx_count_distinct.sql b/test/regression/sql/approx_count_distinct.sql
@@ -0,0 +1,7 @@
+CREATE TABLE t (a int, b text);
+INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e');
+INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h');
+SELECT approx_count_distinct(a), approx_count_distinct(b) FROM t;
+SELECT a, approx_count_distinct(b) FROM t GROUP BY a ORDER BY a;
+SELECT a, approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a;
+DROP TABLE t;
diff --git a/test/regression/sql/transactions.sql b/test/regression/sql/transactions.sql
@@ -115,3 +115,4 @@ FETCH PRIOR FROM c;
 COMMIT;
 
 DROP FUNCTION f, f2;
+DROP TABLE t;
Original file line number	Diff line number	Diff line change
Expand Up		@@ -159,3 +159,4 @@ FETCH PRIOR FROM c;

		COMMIT;
		DROP FUNCTION f, f2;
		DROP TABLE t;
Original file line number	Diff line number	Diff line change
Expand Up		@@ -115,3 +115,4 @@ FETCH PRIOR FROM c;
		COMMIT;

		DROP FUNCTION f, f2;
		DROP TABLE t;