From 94030bb73d43673c26221592d9a0eddeeebb53a7 Mon Sep 17 00:00:00 2001 From: Cheng Chen Date: Tue, 17 Dec 2024 01:08:47 +0000 Subject: [PATCH] Support approx_count_distinct (#55) --- sql/pg_mooncake--0.0.1.sql | 10 ++++++++++ src/pgduckdb/pgduckdb_metadata_cache.cpp | 2 +- test/expected/approx_count_distinct.out | 10 ++++++++++ test/sql/approx_count_distinct.sql | 5 +++++ 4 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 test/expected/approx_count_distinct.out create mode 100644 test/sql/approx_count_distinct.sql diff --git a/sql/pg_mooncake--0.0.1.sql b/sql/pg_mooncake--0.0.1.sql index 51f23d4..6348467 100644 --- a/sql/pg_mooncake--0.0.1.sql +++ b/sql/pg_mooncake--0.0.1.sql @@ -233,6 +233,16 @@ BEGIN END; $func$; +CREATE FUNCTION mooncake.approx_count_distinct(a anyelement) +RETURNS bigint LANGUAGE 'plpgsql' +SET search_path = pg_catalog, pg_temp +AS +$func$ +BEGIN + RAISE EXCEPTION 'Function `approx_count_distinct(ANYELEMENT)` only works with Duckdb execution.'; +END; +$func$; + CREATE TABLE mooncake.secrets ( name TEXT NOT NULL, type TEXT NOT NULL, diff --git a/src/pgduckdb/pgduckdb_metadata_cache.cpp b/src/pgduckdb/pgduckdb_metadata_cache.cpp index 47e88e0..165b257 100644 --- a/src/pgduckdb/pgduckdb_metadata_cache.cpp +++ b/src/pgduckdb/pgduckdb_metadata_cache.cpp @@ -110,7 +110,7 @@ BuildDuckdbOnlyFunctions() { * caching its OID as a DuckDB-only function. */ const char *function_names[] = {"read_parquet", "read_csv", "iceberg_scan", "iceberg_metadata", - "iceberg_snapshots", "delta_scan", "read_json"}; + "iceberg_snapshots", "delta_scan", "read_json", "approx_count_distinct"}; for (uint32_t i = 0; i < lengthof(function_names); i++) { CatCList *catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(function_names[i])); diff --git a/test/expected/approx_count_distinct.out b/test/expected/approx_count_distinct.out new file mode 100644 index 0000000..8afd10a --- /dev/null +++ b/test/expected/approx_count_distinct.out @@ -0,0 +1,10 @@ +CREATE TABLE t (a int, b text) USING columnstore; +INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'); +INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h'); +SELECT mooncake.approx_count_distinct(a), mooncake.approx_count_distinct(b) FROM t; + approx_count_distinct | approx_count_distinct +-----------------------+----------------------- + 5 | 9 +(1 row) + +DROP TABLE t; diff --git a/test/sql/approx_count_distinct.sql b/test/sql/approx_count_distinct.sql new file mode 100644 index 0000000..ae42664 --- /dev/null +++ b/test/sql/approx_count_distinct.sql @@ -0,0 +1,5 @@ +CREATE TABLE t (a int, b text) USING columnstore; +INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'); +INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h'); +SELECT mooncake.approx_count_distinct(a), mooncake.approx_count_distinct(b) FROM t; +DROP TABLE t;