From 6a726f9eb8c1de4bea8f51dd53aec2e5be73e092 Mon Sep 17 00:00:00 2001 From: mkaruza Date: Tue, 7 Jan 2025 12:08:08 +0100 Subject: [PATCH] Simplify max_workers_per_postgres_scan * Setting this variable to `0` disables parallelization * Cardinality of table less than 65536 use only single parallel process * Higher cardinality will try to use `max_workers_per_postgres_scan` parallel processes with upper limit of `max_parallel_workers` --- src/pgduckdb.cpp | 3 ++- src/scan/postgres_table_reader.cpp | 22 ++++++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/pgduckdb.cpp b/src/pgduckdb.cpp index 0718d126..763c6969 100644 --- a/src/pgduckdb.cpp +++ b/src/pgduckdb.cpp @@ -4,6 +4,7 @@ extern "C" { #include "postgres.h" #include "miscadmin.h" #include "utils/guc.h" +#include "postmaster/bgworker_internals.h" } #include "pgduckdb/pgduckdb.h" @@ -161,7 +162,7 @@ DuckdbInitGUC(void) { DefineCustomVariable("duckdb.max_workers_per_postgres_scan", "Maximum number of PostgreSQL workers used for a single Postgres scan", - &duckdb_max_workers_per_postgres_scan, 2, 8); + &duckdb_max_workers_per_postgres_scan, 0, MAX_PARALLEL_WORKER_LIMIT); DefineCustomVariable("duckdb.postgres_role", "Which postgres role should be allowed to use DuckDB execution, use the secrets and create " diff --git a/src/scan/postgres_table_reader.cpp b/src/scan/postgres_table_reader.cpp index 2443296b..e5741562 100644 --- a/src/scan/postgres_table_reader.cpp +++ b/src/scan/postgres_table_reader.cpp @@ -156,12 +156,26 @@ PostgresTableReader::PostgresTableReaderCleanup() { table_scan_query_desc = nullptr; } + +/* + * Logic is straightforward, if `duckdb_max_workers_per_postgres_scan` is set to 0 we don't want any + * parallelization. For cardinality less equal than 2^16 we only try to run one parallel process. When cardinality + * is bigger than we should spawn numer of parallel processes set by `duckdb_max_workers_per_postgres_scan` but + * not bigger than `max_parallel_workers`. + */ + int PostgresTableReader::ParallelWorkerNumber(Cardinality cardinality) { - static const int base_log = 8; - int cardinality_log = std::log2(cardinality); - int base = cardinality_log / base_log; - return std::max(1, std::min(base, std::max(duckdb_max_workers_per_postgres_scan, max_parallel_workers))); + static const int cardinality_threshold = 1 << 16; + /* No parallel scan wanted */ + if (!duckdb_max_workers_per_postgres_scan) { + return 0; + } + /* */ + if (cardinality <= cardinality_threshold) { + return 1; + } + return std::min(duckdb_max_workers_per_postgres_scan, max_parallel_workers); } const char *