Skip to content

Commit

Permalink
Simplify max_workers_per_postgres_scan
Browse files Browse the repository at this point in the history
* Setting this variable to `0` disables parallelization
* Cardinality of table less than 65536 use only single parallel process
* Higher cardinality will try to use `max_workers_per_postgres_scan`
  parallel processes with upper limit of `max_parallel_workers`
  • Loading branch information
mkaruza committed Jan 7, 2025
1 parent ef00ee9 commit 6a726f9
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
3 changes: 2 additions & 1 deletion src/pgduckdb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ extern "C" {
#include "postgres.h"
#include "miscadmin.h"
#include "utils/guc.h"
#include "postmaster/bgworker_internals.h"
}

#include "pgduckdb/pgduckdb.h"
Expand Down Expand Up @@ -161,7 +162,7 @@ DuckdbInitGUC(void) {

DefineCustomVariable("duckdb.max_workers_per_postgres_scan",
"Maximum number of PostgreSQL workers used for a single Postgres scan",
&duckdb_max_workers_per_postgres_scan, 2, 8);
&duckdb_max_workers_per_postgres_scan, 0, MAX_PARALLEL_WORKER_LIMIT);

DefineCustomVariable("duckdb.postgres_role",
"Which postgres role should be allowed to use DuckDB execution, use the secrets and create "
Expand Down
22 changes: 18 additions & 4 deletions src/scan/postgres_table_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,26 @@ PostgresTableReader::PostgresTableReaderCleanup() {
table_scan_query_desc = nullptr;
}


/*
* Logic is straightforward, if `duckdb_max_workers_per_postgres_scan` is set to 0 we don't want any
* parallelization. For cardinality less equal than 2^16 we only try to run one parallel process. When cardinality
* is bigger than we should spawn numer of parallel processes set by `duckdb_max_workers_per_postgres_scan` but
* not bigger than `max_parallel_workers`.
*/

int
PostgresTableReader::ParallelWorkerNumber(Cardinality cardinality) {
static const int base_log = 8;
int cardinality_log = std::log2(cardinality);
int base = cardinality_log / base_log;
return std::max(1, std::min(base, std::max(duckdb_max_workers_per_postgres_scan, max_parallel_workers)));
static const int cardinality_threshold = 1 << 16;
/* No parallel scan wanted */
if (!duckdb_max_workers_per_postgres_scan) {
return 0;
}
/* */
if (cardinality <= cardinality_threshold) {
return 1;
}
return std::min(duckdb_max_workers_per_postgres_scan, max_parallel_workers);
}

const char *
Expand Down

0 comments on commit 6a726f9

Please sign in to comment.