Skip to content

Commit

Permalink
Only do CTAS type detection using duckdb for unresolved types
Browse files Browse the repository at this point in the history
  • Loading branch information
JelteF committed Jan 14, 2025
1 parent 447cea6 commit 59d8a57
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 7 deletions.
1 change: 1 addition & 0 deletions include/pgduckdb/pgduckdb_ruleutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ bool pgduckdb_is_duckdb_row(Oid type_oid);
bool pgduckdb_is_unresolved_type(Oid type_oid);
bool pgduckdb_var_is_duckdb_row(Var *var);
bool pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc);
bool pgduckdb_target_list_contains_unresolved_type_or_row(List *target_list);
Var *pgduckdb_duckdb_row_subscript_var(Expr *expr);
List *pgduckdb_star_start_vars(List *target_list);

Expand Down
36 changes: 29 additions & 7 deletions src/pgduckdb_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,21 @@ static List *ctas_original_target_list = NIL;
static bool top_level_ddl = true;
static ProcessUtility_hook_type prev_process_utility_hook = NULL;

/*
* WrapQueryInQueryCall takes a query and wraps it an duckdb.query(...) call.
* It then explicitly references all the columns and the types from the
* original qeury its target list. So a query like this:
*
* SELECT r from read_csv('file.csv') r;
*
* Would expand to:
*
* SELECT r['id']::int AS id, r['name']::text AS name
* FROM duckdb.query('SELECT * from system.main.read_csv(''file.csv'')') r;
*
*/
static Query *
WrapQueryInQueryCall(Query *query, List *target_list) {
WrapQueryInDuckdbQueryCall(Query *query, List *target_list) {
char *duckdb_query_string = pgduckdb_get_querydef(query);

StringInfo buf = makeStringInfo();
Expand Down Expand Up @@ -113,13 +126,21 @@ DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo para
return;
}

// TODO: Probably we should only do this if the targetlist actually
// contains some duckdb.unresolved_type or duckdb.row columns.
// XXX: This is a huge hack. Probably we should do something different
// here. The current hack doesn't work with materialized views yet.
Query *original_query = castNode(Query, stmt->query);

// We need to do hacky things if the targetlist contains
// duckdb.unresolved_type or duckdb.row columns. In those cases we want
// to run the query through duckdb to get the actual result types for
// these queries. We also want to lock in those types creating a new
// query that will always retun them.
if (!pgduckdb_target_list_contains_unresolved_type_or_row(original_query->targetList)) {
// If the target list doesn't contain duckdb.row or
// duckdb.unresolved_type though, we are done now.
return;
}

/* NOTE: The below code is mostly copied from ExecCreateTableAs */
List *rewritten;
Query *original_query = castNode(Query, stmt->query);

Query *query = (Query *)copyObjectImpl(original_query);
/*
Expand All @@ -140,9 +161,10 @@ DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo para

PlannedStmt *plan = pg_plan_query(query, query_string, CURSOR_OPT_PARALLEL_OK, params);

/* This is where our custom code starts again */
List *target_list = plan->planTree->targetlist;

stmt->query = (Node *)WrapQueryInQueryCall(rewritten_query_copy, target_list);
stmt->query = (Node *)WrapQueryInDuckdbQueryCall(rewritten_query_copy, target_list);
stmt->into->viewQuery = (Node *)copyObjectImpl(stmt->query);

} else if (IsA(parsetree, CreateSchemaStmt) && !pgduckdb::doing_motherduck_sync) {
Expand Down
15 changes: 15 additions & 0 deletions src/pgduckdb_ruleutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,21 @@ pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc) {
return pgduckdb_is_duckdb_row(func_expr->funcresulttype);
}

bool
pgduckdb_target_list_contains_unresolved_type_or_row(List *target_list) {
foreach_node(TargetEntry, tle, target_list) {
Oid type = exprType((Node *)tle->expr);
if (pgduckdb_is_unresolved_type(type)) {
return true;
}

if (pgduckdb_is_duckdb_row(type)) {
return true;
}
}
return false;
}

/*
* Returns NULL if the expression is not a subscript on a duckdb row. Returns
* the Var of the duckdb row if it is.
Expand Down

0 comments on commit 59d8a57

Please sign in to comment.