From b4878e49c98a981b469cb17d875e70b14d7c9634 Mon Sep 17 00:00:00 2001 From: Ritwiz Sinha <43509699+ritwizsinha@users.noreply.github.com> Date: Thu, 2 Jan 2025 19:34:26 +0530 Subject: [PATCH] Add support for BYTEA/BLOB (#511) Fixes #464 --- src/pgduckdb_filter.cpp | 2 + src/pgduckdb_types.cpp | 49 +++++++++++++++++++ .../expected/array_type_support.out | 13 +++++ test/regression/expected/type_support.out | 28 +++++++++++ test/regression/sql/array_type_support.sql | 9 ++++ test/regression/sql/type_support.sql | 14 ++++++ 6 files changed, 115 insertions(+) diff --git a/src/pgduckdb_filter.cpp b/src/pgduckdb_filter.cpp index 6ea6e032..7b0c4bd3 100644 --- a/src/pgduckdb_filter.cpp +++ b/src/pgduckdb_filter.cpp @@ -83,6 +83,8 @@ FilterOperationSwitch(const Datum &value, const duckdb::Value &constant, Oid typ case TEXTOID: case VARCHAROID: return StringFilterOperation(value, constant, type_oid == BPCHAROID); + case BYTEAOID: + return StringFilterOperation(value, constant, false); default: throw duckdb::InvalidTypeException( duckdb::string("(DuckDB/FilterOperationSwitch) Unsupported duckdb type: " + std::to_string(type_oid))); diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index ca234352..79255354 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -2,6 +2,7 @@ #include "duckdb/common/shared_ptr.hpp" #include "duckdb/common/extra_type_info.hpp" #include "duckdb/common/types/uuid.hpp" +#include "duckdb/common/types/blob.hpp" #include "pgduckdb/pgduckdb_types.hpp" #include "pgduckdb/pgduckdb_utils.hpp" @@ -199,6 +200,17 @@ ConvertVarCharDatum(const duckdb::Value &value) { return PointerGetDatum(result); } +static Datum +ConvertBinaryDatum(const duckdb::Value &value) { + auto str = value.GetValueUnsafe(); + auto blob_len = str.GetSize(); + auto blob = str.GetDataUnsafe(); + bytea* result = (bytea *)palloc0(blob_len + VARHDRSZ); + SET_VARSIZE(result, blob_len + VARHDRSZ); + memcpy(VARDATA(result), blob, blob_len); + return PointerGetDatum(result); +} + inline Datum ConvertDateDatum(const duckdb::Value &value) { duckdb::date_t date = value.GetValue(); @@ -505,6 +517,19 @@ struct PostgresTypeTraits { } }; +// BLOB type +template <> +struct PostgresTypeTraits { + static constexpr int16_t typlen = -1; // variable-length + static constexpr bool typbyval = false; + static constexpr char typalign = 'i'; + + static inline Datum + ToDatum(const duckdb::Value &val) { + return ConvertBinaryDatum(val); + } +}; + template struct PostgresOIDMapping { static constexpr int32_t postgres_oid = OID; @@ -545,6 +570,7 @@ using TimestampArray = PODArray>; using UUIDArray = PODArray>; using VarCharArray = PODArray>; using NumericArray = PODArray>; +using ByteArray = PODArray>; static idx_t GetDuckDBListDimensionality(const duckdb::LogicalType &list_type, idx_t depth = 0) { @@ -733,6 +759,10 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col slot->tts_values[col] = ConvertUUIDDatum(value); break; } + case BYTEAOID: { + slot->tts_values[col] = ConvertBinaryDatum(value); + break; + } case BOOLARRAYOID: { ConvertDuckToPostgresArray(slot, value, col); break; @@ -784,6 +814,10 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col ConvertDuckToPostgresArray(slot, value, col); break; } + case BYTEAARRAYOID: { + ConvertDuckToPostgresArray(slot, value, col); + break; + } default: elog(WARNING, "(PGDuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid); return false; @@ -866,6 +900,9 @@ ConvertPostgresToBaseDuckColumnType(Form_pg_attribute &attribute) { case REGCLASSOID: case REGCLASSARRAYOID: return duckdb::LogicalTypeId::UINTEGER; + case BYTEAOID: + case BYTEAARRAYOID: + return duckdb::LogicalTypeId::BLOB; default: return duckdb::LogicalType::USER("UnsupportedPostgresType (Oid=" + std::to_string(attribute->atttypid) + ")"); } @@ -920,6 +957,8 @@ GetPostgresArrayDuckDBType(const duckdb::LogicalType &type) { return NUMERICARRAYOID; case duckdb::LogicalTypeId::UUID: return UUIDARRAYOID; + case duckdb::LogicalTypeId::BLOB: + return BYTEAARRAYOID; default: { elog(WARNING, "(PGDuckDB/GetPostgresDuckDBType) Unsupported `LIST` subtype %d to Postgres type", static_cast(type.id())); @@ -974,6 +1013,8 @@ GetPostgresDuckDBType(const duckdb::LogicalType &type) { } return GetPostgresArrayDuckDBType(*duck_type); } + case duckdb::LogicalTypeId::BLOB: + return BYTEAOID; default: { elog(WARNING, "(PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: %s to Postgres type", type.ToString().c_str()); @@ -1222,6 +1263,14 @@ ConvertPostgresToDuckValue(Oid attr_type, Datum value, duckdb::Vector &result, i Append(result, duckdb_uuid, offset); break; } + case duckdb::LogicalTypeId::BLOB: { + const char *bytea_data = VARDATA_ANY(value); + size_t bytea_length = VARSIZE_ANY_EXHDR(value); + const duckdb::string_t s(bytea_data, bytea_length); + auto data = duckdb::FlatVector::GetData(result); + data[offset] = duckdb::StringVector::AddString(result, s); + break; + } case duckdb::LogicalTypeId::LIST: { // Convert Datum to ArrayType auto array = DatumGetArrayTypeP(value); diff --git a/test/regression/expected/array_type_support.out b/test/regression/expected/array_type_support.out index 35c31f3b..e229f4c6 100644 --- a/test/regression/expected/array_type_support.out +++ b/test/regression/expected/array_type_support.out @@ -318,6 +318,19 @@ SELECT * FROM varchar_array_2d; {{some,strings},{NULL,last}} (5 rows) +-- BYTEA (single dimension) +CREATE TABLE bytea_array_1d (a bytea[]); +INSERT INTO bytea_array_1d (a) +VALUES + (ARRAY[decode('01020304', 'hex'), decode('aabbccdd', 'hex')]), + (ARRAY[decode('11223344', 'hex'), decode('55667788', 'hex')]); +SELECT * FROM bytea_array_1d; + a +------------------------------- + {"\\x01020304","\\xaabbccdd"} + {"\\x11223344","\\x55667788"} +(2 rows) + -- TIMESTAMP (two dimensions) CREATE TABLE timestamp_array_2d(a TIMESTAMP[][]); INSERT INTO timestamp_array_2d VALUES diff --git a/test/regression/expected/type_support.out b/test/regression/expected/type_support.out index c40ce762..4d509658 100644 --- a/test/regression/expected/type_support.out +++ b/test/regression/expected/type_support.out @@ -308,6 +308,33 @@ SELECT * FROM json_tbl; {} (4 rows) +-- BLOB +CREATE TABLE blob_tbl(a bytea); +INSERT INTO blob_tbl SELECT CAST(a as bytea) FROM (VALUES + ('\x'), + ('\x110102030405060708090a0b0c0d0e0f'), + (''), + ('\x00'), + ('\x07'), + (NULL) +) t(a); +SELECT * from blob_tbl; + a +------------------------------------ + \x + \x110102030405060708090a0b0c0d0e0f + \x + \x00 + \x07 + +(6 rows) + +SELECT * from blob_tbl where a = '\x07'; + a +------ + \x07 +(1 row) + -- REGCLASSOID CREATE TABLE regclass_tbl (a REGCLASS); INSERT INTO regclass_tbl VALUES (42), (3000000000); @@ -337,4 +364,5 @@ DROP TABLE bigint_numeric; DROP TABLE hugeint_numeric; DROP TABLE uuid_tbl; DROP TABLE json_tbl; +DROP TABLE blob_tbl; DROP TABLE regclass_tbl; diff --git a/test/regression/sql/array_type_support.sql b/test/regression/sql/array_type_support.sql index 79c2e487..aec79467 100644 --- a/test/regression/sql/array_type_support.sql +++ b/test/regression/sql/array_type_support.sql @@ -195,6 +195,15 @@ INSERT INTO varchar_array_2d VALUES ('{{"some","strings"},{NULL,"last"}}'); SELECT * FROM varchar_array_2d; +-- BYTEA (single dimension) +CREATE TABLE bytea_array_1d (a bytea[]); + +INSERT INTO bytea_array_1d (a) +VALUES + (ARRAY[decode('01020304', 'hex'), decode('aabbccdd', 'hex')]), + (ARRAY[decode('11223344', 'hex'), decode('55667788', 'hex')]); +SELECT * FROM bytea_array_1d; + -- TIMESTAMP (two dimensions) CREATE TABLE timestamp_array_2d(a TIMESTAMP[][]); INSERT INTO timestamp_array_2d VALUES diff --git a/test/regression/sql/type_support.sql b/test/regression/sql/type_support.sql index 725ee143..b42a9eb3 100644 --- a/test/regression/sql/type_support.sql +++ b/test/regression/sql/type_support.sql @@ -154,6 +154,19 @@ INSERT INTO json_tbl SELECT CAST(a as JSON) FROM (VALUES ) t(a); SELECT * FROM json_tbl; +-- BLOB +CREATE TABLE blob_tbl(a bytea); +INSERT INTO blob_tbl SELECT CAST(a as bytea) FROM (VALUES + ('\x'), + ('\x110102030405060708090a0b0c0d0e0f'), + (''), + ('\x00'), + ('\x07'), + (NULL) +) t(a); +SELECT * from blob_tbl; +SELECT * from blob_tbl where a = '\x07'; + -- REGCLASSOID CREATE TABLE regclass_tbl (a REGCLASS); INSERT INTO regclass_tbl VALUES (42), (3000000000); @@ -178,4 +191,5 @@ DROP TABLE bigint_numeric; DROP TABLE hugeint_numeric; DROP TABLE uuid_tbl; DROP TABLE json_tbl; +DROP TABLE blob_tbl; DROP TABLE regclass_tbl;