From 9a01289ac0dbc870b5e941b17008146e47d973c8 Mon Sep 17 00:00:00 2001 From: siddiquebagwan-gslab Date: Thu, 19 Oct 2023 22:02:38 +0530 Subject: [PATCH 1/3] add double and bool in mapping --- .../src/datahub/ingestion/source/redshift/redshift.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index a1b6333a3775d..f8de3bb940a11 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -216,6 +216,9 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): ] = { "BYTES": BytesType, "BOOL": BooleanType, + "BOOLEAN": BooleanType, + "DOUBLE": NumberType, + "DOUBLE PRECISION": NumberType, "DECIMAL": NumberType, "NUMERIC": NumberType, "BIGNUMERIC": NumberType, @@ -641,6 +644,7 @@ def gen_schema_fields(self, columns: List[RedshiftColumn]) -> List[SchemaField]: if col.sort_key: tags.append(TagAssociationClass(make_tag_urn(Constants.TAG_SORT_KEY))) + logger.debug(f"col.data_type = {col.data_type}") data_type = self.REDSHIFT_FIELD_TYPE_MAPPINGS.get(col.data_type) # We have to remove the precision part to properly parse it if data_type is None: From 74c9a3454f691efb4cbf39f5f6549518c8d628c0 Mon Sep 17 00:00:00 2001 From: siddiquebagwan-gslab Date: Wed, 25 Oct 2023 12:13:29 +0530 Subject: [PATCH 2/3] added more data-types --- .../src/datahub/ingestion/source/redshift/redshift.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index f8de3bb940a11..cd02c7b047e1f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -245,6 +245,13 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): "CHARACTER": StringType, "CHAR": StringType, "TIMESTAMP WITHOUT TIME ZONE": TimeType, + "REAL": NumberType, + "VARCHAR": StringType, + "TIMESTAMPTZ": TimeType, + "GEOMETRY": NullType, + "HLLSKETCH": NullType, + "TIMETZ": TimeType, + "VARBYTE": StringType, } def get_platform_instance_id(self) -> str: From 03676d8952867a5b5b731a516eaf8ff7bfc849dd Mon Sep 17 00:00:00 2001 From: siddiquebagwan-gslab Date: Wed, 25 Oct 2023 12:14:20 +0530 Subject: [PATCH 3/3] address review comment --- .../src/datahub/ingestion/source/redshift/redshift.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index cd02c7b047e1f..c701e4575ed68 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -651,7 +651,6 @@ def gen_schema_fields(self, columns: List[RedshiftColumn]) -> List[SchemaField]: if col.sort_key: tags.append(TagAssociationClass(make_tag_urn(Constants.TAG_SORT_KEY))) - logger.debug(f"col.data_type = {col.data_type}") data_type = self.REDSHIFT_FIELD_TYPE_MAPPINGS.get(col.data_type) # We have to remove the precision part to properly parse it if data_type is None: