Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Super type dbt redshift #12337

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
"HLLSKETCH": NullType,
"TIMETZ": TimeType,
"VARBYTE": StringType,
"SUPER": NullType,
}

def get_platform_instance_id(self) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@
"regtype": None,
"regrole": None,
"regnamespace": None,
"super": None,
"super": NullType,
"uuid": StringType,
"pg_lsn": None,
"tsvector": None, # text search vector
Expand Down
34 changes: 33 additions & 1 deletion metadata-ingestion/tests/unit/test_dbt_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.source.dbt import dbt_cloud
from datahub.ingestion.source.dbt.dbt_cloud import DBTCloudConfig
from datahub.ingestion.source.dbt.dbt_common import DBTNode
from datahub.ingestion.source.dbt.dbt_common import (
DBTNode,
DBTSourceReport,
NullTypeClass,
get_column_type,
)
from datahub.ingestion.source.dbt.dbt_core import (
DBTCoreConfig,
DBTCoreSource,
Expand Down Expand Up @@ -461,3 +466,30 @@ def test_dbt_time_parsing() -> None:
assert timestamp.tzinfo is not None and timestamp.tzinfo.utcoffset(
timestamp
) == timedelta(0)


def test_get_column_type_redshift():
report = DBTSourceReport()
dataset_name = "test_dataset"

# Test 'super' type which should not show any warnings/errors
result_super = get_column_type(report, dataset_name, "super", "redshift")
assert isinstance(result_super.type, NullTypeClass)
assert (
len(report.infos) == 0
), "No warnings should be generated for known SUPER type"

# Test unknown type, which generates a warning but resolves to NullTypeClass
unknown_type = "unknown_type"
result_unknown = get_column_type(report, dataset_name, unknown_type, "redshift")
assert isinstance(result_unknown.type, NullTypeClass)

# exact warning message for an unknown type
expected_context = f"{dataset_name} - {unknown_type}"
messages = [info for info in report.infos if expected_context in str(info.context)]
assert len(messages) == 1
assert messages[0].title == "Unable to map column types to DataHub types"
assert (
messages[0].message
== "Got an unexpected column type. The column's parsed field type will not be populated."
)
Loading