Skip to content

Commit

Permalink
fix: Add option for disabling ownership extraction (datahub-project#1…
Browse files Browse the repository at this point in the history
…1970)

Co-authored-by: Mayuri Nehate <[email protected]>
  • Loading branch information
2 people authored and sleeperdeep committed Dec 17, 2024
1 parent a23fad3 commit 5f5cb3a
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
"displayName": "Dremio",
"description": "Import Spaces, Sources, Tables and statistics from Dremio.",
"docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
"recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n #true if https, otherwise false\n tls: true\n\n #For cloud instance\n #is_dremio_cloud: True\n #dremio_cloud_project_id: <project_id>\n\n #Credentials with personal access token\n authentication_method: PAT\n password: pass\n\n #Or Credentials with basic auth\n #authentication_method: password\n #username: null\n #password: null\n\n stateful_ingestion:\n enabled: true"
"recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n #true if https, otherwise false\n tls: true\n\n #For cloud instance\n #is_dremio_cloud: True\n #dremio_cloud_project_id: <project_id>\n\n #Credentials with personal access token\n authentication_method: PAT\n password: pass\n\n #Or Credentials with basic auth\n #authentication_method: password\n #username: null\n #password: null\n\n ingest_owner: true\n\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:cassandra",
Expand Down
2 changes: 2 additions & 0 deletions metadata-ingestion/docs/sources/dremio/dremio_recipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ source:

include_query_lineage: True

ingest_owner: true

#Optional
source_mappings:
- platform: s3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def __init__(
platform: str,
ui_url: str,
env: str,
ingest_owner: bool,
domain: Optional[str] = None,
platform_instance: Optional[str] = None,
):
Expand All @@ -150,6 +151,7 @@ def __init__(
self.env = env
self.domain = domain
self.ui_url = ui_url
self.ingest_owner = ingest_owner

def get_container_key(
self, name: Optional[str], path: Optional[List[str]]
Expand Down Expand Up @@ -426,21 +428,23 @@ def _create_external_url(self, dataset: DremioDataset) -> str:
return f'{self.ui_url}/{container_type}/{dataset_url_path}"{dataset.resource_name}"'

def _create_ownership(self, dataset: DremioDataset) -> Optional[OwnershipClass]:
if not dataset.owner:
return None
owner = (
make_user_urn(dataset.owner)
if dataset.owner_type == "USER"
else make_group_urn(dataset.owner)
)
return OwnershipClass(
owners=[
OwnerClass(
owner=owner,
type=OwnershipTypeClass.TECHNICAL_OWNER,
)
]
)
if self.ingest_owner and dataset.owner:
owner_urn = (
make_user_urn(dataset.owner)
if dataset.owner_type == "USER"
else make_group_urn(dataset.owner)
)
ownership: OwnershipClass = OwnershipClass(
owners=[
OwnerClass(
owner=owner_urn,
type=OwnershipTypeClass.TECHNICAL_OWNER,
)
]
)
return ownership

return None

def _create_glossary_terms(self, entity: DremioDataset) -> GlossaryTermsClass:
return GlossaryTermsClass(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,8 @@ def is_profiling_enabled(self) -> bool:
default=False,
description="Whether to include query-based lineage information.",
)

ingest_owner: bool = Field(
default=True,
description="Ingest Owner from source. This will override Owner info entered from UI",
)
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ class DremioSource(StatefulIngestionSourceBase):
- Ownership and Glossary Terms:
- Metadata related to ownership of datasets, extracted from Dremio’s ownership model.
- Glossary terms and business metadata associated with datasets, providing additional context to the data.
- Note: Ownership information will only be available for the Cloud and Enterprise editions, it will not be available for the Community edition.
- Optional SQL Profiling (if enabled):
- Table, row, and column statistics can be profiled and ingested via optional SQL queries.
Expand All @@ -123,6 +124,7 @@ def __init__(self, config: DremioSourceConfig, ctx: PipelineContext):
self.dremio_aspects = DremioAspects(
platform=self.get_platform(),
domain=self.config.domain,
ingest_owner=self.config.ingest_owner,
platform_instance=self.config.platform_instance,
env=self.config.env,
ui_url=dremio_api.ui_url,
Expand Down

0 comments on commit 5f5cb3a

Please sign in to comment.