Skip to content

Commit

Permalink
feat(ingest/looker): include project name in model/explore properties (
Browse files Browse the repository at this point in the history
…#11664)

Co-authored-by: Mayuri Nehate <[email protected]>
  • Loading branch information
hsheth2 and mayurinehate authored Oct 18, 2024
1 parent 7e4d4ab commit dfd7293
Show file tree
Hide file tree
Showing 14 changed files with 135 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1205,15 +1205,19 @@ def _to_metadata_events( # noqa: C901
dataset_snapshot.aspects.append(browse_paths)
dataset_snapshot.aspects.append(StatusClass(removed=False))

custom_properties = {}
if self.label is not None:
custom_properties["looker.explore.label"] = str(self.label)
if self.source_file is not None:
custom_properties["looker.explore.file"] = str(self.source_file)
custom_properties = {
"project": self.project_name,
"model": self.model_name,
"looker.explore.label": self.label,
"looker.explore.name": self.name,
"looker.explore.file": self.source_file,
}
dataset_props = DatasetPropertiesClass(
name=str(self.label) if self.label else LookerUtil._display_name(self.name),
description=self.description,
customProperties=custom_properties,
customProperties={
k: str(v) for k, v in custom_properties.items() if v is not None
},
)
dataset_props.externalUrl = self._get_url(base_url)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,26 +139,21 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
"""

platform = "looker"
source_config: LookerDashboardSourceConfig
reporter: LookerDashboardSourceReport
user_registry: LookerUserRegistry
reachable_look_registry: Set[
str
] # Keep track of look-id which are reachable from Dashboard

def __init__(self, config: LookerDashboardSourceConfig, ctx: PipelineContext):
super().__init__(config, ctx)
self.source_config = config
self.reporter = LookerDashboardSourceReport()
self.source_config: LookerDashboardSourceConfig = config
self.reporter: LookerDashboardSourceReport = LookerDashboardSourceReport()
self.looker_api: LookerAPI = LookerAPI(self.source_config)
self.user_registry = LookerUserRegistry(self.looker_api)
self.explore_registry = LookerExploreRegistry(
self.user_registry: LookerUserRegistry = LookerUserRegistry(self.looker_api)
self.explore_registry: LookerExploreRegistry = LookerExploreRegistry(
self.looker_api, self.reporter, self.source_config
)
self.reporter._looker_explore_registry = self.explore_registry
self.reporter._looker_api = self.looker_api

self.reachable_look_registry = set()
# Keep track of look-id which are reachable from Dashboard
self.reachable_look_registry: Set[str] = set()

# (model, explore) -> list of charts/looks/dashboards that reference this explore
# The list values are used purely for debugging purposes.
Expand Down Expand Up @@ -868,21 +863,31 @@ def _make_explore_metadata_events(
) -> Iterable[
Union[MetadataChangeEvent, MetadataChangeProposalWrapper, MetadataWorkUnit]
]:
if self.source_config.emit_used_explores_only:
explores_to_fetch = list(self.reachable_explores.keys())
else:
if not self.source_config.emit_used_explores_only:
explores_to_fetch = list(self.list_all_explores())
else:
# We don't keep track of project names for each explore right now.
# Because project names are just used for a custom property, it's
# fine to set them to None.
# TODO: Track project names for each explore.
explores_to_fetch = [
(None, model, explore)
for (model, explore) in self.reachable_explores.keys()
]
explores_to_fetch.sort()

processed_models: List[str] = []

for model, _ in explores_to_fetch:
for project_name, model, _ in explores_to_fetch:
if model not in processed_models:
model_key = gen_model_key(self.source_config, model)
yield from gen_containers(
container_key=model_key,
name=model,
sub_types=[BIContainerSubTypes.LOOKML_MODEL],
extra_properties=(
{"project": project_name} if project_name is not None else None
),
)
yield MetadataChangeProposalWrapper(
entityUrn=model_key.as_urn(),
Expand All @@ -896,7 +901,7 @@ def _make_explore_metadata_events(
self.reporter.total_explores = len(explores_to_fetch)
for future in BackpressureAwareExecutor.map(
self.fetch_one_explore,
((model, explore) for (model, explore) in explores_to_fetch),
((model, explore) for (_project, model, explore) in explores_to_fetch),
max_workers=self.source_config.max_threads,
):
events, explore_id, start_time, end_time = future.result()
Expand All @@ -907,7 +912,7 @@ def _make_explore_metadata_events(
f"Running time of fetch_one_explore for {explore_id}: {(end_time - start_time).total_seconds()}"
)

def list_all_explores(self) -> Iterable[Tuple[str, str]]:
def list_all_explores(self) -> Iterable[Tuple[Optional[str], str, str]]:
# returns a list of (model, explore) tuples

for model in self.looker_api.all_lookml_models():
Expand All @@ -916,7 +921,7 @@ def list_all_explores(self) -> Iterable[Tuple[str, str]]:
for explore in model.explores:
if explore.name is None:
continue
yield (model.name, explore.name)
yield (model.project_name, model.name, explore.name)

def fetch_one_explore(
self, model: str, explore: str
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"description": "lorem ipsum",
"charts": [],
"datasets": [],
"dashboards": [],
"lastModified": {
"created": {
"time": 1586847600000,
Expand Down Expand Up @@ -440,7 +441,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "lkml_samples",
"model": "bogus data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/bogus data/my_view",
Expand Down Expand Up @@ -616,7 +620,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "lkml_samples",
"model": "data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/data/my_view",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"description": "lorem ipsum",
"charts": [],
"datasets": [],
"dashboards": [],
"lastModified": {
"created": {
"time": 1586847600000,
Expand Down Expand Up @@ -282,7 +283,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "lkml_samples",
"model": "data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/data/my_view",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@
"urn:li:chart:(looker,dashboard_elements.2)"
],
"datasets": [],
"dashboards": [],
"lastModified": {
"created": {
"time": 1586847600000,
Expand Down Expand Up @@ -520,7 +521,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "looker_hub",
"model": "data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/data/my_view",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@
"urn:li:chart:(looker,dashboard_elements.2)"
],
"datasets": [],
"dashboards": [],
"lastModified": {
"created": {
"time": 1586847600000,
Expand Down Expand Up @@ -520,7 +521,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "looker_hub",
"model": "data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/data/my_view",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@
"description": "third",
"charts": [],
"datasets": [],
"dashboards": [],
"lastModified": {
"created": {
"time": 1586847600000,
Expand Down Expand Up @@ -613,7 +614,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "lkml_samples",
"model": "data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/data/my_view",
Expand Down
Loading

0 comments on commit dfd7293

Please sign in to comment.