From 5df193bc3f796072c6e02354fc13de4950f8f209 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Mon, 12 Aug 2024 18:44:19 +0530 Subject: [PATCH 1/4] include parent fields --- .../source/looker/looker_constant.py | 3 + .../source/looker/lookml_concept_context.py | 67 ++- .../refinement_include_order_golden.json | 186 ++++++ .../child_view.view.lkml | 16 + .../data.model.lkml | 4 + .../parent_view.view.lkml | 18 + .../vv_lineage_liquid_template_golden.json | 532 ++++++++++++++++++ 7 files changed, 820 insertions(+), 6 deletions(-) create mode 100644 metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/child_view.view.lkml create mode 100644 metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/parent_view.view.lkml diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py index 21160cc97d4a62..c150c522615098 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_constant.py @@ -1 +1,4 @@ IMPORTED_PROJECTS = "imported_projects" +DIMENSIONS = "dimensions" +MEASURES = "measures" +DIMENSION_GROUPS = "dimension_groups" diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py index 7805b8b7b7d9a5..534e1026fbc393 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py @@ -8,6 +8,11 @@ find_view_from_resolved_includes, ) from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition +from datahub.ingestion.source.looker.looker_constant import ( + DIMENSION_GROUPS, + DIMENSIONS, + MEASURES, +) from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader from datahub.ingestion.source.looker.lookml_config import ( @@ -251,6 +256,7 @@ def resolve_extends_view_name( def get_including_extends( self, field: str, + extends_only: bool = False, ) -> Optional[Any]: extends = list( itertools.chain.from_iterable( @@ -259,7 +265,7 @@ def get_including_extends( ) # First, check the current view. - if field in self.raw_view: + if extends_only is False and field in self.raw_view: return self.raw_view[field] # The field might be defined in another view and this view is extending that view, @@ -382,14 +388,63 @@ def _get_list_dict(self, attribute_name: str) -> List[Dict]: return ans return [] + def _include_parent_fields( + self, child_fields: List[dict], field_type: str + ) -> List[Dict]: + # Fetch the parent view fields i.e. view-name mentioned in view.extends + # and include those field in child_fields. + # The inclusion will resolve the fields as per precedence rule mentioned in lookml documentation + # https://cloud.google.com/looker/docs/reference/param-view-extends + + parent_fields: Optional[Any] = self.get_including_extends( + field=field_type, + extends_only=True, + ) + + if parent_fields is None: + return child_fields # No parent fields found + + # Create a map field-name vs field + child_field_map: dict = {} + for field in child_fields: + assert ( + NAME in field + ), "A lookml view must have a name field" # name is required field of lookml field array + + child_field_map[field[NAME]] = field + + for field in parent_fields: + assert ( + NAME in field + ), "A lookml view must have a name field" # name is required field of lookml field array + + if field[NAME] in child_field_map: + # This is an override case where the child has redefined the parent field. + # There are some additive attributes; however, we are not consuming them in metadata ingestion + # and hence not adding them to the child field. + continue + + child_fields.append(field) + + return child_fields + def dimensions(self) -> List[Dict]: - return self._get_list_dict("dimensions") + return self._include_parent_fields( + child_fields=self._get_list_dict(DIMENSIONS), + field_type=DIMENSIONS, + ) def measures(self) -> List[Dict]: - return self._get_list_dict("measures") + return self._include_parent_fields( + child_fields=self._get_list_dict(MEASURES), + field_type=MEASURES, + ) def dimension_groups(self) -> List[Dict]: - return self._get_list_dict("dimension_groups") + return self._include_parent_fields( + child_fields=self._get_list_dict(DIMENSION_GROUPS), + field_type=DIMENSION_GROUPS, + ) def is_materialized_derived_view(self) -> bool: for k in self.derived_table(): @@ -433,7 +488,7 @@ def is_sql_based_derived_case(self) -> bool: return False def is_native_derived_case(self) -> bool: - # It is pattern 5 + # It is pattern 5, mentioned in Class documentation if ( "derived_table" in self.raw_view and "explore_source" in self.raw_view["derived_table"] @@ -443,7 +498,7 @@ def is_native_derived_case(self) -> bool: return False def is_sql_based_derived_view_without_fields_case(self) -> bool: - # Pattern 6 + # Pattern 6, mentioned in Class documentation fields: List[Dict] = [] fields.extend(self.dimensions()) diff --git a/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json b/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json index 70f48953a06adb..c5b1d44772deab 100644 --- a/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json +++ b/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json @@ -485,9 +485,195 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD)", "type": "VIEW" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),date)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),issue_date)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),issue_date_3)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),count)" + ], + "confidenceScore": 1.0 + } ] } }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "extend_book", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "name", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "date", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "issue_date", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "issue_date_3", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "count", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "count", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Measure" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/child_view.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/child_view.view.lkml new file mode 100644 index 00000000000000..5d8b51527b0fe7 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/child_view.view.lkml @@ -0,0 +1,16 @@ +include: "parent_view.view.lkml" + +view: child_view { + extends: [parent_view] + + dimension: id { + primary_key: yes + type: integer + sql: ${TABLE}.id ;; + } + + dimension: child_dimension_1 { + type: string + sql: ${TABLE}.child_dimension_1 ;; + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml index 2cc6ae994d245b..24ef10cbbe223b 100644 --- a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml @@ -7,6 +7,7 @@ include: "top_10_employee_income_source.view.lkml" include: "employee_tax_report.view.lkml" include: "employee_salary_rating.view.lkml" include: "rent_as_employee_income_source.view.lkml" +include: "child_view.view.lkml" explore: activity_logs { } @@ -27,4 +28,7 @@ explore: employee_salary_rating { } explore: rent_as_employee_income_source { +} + +explore: child_view { } \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/parent_view.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/parent_view.view.lkml new file mode 100644 index 00000000000000..c2f18924351c29 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/parent_view.view.lkml @@ -0,0 +1,18 @@ +view: parent_view { + sql_table_name: `dataset.table` ;; + + dimension: id { + primary_key: yes + type: string + sql: ${TABLE}.id ;; + } + + dimension: parent_dimension_1 { + type: string + sql: ${TABLE}.parent_dimension_1 ;; + } + + measure: parent_count { + type: count + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json index 2e55971b65bd43..7742904b37de5a 100644 --- a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json +++ b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json @@ -1828,6 +1828,538 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "view: parent_view {\n sql_table_name: `dataset.table` ;;\n\n dimension: id {\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n\n dimension: parent_dimension_1 {\n type: string\n sql: ${TABLE}.parent_dimension_1 ;;\n }\n\n measure: parent_count {\n type: count\n }\n}", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),parent_dimension_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD),parent_dimension_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),parent_count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD),parent_count)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "parent_view", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": true + }, + { + "fieldPath": "parent_dimension_1", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "parent_count", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "count", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Measure" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [ + "id" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "parent_view.view.lkml", + "looker.model": "data" + }, + "name": "parent_view", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.parent_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "include: \"parent_view.view.lkml\"\n\nview: child_view {\n extends: [parent_view]\n\n dimension: id {\n primary_key: yes\n type: integer\n sql: ${TABLE}.id ;;\n }\n\n dimension: child_dimension_1 {\n type: string\n sql: ${TABLE}.child_dimension_1 ;;\n }\n}", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),child_dimension_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD),child_dimension_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),parent_dimension_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD),parent_dimension_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,.dataset.table,PROD),parent_count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD),parent_count)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "child_view", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": true + }, + { + "fieldPath": "child_dimension_1", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "parent_dimension_1", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "parent_count", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "count", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Measure" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [ + "id" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "child_view.view.lkml", + "looker.model": "data" + }, + "name": "child_view", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.child_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", From 7cd7814c3e3fdf586783bccdfb09746f2adda020 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Mon, 12 Aug 2024 18:53:45 +0530 Subject: [PATCH 2/4] update doc --- .../source/looker/lookml_concept_context.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py index 534e1026fbc393..9ff77daeddc52a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py @@ -264,12 +264,12 @@ def get_including_extends( ) ) - # First, check the current view. + # If extends_only is false, first check the current view, and then fall back to the parent view. if extends_only is False and field in self.raw_view: return self.raw_view[field] - # The field might be defined in another view and this view is extending that view, - # so we resolve this field while taking that into account. + # The field might be defined in another view, and this view is extending that view, + # so we resolve this field while taking that into account, # following Looker's precedence rules. for extend in reversed(extends): assert extend != self.raw_view[NAME], "a view cannot extend itself" @@ -391,10 +391,9 @@ def _get_list_dict(self, attribute_name: str) -> List[Dict]: def _include_parent_fields( self, child_fields: List[dict], field_type: str ) -> List[Dict]: - # Fetch the parent view fields i.e. view-name mentioned in view.extends - # and include those field in child_fields. - # The inclusion will resolve the fields as per precedence rule mentioned in lookml documentation - # https://cloud.google.com/looker/docs/reference/param-view-extends + # Fetch the fields from the parent view, i.e., the view name mentioned in view.extends, and include those + # fields in child_fields. This inclusion will resolve the fields according to the precedence rules mentioned + # in the LookML documentation: https://cloud.google.com/looker/docs/reference/param-view-extends parent_fields: Optional[Any] = self.get_including_extends( field=field_type, From 3d7deb9074242e327256fd6582965b7e7407268f Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Fri, 16 Aug 2024 22:10:34 +0530 Subject: [PATCH 3/4] address review comments --- .../source/looker/lookml_concept_context.py | 63 +++++++++---------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py index 9ff77daeddc52a..0b0fc94e90686a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py @@ -253,24 +253,18 @@ def resolve_extends_view_name( ) return None - def get_including_extends( + def _get_parent_attribute( self, - field: str, - extends_only: bool = False, + attribute_name: str, ) -> Optional[Any]: + extends = list( itertools.chain.from_iterable( self.raw_view.get("extends", self.raw_view.get("extends__all", [])) ) ) - # If extends_only is false, first check the current view, and then fall back to the parent view. - if extends_only is False and field in self.raw_view: - return self.raw_view[field] - - # The field might be defined in another view, and this view is extending that view, - # so we resolve this field while taking that into account, - # following Looker's precedence rules. + # Following Looker's precedence rules. for extend in reversed(extends): assert extend != self.raw_view[NAME], "a view cannot extend itself" extend_view = self.resolve_extends_view_name( @@ -281,8 +275,28 @@ def get_including_extends( f"failed to resolve extends view {extend} in view {self.raw_view[NAME]} of" f" file {self.view_file.absolute_file_path}" ) - if field in extend_view: - return extend_view[field] + if attribute_name in extend_view: + return extend_view[attribute_name] + + return None + + def get_including_extends( + self, + field: str, + ) -> Optional[Any]: + + if field in [DIMENSIONS, DIMENSION_GROUPS, MEASURES]: + child_fields = self._get_list_dict(field) + return self._include_parent_fields( + child_fields=child_fields, + parent_fields=self._get_parent_attribute(attribute_name=field) or [], + ) + else: + if field in self.raw_view: + return self.raw_view[field] + + # The field might be defined in another view, and this view is extending that view, + return self._get_parent_attribute(field) return None @@ -389,20 +403,12 @@ def _get_list_dict(self, attribute_name: str) -> List[Dict]: return [] def _include_parent_fields( - self, child_fields: List[dict], field_type: str + self, child_fields: List[dict], parent_fields: List[dict] ) -> List[Dict]: # Fetch the fields from the parent view, i.e., the view name mentioned in view.extends, and include those # fields in child_fields. This inclusion will resolve the fields according to the precedence rules mentioned # in the LookML documentation: https://cloud.google.com/looker/docs/reference/param-view-extends - parent_fields: Optional[Any] = self.get_including_extends( - field=field_type, - extends_only=True, - ) - - if parent_fields is None: - return child_fields # No parent fields found - # Create a map field-name vs field child_field_map: dict = {} for field in child_fields: @@ -428,22 +434,13 @@ def _include_parent_fields( return child_fields def dimensions(self) -> List[Dict]: - return self._include_parent_fields( - child_fields=self._get_list_dict(DIMENSIONS), - field_type=DIMENSIONS, - ) + return self.get_including_extends(field=DIMENSIONS) or [] def measures(self) -> List[Dict]: - return self._include_parent_fields( - child_fields=self._get_list_dict(MEASURES), - field_type=MEASURES, - ) + return self.get_including_extends(field=MEASURES) or [] def dimension_groups(self) -> List[Dict]: - return self._include_parent_fields( - child_fields=self._get_list_dict(DIMENSION_GROUPS), - field_type=DIMENSION_GROUPS, - ) + return self.get_including_extends(field=DIMENSION_GROUPS) or [] def is_materialized_derived_view(self) -> bool: for k in self.derived_table(): From d3711e77720c571a319c3541fd3068d756e0bfe1 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Tue, 20 Aug 2024 11:41:45 +0530 Subject: [PATCH 4/4] doc update --- .../source/looker/lookml_concept_context.py | 76 +++++++++++-------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py index 50fa54168a0607..bf24f4b84679b1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py @@ -28,6 +28,39 @@ logger = logging.getLogger(__name__) +def merge_parent_and_child_fields( + child_fields: List[dict], parent_fields: List[dict] +) -> List[Dict]: + # Fetch the fields from the parent view, i.e., the view name mentioned in view.extends, and include those + # fields in child_fields. This inclusion will resolve the fields according to the precedence rules mentioned + # in the LookML documentation: https://cloud.google.com/looker/docs/reference/param-view-extends. + + # Create a map field-name vs field + child_field_map: dict = {} + for field in child_fields: + assert ( + NAME in field + ), "A lookml view must have a name field" # name is required field of lookml field array + + child_field_map[field[NAME]] = field + + for field in parent_fields: + assert ( + NAME in field + ), "A lookml view must have a name field" # name is required field of lookml field array + + if field[NAME] in child_field_map: + # Fields defined in the child view take higher precedence. + # This is an override case where the child has redefined the parent field. + # There are some additive attributes; however, we are not consuming them in metadata ingestion + # and hence not adding them to the child field. + continue + + child_fields.append(field) + + return child_fields + + class LookerFieldContext: raw_field: Dict[Any, Any] @@ -257,7 +290,9 @@ def _get_parent_attribute( self, attribute_name: str, ) -> Optional[Any]: - + """ + Search for the attribute_name in the parent views of the current view and return its value. + """ extends = list( itertools.chain.from_iterable( self.raw_view.get("extends", self.raw_view.get("extends__all", [])) @@ -265,6 +300,7 @@ def _get_parent_attribute( ) # Following Looker's precedence rules. + # reversed the view-names mentioned in `extends` attribute for extend in reversed(extends): assert extend != self.raw_view[NAME], "a view cannot extend itself" extend_view = self.resolve_extends_view_name( @@ -285,13 +321,18 @@ def get_including_extends( field: str, ) -> Optional[Any]: + # According to Looker's inheritance rules, we need to merge the fields(i.e. dimensions, measures and + # dimension_groups) from both the child and parent. if field in [DIMENSIONS, DIMENSION_GROUPS, MEASURES]: + # Get the child fields child_fields = self._get_list_dict(field) - return self._include_parent_fields( + # merge parent and child fields + return merge_parent_and_child_fields( child_fields=child_fields, parent_fields=self._get_parent_attribute(attribute_name=field) or [], ) else: + # Return the field from the current view if it exists. if field in self.raw_view: return self.raw_view[field] @@ -402,37 +443,6 @@ def _get_list_dict(self, attribute_name: str) -> List[Dict]: return ans return [] - def _include_parent_fields( - self, child_fields: List[dict], parent_fields: List[dict] - ) -> List[Dict]: - # Fetch the fields from the parent view, i.e., the view name mentioned in view.extends, and include those - # fields in child_fields. This inclusion will resolve the fields according to the precedence rules mentioned - # in the LookML documentation: https://cloud.google.com/looker/docs/reference/param-view-extends - - # Create a map field-name vs field - child_field_map: dict = {} - for field in child_fields: - assert ( - NAME in field - ), "A lookml view must have a name field" # name is required field of lookml field array - - child_field_map[field[NAME]] = field - - for field in parent_fields: - assert ( - NAME in field - ), "A lookml view must have a name field" # name is required field of lookml field array - - if field[NAME] in child_field_map: - # This is an override case where the child has redefined the parent field. - # There are some additive attributes; however, we are not consuming them in metadata ingestion - # and hence not adding them to the child field. - continue - - child_fields.append(field) - - return child_fields - def dimensions(self) -> List[Dict]: return self.get_including_extends(field=DIMENSIONS) or []