From 27e1130586b8c6cbf12ba56646ede1f4be27cffe Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 31 Jul 2024 15:48:52 +0530 Subject: [PATCH 1/2] fix(lint): apply spotless (#11050) --- .../graphql/types/chart/mappers/InputFieldsMapper.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/InputFieldsMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/InputFieldsMapper.java index a4e40750f0d659..269fb7d4ddf793 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/InputFieldsMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/InputFieldsMapper.java @@ -11,7 +11,6 @@ import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; - @Slf4j public class InputFieldsMapper { @@ -40,9 +39,14 @@ public com.linkedin.datahub.graphql.generated.InputFields apply( if (field.hasSchemaFieldUrn()) { fieldResult.setSchemaFieldUrn(field.getSchemaFieldUrn().toString()); try { - parentUrn = Urn.createFromString(field.getSchemaFieldUrn().getEntityKey().get(0)); + parentUrn = + Urn.createFromString(field.getSchemaFieldUrn().getEntityKey().get(0)); } catch (URISyntaxException e) { - log.error("Field urn resolution: failed to extract parentUrn successfully from {}. Falling back to {}", field.getSchemaFieldUrn(), entityUrn, e); + log.error( + "Field urn resolution: failed to extract parentUrn successfully from {}. Falling back to {}", + field.getSchemaFieldUrn(), + entityUrn, + e); } } if (field.hasSchemaField()) { From f73149a05972ca67fe6fe9d11f70b2b13dd3af37 Mon Sep 17 00:00:00 2001 From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com> Date: Wed, 31 Jul 2024 04:31:09 -0700 Subject: [PATCH 2/2] docs(airflow): example query to get datajobs for a dataflow (#11034) --- docs/api/graphql/getting-started.md | 1 + docs/lineage/airflow.md | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/docs/api/graphql/getting-started.md b/docs/api/graphql/getting-started.md index 98aeca196600d7..dfa556051bd4d1 100644 --- a/docs/api/graphql/getting-started.md +++ b/docs/api/graphql/getting-started.md @@ -27,6 +27,7 @@ For more information on, please refer to the following links." - [Querying for Domain of a Dataset](/docs/api/tutorials/domains.md#read-domains) - [Querying for Glossary Terms of a Dataset](/docs/api/tutorials/terms.md#read-terms) - [Querying for Deprecation of a dataset](/docs/api/tutorials/deprecation.md#read-deprecation) +- [Querying for all DataJobs that belong to a DataFlow](/docs/lineage/airflow.md#get-all-datajobs-associated-with-a-dataflow) ### Search diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md index 8680e36e2baf38..9d838ef8a44042 100644 --- a/docs/lineage/airflow.md +++ b/docs/lineage/airflow.md @@ -266,6 +266,34 @@ with DAG( - ingest this DAG, and it will remove all the obsolete pipelines and tasks from the Datahub based on the `cluster` value set in the `airflow.cfg` +## Get all dataJobs associated with a dataFlow + +If you are looking to find all tasks (aka DataJobs) that belong to a specific pipeline (aka DataFlow), you can use the following GraphQL query: + +```graphql +query { + dataFlow(urn: "urn:li:dataFlow:(airflow,db_etl,prod)") { + childJobs: relationships( + input: { + types: ["IsPartOf"], + direction: INCOMING, + start: 0, + count: 100 + } + ) { + total + relationships { + entity { + ... on DataJob { + urn + } + } + } + } + } +} +``` + ## Emit Lineage Directly If you can't use the plugin or annotate inlets/outlets, you can also emit lineage using the `DatahubEmitterOperator`.