From 63cdb81e2cfd3e3f2d5514b4664ea4117505c4e3 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Mon, 30 Dec 2024 10:57:55 -0800 Subject: [PATCH] feat(data transform): adding dataTransformLogic models (#12198) --- .../mappers/DataTransformLogicMapper.java | 73 +++++++++++ .../common/mappers/QueryPropertiesMapper.java | 61 +++++++++ .../graphql/types/datajob/DataJobType.java | 3 +- .../types/datajob/mappers/DataJobMapper.java | 24 +--- .../graphql/types/query/QueryMapper.java | 43 +------ .../src/main/resources/entity.graphql | 25 ++++ .../mappers/DataTransformLogicMapperTest.java | 103 +++++++++++++++ .../mappers/QueryPropertiesMapperTest.java | 121 ++++++++++++++++++ .../java/com/linkedin/metadata/Constants.java | 1 + .../com/linkedin/common/DataTransform.pdl | 13 ++ .../linkedin/common/DataTransformLogic.pdl | 14 ++ .../src/main/resources/entity-registry.yml | 1 + 12 files changed, 424 insertions(+), 58 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataTransformLogicMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapper.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/DataTransformLogicMapperTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapperTest.java create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/DataTransform.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/DataTransformLogic.pdl diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataTransformLogicMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataTransformLogicMapper.java new file mode 100644 index 0000000000000..04602e7ff6dde --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataTransformLogicMapper.java @@ -0,0 +1,73 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataTransform; +import com.linkedin.datahub.graphql.generated.DataTransformLogic; +import com.linkedin.datahub.graphql.generated.QueryLanguage; +import com.linkedin.datahub.graphql.generated.QueryStatement; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class DataTransformLogicMapper + implements ModelMapper< + com.linkedin.common.DataTransformLogic, + com.linkedin.datahub.graphql.generated.DataTransformLogic> { + + public static final DataTransformLogicMapper INSTANCE = new DataTransformLogicMapper(); + + public static DataTransformLogic map( + @Nullable final QueryContext context, + @Nonnull final com.linkedin.common.DataTransformLogic input) { + return INSTANCE.apply(context, input); + } + + @Override + public DataTransformLogic apply( + @Nullable final QueryContext context, + @Nonnull final com.linkedin.common.DataTransformLogic input) { + + final DataTransformLogic result = new DataTransformLogic(); + + // Map transforms array using DataTransformMapper + result.setTransforms( + input.getTransforms().stream() + .map(transform -> DataTransformMapper.map(context, transform)) + .collect(Collectors.toList())); + + return result; + } +} + +class DataTransformMapper + implements ModelMapper< + com.linkedin.common.DataTransform, com.linkedin.datahub.graphql.generated.DataTransform> { + + public static final DataTransformMapper INSTANCE = new DataTransformMapper(); + + public static DataTransform map( + @Nullable final QueryContext context, + @Nonnull final com.linkedin.common.DataTransform input) { + return INSTANCE.apply(context, input); + } + + @Override + public DataTransform apply( + @Nullable final QueryContext context, + @Nonnull final com.linkedin.common.DataTransform input) { + + final DataTransform result = new DataTransform(); + + // Map query statement if present + if (input.hasQueryStatement()) { + QueryStatement statement = + new QueryStatement( + input.getQueryStatement().getValue(), + QueryLanguage.valueOf(input.getQueryStatement().getLanguage().toString())); + result.setQueryStatement(statement); + } + + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapper.java new file mode 100644 index 0000000000000..e29bea5b3943c --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapper.java @@ -0,0 +1,61 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import com.linkedin.data.template.GetMode; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.*; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.query.QueryProperties; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class QueryPropertiesMapper + implements ModelMapper< + QueryProperties, com.linkedin.datahub.graphql.generated.QueryProperties> { + + public static final QueryPropertiesMapper INSTANCE = new QueryPropertiesMapper(); + + public static com.linkedin.datahub.graphql.generated.QueryProperties map( + @Nullable final QueryContext context, @Nonnull final QueryProperties input) { + return INSTANCE.apply(context, input); + } + + @Override + public com.linkedin.datahub.graphql.generated.QueryProperties apply( + @Nullable final QueryContext context, @Nonnull final QueryProperties input) { + + final com.linkedin.datahub.graphql.generated.QueryProperties result = + new com.linkedin.datahub.graphql.generated.QueryProperties(); + + // Map Query Source + result.setSource(QuerySource.valueOf(input.getSource().toString())); + + // Map Query Statement + result.setStatement( + new QueryStatement( + input.getStatement().getValue(), + QueryLanguage.valueOf(input.getStatement().getLanguage().toString()))); + + // Map optional fields + result.setName(input.getName(GetMode.NULL)); + result.setDescription(input.getDescription(GetMode.NULL)); + + // Map origin if present + if (input.hasOrigin() && input.getOrigin() != null) { + result.setOrigin(UrnToEntityMapper.map(context, input.getOrigin())); + } + + // Map created audit stamp + AuditStamp created = new AuditStamp(); + created.setTime(input.getCreated().getTime()); + created.setActor(input.getCreated().getActor(GetMode.NULL).toString()); + result.setCreated(created); + + // Map last modified audit stamp + AuditStamp lastModified = new AuditStamp(); + lastModified.setTime(input.getLastModified().getTime()); + lastModified.setActor(input.getLastModified().getActor(GetMode.NULL).toString()); + result.setLastModified(lastModified); + + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java index b32832a28d5d5..8d55ca6dbf7ac 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java @@ -79,7 +79,8 @@ public class DataJobType BROWSE_PATHS_V2_ASPECT_NAME, SUB_TYPES_ASPECT_NAME, STRUCTURED_PROPERTIES_ASPECT_NAME, - FORMS_ASPECT_NAME); + FORMS_ASPECT_NAME, + DATA_TRANSFORM_LOGIC_ASPECT_NAME); private static final Set FACET_FIELDS = ImmutableSet.of("flow"); private final EntityClient _entityClient; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java index 772871d77f217..ec57c95ce151e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java @@ -4,16 +4,7 @@ import static com.linkedin.metadata.Constants.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.BrowsePathsV2; -import com.linkedin.common.DataPlatformInstance; -import com.linkedin.common.Deprecation; -import com.linkedin.common.Forms; -import com.linkedin.common.GlobalTags; -import com.linkedin.common.GlossaryTerms; -import com.linkedin.common.InstitutionalMemory; -import com.linkedin.common.Ownership; -import com.linkedin.common.Status; -import com.linkedin.common.SubTypes; +import com.linkedin.common.*; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; @@ -26,15 +17,7 @@ import com.linkedin.datahub.graphql.generated.DataJobProperties; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.EntityType; -import com.linkedin.datahub.graphql.types.common.mappers.BrowsePathsV2Mapper; -import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; -import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; -import com.linkedin.datahub.graphql.types.common.mappers.DeprecationMapper; -import com.linkedin.datahub.graphql.types.common.mappers.FineGrainedLineagesMapper; -import com.linkedin.datahub.graphql.types.common.mappers.InstitutionalMemoryMapper; -import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; -import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; -import com.linkedin.datahub.graphql.types.common.mappers.SubTypesMapper; +import com.linkedin.datahub.graphql.types.common.mappers.*; import com.linkedin.datahub.graphql.types.common.mappers.util.SystemMetadataUtils; import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.form.FormsMapper; @@ -139,6 +122,9 @@ public DataJob apply( context, new StructuredProperties(data), entityUrn)); } else if (FORMS_ASPECT_NAME.equals(name)) { result.setForms(FormsMapper.map(new Forms(data), entityUrn.toString())); + } else if (DATA_TRANSFORM_LOGIC_ASPECT_NAME.equals(name)) { + result.setDataTransformLogic( + DataTransformLogicMapper.map(context, new DataTransformLogic(data))); } }); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/query/QueryMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/query/QueryMapper.java index e71b569e9ae23..916ebc772f545 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/query/QueryMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/query/QueryMapper.java @@ -5,18 +5,13 @@ import com.linkedin.common.DataPlatformInstance; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; -import com.linkedin.data.template.GetMode; import com.linkedin.datahub.graphql.QueryContext; -import com.linkedin.datahub.graphql.generated.AuditStamp; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.QueryEntity; -import com.linkedin.datahub.graphql.generated.QueryLanguage; -import com.linkedin.datahub.graphql.generated.QuerySource; -import com.linkedin.datahub.graphql.generated.QueryStatement; import com.linkedin.datahub.graphql.generated.QuerySubject; -import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; +import com.linkedin.datahub.graphql.types.common.mappers.QueryPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.entity.EntityResponse; @@ -48,7 +43,10 @@ public QueryEntity apply( result.setType(EntityType.QUERY); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); - mappingHelper.mapToResult(context, QUERY_PROPERTIES_ASPECT_NAME, this::mapQueryProperties); + mappingHelper.mapToResult( + QUERY_PROPERTIES_ASPECT_NAME, + (entity, dataMap) -> + entity.setProperties(QueryPropertiesMapper.map(context, new QueryProperties(dataMap)))); mappingHelper.mapToResult(QUERY_SUBJECTS_ASPECT_NAME, this::mapQuerySubjects); mappingHelper.mapToResult(DATA_PLATFORM_INSTANCE_ASPECT_NAME, this::mapPlatform); return mappingHelper.getResult(); @@ -64,37 +62,6 @@ private void mapPlatform(@Nonnull QueryEntity query, @Nonnull DataMap dataMap) { } } - private void mapQueryProperties( - @Nullable final QueryContext context, @Nonnull QueryEntity query, @Nonnull DataMap dataMap) { - QueryProperties queryProperties = new QueryProperties(dataMap); - com.linkedin.datahub.graphql.generated.QueryProperties res = - new com.linkedin.datahub.graphql.generated.QueryProperties(); - - // Query Source must be kept in sync. - res.setSource(QuerySource.valueOf(queryProperties.getSource().toString())); - res.setStatement( - new QueryStatement( - queryProperties.getStatement().getValue(), - QueryLanguage.valueOf(queryProperties.getStatement().getLanguage().toString()))); - res.setName(queryProperties.getName(GetMode.NULL)); - res.setDescription(queryProperties.getDescription(GetMode.NULL)); - if (queryProperties.hasOrigin() && queryProperties.getOrigin() != null) { - res.setOrigin(UrnToEntityMapper.map(context, queryProperties.getOrigin())); - } - - AuditStamp created = new AuditStamp(); - created.setTime(queryProperties.getCreated().getTime()); - created.setActor(queryProperties.getCreated().getActor(GetMode.NULL).toString()); - res.setCreated(created); - - AuditStamp lastModified = new AuditStamp(); - lastModified.setTime(queryProperties.getLastModified().getTime()); - lastModified.setActor(queryProperties.getLastModified().getActor(GetMode.NULL).toString()); - res.setLastModified(lastModified); - - query.setProperties(res); - } - @Nonnull private void mapQuerySubjects(@Nonnull QueryEntity query, @Nonnull DataMap dataMap) { QuerySubjects querySubjects = new QuerySubjects(dataMap); diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 9abf4e16f12dd..a5cb0893a64fa 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -6569,6 +6569,11 @@ type DataJob implements EntityWithRelationships & Entity & BrowsableEntity { The forms associated with the Dataset """ forms: Forms + + """ + Data Transform Logic associated with the Data Job + """ + dataTransformLogic: DataTransformLogic } """ @@ -6786,6 +6791,26 @@ type DataJobInputOutput { fineGrainedLineages: [FineGrainedLineage!] } +""" +Information about a transformation applied to data assets +""" +type DataTransform { + """ + The transformation may be defined by a query statement + """ + queryStatement: QueryStatement +} + +""" +Information about transformations applied to data assets +""" +type DataTransformLogic { + """ + List of transformations applied + """ + transforms: [DataTransform!]! +} + """ Information about individual user usage of a Dataset """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/DataTransformLogicMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/DataTransformLogicMapperTest.java new file mode 100644 index 0000000000000..f94738ff049ef --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/DataTransformLogicMapperTest.java @@ -0,0 +1,103 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; + +import com.linkedin.common.DataTransform; +import com.linkedin.common.DataTransformArray; +import com.linkedin.common.DataTransformLogic; +import com.linkedin.query.QueryLanguage; +import com.linkedin.query.QueryStatement; +import java.util.Arrays; +import org.testng.annotations.Test; + +public class DataTransformLogicMapperTest { + + @Test + public void testMapWithQueryStatement() throws Exception { + // Create test data + DataTransformLogic input = new DataTransformLogic(); + + // Create a transform with query statement + DataTransform transform1 = new DataTransform(); + QueryStatement statement = new QueryStatement(); + statement.setValue("SELECT * FROM source_table"); + statement.setLanguage(QueryLanguage.SQL); + transform1.setQueryStatement(statement); + + // Create another transform + DataTransform transform2 = new DataTransform(); + QueryStatement statement2 = new QueryStatement(); + statement2.setValue("INSERT INTO target_table SELECT * FROM temp_table"); + statement2.setLanguage(QueryLanguage.SQL); + transform2.setQueryStatement(statement2); + + // Set transforms + input.setTransforms(new DataTransformArray(Arrays.asList(transform1, transform2))); + + // Map the object + com.linkedin.datahub.graphql.generated.DataTransformLogic result = + DataTransformLogicMapper.map(null, input); + + // Verify result + assertNotNull(result); + assertEquals(result.getTransforms().size(), 2); + + // Verify first transform + com.linkedin.datahub.graphql.generated.DataTransform resultTransform1 = + result.getTransforms().get(0); + assertNotNull(resultTransform1.getQueryStatement()); + assertEquals(resultTransform1.getQueryStatement().getValue(), "SELECT * FROM source_table"); + assertEquals(resultTransform1.getQueryStatement().getLanguage().toString(), "SQL"); + + // Verify second transform + com.linkedin.datahub.graphql.generated.DataTransform resultTransform2 = + result.getTransforms().get(1); + assertNotNull(resultTransform2.getQueryStatement()); + assertEquals( + resultTransform2.getQueryStatement().getValue(), + "INSERT INTO target_table SELECT * FROM temp_table"); + assertEquals(resultTransform2.getQueryStatement().getLanguage().toString(), "SQL"); + } + + @Test + public void testMapWithoutQueryStatement() throws Exception { + // Create test data + DataTransformLogic input = new DataTransformLogic(); + + // Create a transform without query statement + DataTransform transform = new DataTransform(); + + // Set transforms + input.setTransforms(new DataTransformArray(Arrays.asList(transform))); + + // Map the object + com.linkedin.datahub.graphql.generated.DataTransformLogic result = + DataTransformLogicMapper.map(null, input); + + // Verify result + assertNotNull(result); + assertEquals(result.getTransforms().size(), 1); + + // Verify transform + com.linkedin.datahub.graphql.generated.DataTransform resultTransform = + result.getTransforms().get(0); + assertNull(resultTransform.getQueryStatement()); + } + + @Test + public void testMapWithEmptyTransforms() throws Exception { + // Create test data + DataTransformLogic input = new DataTransformLogic(); + input.setTransforms(new DataTransformArray(Arrays.asList())); + + // Map the object + com.linkedin.datahub.graphql.generated.DataTransformLogic result = + DataTransformLogicMapper.map(null, input); + + // Verify result + assertNotNull(result); + assertEquals(result.getTransforms().size(), 0); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapperTest.java new file mode 100644 index 0000000000000..756115cf2054a --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapperTest.java @@ -0,0 +1,121 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.query.QueryLanguage; +import com.linkedin.query.QueryProperties; +import com.linkedin.query.QuerySource; +import com.linkedin.query.QueryStatement; +import org.testng.annotations.Test; + +public class QueryPropertiesMapperTest { + + @Test + public void testMapWithRequiredFields() throws Exception { + // Create test data + QueryProperties input = new QueryProperties(); + + // Set required fields + QueryStatement statement = new QueryStatement(); + statement.setValue("SELECT * FROM table"); + statement.setLanguage(QueryLanguage.SQL); + input.setStatement(statement); + + input.setSource(QuerySource.MANUAL); + + Urn userUrn = Urn.createFromString("urn:li:corpuser:test"); + + AuditStamp created = new AuditStamp(); + created.setTime(1000L); + created.setActor(userUrn); + input.setCreated(created); + + AuditStamp lastModified = new AuditStamp(); + lastModified.setTime(2000L); + lastModified.setActor(userUrn); + input.setLastModified(lastModified); + + // Map the object + com.linkedin.datahub.graphql.generated.QueryProperties result = + QueryPropertiesMapper.map(null, input); + + // Verify required fields + assertNotNull(result); + assertEquals(result.getSource().toString(), "MANUAL"); + assertEquals(result.getStatement().getValue(), "SELECT * FROM table"); + assertEquals(result.getStatement().getLanguage().toString(), "SQL"); + + // Verify audit stamps + assertEquals(result.getCreated().getTime().longValue(), 1000L); + assertEquals(result.getCreated().getActor(), userUrn.toString()); + assertEquals(result.getLastModified().getTime().longValue(), 2000L); + assertEquals(result.getLastModified().getActor(), userUrn.toString()); + + // Verify createdOn resolved stamp + assertEquals(result.getCreatedOn().getTime().longValue(), 1000L); + assertEquals(result.getCreatedOn().getActor().getUrn(), userUrn.toString()); + + // Verify optional fields are null + assertNull(result.getName()); + assertNull(result.getDescription()); + assertNull(result.getOrigin()); + } + + @Test + public void testMapWithOptionalFields() throws Exception { + // Create test data + QueryProperties input = new QueryProperties(); + + // Set required fields + QueryStatement statement = new QueryStatement(); + statement.setValue("SELECT * FROM table"); + statement.setLanguage(QueryLanguage.SQL); + input.setStatement(statement); + + input.setSource(QuerySource.SYSTEM); + + Urn userUrn = Urn.createFromString("urn:li:corpuser:test"); + Urn originUrn = Urn.createFromString("urn:li:dataset:test"); + + AuditStamp created = new AuditStamp(); + created.setTime(1000L); + created.setActor(userUrn); + input.setCreated(created); + + AuditStamp lastModified = new AuditStamp(); + lastModified.setTime(2000L); + lastModified.setActor(userUrn); + input.setLastModified(lastModified); + + // Set optional fields + input.setName("Test Query"); + input.setDescription("Test Description"); + input.setOrigin(originUrn); + + // Map the object + com.linkedin.datahub.graphql.generated.QueryProperties result = + QueryPropertiesMapper.map(null, input); + + // Verify required fields + assertNotNull(result); + assertEquals(result.getSource().toString(), "SYSTEM"); + assertEquals(result.getStatement().getValue(), "SELECT * FROM table"); + assertEquals(result.getStatement().getLanguage().toString(), "SQL"); + + // Verify audit stamps + assertEquals(result.getCreated().getTime().longValue(), 1000L); + assertEquals(result.getCreated().getActor(), userUrn.toString()); + assertEquals(result.getLastModified().getTime().longValue(), 2000L); + assertEquals(result.getLastModified().getActor(), userUrn.toString()); + + // Verify optional fields + assertEquals(result.getName(), "Test Query"); + assertEquals(result.getDescription(), "Test Description"); + assertNotNull(result.getOrigin()); + assertEquals(result.getOrigin().getUrn(), originUrn.toString()); + } +} diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 09f873ebf7bc9..42080e4e17596 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -127,6 +127,7 @@ public class Constants { public static final String EMBED_ASPECT_NAME = "embed"; public static final String INCIDENTS_SUMMARY_ASPECT_NAME = "incidentsSummary"; public static final String DOCUMENTATION_ASPECT_NAME = "documentation"; + public static final String DATA_TRANSFORM_LOGIC_ASPECT_NAME = "dataTransformLogic"; // User public static final String CORP_USER_KEY_ASPECT_NAME = "corpUserKey"; diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/DataTransform.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/DataTransform.pdl new file mode 100644 index 0000000000000..adc8d693b28e2 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/common/DataTransform.pdl @@ -0,0 +1,13 @@ +namespace com.linkedin.common + +import com.linkedin.query.QueryStatement + +/** + * Information about a transformation. It may be a query, + */ +record DataTransform { + /** + * The data transform may be defined by a query statement + */ + queryStatement: optional QueryStatement +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/DataTransformLogic.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/DataTransformLogic.pdl new file mode 100644 index 0000000000000..431cebf436ffb --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/common/DataTransformLogic.pdl @@ -0,0 +1,14 @@ +namespace com.linkedin.common + +/** + * Information about a Query against one or more data assets (e.g. Tables or Views). + */ +@Aspect = { + "name": "dataTransformLogic" +} +record DataTransformLogic { + /** + * List of transformations applied + */ + transforms: array[DataTransform], +} diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 4fe170ced69f3..0193e5e2c5c6c 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -76,6 +76,7 @@ entities: - subTypes - incidentsSummary - testResults + - dataTransformLogic - name: dataFlow category: core keyAspect: dataFlowKey