diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index c38b97d5f44bf8..70e5dd9d14d91e 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -3,17 +3,9 @@ on: push: branches: - master - paths-ignore: - - "docs-website/**" - - "docs/**" - - "**.md" pull_request: branches: - "**" - paths-ignore: - - "docs-website/**" - - "docs/**" - - "**.md" release: types: [published] diff --git a/build.gradle b/build.gradle index 62b305edf27d71..17b21757f0380c 100644 --- a/build.gradle +++ b/build.gradle @@ -73,7 +73,7 @@ plugins { id 'com.gorylenko.gradle-git-properties' version '2.4.1' id 'com.github.johnrengelman.shadow' version '8.1.1' apply false id 'com.palantir.docker' version '0.35.0' apply false - id 'com.avast.gradle.docker-compose' version '0.17.5' + id 'com.avast.gradle.docker-compose' version '0.17.6' id "com.diffplug.spotless" version "6.23.3" // https://blog.ltgt.net/javax-jakarta-mess-and-gradle-solution/ // TODO id "org.gradlex.java-ecosystem-capabilities" version "1.0" diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 641ed3378f9d0b..fdb91a749b2268 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -141,6 +141,7 @@ import com.linkedin.datahub.graphql.resolvers.entity.EntityExistsResolver; import com.linkedin.datahub.graphql.resolvers.entity.EntityPrivilegesResolver; import com.linkedin.datahub.graphql.resolvers.form.BatchAssignFormResolver; +import com.linkedin.datahub.graphql.resolvers.form.BatchRemoveFormResolver; import com.linkedin.datahub.graphql.resolvers.form.CreateDynamicFormAssignmentResolver; import com.linkedin.datahub.graphql.resolvers.form.IsFormAssignedToMeResolver; import com.linkedin.datahub.graphql.resolvers.form.SubmitFormPromptResolver; @@ -1214,6 +1215,7 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { new CreateDynamicFormAssignmentResolver(this.formService)) .dataFetcher( "verifyForm", new VerifyFormResolver(this.formService, this.groupService)) + .dataFetcher("batchRemoveForm", new BatchRemoveFormResolver(this.formService)) .dataFetcher("raiseIncident", new RaiseIncidentResolver(this.entityClient)) .dataFetcher( "updateIncidentStatus", diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/BatchRemoveFormResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/BatchRemoveFormResolver.java new file mode 100644 index 00000000000000..a6590625a9ba3c --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/BatchRemoveFormResolver.java @@ -0,0 +1,54 @@ +package com.linkedin.datahub.graphql.resolvers.form; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.BatchAssignFormInput; +import com.linkedin.metadata.service.FormService; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; + +public class BatchRemoveFormResolver implements DataFetcher> { + + private final FormService _formService; + + public BatchRemoveFormResolver(@Nonnull final FormService formService) { + _formService = Objects.requireNonNull(formService, "formService must not be null"); + } + + @Override + public CompletableFuture get(final DataFetchingEnvironment environment) + throws Exception { + final QueryContext context = environment.getContext(); + + final BatchAssignFormInput input = + bindArgument(environment.getArgument("input"), BatchAssignFormInput.class); + final Urn formUrn = UrnUtils.getUrn(input.getFormUrn()); + final List entityUrns = input.getEntityUrns(); + final Authentication authentication = context.getAuthentication(); + + // TODO: (PRD-1062) Add permission check once permission exists + + return CompletableFuture.supplyAsync( + () -> { + try { + _formService.batchUnassignFormForEntities( + entityUrns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()), + formUrn, + authentication); + return true; + } catch (Exception e) { + throw new RuntimeException( + String.format("Failed to perform update against input %s", input), e); + } + }); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java index 1a8b7734c093e5..4a0eacaf09671a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java @@ -6,6 +6,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.AndFilterInput; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.LineageDirection; @@ -92,9 +93,14 @@ public CompletableFuture get(DataFetchingEnvironment final int start = input.getStart() != null ? input.getStart() : DEFAULT_START; final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT; - final List filters = - input.getFilters() != null ? input.getFilters() : new ArrayList<>(); - final Integer maxHops = getMaxHops(filters); + final List filters = + input.getOrFilters() != null ? input.getOrFilters() : new ArrayList<>(); + final List facetFilters = + filters.stream() + .map(AndFilterInput::getAnd) + .flatMap(List::stream) + .collect(Collectors.toList()); + final Integer maxHops = getMaxHops(facetFilters); @Nullable final Long startTimeMillis = @@ -117,7 +123,8 @@ public CompletableFuture get(DataFetchingEnvironment start, count); - final Filter filter = ResolverUtils.buildFilter(filters, input.getOrFilters()); + final Filter filter = + ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); SearchFlags searchFlags = null; com.linkedin.datahub.graphql.generated.SearchFlags inputFlags = input.getSearchFlags(); if (inputFlags != null) { diff --git a/datahub-graphql-core/src/main/resources/forms.graphql b/datahub-graphql-core/src/main/resources/forms.graphql index a0f84f8e3bb1a6..f5e5fa74e3dc97 100644 --- a/datahub-graphql-core/src/main/resources/forms.graphql +++ b/datahub-graphql-core/src/main/resources/forms.graphql @@ -1,3 +1,10 @@ +extend type Mutation { + """ + Remove a form from a given list of entities. + """ + batchRemoveForm(input: BatchRemoveFormInput!): Boolean! +} + """ Requirements forms that are assigned to an entity. """ @@ -376,3 +383,18 @@ input VerifyFormInput { """ entityUrn: String! } + +""" +Input for batch removing a form from different entities +""" +input BatchRemoveFormInput { + """ + The urn of the form being removed from entities + """ + formUrn: String! + + """ + The entities that this form is being removed from + """ + entityUrns: [String!]! +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java index b75530773c352f..ac07053e59d75a 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java @@ -13,7 +13,7 @@ import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.mxe.MetadataChangeProposal; @@ -22,14 +22,14 @@ public class TestUtils { - public static EntityService getMockEntityService() { + public static EntityService getMockEntityService() { PathSpecBasedSchemaAnnotationVisitor.class .getClassLoader() .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); EntityRegistry registry = new ConfigEntityRegistry(TestUtils.class.getResourceAsStream("/test-entity-registry.yaml")); - EntityService mockEntityService = - (EntityService) Mockito.mock(EntityService.class); + EntityService mockEntityService = + (EntityService) Mockito.mock(EntityService.class); Mockito.when(mockEntityService.getEntityRegistry()).thenReturn(registry); return mockEntityService; } @@ -111,14 +111,14 @@ public static QueryContext getMockDenyContext(String actorUrn, AuthorizationRequ } public static void verifyIngestProposal( - EntityService mockService, + EntityService mockService, int numberOfInvocations, MetadataChangeProposal proposal) { verifyIngestProposal(mockService, numberOfInvocations, List.of(proposal)); } public static void verifyIngestProposal( - EntityService mockService, + EntityService mockService, int numberOfInvocations, List proposals) { AspectsBatchImpl batch = @@ -128,7 +128,7 @@ public static void verifyIngestProposal( } public static void verifySingleIngestProposal( - EntityService mockService, + EntityService mockService, int numberOfInvocations, MetadataChangeProposal proposal) { Mockito.verify(mockService, Mockito.times(numberOfInvocations)) @@ -136,13 +136,13 @@ public static void verifySingleIngestProposal( } public static void verifyIngestProposal( - EntityService mockService, int numberOfInvocations) { + EntityService mockService, int numberOfInvocations) { Mockito.verify(mockService, Mockito.times(numberOfInvocations)) .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.eq(false)); } public static void verifySingleIngestProposal( - EntityService mockService, int numberOfInvocations) { + EntityService mockService, int numberOfInvocations) { Mockito.verify(mockService, Mockito.times(numberOfInvocations)) .ingestProposal( Mockito.any(MetadataChangeProposal.class), @@ -150,7 +150,7 @@ public static void verifySingleIngestProposal( Mockito.eq(false)); } - public static void verifyNoIngestProposal(EntityService mockService) { + public static void verifyNoIngestProposal(EntityService mockService) { Mockito.verify(mockService, Mockito.times(0)) .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/form/BatchRemoveFormResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/form/BatchRemoveFormResolverTest.java new file mode 100644 index 00000000000000..79af61b1bc79da --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/form/BatchRemoveFormResolverTest.java @@ -0,0 +1,83 @@ +package com.linkedin.datahub.graphql.resolvers.form; + +import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static org.testng.Assert.assertThrows; +import static org.testng.Assert.assertTrue; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.BatchAssignFormInput; +import com.linkedin.metadata.service.FormService; +import graphql.com.google.common.collect.ImmutableList; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class BatchRemoveFormResolverTest { + + private static final String TEST_DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:hive,name,PROD)"; + private static final String TEST_FORM_URN = "urn:li:form:1"; + + private static final BatchAssignFormInput TEST_INPUT = + new BatchAssignFormInput(TEST_FORM_URN, ImmutableList.of(TEST_DATASET_URN)); + + @Test + public void testGetSuccess() throws Exception { + FormService mockFormService = initMockFormService(true); + BatchRemoveFormResolver resolver = new BatchRemoveFormResolver(mockFormService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + boolean success = resolver.get(mockEnv).get(); + + assertTrue(success); + + // Validate that we called unassign on the service + Mockito.verify(mockFormService, Mockito.times(1)) + .batchUnassignFormForEntities( + Mockito.eq(ImmutableList.of(UrnUtils.getUrn(TEST_DATASET_URN))), + Mockito.eq(UrnUtils.getUrn(TEST_FORM_URN)), + Mockito.any(Authentication.class)); + } + + @Test + public void testThrowsError() throws Exception { + FormService mockFormService = initMockFormService(false); + BatchRemoveFormResolver resolver = new BatchRemoveFormResolver(mockFormService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + + // Validate that we called unassign on the service - but it throws an error + Mockito.verify(mockFormService, Mockito.times(1)) + .batchUnassignFormForEntities( + Mockito.eq(ImmutableList.of(UrnUtils.getUrn(TEST_DATASET_URN))), + Mockito.eq(UrnUtils.getUrn(TEST_FORM_URN)), + Mockito.any(Authentication.class)); + } + + private FormService initMockFormService(final boolean shouldSucceed) throws Exception { + FormService service = Mockito.mock(FormService.class); + + if (!shouldSucceed) { + Mockito.doThrow(new RuntimeException()) + .when(service) + .batchUnassignFormForEntities( + Mockito.any(), Mockito.any(), Mockito.any(Authentication.class)); + } + + return service; + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java index 1898753e5ae76f..b8c4ce21949373 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java @@ -18,7 +18,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; @@ -221,7 +221,7 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java index c756407832a36e..147acc9c1e0f33 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java @@ -178,8 +178,9 @@ private void readerExecutable(ReaderWrapper reader, UpgradeContext context) { final RecordTemplate aspectRecord; try { aspectRecord = - EntityUtils.toAspectRecord( - entityName, aspectName, aspect.getMetadata(), _entityRegistry); + EntityUtils.toSystemAspect(aspect.toEntityAspect(), _entityService) + .get() + .getRecordTemplate(); } catch (Exception e) { context .report() diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java index 894075417a3498..0695dbe4b1acb0 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java @@ -2,10 +2,13 @@ import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.INDEX_BLOCKS_WRITE_SETTING; import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.getAllReindexConfigs; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; +import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableMap; +import com.linkedin.common.Status; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; @@ -14,14 +17,15 @@ import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; import com.linkedin.metadata.entity.AspectDao; -import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; @@ -54,24 +58,13 @@ public int retryCount() { public Function executable() { return (context) -> { try { - List reindexConfigs = - _configurationProvider.getStructuredProperties().isSystemUpdateEnabled() - ? getAllReindexConfigs( - _services, - _aspectDao - .streamAspects( - STRUCTURED_PROPERTY_ENTITY_NAME, - STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .map( - entityAspect -> - EntityUtils.toAspectRecord( - STRUCTURED_PROPERTY_ENTITY_NAME, - STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, - entityAspect.getMetadata(), - _entityRegistry)) - .map(recordTemplate -> (StructuredPropertyDefinition) recordTemplate) - .collect(Collectors.toSet())) - : getAllReindexConfigs(_services); + final List reindexConfigs; + if (_configurationProvider.getStructuredProperties().isSystemUpdateEnabled()) { + reindexConfigs = + getAllReindexConfigs(_services, getActiveStructuredPropertiesDefinitions(_aspectDao)); + } else { + reindexConfigs = getAllReindexConfigs(_services); + } // Get indices to update List indexConfigs = @@ -160,4 +153,31 @@ private boolean blockWrites(String indexName) throws InterruptedException, IOExc return ack; } + + private static Set getActiveStructuredPropertiesDefinitions( + AspectDao aspectDao) { + Set removedStructuredPropertyUrns = + aspectDao + .streamAspects(STRUCTURED_PROPERTY_ENTITY_NAME, STATUS_ASPECT_NAME) + .map( + entityAspect -> + Pair.of( + entityAspect.getUrn(), + RecordUtils.toRecordTemplate(Status.class, entityAspect.getMetadata()))) + .filter(status -> status.getSecond().isRemoved()) + .map(Pair::getFirst) + .collect(Collectors.toSet()); + + return aspectDao + .streamAspects(STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) + .map( + entityAspect -> + Pair.of( + entityAspect.getUrn(), + RecordUtils.toRecordTemplate( + StructuredPropertyDefinition.class, entityAspect.getMetadata()))) + .filter(definition -> !removedStructuredPropertyUrns.contains(definition.getKey())) + .map(Pair::getSecond) + .collect(Collectors.toSet()); + } } diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java index 4c9e12c0ed1511..ed09a4a5aec2b9 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java @@ -49,7 +49,7 @@ public void testSystemUpdateInit() { @Test public void testSystemUpdateKafkaProducerOverride() { assertEquals(kafkaEventProducer, duheKafkaEventProducer); - assertEquals(entityService.get_producer(), duheKafkaEventProducer); + assertEquals(entityService.getProducer(), duheKafkaEventProducer); } @Test diff --git a/datahub-web-react/build.gradle b/datahub-web-react/build.gradle index 05af6871715ced..103792b20f761d 100644 --- a/datahub-web-react/build.gradle +++ b/datahub-web-react/build.gradle @@ -112,13 +112,17 @@ task yarnBuild(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { outputs.dir('dist') } +task cleanGenerate { + delete fileTree(dir: 'src', include: '**/*.generated.ts') +} + task cleanExtraDirs { delete 'node_modules/.yarn-integrity' delete 'dist' delete 'tmp' delete 'just' - delete fileTree(dir: 'src', include: '*.generated.ts') } +cleanExtraDirs.finalizedBy(cleanGenerate) clean.finalizedBy(cleanExtraDirs) configurations { diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index 337b0dc87ec1c1..230dcad45468e1 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -88,8 +88,10 @@ "build": "yarn run generate && NODE_OPTIONS='--max-old-space-size=3072 --openssl-legacy-provider' CI=false vite build", "test": "vitest", "generate": "graphql-codegen --config codegen.yml", - "lint": "eslint . --ext .ts,.tsx --quiet", - "lint-fix": "eslint '*/**/*.{ts,tsx}' --quiet --fix" + "lint": "eslint . --ext .ts,.tsx --quiet && yarn type-check", + "lint-fix": "eslint '*/**/*.{ts,tsx}' --quiet --fix", + "type-check": "tsc --noEmit -p tsconfig.test.json", + "type-watch": "tsc -w --noEmit -p tsconfig.test.json" }, "browserslist": { "production": [ diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntityInfo/EntityInfo.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntityInfo/EntityInfo.tsx index 1d1400a8cc7539..02dc70dc157bde 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntityInfo/EntityInfo.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntityInfo/EntityInfo.tsx @@ -56,7 +56,7 @@ export default function EntityInfo({ formUrn }: Props) { > View Profile - + diff --git a/datahub-web-react/src/app/entity/shared/entityForm/FormByEntity.tsx b/datahub-web-react/src/app/entity/shared/entityForm/FormByEntity.tsx index 6fe03585e6405d..b20aa26930fe92 100644 --- a/datahub-web-react/src/app/entity/shared/entityForm/FormByEntity.tsx +++ b/datahub-web-react/src/app/entity/shared/entityForm/FormByEntity.tsx @@ -46,6 +46,7 @@ export default function FormByEntity({ formUrn }: Props) { dataNotCombinedWithSiblings: selectedEntityData, routeToTab: () => {}, refetch, + lineage: undefined, }} > diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/extensions/mentions/DataHubMentionsExtension.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/extensions/mentions/DataHubMentionsExtension.tsx index d4c6ace1897062..ed7881b340c437 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/extensions/mentions/DataHubMentionsExtension.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/extensions/mentions/DataHubMentionsExtension.tsx @@ -1,6 +1,6 @@ /* eslint-disable class-methods-use-this */ import React, { ComponentType } from 'react'; -import { Plugin } from '@remirror/pm/state'; +import type { Plugin } from 'prosemirror-state'; import { ApplySchemaAttributes, CommandFunction, @@ -64,7 +64,7 @@ class DataHubMentionsExtension extends NodeExtension { }); } - createNodeSpec(extra: ApplySchemaAttributes, override: NodeSpecOverride): NodeExtensionSpec { + createNodeSpec(extra: ApplySchemaAttributes, override: Partial): NodeExtensionSpec { return { inline: true, marks: '', diff --git a/datahub-web-react/src/app/entity/shared/tabs/Entity/InputFieldsTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Entity/InputFieldsTab.tsx index b9d06afabb097b..cee65793a80e46 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Entity/InputFieldsTab.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Entity/InputFieldsTab.tsx @@ -35,7 +35,6 @@ export const InputFieldsTab = () => { { return ( - {hasMedia && ( + {hasMedia ? ( + ) : ( + )} - + Link diff --git a/datahub-web-react/tsconfig.test.json b/datahub-web-react/tsconfig.test.json new file mode 100644 index 00000000000000..bbbd6462b02c25 --- /dev/null +++ b/datahub-web-react/tsconfig.test.json @@ -0,0 +1,23 @@ +{ + "compilerOptions": { + "target": "es2017", + "lib": ["dom", "dom.iterable", "esnext"], + "allowJs": true, + "skipLibCheck": true, + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "strict": true, + "noImplicitAny": false, + "forceConsistentCasingInFileNames": true, + "noFallthroughCasesInSwitch": true, + "module": "esnext", + "moduleResolution": "node", + "resolveJsonModule": true, + "isolatedModules": false, + "noEmit": true, + "jsx": "react-jsx", + "types": ["vitest/globals"] + }, + "include": ["src", "src/conf/theme/styled-components.d.ts", "vite.config.ts", ".eslintrc.js"], + "exclude": ["**/*.test.ts*"], +} diff --git a/docs/README.md b/docs/README.md index ed84b28063065d..458bd0b2256cf6 100644 --- a/docs/README.md +++ b/docs/README.md @@ -49,3 +49,4 @@ Example snippet: * Good: https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/dbt-test-logic-view.png * Bad: https://github.com/datahub-project/static-assets/blob/main/imgs/dbt-test-logic-view.png + diff --git a/docs/api/openapi/openapi-structured-properties.md b/docs/api/openapi/openapi-structured-properties.md index 521ce8789db0d4..8dd660698a0e8f 100644 --- a/docs/api/openapi/openapi-structured-properties.md +++ b/docs/api/openapi/openapi-structured-properties.md @@ -86,6 +86,50 @@ Example Response: ### Delete Property Definition +There are two types of deletion present in DataHub: `hard` and `soft` delete. As of the current release only the `soft` delete +is supported for Structured Properties. See the subsections below for more details. + +#### Soft Delete + +A `soft` deleted Structured Property does not remove any underlying data on the Structured Property entity +or the Structured Property's values written to other entities. The `soft` delete is 100% reversible with zero data loss. +When a Structured Property is `soft` deleted, a few operations are not available. + +Structured Property Soft Delete Effects: + +* Entities with a `soft` deleted Structured Property value will not return the `soft` deleted properties +* Updates to a `soft` deleted Structured Property's definition are denied +* Adding a `soft` deleted Structured Property's value to an entity is denied +* Search filters using a `soft` deleted Structured Property will be denied + +The following command will `soft` delete the test property `MyProperty01` created in this guide by writing +to the `status` aspect. + +```shell +curl -X 'POST' \ + 'http://localhost:8080/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Amy.test.MyProperty01/status?systemMetadata=false' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ +"removed": true +}' | jq +``` + +Removing the `soft` delete from the Structured Property can be done by either `hard` deleting the `status` aspect or +changing the `removed` boolean to `false. + +```shell +curl -X 'POST' \ + 'http://localhost:8080/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Amy.test.MyProperty01/status?systemMetadata=false' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ +"removed": false +}' | jq +``` + +#### Hard Delete + ⚠ **Not Implemented** ⚠ ## Applying Structured Properties diff --git a/docs/cli.md b/docs/cli.md index 3f67f1de6204d0..c4c6bd3dcc6417 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -134,7 +134,8 @@ datahub ingest -c ./examples/recipes/example_to_datahub_rest.dhub.yml -n --previ #### ingest deploy The `ingest deploy` command instructs the cli to upload an ingestion recipe to DataHub to be run by DataHub's [UI Ingestion](./ui-ingestion.md). -This command can also be used to schedule the ingestion while uploading or even to update existing sources. +This command can also be used to schedule the ingestion while uploading or even to update existing sources. It will upload to the remote instance the +CLI is connected to, not the sink of the recipe. Use `datahub init` to set the remote if not already set. To schedule a recipe called "test", to run at 5am everyday, London time with the recipe configured in a local `recipe.yaml` file: ````shell diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 6b6903b04f383e..430537bf684f63 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -35,6 +35,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe `datahub delete --platform databricks --soft` and then reingesting with latest cli version. - #9601 - The Unity Catalog(UC) ingestion source config `include_hive_metastore` is now enabled by default. This requires config `warehouse_id` to be set. You can disable `include_hive_metastore` by setting it to `False` to avoid ingesting legacy hive metastore catalog in Databricks. +- #9904 - The default Redshift `table_lineage_mode` is now MIXED, instead of `STL_SCAN_BASED`. Improved lineage generation is also available by enabling `use_lineaege_v2`. This v2 implementation will become the default in a future release. ### Potential Downtime diff --git a/entity-registry/build.gradle b/entity-registry/build.gradle index 315a29e305b77c..66e4ad4b930e07 100644 --- a/entity-registry/build.gradle +++ b/entity-registry/build.gradle @@ -32,6 +32,7 @@ dependencies { testImplementation externalDependency.mockito testImplementation externalDependency.mockitoInline testCompileOnly externalDependency.lombok + testAnnotationProcessor externalDependency.lombok testImplementation externalDependency.classGraph } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java similarity index 95% rename from entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectRetriever.java rename to entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java index 11cd2352025efe..2ef22483da1ca5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.aspect.plugins.validation; +package com.linkedin.metadata.aspect; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java new file mode 100644 index 00000000000000..77e799f752455c --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java @@ -0,0 +1,4 @@ +package com.linkedin.metadata.aspect; + +/** Responses can be cached based on application.yaml caching configuration for the EntityClient */ +public interface CachingAspectRetriever extends AspectRetriever {} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java new file mode 100644 index 00000000000000..342b5376d8a755 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java @@ -0,0 +1,77 @@ +package com.linkedin.metadata.aspect; + +import com.linkedin.common.urn.Urn; +import com.linkedin.data.DataMap; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.mxe.SystemMetadata; +import java.lang.reflect.InvocationTargetException; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public interface ReadItem { + /** + * The urn associated with the aspect + * + * @return + */ + @Nonnull + Urn getUrn(); + + /** + * Aspect's name + * + * @return the name + */ + @Nonnull + default String getAspectName() { + return getAspectSpec().getName(); + } + + @Nullable + RecordTemplate getRecordTemplate(); + + default <T> T getAspect(Class<T> clazz) { + return getAspect(clazz, getRecordTemplate()); + } + + static <T> T getAspect(Class<T> clazz, @Nullable RecordTemplate recordTemplate) { + if (recordTemplate != null) { + try { + return clazz.getConstructor(DataMap.class).newInstance(recordTemplate.data()); + } catch (InstantiationException + | IllegalAccessException + | InvocationTargetException + | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } else { + return null; + } + } + + /** + * System information + * + * @return the system metadata + */ + @Nullable + SystemMetadata getSystemMetadata(); + + /** + * The entity's schema + * + * @return entity specification + */ + @Nonnull + EntitySpec getEntitySpec(); + + /** + * The aspect's schema + * + * @return aspect's specification + */ + @Nonnull + AspectSpec getAspectSpec(); +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/SystemAspect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/SystemAspect.java new file mode 100644 index 00000000000000..e83414c8c23a85 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/SystemAspect.java @@ -0,0 +1,25 @@ +package com.linkedin.metadata.aspect; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.UrnUtils; +import java.sql.Timestamp; +import javax.annotation.Nonnull; + +/** + * An aspect along with system metadata and creation timestamp. Represents an aspect as stored in + * primary storage. + */ +public interface SystemAspect extends ReadItem { + long getVersion(); + + Timestamp getCreatedOn(); + + String getCreatedBy(); + + @Nonnull + default AuditStamp getAuditStamp() { + return new AuditStamp() + .setActor(UrnUtils.getUrn(getCreatedBy())) + .setTime(getCreatedOn().getTime()); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java index 3d803d238b4f92..ddedc96b385779 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java @@ -1,6 +1,10 @@ package com.linkedin.metadata.aspect.batch; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.ReadItem; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; import java.util.Collection; @@ -20,27 +24,108 @@ public interface AspectsBatch { Collection<? extends BatchItem> getItems(); + AspectRetriever getAspectRetriever(); + /** - * Returns MCP items. Can be patch, upsert, etc. + * Returns MCP items. Could be patch, upsert, etc. * * @return batch items */ - default Collection<? extends MCPBatchItem> getMCPItems() { + default Collection<? extends MCPItem> getMCPItems() { return getItems().stream() - .filter(item -> item instanceof MCPBatchItem) - .map(item -> (MCPBatchItem) item) + .filter(item -> item instanceof MCPItem) + .map(item -> (MCPItem) item) .collect(Collectors.toList()); } - Pair<Map<String, Set<String>>, List<UpsertItem>> toUpsertBatchItems( - Map<String, Map<String, SystemAspect>> latestAspects, AspectRetriever aspectRetriever); + /** + * Convert patches to upserts, apply hooks at the aspect and batch level. + * + * @param latestAspects latest version in the database + * @return The new urn/aspectnames and the uniform upserts, possibly expanded/mutated by the + * various hooks + */ + Pair<Map<String, Set<String>>, List<ChangeMCP>> toUpsertBatchItems( + Map<String, Map<String, SystemAspect>> latestAspects); + + /** + * Apply read mutations to batch + * + * @param items + */ + default void applyReadMutationHooks(Collection<ReadItem> items) { + applyReadMutationHooks(items, getAspectRetriever()); + } + + static void applyReadMutationHooks(Collection<ReadItem> items, AspectRetriever aspectRetriever) { + for (MutationHook mutationHook : aspectRetriever.getEntityRegistry().getAllMutationHooks()) { + mutationHook.applyReadMutation(items, aspectRetriever); + } + } + + /** + * Apply write mutations to batch + * + * @param changeMCPS + */ + default void applyWriteMutationHooks(Collection<ChangeMCP> changeMCPS) { + applyWriteMutationHooks(changeMCPS, getAspectRetriever()); + } + + static void applyWriteMutationHooks( + Collection<ChangeMCP> changeMCPS, AspectRetriever aspectRetriever) { + for (MutationHook mutationHook : aspectRetriever.getEntityRegistry().getAllMutationHooks()) { + mutationHook.applyWriteMutation(changeMCPS, aspectRetriever); + } + } + + default <T extends BatchItem> ValidationExceptionCollection validateProposed( + Collection<T> mcpItems) { + return validateProposed(mcpItems, getAspectRetriever()); + } + + static <T extends BatchItem> ValidationExceptionCollection validateProposed( + Collection<T> mcpItems, AspectRetriever aspectRetriever) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + aspectRetriever.getEntityRegistry().getAllAspectPayloadValidators().stream() + .flatMap(validator -> validator.validateProposed(mcpItems, aspectRetriever)) + .forEach(exceptions::addException); + return exceptions; + } + + default ValidationExceptionCollection validatePreCommit(Collection<ChangeMCP> changeMCPs) { + return validatePreCommit(changeMCPs, getAspectRetriever()); + } + + static ValidationExceptionCollection validatePreCommit( + Collection<ChangeMCP> changeMCPs, AspectRetriever aspectRetriever) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + aspectRetriever.getEntityRegistry().getAllAspectPayloadValidators().stream() + .flatMap(validator -> validator.validatePreCommit(changeMCPs, aspectRetriever)) + .forEach(exceptions::addException); + return exceptions; + } + + default Stream<ChangeMCP> applyMCPSideEffects(Collection<ChangeMCP> items) { + return applyMCPSideEffects(items, getAspectRetriever()); + } - default Stream<UpsertItem> applyMCPSideEffects( - List<UpsertItem> items, AspectRetriever aspectRetriever) { + static Stream<ChangeMCP> applyMCPSideEffects( + Collection<ChangeMCP> items, AspectRetriever aspectRetriever) { return aspectRetriever.getEntityRegistry().getAllMCPSideEffects().stream() .flatMap(mcpSideEffect -> mcpSideEffect.apply(items, aspectRetriever)); } + default Stream<MCLItem> applyMCLSideEffects(Collection<MCLItem> items) { + return applyMCLSideEffects(items, getAspectRetriever()); + } + + static Stream<MCLItem> applyMCLSideEffects( + Collection<MCLItem> items, AspectRetriever aspectRetriever) { + return aspectRetriever.getEntityRegistry().getAllMCLSideEffects().stream() + .flatMap(mclSideEffect -> mclSideEffect.apply(items, aspectRetriever)); + } + default boolean containsDuplicateAspects() { return getItems().stream() .map(i -> String.format("%s_%s", i.getClass().getName(), i.hashCode())) @@ -81,7 +166,7 @@ default Map<String, Set<String>> getNewUrnAspectsMap( .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } - default <T> Map<String, Map<String, T>> merge( + static <T> Map<String, Map<String, T>> merge( @Nonnull Map<String, Map<String, T>> a, @Nonnull Map<String, Map<String, T>> b) { return Stream.concat(a.entrySet().stream(), b.entrySet().stream()) .flatMap( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java index 60033cd6919d60..a6dfbc277e12ec 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java @@ -1,45 +1,19 @@ package com.linkedin.metadata.aspect.batch; import com.linkedin.common.AuditStamp; -import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.mxe.SystemMetadata; +import com.linkedin.metadata.aspect.ReadItem; import javax.annotation.Nonnull; import javax.annotation.Nullable; -public interface BatchItem { - /** - * The urn associated with the aspect - * - * @return - */ - Urn getUrn(); - - /** - * Aspect's name - * - * @return the name - */ - @Nonnull - default String getAspectName() { - return getAspectSpec().getName(); - } - - /** - * System information - * - * @return the system metadata - */ - SystemMetadata getSystemMetadata(); +public interface BatchItem extends ReadItem { /** * Timestamp and actor * * @return the audit information */ + @Nullable AuditStamp getAuditStamp(); /** @@ -49,28 +23,4 @@ default String getAspectName() { */ @Nonnull ChangeType getChangeType(); - - /** - * The entity's schema - * - * @return entity specification - */ - @Nonnull - EntitySpec getEntitySpec(); - - /** - * The aspect's schema - * - * @return aspect's specification - */ - @Nonnull - AspectSpec getAspectSpec(); - - /** - * The aspect's record template. Null when patch - * - * @return record template if it exists - */ - @Nullable - RecordTemplate getRecordTemplate(); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/ChangeMCP.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/ChangeMCP.java new file mode 100644 index 00000000000000..94e8bbab3ceeb6 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/ChangeMCP.java @@ -0,0 +1,42 @@ +package com.linkedin.metadata.aspect.batch; + +import com.linkedin.data.DataMap; +import com.linkedin.metadata.aspect.SystemAspect; +import java.lang.reflect.InvocationTargetException; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * A proposal to write data to the primary datastore which includes system metadata and other + * related data stored along with the aspect + */ +public interface ChangeMCP extends MCPItem { + @Nonnull + SystemAspect getSystemAspect(@Nullable Long nextAspectVersion); + + @Nullable + SystemAspect getPreviousSystemAspect(); + + void setPreviousSystemAspect(@Nullable SystemAspect previousSystemAspect); + + long getNextAspectVersion(); + + void setNextAspectVersion(long nextAspectVersion); + + default <T> T getPreviousAspect(Class<T> clazz) { + if (getPreviousSystemAspect() != null) { + try { + return clazz + .getConstructor(DataMap.class) + .newInstance(getPreviousSystemAspect().getRecordTemplate().data()); + } catch (InstantiationException + | IllegalAccessException + | InvocationTargetException + | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } else { + return null; + } + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLBatchItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLItem.java similarity index 70% rename from entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLBatchItem.java rename to entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLItem.java index 17a910b125a34f..9fd2d6c607342f 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLBatchItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLItem.java @@ -2,15 +2,17 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; +import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.SystemMetadata; +import java.lang.reflect.InvocationTargetException; import javax.annotation.Nonnull; import javax.annotation.Nullable; /** An item that represents a change that has been written to primary storage. */ -public interface MCLBatchItem extends BatchItem { +public interface MCLItem extends BatchItem { @Nonnull MetadataChangeLog getMetadataChangeLog(); @@ -42,6 +44,21 @@ default SystemMetadata getPreviousSystemMetadata() { @Nullable RecordTemplate getPreviousRecordTemplate(); + default <T> T getPreviousAspect(Class<T> clazz) { + if (getPreviousRecordTemplate() != null) { + try { + return clazz.getConstructor(DataMap.class).newInstance(getPreviousRecordTemplate().data()); + } catch (InstantiationException + | IllegalAccessException + | InvocationTargetException + | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } else { + return null; + } + } + @Override @Nonnull default ChangeType getChangeType() { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPBatchItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPItem.java similarity index 83% rename from entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPBatchItem.java rename to entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPItem.java index dd0d0ec68dac6c..8c25f3c4f44de6 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPBatchItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPItem.java @@ -7,10 +7,10 @@ import javax.annotation.Nullable; /** Represents a proposal to write to the primary data store which may be represented by an MCP */ -public abstract class MCPBatchItem implements BatchItem { +public interface MCPItem extends BatchItem { @Nullable - public abstract MetadataChangeProposal getMetadataChangeProposal(); + MetadataChangeProposal getMetadataChangeProposal(); /** * Validates that a change type is valid for the given aspect @@ -19,7 +19,7 @@ public abstract class MCPBatchItem implements BatchItem { * @param aspectSpec * @return */ - protected static boolean isValidChangeType(ChangeType changeType, AspectSpec aspectSpec) { + static boolean isValidChangeType(ChangeType changeType, AspectSpec aspectSpec) { if (aspectSpec.isTimeseries()) { // Timeseries aspects only support UPSERT return ChangeType.UPSERT.equals(changeType); @@ -32,7 +32,7 @@ protected static boolean isValidChangeType(ChangeType changeType, AspectSpec asp } } - protected static boolean supportsPatch(AspectSpec aspectSpec) { + static boolean supportsPatch(AspectSpec aspectSpec) { // Limit initial support to defined templates if (!AspectTemplateEngine.SUPPORTED_TEMPLATES.contains(aspectSpec.getName())) { // Prevent unexpected behavior for aspects that do not currently have 1st class patch support, diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchMCP.java similarity index 58% rename from entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchItem.java rename to entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchMCP.java index e9e30f7f2bd96f..f04133e9e1ff8b 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchMCP.java @@ -2,12 +2,12 @@ import com.github.fge.jsonpatch.Patch; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.aspect.AspectRetriever; /** * A change proposal represented as a patch to an exiting stored object in the primary data store. */ -public abstract class PatchItem extends MCPBatchItem { +public interface PatchMCP extends MCPItem { /** * Convert a Patch to an Upsert @@ -15,8 +15,7 @@ public abstract class PatchItem extends MCPBatchItem { * @param recordTemplate the current value record template * @return the upsert */ - public abstract UpsertItem applyPatch( - RecordTemplate recordTemplate, AspectRetriever aspectRetriever); + ChangeMCP applyPatch(RecordTemplate recordTemplate, AspectRetriever aspectRetriever); - public abstract Patch getPatch(); + Patch getPatch(); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/SystemAspect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/SystemAspect.java deleted file mode 100644 index 88ac902ae52fed..00000000000000 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/SystemAspect.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.linkedin.metadata.aspect.batch; - -import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.mxe.SystemMetadata; -import java.sql.Timestamp; - -/** - * An aspect along with system metadata and creation timestamp. Represents an aspect as stored in - * primary storage. - */ -public interface SystemAspect { - Urn getUrn(); - - String getAspectName(); - - long getVersion(); - - RecordTemplate getRecordTemplate(EntityRegistry entityRegistry); - - SystemMetadata getSystemMetadata(); - - Timestamp getCreatedOn(); -} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/UpsertItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/UpsertItem.java deleted file mode 100644 index c64105637dfcc6..00000000000000 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/UpsertItem.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.linkedin.metadata.aspect.batch; - -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - -/** - * A proposal to write data to the primary datastore which includes system metadata and other - * related data stored along with the aspect - */ -public abstract class UpsertItem extends MCPBatchItem { - public abstract SystemAspect toLatestEntityAspect(); - - public abstract void validatePreCommit( - @Nullable RecordTemplate previous, @Nonnull AspectRetriever aspectRetriever) - throws AspectValidationException; -} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java new file mode 100644 index 00000000000000..98e90cfaa45cfc --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java @@ -0,0 +1,38 @@ +package com.linkedin.metadata.aspect.hooks; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.ReadItem; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; +import com.linkedin.metadata.models.StructuredPropertyUtils; +import com.linkedin.structured.StructuredProperties; +import com.linkedin.util.Pair; +import java.util.Collection; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; + +public class StructuredPropertiesSoftDelete extends MutationHook { + public StructuredPropertiesSoftDelete(AspectPluginConfig aspectPluginConfig) { + super(aspectPluginConfig); + } + + @Override + protected Stream<Pair<ReadItem, Boolean>> readMutation( + @Nonnull Collection<ReadItem> items, @Nonnull AspectRetriever aspectRetriever) { + Map<Urn, StructuredProperties> entityStructuredPropertiesMap = + items.stream() + .filter(i -> i.getRecordTemplate() != null) + .map(i -> Pair.of(i.getUrn(), i.getAspect(StructuredProperties.class))) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + + // Apply filter + Map<Urn, Boolean> mutatedEntityStructuredPropertiesMap = + StructuredPropertyUtils.filterSoftDelete(entityStructuredPropertiesMap, aspectRetriever); + + return items.stream() + .map(i -> Pair.of(i, mutatedEntityStructuredPropertiesMap.getOrDefault(i.getUrn(), false))); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java index d88b05ede84548..6937070a684e29 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java @@ -4,7 +4,9 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; @@ -20,22 +22,34 @@ protected AspectPluginConfig getConfig() { } public boolean shouldApply( - @Nonnull ChangeType changeType, @Nonnull Urn entityUrn, @Nonnull AspectSpec aspectSpec) { + @Nullable ChangeType changeType, @Nonnull Urn entityUrn, @Nonnull AspectSpec aspectSpec) { return shouldApply(changeType, entityUrn.getEntityType(), aspectSpec); } public boolean shouldApply( - @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull AspectSpec aspectSpec) { + @Nullable ChangeType changeType, + @Nonnull EntitySpec entitySpec, + @Nonnull AspectSpec aspectSpec) { + return shouldApply(changeType, entitySpec.getName(), aspectSpec.getName()); + } + + public boolean shouldApply( + @Nullable ChangeType changeType, @Nonnull String entityName, @Nonnull AspectSpec aspectSpec) { return shouldApply(changeType, entityName, aspectSpec.getName()); } public boolean shouldApply( - @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { + @Nullable ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { return getConfig().isEnabled() && isChangeTypeSupported(changeType) && isEntityAspectSupported(entityName, aspectName); } + protected boolean isEntityAspectSupported( + @Nonnull EntitySpec entitySpec, @Nonnull AspectSpec aspectSpec) { + return isEntityAspectSupported(entitySpec.getName(), aspectSpec.getName()); + } + protected boolean isEntityAspectSupported( @Nonnull String entityName, @Nonnull String aspectName) { return (getConfig().getSupportedEntityAspectNames().stream() @@ -51,8 +65,9 @@ protected boolean isAspectSupported(@Nonnull String aspectName) { .anyMatch(supported -> supported.getAspectName().equals(aspectName)); } - protected boolean isChangeTypeSupported(@Nonnull ChangeType changeType) { - return getConfig().getSupportedOperations().stream() - .anyMatch(supported -> changeType.toString().equals(supported)); + protected boolean isChangeTypeSupported(@Nullable ChangeType changeType) { + return (changeType == null && getConfig().getSupportedOperations().isEmpty()) + || getConfig().getSupportedOperations().stream() + .anyMatch(supported -> supported.equalsIgnoreCase(String.valueOf(changeType))); } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java index 059f133ad27760..00ebcf6b464911 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java @@ -1,7 +1,10 @@ package com.linkedin.metadata.aspect.plugins.config; +import java.util.Collections; import java.util.List; +import java.util.Objects; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -15,7 +18,7 @@ public class AspectPluginConfig { @Nonnull private String className; private boolean enabled; - @Nonnull private List<String> supportedOperations; + @Nullable private List<String> supportedOperations; @Nonnull private List<EntityAspectName> supportedEntityAspectNames; @Data @@ -27,6 +30,11 @@ public static class EntityAspectName { @Nonnull private String aspectName; } + @Nonnull + public List<String> getSupportedOperations() { + return supportedOperations != null ? supportedOperations : Collections.emptyList(); + } + /** * Used to determine is an earlier plugin is disabled by a subsequent plugin * @@ -44,7 +52,7 @@ private boolean isEqualExcludingEnabled(Object o) { AspectPluginConfig that = (AspectPluginConfig) o; if (!className.equals(that.className)) return false; - if (!supportedOperations.equals(that.supportedOperations)) return false; + if (!Objects.equals(supportedOperations, that.supportedOperations)) return false; return supportedEntityAspectNames.equals(that.supportedEntityAspectNames); } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java index a21f3cd2436de7..c60af636e424f1 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java @@ -1,15 +1,18 @@ package com.linkedin.metadata.aspect.plugins.hooks; -import com.linkedin.metadata.aspect.batch.MCLBatchItem; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.MCLItem; import com.linkedin.metadata.aspect.plugins.PluginSpec; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import java.util.List; +import java.util.Collection; +import java.util.function.BiFunction; +import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; /** Given an MCL produce additional MCLs for writing */ -public abstract class MCLSideEffect extends PluginSpec { +public abstract class MCLSideEffect extends PluginSpec + implements BiFunction<Collection<MCLItem>, AspectRetriever, Stream<MCLItem>> { public MCLSideEffect(AspectPluginConfig aspectPluginConfig) { super(aspectPluginConfig); @@ -18,16 +21,19 @@ public MCLSideEffect(AspectPluginConfig aspectPluginConfig) { /** * Given a list of MCLs, output additional MCLs * - * @param input list + * @param batchItems list * @return additional upserts */ - public final Stream<MCLBatchItem> apply( - @Nonnull List<MCLBatchItem> input, @Nonnull AspectRetriever aspectRetriever) { - return input.stream() - .filter(item -> shouldApply(item.getChangeType(), item.getUrn(), item.getAspectSpec())) - .flatMap(i -> applyMCLSideEffect(i, aspectRetriever)); + @Override + public final Stream<MCLItem> apply( + @Nonnull Collection<MCLItem> batchItems, @Nonnull AspectRetriever aspectRetriever) { + return applyMCLSideEffect( + batchItems.stream() + .filter(item -> shouldApply(item.getChangeType(), item.getUrn(), item.getAspectSpec())) + .collect(Collectors.toList()), + aspectRetriever); } - protected abstract Stream<MCLBatchItem> applyMCLSideEffect( - @Nonnull MCLBatchItem input, @Nonnull AspectRetriever aspectRetriever); + protected abstract Stream<MCLItem> applyMCLSideEffect( + @Nonnull Collection<MCLItem> batchItems, @Nonnull AspectRetriever aspectRetriever); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java index 80cb405201c876..c346695c51d30f 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java @@ -1,15 +1,18 @@ package com.linkedin.metadata.aspect.plugins.hooks; -import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.PluginSpec; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import java.util.List; +import java.util.Collection; +import java.util.function.BiFunction; +import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; /** Given an MCP produce additional MCPs to write */ -public abstract class MCPSideEffect extends PluginSpec { +public abstract class MCPSideEffect extends PluginSpec + implements BiFunction<Collection<ChangeMCP>, AspectRetriever, Stream<ChangeMCP>> { public MCPSideEffect(AspectPluginConfig aspectPluginConfig) { super(aspectPluginConfig); @@ -18,16 +21,18 @@ public MCPSideEffect(AspectPluginConfig aspectPluginConfig) { /** * Given the list of MCP upserts, output additional upserts * - * @param input list + * @param changeMCPS list * @return additional upserts */ - public final Stream<UpsertItem> apply( - List<UpsertItem> input, @Nonnull AspectRetriever aspectRetriever) { - return input.stream() - .filter(item -> shouldApply(item.getChangeType(), item.getUrn(), item.getAspectSpec())) - .flatMap(i -> applyMCPSideEffect(i, aspectRetriever)); + public final Stream<ChangeMCP> apply( + Collection<ChangeMCP> changeMCPS, @Nonnull AspectRetriever aspectRetriever) { + return applyMCPSideEffect( + changeMCPS.stream() + .filter(item -> shouldApply(item.getChangeType(), item.getUrn(), item.getAspectSpec())) + .collect(Collectors.toList()), + aspectRetriever); } - protected abstract Stream<UpsertItem> applyMCPSideEffect( - UpsertItem input, @Nonnull AspectRetriever aspectRetriever); + protected abstract Stream<ChangeMCP> applyMCPSideEffect( + Collection<ChangeMCP> changeMCPS, @Nonnull AspectRetriever aspectRetriever); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java index 730a494c03d7b9..1d6b4eeb617f5d 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java @@ -1,16 +1,15 @@ package com.linkedin.metadata.aspect.plugins.hooks; -import com.linkedin.common.AuditStamp; -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.ReadItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.PluginSpec; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import java.util.Collection; +import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; -import javax.annotation.Nullable; /** Applies changes to the RecordTemplate prior to write */ public abstract class MutationHook extends PluginSpec { @@ -20,49 +19,38 @@ public MutationHook(AspectPluginConfig aspectPluginConfig) { } /** - * Mutating hook + * Mutating hook, original objects are potentially modified. * - * @param changeType Type of change to mutate - * @param entitySpec Entity specification - * @param aspectSpec Aspect specification - * @param oldAspectValue old aspect vale if it exists - * @param newAspectValue the new aspect - * @param oldSystemMetadata old system metadata if it exists - * @param newSystemMetadata the new system metadata - * @param auditStamp the audit stamp + * @param changeMCPS input upsert items + * @param aspectRetriever aspect retriever + * @return all items, with a boolean to indicate mutation */ - public final void applyMutation( - @Nonnull final ChangeType changeType, - @Nonnull EntitySpec entitySpec, - @Nonnull final AspectSpec aspectSpec, - @Nullable final RecordTemplate oldAspectValue, - @Nullable final RecordTemplate newAspectValue, - @Nullable final SystemMetadata oldSystemMetadata, - @Nullable final SystemMetadata newSystemMetadata, - @Nonnull AuditStamp auditStamp, - @Nonnull AspectRetriever aspectRetriever) { - if (shouldApply(changeType, entitySpec.getName(), aspectSpec)) { - mutate( - changeType, - entitySpec, - aspectSpec, - oldAspectValue, - newAspectValue, - oldSystemMetadata, - newSystemMetadata, - auditStamp, - aspectRetriever); - } + public final Stream<Pair<ChangeMCP, Boolean>> applyWriteMutation( + @Nonnull Collection<ChangeMCP> changeMCPS, @Nonnull AspectRetriever aspectRetriever) { + return writeMutation( + changeMCPS.stream() + .filter(i -> shouldApply(i.getChangeType(), i.getEntitySpec(), i.getAspectSpec())) + .collect(Collectors.toList()), + aspectRetriever); } - protected abstract void mutate( - @Nonnull final ChangeType changeType, - @Nonnull EntitySpec entitySpec, - @Nonnull final AspectSpec aspectSpec, - @Nullable final RecordTemplate oldAspectValue, - @Nullable final RecordTemplate newAspectValue, - @Nullable final SystemMetadata oldSystemMetadata, - @Nullable final SystemMetadata newSystemMetadata, - @Nonnull AuditStamp auditStamp, - @Nonnull AspectRetriever aspectRetriever); + // Read mutation + public final Stream<Pair<ReadItem, Boolean>> applyReadMutation( + @Nonnull Collection<ReadItem> items, @Nonnull AspectRetriever aspectRetriever) { + return readMutation( + items.stream() + .filter(i -> isEntityAspectSupported(i.getEntitySpec(), i.getAspectSpec())) + .collect(Collectors.toList()), + aspectRetriever); + } + + protected Stream<Pair<ReadItem, Boolean>> readMutation( + @Nonnull Collection<ReadItem> items, @Nonnull AspectRetriever aspectRetriever) { + return items.stream().map(i -> Pair.of(i, false)); + } + + protected Stream<Pair<ChangeMCP, Boolean>> writeMutation( + @Nonnull Collection<ChangeMCP> changeMCPS, @Nonnull AspectRetriever aspectRetriever) { + return changeMCPS.stream().map(i -> Pair.of(i, false)); + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java index 656d017724571e..6e4bc45e563b90 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java @@ -1,13 +1,14 @@ package com.linkedin.metadata.aspect.plugins.validation; -import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.PluginSpec; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.models.AspectSpec; +import java.util.Collection; +import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; -import javax.annotation.Nullable; public abstract class AspectPayloadValidator extends PluginSpec { @@ -19,65 +20,35 @@ public AspectPayloadValidator(AspectPluginConfig aspectPluginConfig) { * Validate a proposal for the given change type for an aspect within the context of the given * entity's urn. * - * @param changeType The change type - * @param entityUrn The parent entity for the aspect - * @param aspectSpec The aspect's specification - * @param aspectPayload The aspect's payload * @return whether the aspect proposal is valid - * @throws AspectValidationException */ - public final void validateProposed( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nonnull RecordTemplate aspectPayload, - @Nonnull AspectRetriever aspectRetriever) - throws AspectValidationException { - if (shouldApply(changeType, entityUrn, aspectSpec)) { - validateProposedAspect(changeType, entityUrn, aspectSpec, aspectPayload, aspectRetriever); - } + public final Stream<AspectValidationException> validateProposed( + @Nonnull Collection<? extends BatchItem> mcpItems, @Nonnull AspectRetriever aspectRetriever) { + return validateProposedAspects( + mcpItems.stream() + .filter(i -> shouldApply(i.getChangeType(), i.getUrn(), i.getAspectSpec())) + .collect(Collectors.toList()), + aspectRetriever); } /** * Validate the proposed aspect as its about to be written with the context of the previous * version of the aspect (if it existed) * - * @param changeType The change type - * @param entityUrn The parent entity for the aspect - * @param aspectSpec The aspect's specification - * @param previousAspect The previous version of the aspect if it exists - * @param proposedAspect The new version of the aspect * @return whether the aspect proposal is valid - * @throws AspectValidationException */ - public final void validatePreCommit( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nullable RecordTemplate previousAspect, - @Nonnull RecordTemplate proposedAspect, - AspectRetriever aspectRetriever) - throws AspectValidationException { - if (shouldApply(changeType, entityUrn, aspectSpec)) { - validatePreCommitAspect( - changeType, entityUrn, aspectSpec, previousAspect, proposedAspect, aspectRetriever); - } + public final Stream<AspectValidationException> validatePreCommit( + @Nonnull Collection<ChangeMCP> changeMCPs, AspectRetriever aspectRetriever) { + return validatePreCommitAspects( + changeMCPs.stream() + .filter(i -> shouldApply(i.getChangeType(), i.getUrn(), i.getAspectSpec())) + .collect(Collectors.toList()), + aspectRetriever); } - protected abstract void validateProposedAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nonnull RecordTemplate aspectPayload, - @Nonnull AspectRetriever aspectRetriever) - throws AspectValidationException; + protected abstract Stream<AspectValidationException> validateProposedAspects( + @Nonnull Collection<? extends BatchItem> mcpItems, @Nonnull AspectRetriever aspectRetriever); - protected abstract void validatePreCommitAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nullable RecordTemplate previousAspect, - @Nonnull RecordTemplate proposedAspect, - AspectRetriever aspectRetriever) - throws AspectValidationException; + protected abstract Stream<AspectValidationException> validatePreCommitAspects( + @Nonnull Collection<ChangeMCP> changeMCPs, AspectRetriever aspectRetriever); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java index f858bdcf141aeb..f83642c5eed9ec 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java @@ -1,12 +1,59 @@ package com.linkedin.metadata.aspect.plugins.validation; +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.util.Pair; +import java.util.Objects; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + public class AspectValidationException extends Exception { - public AspectValidationException(String msg) { - super(msg); + public static AspectValidationException forItem(BatchItem item, String msg) { + return forItem(item, msg, null); + } + + public static AspectValidationException forItem(BatchItem item, String msg, Exception e) { + return new AspectValidationException(item.getUrn(), item.getAspectName(), msg, e); } - public AspectValidationException(String msg, Exception e) { + @Nonnull private final Urn entityUrn; + @Nonnull private final String aspectName; + @Nullable private final String msg; + + public AspectValidationException(@Nonnull Urn entityUrn, @Nonnull String aspectName, String msg) { + this(entityUrn, aspectName, msg, null); + } + + public AspectValidationException( + @Nonnull Urn entityUrn, @Nonnull String aspectName, @Nonnull String msg, Exception e) { super(msg, e); + this.entityUrn = entityUrn; + this.aspectName = aspectName; + this.msg = msg; + } + + public Pair<Urn, String> getExceptionKey() { + return Pair.of(entityUrn, aspectName); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + AspectValidationException that = (AspectValidationException) o; + + if (!entityUrn.equals(that.entityUrn)) return false; + if (!aspectName.equals(that.aspectName)) return false; + return Objects.equals(msg, that.msg); + } + + @Override + public int hashCode() { + int result = entityUrn.hashCode(); + result = 31 * result + aspectName.hashCode(); + result = 31 * result + (msg != null ? msg.hashCode() : 0); + return result; } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/ValidationExceptionCollection.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/ValidationExceptionCollection.java new file mode 100644 index 00000000000000..559fa85cff04c4 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/ValidationExceptionCollection.java @@ -0,0 +1,68 @@ +package com.linkedin.metadata.aspect.plugins.validation; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.util.Pair; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** Used to store a collection of exceptions, keyed by the URN/AspectName pair */ +public class ValidationExceptionCollection + extends HashMap<Pair<Urn, String>, Set<AspectValidationException>> { + + public static ValidationExceptionCollection newCollection() { + return new ValidationExceptionCollection(); + } + + public void addException(AspectValidationException exception) { + super.computeIfAbsent(exception.getExceptionKey(), key -> new HashSet<>()).add(exception); + } + + public void addException(BatchItem item, String message) { + addException(item, message, null); + } + + public void addException(BatchItem item, String message, Exception ex) { + super.computeIfAbsent(Pair.of(item.getUrn(), item.getAspectName()), key -> new HashSet<>()) + .add(AspectValidationException.forItem(item, message, ex)); + } + + public Stream<AspectValidationException> streamAllExceptions() { + return values().stream().flatMap(Collection::stream); + } + + public <T extends BatchItem> Collection<T> successful(Collection<T> items) { + return streamSuccessful(items.stream()).collect(Collectors.toList()); + } + + public <T extends BatchItem> Stream<T> streamSuccessful(Stream<T> items) { + return items.filter(i -> !this.containsKey(Pair.of(i.getUrn(), i.getAspectName()))); + } + + public <T extends BatchItem> Collection<T> exceptions(Collection<T> items) { + return streamExceptions(items.stream()).collect(Collectors.toList()); + } + + public <T extends BatchItem> Stream<T> streamExceptions(Stream<T> items) { + return items.filter(i -> this.containsKey(Pair.of(i.getUrn(), i.getAspectName()))); + } + + @Override + public String toString() { + return String.format( + "ValidationExceptionCollection{%s}", + entrySet().stream() + // sort by entity/aspect + .sorted(Comparator.comparing(p -> p.getKey().toString())) + .map( + e -> + String.format( + "EntityAspect:%s Exceptions: %s", e.getKey().toString(), e.getValue())) + .collect(Collectors.joining("; "))); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java index 5a4635da433ae4..17b66de79d113c 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java @@ -1,22 +1,37 @@ package com.linkedin.metadata.aspect.validation; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import static com.linkedin.structured.PropertyCardinality.*; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.RecordTemplate; +import com.linkedin.entity.Aspect; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; -import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.r2.RemoteInvocationException; import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.PropertyValue; import com.linkedin.structured.StructuredPropertyDefinition; +import java.net.URISyntaxException; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; -import javax.annotation.Nullable; public class PropertyDefinitionValidator extends AspectPayloadValidator { @@ -24,68 +39,128 @@ public PropertyDefinitionValidator(AspectPluginConfig aspectPluginConfig) { super(aspectPluginConfig); } + /** + * Prevent deletion of the definition or key aspect (only soft delete) + * + * @param mcpItems + * @param aspectRetriever + * @return + */ @Override - protected void validateProposedAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nonnull RecordTemplate aspectPayload, - @Nonnull AspectRetriever aspectRetriever) - throws AspectValidationException { - // No-op + protected Stream<AspectValidationException> validateProposedAspects( + @Nonnull Collection<? extends BatchItem> mcpItems, @Nonnull AspectRetriever aspectRetriever) { + final String entityKeyAspect = + aspectRetriever + .getEntityRegistry() + .getEntitySpec(STRUCTURED_PROPERTY_ENTITY_NAME) + .getKeyAspectName(); + + return mcpItems.stream() + .filter(i -> ChangeType.DELETE.equals(i.getChangeType())) + .map( + i -> { + if (ImmutableSet.of(entityKeyAspect, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) + .contains(i.getAspectSpec().getName())) { + return AspectValidationException.forItem( + i, "Hard delete of Structured Property Definitions is not supported."); + } + return null; + }) + .filter(Objects::nonNull); } @Override - protected void validatePreCommitAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nullable RecordTemplate previousAspect, - @Nonnull RecordTemplate proposedAspect, - AspectRetriever aspectRetriever) - throws AspectValidationException { - validate(previousAspect, proposedAspect); + protected Stream<AspectValidationException> validatePreCommitAspects( + @Nonnull Collection<ChangeMCP> changeMCPs, AspectRetriever aspectRetriever) { + return validateDefinitionUpserts( + changeMCPs.stream() + .filter( + i -> + ChangeType.UPSERT.equals(i.getChangeType()) + && STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME.equals(i.getAspectName())) + .collect(Collectors.toList()), + aspectRetriever); } - public static boolean validate( - @Nullable RecordTemplate previousAspect, @Nonnull RecordTemplate proposedAspect) - throws AspectValidationException { - if (previousAspect != null) { - StructuredPropertyDefinition previousDefinition = - (StructuredPropertyDefinition) previousAspect; - StructuredPropertyDefinition newDefinition = (StructuredPropertyDefinition) proposedAspect; - if (!newDefinition.getValueType().equals(previousDefinition.getValueType())) { - throw new AspectValidationException( - "Value type cannot be changed as this is a backwards incompatible change"); - } - if (newDefinition.getCardinality().equals(SINGLE) - && previousDefinition.getCardinality().equals(MULTIPLE)) { - throw new AspectValidationException( - "Property definition cardinality cannot be changed from MULTI to SINGLE"); - } - if (!newDefinition.getQualifiedName().equals(previousDefinition.getQualifiedName())) { - throw new AspectValidationException( - "Cannot change the fully qualified name of a Structured Property"); - } - // Assure new definition has only added allowed values, not removed them - if (newDefinition.getAllowedValues() != null) { - if (!previousDefinition.hasAllowedValues() - || previousDefinition.getAllowedValues() == null) { - throw new AspectValidationException( - "Cannot restrict values that were previously allowed"); + public static Stream<AspectValidationException> validateDefinitionUpserts( + @Nonnull Collection<ChangeMCP> changeMCPs, @Nonnull AspectRetriever aspectRetriever) { + + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + + Set<Urn> propertyUrns = changeMCPs.stream().map(ChangeMCP::getUrn).collect(Collectors.toSet()); + + // Batch fetch status aspects + Map<Urn, Map<String, Aspect>> structuredPropertyAspects = + fetchPropertyStatusAspects(propertyUrns, aspectRetriever); + + for (ChangeMCP item : changeMCPs) { + // Prevent updates to the definition, if soft deleted property + softDeleteCheck( + item, + structuredPropertyAspects.getOrDefault(item.getUrn(), Collections.emptyMap()), + "Cannot mutate a soft deleted Structured Property Definition") + .ifPresent(exceptions::addException); + + if (item.getPreviousSystemAspect() != null) { + + StructuredPropertyDefinition previousDefinition = + item.getPreviousSystemAspect().getAspect(StructuredPropertyDefinition.class); + StructuredPropertyDefinition newDefinition = + item.getAspect(StructuredPropertyDefinition.class); + + if (!newDefinition.getValueType().equals(previousDefinition.getValueType())) { + exceptions.addException( + item, "Value type cannot be changed as this is a backwards incompatible change"); + } + if (newDefinition.getCardinality().equals(SINGLE) + && previousDefinition.getCardinality().equals(MULTIPLE)) { + exceptions.addException( + item, "Property definition cardinality cannot be changed from MULTI to SINGLE"); + } + if (!newDefinition.getQualifiedName().equals(previousDefinition.getQualifiedName())) { + exceptions.addException( + item, "Cannot change the fully qualified name of a Structured Property"); } - Set<PrimitivePropertyValue> newAllowedValues = - newDefinition.getAllowedValues().stream() - .map(PropertyValue::getValue) - .collect(Collectors.toSet()); - for (PropertyValue value : previousDefinition.getAllowedValues()) { - if (!newAllowedValues.contains(value.getValue())) { - throw new AspectValidationException( - "Cannot restrict values that were previously allowed"); + // Assure new definition has only added allowed values, not removed them + if (newDefinition.getAllowedValues() != null) { + if (!previousDefinition.hasAllowedValues() + || previousDefinition.getAllowedValues() == null) { + exceptions.addException(item, "Cannot restrict values that were previously allowed"); + } else { + Set<PrimitivePropertyValue> newAllowedValues = + newDefinition.getAllowedValues().stream() + .map(PropertyValue::getValue) + .collect(Collectors.toSet()); + for (PropertyValue value : previousDefinition.getAllowedValues()) { + if (!newAllowedValues.contains(value.getValue())) { + exceptions.addException( + item, "Cannot restrict values that were previously allowed"); + } + } } } } } - return true; + + return exceptions.streamAllExceptions(); + } + + private static Map<Urn, Map<String, Aspect>> fetchPropertyStatusAspects( + Set<Urn> structuredPropertyUrns, AspectRetriever aspectRetriever) { + try { + return aspectRetriever.getLatestAspectObjects( + structuredPropertyUrns, ImmutableSet.of(Constants.STATUS_ASPECT_NAME)); + } catch (RemoteInvocationException | URISyntaxException e) { + throw new RuntimeException(e); + } + } + + static <T extends BatchItem> Optional<AspectValidationException> softDeleteCheck( + T item, @Nonnull Map<String, Aspect> structuredPropertyAspects, String message) { + Aspect aspect = structuredPropertyAspects.get(STATUS_ASPECT_NAME); + if (aspect != null && new Status(aspect.data()).isRemoved()) { + return Optional.of(AspectValidationException.forItem(item, message)); + } + return Optional.empty(); } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java index efd95e0c2e3f12..8a8c13bf18e85e 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java @@ -1,18 +1,25 @@ package com.linkedin.metadata.aspect.validation; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static com.linkedin.metadata.aspect.validation.PropertyDefinitionValidator.softDeleteCheck; + +import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; import com.linkedin.data.template.StringArrayMap; import com.linkedin.entity.Aspect; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; -import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.metadata.models.LogicalValueType; import com.linkedin.metadata.models.StructuredPropertyUtils; +import com.linkedin.r2.RemoteInvocationException; import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.PrimitivePropertyValueArray; import com.linkedin.structured.PropertyCardinality; @@ -23,14 +30,17 @@ import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; -import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; /** A Validator for StructuredProperties Aspect that is attached to entities like Datasets, etc. */ @@ -67,106 +77,155 @@ public static LogicalValueType getLogicalValueType(Urn valueType) { } @Override - protected void validateProposedAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nonnull RecordTemplate aspectPayload, - @Nonnull AspectRetriever aspectRetriever) - throws AspectValidationException { - validate(aspectPayload, aspectRetriever); + protected Stream<AspectValidationException> validateProposedAspects( + @Nonnull Collection<? extends BatchItem> mcpItems, @Nonnull AspectRetriever aspectRetriever) { + return validateProposedUpserts( + mcpItems.stream() + .filter(i -> ChangeType.UPSERT.equals(i.getChangeType())) + .collect(Collectors.toList()), + aspectRetriever); } - public static boolean validate( - @Nonnull RecordTemplate aspectPayload, @Nonnull AspectRetriever aspectRetriever) - throws AspectValidationException { - StructuredProperties structuredProperties = (StructuredProperties) aspectPayload; - log.warn("Validator called with {}", structuredProperties); - Map<Urn, List<StructuredPropertyValueAssignment>> structuredPropertiesMap = - structuredProperties.getProperties().stream() - .collect( - Collectors.groupingBy( - x -> x.getPropertyUrn(), - HashMap::new, - Collectors.toCollection(ArrayList::new))); - for (Map.Entry<Urn, List<StructuredPropertyValueAssignment>> entry : - structuredPropertiesMap.entrySet()) { - // There should only be one entry per structured property - List<StructuredPropertyValueAssignment> values = entry.getValue(); - if (values.size() > 1) { - throw new AspectValidationException( - "Property: " + entry.getKey() + " has multiple entries: " + values); - } - } + @Override + protected Stream<AspectValidationException> validatePreCommitAspects( + @Nonnull Collection<ChangeMCP> changeMCPs, AspectRetriever aspectRetriever) { + return Stream.empty(); + } - for (StructuredPropertyValueAssignment structuredPropertyValueAssignment : - structuredProperties.getProperties()) { - Urn propertyUrn = structuredPropertyValueAssignment.getPropertyUrn(); - String property = propertyUrn.toString(); - if (!propertyUrn.getEntityType().equals("structuredProperty")) { - throw new IllegalStateException( - "Unexpected entity type. Expected: structuredProperty Found: " - + propertyUrn.getEntityType()); - } - Aspect structuredPropertyDefinitionAspect = null; - try { - structuredPropertyDefinitionAspect = - aspectRetriever.getLatestAspectObject(propertyUrn, "propertyDefinition"); + public static Stream<AspectValidationException> validateProposedUpserts( + @Nonnull Collection<BatchItem> mcpItems, @Nonnull AspectRetriever aspectRetriever) { + + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + + // Validate propertyUrns + Set<Urn> validPropertyUrns = validateStructuredPropertyUrns(mcpItems, exceptions); + + // Fetch property aspects for further validation + Map<Urn, Map<String, Aspect>> allStructuredPropertiesAspects = + fetchPropertyAspects(validPropertyUrns, aspectRetriever); + + // Validate assignments + for (BatchItem i : exceptions.successful(mcpItems)) { + for (StructuredPropertyValueAssignment structuredPropertyValueAssignment : + i.getAspect(StructuredProperties.class).getProperties()) { + + Urn propertyUrn = structuredPropertyValueAssignment.getPropertyUrn(); + Map<String, Aspect> propertyAspects = + allStructuredPropertiesAspects.getOrDefault(propertyUrn, Collections.emptyMap()); + // check definition soft delete + softDeleteCheck(i, propertyAspects, "Cannot apply a soft deleted Structured Property value") + .ifPresent(exceptions::addException); + + Aspect structuredPropertyDefinitionAspect = + propertyAspects.get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME); if (structuredPropertyDefinitionAspect == null) { - throw new AspectValidationException("Unexpected null value found."); + exceptions.addException(i, "Unexpected null value found."); } - } catch (Exception e) { - log.error("Could not fetch latest aspect. PropertyUrn: {}", propertyUrn, e); - throw new AspectValidationException("Could not fetch latest aspect: " + e.getMessage(), e); - } - StructuredPropertyDefinition structuredPropertyDefinition = - new StructuredPropertyDefinition(structuredPropertyDefinitionAspect.data()); - log.warn( - "Retrieved property definition for {}. {}", propertyUrn, structuredPropertyDefinition); - if (structuredPropertyDefinition != null) { - PrimitivePropertyValueArray values = structuredPropertyValueAssignment.getValues(); - // Check cardinality - if (structuredPropertyDefinition.getCardinality() == PropertyCardinality.SINGLE) { - if (values.size() > 1) { - throw new AspectValidationException( - "Property: " - + property - + " has cardinality 1, but multiple values were assigned: " - + values); + StructuredPropertyDefinition structuredPropertyDefinition = + new StructuredPropertyDefinition(structuredPropertyDefinitionAspect.data()); + log.warn( + "Retrieved property definition for {}. {}", propertyUrn, structuredPropertyDefinition); + if (structuredPropertyDefinition != null) { + PrimitivePropertyValueArray values = structuredPropertyValueAssignment.getValues(); + // Check cardinality + if (structuredPropertyDefinition.getCardinality() == PropertyCardinality.SINGLE) { + if (values.size() > 1) { + exceptions.addException( + i, + "Property: " + + propertyUrn + + " has cardinality 1, but multiple values were assigned: " + + values); + } + } + + // Check values + for (PrimitivePropertyValue value : values) { + validateType(i, propertyUrn, structuredPropertyDefinition, value) + .ifPresent(exceptions::addException); + validateAllowedValues(i, propertyUrn, structuredPropertyDefinition, value) + .ifPresent(exceptions::addException); } } - // Check values - for (PrimitivePropertyValue value : values) { - validateType(propertyUrn, structuredPropertyDefinition, value); - validateAllowedValues(propertyUrn, structuredPropertyDefinition, value); + } + } + + return exceptions.streamAllExceptions(); + } + + private static Set<Urn> validateStructuredPropertyUrns( + Collection<BatchItem> mcpItems, ValidationExceptionCollection exceptions) { + Set<Urn> validPropertyUrns = new HashSet<>(); + + for (BatchItem i : exceptions.successful(mcpItems)) { + StructuredProperties structuredProperties = i.getAspect(StructuredProperties.class); + + log.warn("Validator called with {}", structuredProperties); + Map<Urn, List<StructuredPropertyValueAssignment>> structuredPropertiesMap = + structuredProperties.getProperties().stream() + .collect( + Collectors.groupingBy( + x -> x.getPropertyUrn(), + HashMap::new, + Collectors.toCollection(ArrayList::new))); + for (Map.Entry<Urn, List<StructuredPropertyValueAssignment>> entry : + structuredPropertiesMap.entrySet()) { + + // There should only be one entry per structured property + List<StructuredPropertyValueAssignment> values = entry.getValue(); + if (values.size() > 1) { + exceptions.addException( + i, "Property: " + entry.getKey() + " has multiple entries: " + values); + } else { + for (StructuredPropertyValueAssignment structuredPropertyValueAssignment : + structuredProperties.getProperties()) { + Urn propertyUrn = structuredPropertyValueAssignment.getPropertyUrn(); + + if (!propertyUrn.getEntityType().equals("structuredProperty")) { + exceptions.addException( + i, + "Unexpected entity type. Expected: structuredProperty Found: " + + propertyUrn.getEntityType()); + } else { + validPropertyUrns.add(propertyUrn); + } + } } } } - return true; + return validPropertyUrns; } - private static void validateAllowedValues( - Urn propertyUrn, StructuredPropertyDefinition definition, PrimitivePropertyValue value) - throws AspectValidationException { + private static Optional<AspectValidationException> validateAllowedValues( + BatchItem item, + Urn propertyUrn, + StructuredPropertyDefinition definition, + PrimitivePropertyValue value) { if (definition.getAllowedValues() != null) { Set<PrimitivePropertyValue> definedValues = definition.getAllowedValues().stream() .map(PropertyValue::getValue) .collect(Collectors.toSet()); if (definedValues.stream().noneMatch(definedPrimitive -> definedPrimitive.equals(value))) { - throw new AspectValidationException( - String.format( - "Property: %s, value: %s should be one of %s", propertyUrn, value, definedValues)); + return Optional.of( + AspectValidationException.forItem( + item, + String.format( + "Property: %s, value: %s should be one of %s", + propertyUrn, value, definedValues))); } } + return Optional.empty(); } - private static void validateType( - Urn propertyUrn, StructuredPropertyDefinition definition, PrimitivePropertyValue value) - throws AspectValidationException { + private static Optional<AspectValidationException> validateType( + BatchItem item, + Urn propertyUrn, + StructuredPropertyDefinition definition, + PrimitivePropertyValue value) { Urn valueType = definition.getValueType(); LogicalValueType typeDefinition = getLogicalValueType(valueType); @@ -175,16 +234,24 @@ private static void validateType( log.debug( "Property definition demands a string value. {}, {}", value.isString(), value.isDouble()); if (value.getString() == null) { - throw new AspectValidationException( - "Property: " + propertyUrn.toString() + ", value: " + value + " should be a string"); + return Optional.of( + AspectValidationException.forItem( + item, + "Property: " + + propertyUrn.toString() + + ", value: " + + value + + " should be a string")); } else if (typeDefinition.equals(LogicalValueType.DATE)) { if (!StructuredPropertyUtils.isValidDate(value)) { - throw new AspectValidationException( - "Property: " - + propertyUrn.toString() - + ", value: " - + value - + " should be a date with format YYYY-MM-DD"); + return Optional.of( + AspectValidationException.forItem( + item, + "Property: " + + propertyUrn.toString() + + ", value: " + + value + + " should be a date with format YYYY-MM-DD")); } } else if (typeDefinition.equals(LogicalValueType.URN)) { StringArrayMap valueTypeQualifier = definition.getTypeQualifier(); @@ -192,8 +259,11 @@ private static void validateType( try { typeValue = Urn.createFromString(value.getString()); } catch (URISyntaxException e) { - throw new AspectValidationException( - "Property: " + propertyUrn.toString() + ", value: " + value + " should be an urn", e); + return Optional.of( + AspectValidationException.forItem( + item, + "Property: " + propertyUrn.toString() + ", value: " + value + " should be an urn", + e)); } if (valueTypeQualifier != null) { if (valueTypeQualifier.containsKey("allowedTypes")) { @@ -216,13 +286,15 @@ private static void validateType( } } if (!matchedAny) { - throw new AspectValidationException( - "Property: " - + propertyUrn.toString() - + ", value: " - + value - + " is not of any supported urn types:" - + allowedTypes); + return Optional.of( + AspectValidationException.forItem( + item, + "Property: " + + propertyUrn.toString() + + ", value: " + + value + + " is not of any supported urn types:" + + allowedTypes)); } } } @@ -233,13 +305,25 @@ private static void validateType( Double doubleValue = value.getDouble() != null ? value.getDouble() : Double.parseDouble(value.getString()); } catch (NumberFormatException | NullPointerException e) { - throw new AspectValidationException( - "Property: " + propertyUrn.toString() + ", value: " + value + " should be a number"); + return Optional.of( + AspectValidationException.forItem( + item, + "Property: " + + propertyUrn.toString() + + ", value: " + + value + + " should be a number")); } } else { - throw new AspectValidationException( - "Validation support for type " + definition.getValueType() + " is not yet implemented."); + return Optional.of( + AspectValidationException.forItem( + item, + "Validation support for type " + + definition.getValueType() + + " is not yet implemented.")); } + + return Optional.empty(); } private static String getValueTypeId(@Nonnull final Urn valueType) { @@ -250,15 +334,19 @@ private static String getValueTypeId(@Nonnull final Urn valueType) { return valueTypeId; } - @Override - protected void validatePreCommitAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nullable RecordTemplate previousAspect, - @Nonnull RecordTemplate proposedAspect, - AspectRetriever aspectRetriever) - throws AspectValidationException { - // No-op + private static Map<Urn, Map<String, Aspect>> fetchPropertyAspects( + Set<Urn> structuredPropertyUrns, AspectRetriever aspectRetriever) { + if (structuredPropertyUrns.isEmpty()) { + return Collections.emptyMap(); + } else { + try { + return aspectRetriever.getLatestAspectObjects( + structuredPropertyUrns, + ImmutableSet.of( + Constants.STATUS_ASPECT_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)); + } catch (RemoteInvocationException | URISyntaxException e) { + throw new RuntimeException(e); + } + } } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java index a8711429421f3b..6c720f6c83ffa5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java @@ -1,8 +1,34 @@ package com.linkedin.metadata.models; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.r2.RemoteInvocationException; import com.linkedin.structured.PrimitivePropertyValue; +import com.linkedin.structured.StructuredProperties; +import com.linkedin.structured.StructuredPropertyValueAssignment; +import com.linkedin.structured.StructuredPropertyValueAssignmentArray; +import com.linkedin.util.Pair; +import java.net.URISyntaxException; import java.sql.Date; import java.time.format.DateTimeParseException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; public class StructuredPropertyUtils { @@ -18,9 +44,62 @@ private StructuredPropertyUtils() {} * @param fullyQualifiedName The original fully qualified name of the property * @return The sanitized version that can be used as a field name */ - public static String sanitizeStructuredPropertyFQN(String fullyQualifiedName) { - String sanitizedName = fullyQualifiedName.replace('.', '_').replace(' ', '_'); - return sanitizedName; + public static String sanitizeStructuredPropertyFQN(@Nonnull String fullyQualifiedName) { + if (fullyQualifiedName.contains(" ")) { + throw new IllegalArgumentException( + "Fully qualified structured property name cannot contain spaces"); + } + return fullyQualifiedName.replace('.', '_'); + } + + public static void validateStructuredPropertyFQN( + @Nonnull Collection<String> fullyQualifiedNames, @Nonnull AspectRetriever aspectRetriever) { + Set<Urn> structuredPropertyUrns = + fullyQualifiedNames.stream() + .map(StructuredPropertyUtils::toURNFromFieldName) + .collect(Collectors.toSet()); + Set<Urn> removedUrns = getRemovedUrns(structuredPropertyUrns, aspectRetriever); + if (!removedUrns.isEmpty()) { + throw new IllegalArgumentException( + String.format("Cannot filter on deleted Structured Property %s", removedUrns)); + } + } + + public static Urn toURNFromFieldName(@Nonnull String fieldName) { + return UrnUtils.getUrn( + String.join(":", "urn:li", STRUCTURED_PROPERTY_ENTITY_NAME, fieldName.replace('_', '.'))); + } + + public static void validateFilter( + @Nullable Filter filter, @Nonnull AspectRetriever aspectRetriever) { + + if (filter == null) { + return; + } + + Set<String> fieldNames = new HashSet<>(); + + if (filter.getCriteria() != null) { + for (Criterion c : filter.getCriteria()) { + if (c.getField().startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { + fieldNames.add(c.getField().substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1)); + } + } + } + + if (filter.getOr() != null) { + for (ConjunctiveCriterion cc : filter.getOr()) { + for (Criterion c : cc.getAnd()) { + if (c.getField().startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { + fieldNames.add(c.getField().substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1)); + } + } + } + } + + if (!fieldNames.isEmpty()) { + validateStructuredPropertyFQN(fieldNames, aspectRetriever); + } } public static Date toDate(PrimitivePropertyValue value) throws DateTimeParseException { @@ -42,4 +121,76 @@ public static boolean isValidDate(PrimitivePropertyValue value) { } return date.compareTo(MIN_DATE) >= 0 && date.compareTo(MAX_DATE) <= 0; } + + private static Set<Urn> getRemovedUrns(Set<Urn> urns, AspectRetriever aspectRetriever) { + try { + return aspectRetriever + .getLatestAspectObjects(urns, ImmutableSet.of(STATUS_ASPECT_NAME)) + .entrySet() + .stream() + .filter( + entry -> + entry.getValue().containsKey(STATUS_ASPECT_NAME) + && new Status(entry.getValue().get(STATUS_ASPECT_NAME).data()).isRemoved()) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + } catch (RemoteInvocationException | URISyntaxException e) { + throw new RuntimeException(e); + } + } + + /** + * Given a collection of structured properties, return the structured properties with soft deleted + * assignments removed + * + * @param properties collection of structured properties + * @param aspectRetriever typically entity service or entity client + entity registry + * @return structured properties object without value assignments for deleted structured + * properties and whether values were filtered + */ + public static Map<Urn, Boolean> filterSoftDelete( + Map<Urn, StructuredProperties> properties, AspectRetriever aspectRetriever) { + final Set<Urn> structuredPropertiesUrns = + properties.values().stream() + .flatMap(structuredProperties -> structuredProperties.getProperties().stream()) + .map(StructuredPropertyValueAssignment::getPropertyUrn) + .collect(Collectors.toSet()); + + final Set<Urn> removedUrns = getRemovedUrns(structuredPropertiesUrns, aspectRetriever); + + return properties.entrySet().stream() + .map( + entry -> + Pair.of( + entry.getKey(), filterSoftDelete(entry.getValue(), removedUrns).getSecond())) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + } + + private static Pair<StructuredProperties, Boolean> filterSoftDelete( + StructuredProperties structuredProperties, Set<Urn> softDeletedPropertyUrns) { + + Pair<StructuredPropertyValueAssignmentArray, Boolean> filtered = + filterValueAssignment(structuredProperties.getProperties(), softDeletedPropertyUrns); + + if (filtered.getSecond()) { + return Pair.of(structuredProperties.setProperties(filtered.getFirst()), true); + } else { + return Pair.of(structuredProperties, false); + } + } + + private static Pair<StructuredPropertyValueAssignmentArray, Boolean> filterValueAssignment( + StructuredPropertyValueAssignmentArray in, Set<Urn> softDeletedPropertyUrns) { + if (in.stream().noneMatch(p -> softDeletedPropertyUrns.contains(p.getPropertyUrn()))) { + return Pair.of(in, false); + } else { + return Pair.of( + new StructuredPropertyValueAssignmentArray( + in.stream() + .filter( + assignment -> !softDeletedPropertyUrns.contains(assignment.getPropertyUrn())) + .collect(Collectors.toSet())), + true); + } + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java index c2aa1fab6c2c0f..f4d9926f13ae66 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.models.registry; -import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; import com.linkedin.metadata.aspect.plugins.PluginFactory; import com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffect; @@ -13,7 +12,6 @@ import com.linkedin.metadata.models.EventSpec; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -77,45 +75,24 @@ default String getIdentifier() { AspectTemplateEngine getAspectTemplateEngine(); /** - * Returns applicable {@link AspectPayloadValidator} implementations given the change type and - * entity/aspect information. + * Prefer {@link com.linkedin.metadata.aspect.batch.AspectsBatch} instead of using this method + * directly. * - * @param changeType The type of change to be validated - * @param entityName The entity name - * @param aspectName The aspect name * @return List of validator implementations */ - @Nonnull - default List<AspectPayloadValidator> getAspectPayloadValidators( - @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { - return getAllAspectPayloadValidators().stream() - .filter( - aspectPayloadValidator -> - aspectPayloadValidator.shouldApply(changeType, entityName, aspectName)) - .collect(Collectors.toList()); - } - @Nonnull default List<AspectPayloadValidator> getAllAspectPayloadValidators() { return getPluginFactory().getAspectPayloadValidators(); } /** - * Return mutation hooks for {@link com.linkedin.data.template.RecordTemplate} + * Returns mutation hooks. + * + * <p>Prefer {@link com.linkedin.metadata.aspect.batch.AspectsBatch} instead of using this method + * directly. * - * @param changeType The type of change - * @param entityName The entity name - * @param aspectName The aspect name - * @return Mutation hooks + * @return list of mutation hooks. */ - @Nonnull - default List<MutationHook> getMutationHooks( - @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { - return getAllMutationHooks().stream() - .filter(mutationHook -> mutationHook.shouldApply(changeType, entityName, aspectName)) - .collect(Collectors.toList()); - } - @Nonnull default List<MutationHook> getAllMutationHooks() { return getPluginFactory().getMutationHooks(); @@ -125,19 +102,11 @@ default List<MutationHook> getAllMutationHooks() { * Returns the side effects to apply to {@link com.linkedin.mxe.MetadataChangeProposal}. Side * effects can generate one or more additional MCPs during write operations. * - * @param changeType The type of change - * @param entityName The entity name - * @param aspectName The aspect name + * <p>Prefer {@link com.linkedin.metadata.aspect.batch.AspectsBatch} instead of using this method + * directly. + * * @return MCP side effects */ - @Nonnull - default List<MCPSideEffect> getMCPSideEffects( - @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { - return getAllMCPSideEffects().stream() - .filter(mcpSideEffect -> mcpSideEffect.shouldApply(changeType, entityName, aspectName)) - .collect(Collectors.toList()); - } - @Nonnull default List<MCPSideEffect> getAllMCPSideEffects() { return getPluginFactory().getMcpSideEffects(); @@ -147,19 +116,11 @@ default List<MCPSideEffect> getAllMCPSideEffects() { * Returns the side effects to apply to {@link com.linkedin.mxe.MetadataChangeLog}. Side effects * can generate one or more additional MCLs during write operations. * - * @param changeType The type of change - * @param entityName The entity name - * @param aspectName The aspect name + * <p>Prefer {@link com.linkedin.metadata.aspect.batch.AspectsBatch} instead of using this method + * directly. + * * @return MCL side effects */ - @Nonnull - default List<MCLSideEffect> getMCLSideEffects( - @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { - return getAllMCLSideEffects().stream() - .filter(mclSideEffect -> mclSideEffect.shouldApply(changeType, entityName, aspectName)) - .collect(Collectors.toList()); - } - @Nonnull default List<MCLSideEffect> getAllMCLSideEffects() { return getPluginFactory().getMclSideEffects(); diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java new file mode 100644 index 00000000000000..6f4149b1031256 --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java @@ -0,0 +1,96 @@ +package com.linkedin.metadata.aspect.hooks; + +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static org.testng.Assert.assertEquals; + +import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.structured.PrimitivePropertyValue; +import com.linkedin.structured.PrimitivePropertyValueArray; +import com.linkedin.structured.StructuredProperties; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.structured.StructuredPropertyValueAssignment; +import com.linkedin.structured.StructuredPropertyValueAssignmentArray; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import java.net.URISyntaxException; +import java.util.List; +import java.util.Map; +import org.testng.annotations.Test; + +public class StructuredPropertiesSoftDeleteTest { + + private static final EntityRegistry TEST_REGISTRY = new TestEntityRegistry(); + + @Test + public void testSoftDeleteFilter() throws URISyntaxException, CloneNotSupportedException { + Urn propertyUrnA = + Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime"); + StructuredPropertyDefinition stringPropertyDefA = + new StructuredPropertyDefinition() + .setValueType(Urn.createFromString("urn:li:type:datahub.string")); + StructuredPropertyValueAssignment assignmentA = + new StructuredPropertyValueAssignment() + .setPropertyUrn(propertyUrnA) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(0.0))); + + Urn propertyUrnB = + Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTimeDeleted"); + StructuredPropertyDefinition stringPropertyDefB = + new StructuredPropertyDefinition() + .setValueType(Urn.createFromString("urn:li:type:datahub.string")); + StructuredPropertyValueAssignment assignmentB = + new StructuredPropertyValueAssignment() + .setPropertyUrn(propertyUrnB) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(0.0))); + + StructuredPropertiesSoftDelete testHook = + new StructuredPropertiesSoftDelete( + AspectPluginConfig.builder() + .enabled(true) + .className(StructuredPropertiesSoftDelete.class.getName()) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(DATASET_ENTITY_NAME) + .aspectName(Constants.STRUCTURED_PROPERTIES_ASPECT_NAME) + .build())) + .build()); + + StructuredProperties expectedAllValues = new StructuredProperties(); + expectedAllValues.setProperties( + new StructuredPropertyValueAssignmentArray(assignmentA, assignmentB)); + + StructuredProperties test = expectedAllValues.copy(); + testHook.readMutation( + TestMCP.ofOneBatchItemDatasetUrn(test, TEST_REGISTRY), + new MockAspectRetriever( + Map.of( + propertyUrnA, + List.of(stringPropertyDefA), + propertyUrnB, + List.of(stringPropertyDefB)))); + assertEquals( + test.getProperties().size(), + 2, + "Expected all values because all definitions are NOT soft deleted"); + + StructuredProperties expectedOneValue = new StructuredProperties(); + expectedOneValue.setProperties(new StructuredPropertyValueAssignmentArray(assignmentA)); + test = expectedAllValues.copy(); + testHook.readMutation( + TestMCP.ofOneBatchItemDatasetUrn(test, TEST_REGISTRY), + new MockAspectRetriever( + Map.of( + propertyUrnA, + List.of(stringPropertyDefA), + propertyUrnB, + List.of(stringPropertyDefB, new Status().setRemoved(true))))); + assertEquals( + test.getProperties().size(), 1, "Expected 1 value because 1 definition is soft deleted"); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java index f801ce7bf1ffe6..a9f903f4b7017d 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java @@ -61,27 +61,37 @@ public void testConfigEntityRegistry() throws FileNotFoundException { assertNotNull(eventSpec.getPegasusSchema()); assertEquals( - configEntityRegistry - .getAspectPayloadValidators(ChangeType.UPSERT, "chart", "status") - .size(), + configEntityRegistry.getAllAspectPayloadValidators().stream() + .filter(validator -> validator.shouldApply(ChangeType.UPSERT, "chart", "status")) + .count(), 2); assertEquals( - configEntityRegistry - .getAspectPayloadValidators(ChangeType.DELETE, "chart", "status") - .size(), + configEntityRegistry.getAllAspectPayloadValidators().stream() + .filter(validator -> validator.shouldApply(ChangeType.DELETE, "chart", "status")) + .count(), 0); assertEquals( - configEntityRegistry.getMCPSideEffects(ChangeType.UPSERT, "dataset", "datasetKey").size(), + configEntityRegistry.getAllMCPSideEffects().stream() + .filter(validator -> validator.shouldApply(ChangeType.UPSERT, "dataset", "datasetKey")) + .count(), 1); assertEquals( - configEntityRegistry.getMCPSideEffects(ChangeType.DELETE, "dataset", "datasetKey").size(), + configEntityRegistry.getAllMCPSideEffects().stream() + .filter(validator -> validator.shouldApply(ChangeType.DELETE, "dataset", "datasetKey")) + .count(), 0); assertEquals( - configEntityRegistry.getMutationHooks(ChangeType.UPSERT, "*", "schemaMetadata").size(), 1); + configEntityRegistry.getAllMutationHooks().stream() + .filter(validator -> validator.shouldApply(ChangeType.UPSERT, "*", "schemaMetadata")) + .count(), + 1); assertEquals( - configEntityRegistry.getMutationHooks(ChangeType.DELETE, "*", "schemaMetadata").size(), 0); + configEntityRegistry.getAllMutationHooks().stream() + .filter(validator -> validator.shouldApply(ChangeType.DELETE, "*", "schemaMetadata")) + .count(), + 0); } @Test @@ -123,27 +133,37 @@ public void testMergedEntityRegistry() throws EntityRegistryException { assertNotNull(eventSpec.getPegasusSchema()); assertEquals( - mergedEntityRegistry - .getAspectPayloadValidators(ChangeType.UPSERT, "chart", "status") - .size(), + mergedEntityRegistry.getAllAspectPayloadValidators().stream() + .filter(validator -> validator.shouldApply(ChangeType.UPSERT, "chart", "status")) + .count(), 2); assertEquals( - mergedEntityRegistry - .getAspectPayloadValidators(ChangeType.DELETE, "chart", "status") - .size(), + mergedEntityRegistry.getAllAspectPayloadValidators().stream() + .filter(validator -> validator.shouldApply(ChangeType.DELETE, "chart", "status")) + .count(), 1); assertEquals( - mergedEntityRegistry.getMCPSideEffects(ChangeType.UPSERT, "dataset", "datasetKey").size(), + mergedEntityRegistry.getAllMCPSideEffects().stream() + .filter(validator -> validator.shouldApply(ChangeType.UPSERT, "dataset", "datasetKey")) + .count(), 2); assertEquals( - mergedEntityRegistry.getMCPSideEffects(ChangeType.DELETE, "dataset", "datasetKey").size(), + mergedEntityRegistry.getAllMCPSideEffects().stream() + .filter(validator -> validator.shouldApply(ChangeType.DELETE, "dataset", "datasetKey")) + .count(), 1); assertEquals( - mergedEntityRegistry.getMutationHooks(ChangeType.UPSERT, "*", "schemaMetadata").size(), 2); + mergedEntityRegistry.getAllMutationHooks().stream() + .filter(validator -> validator.shouldApply(ChangeType.UPSERT, "*", "schemaMetadata")) + .count(), + 2); assertEquals( - mergedEntityRegistry.getMutationHooks(ChangeType.DELETE, "*", "schemaMetadata").size(), 1); + mergedEntityRegistry.getAllMutationHooks().stream() + .filter(validator -> validator.shouldApply(ChangeType.DELETE, "*", "schemaMetadata")) + .count(), + 1); } @Test diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java index 8ee5ff4f998206..ac2397a6aaa335 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java @@ -5,11 +5,13 @@ import com.datahub.test.TestEntityProfile; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.aspect.batch.MCLBatchItem; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.MCLItem; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import java.util.Collection; import java.util.List; +import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; import org.testng.annotations.BeforeTest; @@ -32,7 +34,10 @@ public void testCustomMCLSideEffect() { TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE)); List<MCLSideEffect> mclSideEffects = - configEntityRegistry.getMCLSideEffects(ChangeType.UPSERT, "chart", "chartInfo"); + configEntityRegistry.getAllMCLSideEffects().stream() + .filter(validator -> validator.shouldApply(ChangeType.UPSERT, "chart", "chartInfo")) + .collect(Collectors.toList()); + assertEquals( mclSideEffects, List.of( @@ -52,15 +57,14 @@ public void testCustomMCLSideEffect() { } public static class TestMCLSideEffect extends MCLSideEffect { - public TestMCLSideEffect(AspectPluginConfig aspectPluginConfig) { super(aspectPluginConfig); } @Override - protected Stream<MCLBatchItem> applyMCLSideEffect( - @Nonnull MCLBatchItem input, @Nonnull AspectRetriever aspectRetriever) { - return Stream.of(input); + protected Stream<MCLItem> applyMCLSideEffect( + @Nonnull Collection<MCLItem> batchItems, @Nonnull AspectRetriever aspectRetriever) { + return null; } } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java index 8522e8facf3e08..e3499861d61986 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java @@ -5,11 +5,13 @@ import com.datahub.test.TestEntityProfile; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import java.util.Collection; import java.util.List; +import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; import org.testng.annotations.BeforeTest; @@ -32,7 +34,10 @@ public void testCustomMCPSideEffect() { TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE)); List<MCPSideEffect> mcpSideEffects = - configEntityRegistry.getMCPSideEffects(ChangeType.UPSERT, "dataset", "datasetKey"); + configEntityRegistry.getAllMCPSideEffects().stream() + .filter(validator -> validator.shouldApply(ChangeType.UPSERT, "dataset", "datasetKey")) + .collect(Collectors.toList()); + assertEquals( mcpSideEffects, List.of( @@ -58,9 +63,9 @@ public TestMCPSideEffect(AspectPluginConfig aspectPluginConfig) { } @Override - protected Stream<UpsertItem> applyMCPSideEffect( - UpsertItem input, @Nonnull AspectRetriever aspectRetriever) { - return Stream.of(input); + protected Stream<ChangeMCP> applyMCPSideEffect( + Collection<ChangeMCP> changeMCPS, @Nonnull AspectRetriever aspectRetriever) { + return changeMCPS.stream(); } } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java index 5094fd7fdd443d..16ea003582b180 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java @@ -3,19 +3,12 @@ import static org.testng.Assert.assertEquals; import com.datahub.test.TestEntityProfile; -import com.linkedin.common.AuditStamp; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; -import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; -import com.linkedin.mxe.SystemMetadata; import java.util.List; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; +import java.util.stream.Collectors; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -36,7 +29,10 @@ public void testCustomMutator() { TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE)); List<MutationHook> mutators = - configEntityRegistry.getMutationHooks(ChangeType.UPSERT, "*", "schemaMetadata"); + configEntityRegistry.getAllMutationHooks().stream() + .filter(validator -> validator.shouldApply(ChangeType.UPSERT, "*", "schemaMetadata")) + .collect(Collectors.toList()); + assertEquals( mutators, List.of( @@ -56,21 +52,8 @@ public void testCustomMutator() { } public static class TestMutator extends MutationHook { - public TestMutator(AspectPluginConfig aspectPluginConfig) { super(aspectPluginConfig); } - - @Override - protected void mutate( - @Nonnull ChangeType changeType, - @Nonnull EntitySpec entitySpec, - @Nonnull AspectSpec aspectSpec, - @Nullable RecordTemplate oldAspectValue, - @Nullable RecordTemplate newAspectValue, - @Nullable SystemMetadata oldSystemMetadata, - @Nullable SystemMetadata newSystemMetadata, - @Nonnull AuditStamp auditStamp, - @Nonnull AspectRetriever aspectRetriever) {} } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java index eb132836be4656..10dbeafa822945 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java @@ -3,16 +3,18 @@ import static org.testng.Assert.assertEquals; import com.datahub.test.TestEntityProfile; -import com.linkedin.common.urn.Urn; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; -import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import java.util.Collection; import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; -import javax.annotation.Nullable; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -33,7 +35,10 @@ public void testCustomValidator() { TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE)); List<AspectPayloadValidator> validators = - configEntityRegistry.getAspectPayloadValidators(ChangeType.UPSERT, "chart", "status"); + configEntityRegistry.getAllAspectPayloadValidators().stream() + .filter(validator -> validator.shouldApply(ChangeType.UPSERT, "chart", "status")) + .collect(Collectors.toList()); + assertEquals( validators, List.of( @@ -72,26 +77,16 @@ public TestValidator(AspectPluginConfig config) { } @Override - protected void validateProposedAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nonnull RecordTemplate aspectPayload, - AspectRetriever aspectRetriever) - throws AspectValidationException { - if (entityUrn.toString().contains("dataset")) { - throw new AspectValidationException("test error"); - } + protected Stream<AspectValidationException> validateProposedAspects( + @Nonnull Collection<? extends BatchItem> mcpItems, + @Nonnull AspectRetriever aspectRetriever) { + return mcpItems.stream().map(i -> AspectValidationException.forItem(i, "test error")); } @Override - protected void validatePreCommitAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nullable RecordTemplate previousAspect, - @Nonnull RecordTemplate proposedAspect, - AspectRetriever aspectRetriever) - throws AspectValidationException {} + protected Stream<AspectValidationException> validatePreCommitAspects( + @Nonnull Collection<ChangeMCP> changeMCPs, AspectRetriever aspectRetriever) { + return Stream.empty(); + } } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java index 96e9fceb4a05d8..38ba87cfaae80c 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java @@ -1,20 +1,49 @@ package com.linkedin.metadata.aspect.validators; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.testng.Assert.*; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; import com.linkedin.metadata.aspect.validation.PropertyDefinitionValidator; +import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.PropertyCardinality; import com.linkedin.structured.PropertyValue; import com.linkedin.structured.PropertyValueArray; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.structured.StructuredPropertyKey; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; import java.net.URISyntaxException; +import java.util.List; +import java.util.Set; +import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; public class PropertyDefinitionValidatorTest { + + private EntityRegistry entityRegistry; + private Urn testPropertyUrn; + private AspectRetriever mockAspectRetriever; + + @BeforeTest + public void init() { + entityRegistry = new TestEntityRegistry(); + testPropertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:foo.bar"); + mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + } + @Test public void testValidatePreCommitNoPrevious() throws URISyntaxException, AspectValidationException { @@ -28,7 +57,11 @@ public void testValidatePreCommitNoPrevious() newProperty.setQualifiedName("prop3"); newProperty.setCardinality(PropertyCardinality.MULTIPLE); newProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); - assertTrue(PropertyDefinitionValidator.validate(null, newProperty)); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, newProperty, entityRegistry), mockAspectRetriever) + .count(), + 0); } @Test @@ -46,7 +79,12 @@ public void testCanChangeSingleToMultiple() oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); StructuredPropertyDefinition newProperty = oldProperty.copy(); newProperty.setCardinality(PropertyCardinality.MULTIPLE); - assertTrue(PropertyDefinitionValidator.validate(oldProperty, newProperty)); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockAspectRetriever) + .count(), + 0); } @Test @@ -64,9 +102,12 @@ public void testCannotChangeMultipleToSingle() oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); StructuredPropertyDefinition newProperty = oldProperty.copy(); newProperty.setCardinality(PropertyCardinality.SINGLE); - assertThrows( - AspectValidationException.class, - () -> PropertyDefinitionValidator.validate(oldProperty, newProperty)); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockAspectRetriever) + .count(), + 1); } @Test @@ -83,9 +124,12 @@ public void testCannotChangeValueType() throws URISyntaxException, CloneNotSuppo oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); StructuredPropertyDefinition newProperty = oldProperty.copy(); newProperty.setValueType(Urn.createFromString("urn:li:logicalType:NUMBER")); - assertThrows( - AspectValidationException.class, - () -> PropertyDefinitionValidator.validate(oldProperty, newProperty)); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockAspectRetriever) + .count(), + 1); } @Test @@ -103,7 +147,12 @@ public void testCanChangeDisplayName() oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); StructuredPropertyDefinition newProperty = oldProperty.copy(); newProperty.setDisplayName("newProp"); - assertTrue(PropertyDefinitionValidator.validate(oldProperty, newProperty)); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockAspectRetriever) + .count(), + 0); } @Test @@ -121,9 +170,12 @@ public void testCannotChangeFullyQualifiedName() oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); StructuredPropertyDefinition newProperty = oldProperty.copy(); newProperty.setQualifiedName("newProp"); - assertThrows( - AspectValidationException.class, - () -> PropertyDefinitionValidator.validate(oldProperty, newProperty)); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockAspectRetriever) + .count(), + 1); } @Test @@ -144,17 +196,23 @@ public void testCannotChangeRestrictAllowedValues() PropertyValue allowedValue = new PropertyValue().setValue(PrimitivePropertyValue.create(1.0)).setDescription("hello"); newProperty.setAllowedValues(new PropertyValueArray(allowedValue)); - assertThrows( - AspectValidationException.class, - () -> PropertyDefinitionValidator.validate(oldProperty, newProperty)); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockAspectRetriever) + .count(), + 1); // Remove allowed values from constraint case PropertyValue oldAllowedValue = new PropertyValue().setValue(PrimitivePropertyValue.create(3.0)).setDescription("hello"); oldProperty.setAllowedValues((new PropertyValueArray(allowedValue, oldAllowedValue))); - assertThrows( - AspectValidationException.class, - () -> PropertyDefinitionValidator.validate(oldProperty, newProperty)); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockAspectRetriever) + .count(), + 1); } @Test @@ -175,13 +233,23 @@ public void testCanExpandAllowedValues() PropertyValue allowedValue = new PropertyValue().setValue(PrimitivePropertyValue.create(1.0)).setDescription("hello"); oldProperty.setAllowedValues(new PropertyValueArray(allowedValue)); - assertTrue(PropertyDefinitionValidator.validate(oldProperty, newProperty)); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockAspectRetriever) + .count(), + 0); // Add allowed values to constraint case PropertyValue newAllowedValue = new PropertyValue().setValue(PrimitivePropertyValue.create(3.0)).setDescription("hello"); newProperty.setAllowedValues((new PropertyValueArray(allowedValue, newAllowedValue))); - assertTrue(PropertyDefinitionValidator.validate(oldProperty, newProperty)); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockAspectRetriever) + .count(), + 0); } @Test @@ -207,6 +275,67 @@ public void testCanChangeAllowedValueDescriptions() .setValue(PrimitivePropertyValue.create(1.0)) .setDescription("hello there"); newProperty.setAllowedValues(new PropertyValueArray(newAllowedValue)); - assertTrue(PropertyDefinitionValidator.validate(oldProperty, newProperty)); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockAspectRetriever) + .count(), + 0); + } + + @Test + public void testHardDeleteBlock() { + PropertyDefinitionValidator test = + new PropertyDefinitionValidator( + AspectPluginConfig.builder() + .enabled(true) + .className(PropertyDefinitionValidator.class.getName()) + .supportedOperations(List.of("DELETE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) + .aspectName(Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) + .build(), + AspectPluginConfig.EntityAspectName.builder() + .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) + .aspectName("structuredPropertyKey") + .build())) + .build()); + + assertEquals( + test.validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.DELETE) + .urn(UrnUtils.getUrn("urn:li:structuredProperty:foo.bar")) + .entitySpec(entityRegistry.getEntitySpec("structuredProperty")) + .aspectSpec( + entityRegistry + .getEntitySpec(STRUCTURED_PROPERTY_ENTITY_NAME) + .getKeyAspectSpec()) + .recordTemplate(new StructuredPropertyKey()) + .build()), + mockAspectRetriever) + .count(), + 1); + + assertEquals( + test.validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.DELETE) + .urn(UrnUtils.getUrn("urn:li:structuredProperty:foo.bar")) + .entitySpec(entityRegistry.getEntitySpec("structuredProperty")) + .aspectSpec( + entityRegistry + .getEntitySpec(STRUCTURED_PROPERTY_ENTITY_NAME) + .getAspectSpecMap() + .get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) + .recordTemplate(new StructuredPropertyDefinition()) + .build()), + mockAspectRetriever) + .count(), + 1); } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java index 450b299b48b34f..5d63d8c8ba5e7b 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java @@ -1,12 +1,11 @@ package com.linkedin.metadata.aspect.validators; +import static org.testng.Assert.assertEquals; + +import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; -import com.linkedin.entity.Aspect; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; import com.linkedin.metadata.aspect.validation.StructuredPropertiesValidator; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.r2.RemoteInvocationException; import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.PrimitivePropertyValueArray; import com.linkedin.structured.PropertyValue; @@ -15,42 +14,23 @@ import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.structured.StructuredPropertyValueAssignment; import com.linkedin.structured.StructuredPropertyValueAssignmentArray; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; import java.net.URISyntaxException; import java.util.List; -import java.util.Map; -import java.util.Set; -import javax.annotation.Nonnull; import org.testng.Assert; import org.testng.annotations.Test; public class StructuredPropertiesValidatorTest { - static class MockAspectRetriever implements AspectRetriever { - StructuredPropertyDefinition _propertyDefinition; - - MockAspectRetriever(StructuredPropertyDefinition defToReturn) { - this._propertyDefinition = defToReturn; - } - - @Nonnull - @Override - public Map<Urn, Map<String, Aspect>> getLatestAspectObjects( - Set<Urn> urns, Set<String> aspectNames) - throws RemoteInvocationException, URISyntaxException { - return Map.of( - urns.stream().findFirst().get(), - Map.of(aspectNames.stream().findFirst().get(), new Aspect(_propertyDefinition.data()))); - } - - @Nonnull - @Override - public EntityRegistry getEntityRegistry() { - return null; - } - } + private static final EntityRegistry TEST_REGISTRY = new TestEntityRegistry(); @Test public void testValidateAspectNumberUpsert() throws URISyntaxException { + Urn propertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime"); + StructuredPropertyDefinition numberPropertyDef = new StructuredPropertyDefinition() .setValueType(Urn.createFromString("urn:li:type:datahub.number")) @@ -61,40 +41,38 @@ public void testValidateAspectNumberUpsert() throws URISyntaxException { new PropertyValue().setValue(PrimitivePropertyValue.create(60.0)), new PropertyValue().setValue(PrimitivePropertyValue.create(90.0))))); - try { - StructuredPropertyValueAssignment assignment = - new StructuredPropertyValueAssignment() - .setPropertyUrn( - Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime")) - .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); - StructuredProperties numberPayload = - new StructuredProperties() - .setProperties(new StructuredPropertyValueAssignmentArray(assignment)); - - boolean isValid = - StructuredPropertiesValidator.validate( - numberPayload, new MockAspectRetriever(numberPropertyDef)); - Assert.assertTrue(isValid); - } catch (AspectValidationException e) { - throw new RuntimeException(e); - } - - try { - StructuredPropertyValueAssignment assignment = - new StructuredPropertyValueAssignment() - .setPropertyUrn( - Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime")) - .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(0.0))); - StructuredProperties numberPayload = - new StructuredProperties() - .setProperties(new StructuredPropertyValueAssignmentArray(assignment)); - - StructuredPropertiesValidator.validate( - numberPayload, new MockAspectRetriever(numberPropertyDef)); - Assert.fail("Should have raised exception for disallowed value 0.0"); - } catch (AspectValidationException e) { - Assert.assertTrue(e.getMessage().contains("{double=0.0} should be one of [{")); - } + StructuredPropertyValueAssignment assignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn(propertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); + StructuredProperties numberPayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(assignment)); + + boolean isValid = + StructuredPropertiesValidator.validateProposedUpserts( + TestMCP.ofOneUpsertItemDatasetUrn(numberPayload, TEST_REGISTRY), + new MockAspectRetriever(propertyUrn, numberPropertyDef)) + .count() + == 0; + Assert.assertTrue(isValid); + + assignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn( + Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime")) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(0.0))); + numberPayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(assignment)); + + assertEquals( + StructuredPropertiesValidator.validateProposedUpserts( + TestMCP.ofOneUpsertItemDatasetUrn(numberPayload, TEST_REGISTRY), + new MockAspectRetriever(propertyUrn, numberPropertyDef)) + .count(), + 1, + "Should have raised exception for disallowed value 0.0"); // Assign string value to number property StructuredPropertyValueAssignment stringAssignment = @@ -105,17 +83,21 @@ public void testValidateAspectNumberUpsert() throws URISyntaxException { StructuredProperties stringPayload = new StructuredProperties() .setProperties(new StructuredPropertyValueAssignmentArray(stringAssignment)); - try { - StructuredPropertiesValidator.validate( - stringPayload, new MockAspectRetriever(numberPropertyDef)); - Assert.fail("Should have raised exception for mis-matched types"); - } catch (AspectValidationException e) { - Assert.assertTrue(e.getMessage().contains("should be a number")); - } + + assertEquals( + StructuredPropertiesValidator.validateProposedUpserts( + TestMCP.ofOneUpsertItemDatasetUrn(stringPayload, TEST_REGISTRY), + new MockAspectRetriever(propertyUrn, numberPropertyDef)) + .count(), + 2, + "Should have raised exception for mis-matched types `string` vs `number` && `hello` is not a valid value of [90.0, 30.0, 60.0]"); } @Test public void testValidateAspectDateUpsert() throws URISyntaxException { + Urn propertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime"); + // Assign string value StructuredPropertyValueAssignment stringAssignment = new StructuredPropertyValueAssignment() @@ -130,41 +112,43 @@ public void testValidateAspectDateUpsert() throws URISyntaxException { StructuredPropertyDefinition datePropertyDef = new StructuredPropertyDefinition() .setValueType(Urn.createFromString("urn:li:type:datahub.date")); - try { - StructuredPropertiesValidator.validate( - stringPayload, new MockAspectRetriever(datePropertyDef)); - Assert.fail("Should have raised exception for mis-matched types"); - } catch (AspectValidationException e) { - Assert.assertTrue(e.getMessage().contains("should be a date with format")); - } + + assertEquals( + StructuredPropertiesValidator.validateProposedUpserts( + TestMCP.ofOneUpsertItemDatasetUrn(stringPayload, TEST_REGISTRY), + new MockAspectRetriever(propertyUrn, datePropertyDef)) + .count(), + 1, + "Should have raised exception for mis-matched types"); // Assign valid date StructuredPropertyValueAssignment dateAssignment = new StructuredPropertyValueAssignment() - .setPropertyUrn( - Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime")) + .setPropertyUrn(propertyUrn) .setValues( new PrimitivePropertyValueArray(PrimitivePropertyValue.create("2023-10-24"))); StructuredProperties datePayload = new StructuredProperties() .setProperties(new StructuredPropertyValueAssignmentArray(dateAssignment)); - try { - boolean isValid = - StructuredPropertiesValidator.validate( - datePayload, new MockAspectRetriever(datePropertyDef)); - Assert.assertTrue(isValid); - } catch (AspectValidationException e) { - throw new RuntimeException(e); - } + + boolean isValid = + StructuredPropertiesValidator.validateProposedUpserts( + TestMCP.ofOneUpsertItemDatasetUrn(datePayload, TEST_REGISTRY), + new MockAspectRetriever(propertyUrn, datePropertyDef)) + .count() + == 0; + Assert.assertTrue(isValid); } @Test public void testValidateAspectStringUpsert() throws URISyntaxException { + Urn propertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime"); + // Assign string value StructuredPropertyValueAssignment stringAssignment = new StructuredPropertyValueAssignment() - .setPropertyUrn( - Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime")) + .setPropertyUrn(propertyUrn) .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create("hello"))); StructuredProperties stringPayload = new StructuredProperties() @@ -173,8 +157,7 @@ public void testValidateAspectStringUpsert() throws URISyntaxException { // Assign date StructuredPropertyValueAssignment dateAssignment = new StructuredPropertyValueAssignment() - .setPropertyUrn( - Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime")) + .setPropertyUrn(propertyUrn) .setValues( new PrimitivePropertyValueArray(PrimitivePropertyValue.create("2023-10-24"))); StructuredProperties datePayload = @@ -184,8 +167,7 @@ public void testValidateAspectStringUpsert() throws URISyntaxException { // Assign number StructuredPropertyValueAssignment assignment = new StructuredPropertyValueAssignment() - .setPropertyUrn( - Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime")) + .setPropertyUrn(propertyUrn) .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); StructuredProperties numberPayload = new StructuredProperties() @@ -202,45 +184,88 @@ public void testValidateAspectStringUpsert() throws URISyntaxException { .setValue(PrimitivePropertyValue.create("2023-10-24"))))); // Valid strings (both the date value and "hello" are valid) - try { - boolean isValid = - StructuredPropertiesValidator.validate( - stringPayload, new MockAspectRetriever(stringPropertyDef)); - Assert.assertTrue(isValid); - isValid = - StructuredPropertiesValidator.validate( - datePayload, new MockAspectRetriever(stringPropertyDef)); - Assert.assertTrue(isValid); - } catch (AspectValidationException e) { - throw new RuntimeException(e); - } + + boolean isValid = + StructuredPropertiesValidator.validateProposedUpserts( + TestMCP.ofOneUpsertItemDatasetUrn(stringPayload, TEST_REGISTRY), + new MockAspectRetriever(propertyUrn, stringPropertyDef)) + .count() + == 0; + Assert.assertTrue(isValid); + isValid = + StructuredPropertiesValidator.validateProposedUpserts( + TestMCP.ofOneUpsertItemDatasetUrn(datePayload, TEST_REGISTRY), + new MockAspectRetriever(propertyUrn, stringPropertyDef)) + .count() + == 0; + Assert.assertTrue(isValid); // Invalid: assign a number to the string property - try { - StructuredPropertiesValidator.validate( - numberPayload, new MockAspectRetriever(stringPropertyDef)); - Assert.fail("Should have raised exception for mis-matched types"); - } catch (AspectValidationException e) { - Assert.assertTrue(e.getMessage().contains("should be a string")); - } + assertEquals( + StructuredPropertiesValidator.validateProposedUpserts( + TestMCP.ofOneUpsertItemDatasetUrn(numberPayload, TEST_REGISTRY), + new MockAspectRetriever(propertyUrn, stringPropertyDef)) + .count(), + 2, + "Should have raised exception for mis-matched types. The double 30.0 is not a `string` && not one of the allowed types `2023-10-24` or `hello`"); // Invalid allowedValue - try { - assignment = - new StructuredPropertyValueAssignment() - .setPropertyUrn( - Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime")) - .setValues( - new PrimitivePropertyValueArray(PrimitivePropertyValue.create("not hello"))); - stringPayload = - new StructuredProperties() - .setProperties(new StructuredPropertyValueAssignmentArray(assignment)); - - StructuredPropertiesValidator.validate( - stringPayload, new MockAspectRetriever(stringPropertyDef)); - Assert.fail("Should have raised exception for disallowed value `not hello`"); - } catch (AspectValidationException e) { - Assert.assertTrue(e.getMessage().contains("{string=not hello} should be one of [{")); - } + + assignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn(propertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create("not hello"))); + stringPayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(assignment)); + + assertEquals( + StructuredPropertiesValidator.validateProposedUpserts( + TestMCP.ofOneUpsertItemDatasetUrn(stringPayload, TEST_REGISTRY), + new MockAspectRetriever(propertyUrn, stringPropertyDef)) + .count(), + 1, + "Should have raised exception for disallowed value `not hello`"); + } + + @Test + public void testValidateSoftDeletedUpsert() throws URISyntaxException { + Urn propertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.privacy.retentionTime"); + + StructuredPropertyDefinition numberPropertyDef = + new StructuredPropertyDefinition() + .setValueType(Urn.createFromString("urn:li:type:datahub.number")) + .setAllowedValues( + new PropertyValueArray( + List.of( + new PropertyValue().setValue(PrimitivePropertyValue.create(30.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(60.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(90.0))))); + + StructuredPropertyValueAssignment assignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn(propertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); + StructuredProperties numberPayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(assignment)); + + boolean isValid = + StructuredPropertiesValidator.validateProposedUpserts( + TestMCP.ofOneUpsertItemDatasetUrn(numberPayload, TEST_REGISTRY), + new MockAspectRetriever(propertyUrn, numberPropertyDef)) + .count() + == 0; + Assert.assertTrue(isValid); + + assertEquals( + StructuredPropertiesValidator.validateProposedUpserts( + TestMCP.ofOneUpsertItemDatasetUrn(numberPayload, TEST_REGISTRY), + new MockAspectRetriever( + propertyUrn, numberPropertyDef, new Status().setRemoved(true))) + .count(), + 1, + "Should have raised exception for soft deleted definition"); } } diff --git a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java b/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java new file mode 100644 index 00000000000000..b98d78bf6ff933 --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java @@ -0,0 +1,71 @@ +package com.linkedin.test.metadata.aspect; + +import static org.mockito.Mockito.mock; + +import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.DataMap; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.entity.Aspect; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; +import java.net.URISyntaxException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; + +public class MockAspectRetriever implements AspectRetriever { + private final Map<Urn, Map<String, Aspect>> data; + + public MockAspectRetriever(@Nonnull Map<Urn, List<RecordTemplate>> data) { + this.data = + new HashMap<>( + data.entrySet().stream() + .map( + entry -> + Pair.of( + entry.getKey(), + entry.getValue().stream() + .map( + rt -> { + String aspectName = + ((DataMap) rt.schema().getProperties().get("Aspect")) + .get("name") + .toString(); + return Pair.of(aspectName, new Aspect(rt.data())); + }) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)))) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue))); + } + + public MockAspectRetriever( + Urn propertyUrn, StructuredPropertyDefinition definition, Status status) { + this(Map.of(propertyUrn, List.of(definition, status))); + } + + public MockAspectRetriever(Urn propertyUrn, StructuredPropertyDefinition definition) { + this(Map.of(propertyUrn, List.of(definition))); + } + + @Nonnull + @Override + public Map<Urn, Map<String, Aspect>> getLatestAspectObjects( + Set<Urn> urns, Set<String> aspectNames) throws RemoteInvocationException, URISyntaxException { + return urns.stream() + .filter(data::containsKey) + .map(urn -> Pair.of(urn, data.get(urn))) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + } + + @Nonnull + @Override + public EntityRegistry getEntityRegistry() { + return mock(EntityRegistry.class); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java b/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java new file mode 100644 index 00000000000000..cad3b8c730e4bd --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java @@ -0,0 +1,31 @@ +package com.linkedin.test.metadata.aspect; + +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import java.util.Map; + +public class TestEntityRegistry extends ConfigEntityRegistry { + + static { + PathSpecBasedSchemaAnnotationVisitor.class + .getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + public TestEntityRegistry() { + super(TestEntityRegistry.class.getClassLoader().getResourceAsStream("entity-registry.yml")); + } + + public static <T extends RecordTemplate> String getAspectName(T aspect) { + Map<String, Object> schemaProps = aspect.schema().getProperties(); + if (schemaProps != null && schemaProps.containsKey("Aspect")) { + Object aspectProps = schemaProps.get("Aspect"); + if (aspectProps instanceof Map aspectMap) { + return (String) aspectMap.get("name"); + } + } + + throw new IllegalStateException("Cannot determine aspect name"); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java b/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java new file mode 100644 index 00000000000000..20d01dc55934a6 --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java @@ -0,0 +1,128 @@ +package com.linkedin.test.metadata.aspect.batch; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.ReadItem; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import java.net.URISyntaxException; +import java.util.Collection; +import java.util.Set; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import lombok.Setter; + +@Builder +@Getter +public class TestMCP implements ChangeMCP { + private static final String TEST_DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:datahub,Test,PROD)"; + + public static <T extends RecordTemplate> Collection<ReadItem> ofOneBatchItem( + Urn urn, T aspect, EntityRegistry entityRegistry) { + return Set.of( + TestMCP.builder() + .urn(urn) + .entitySpec(entityRegistry.getEntitySpec(urn.getEntityType())) + .aspectSpec( + entityRegistry.getAspectSpecs().get(TestEntityRegistry.getAspectName(aspect))) + .recordTemplate(aspect) + .build()); + } + + public static <T extends RecordTemplate> Collection<ReadItem> ofOneBatchItemDatasetUrn( + T aspect, EntityRegistry entityRegistry) { + try { + return ofOneBatchItem(Urn.createFromString(TEST_DATASET_URN), aspect, entityRegistry); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + public static <T extends RecordTemplate> Set<BatchItem> ofOneUpsertItem( + Urn urn, T aspect, EntityRegistry entityRegistry) { + return Set.of( + TestMCP.builder() + .urn(urn) + .entitySpec(entityRegistry.getEntitySpec(urn.getEntityType())) + .aspectSpec( + entityRegistry.getAspectSpecs().get(TestEntityRegistry.getAspectName(aspect))) + .recordTemplate(aspect) + .build()); + } + + public static <T extends RecordTemplate> Set<BatchItem> ofOneUpsertItemDatasetUrn( + T aspect, EntityRegistry entityRegistry) { + try { + return ofOneUpsertItem(Urn.createFromString(TEST_DATASET_URN), aspect, entityRegistry); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + public static <T extends RecordTemplate> Set<ChangeMCP> ofOneMCP( + Urn urn, T newAspect, EntityRegistry entityRegistry) { + return ofOneMCP(urn, null, newAspect, entityRegistry); + } + + public static <T extends RecordTemplate> Set<ChangeMCP> ofOneMCP( + Urn urn, @Nullable T oldAspect, T newAspect, EntityRegistry entityRegistry) { + + SystemAspect mockNewSystemAspect = mock(SystemAspect.class); + when(mockNewSystemAspect.getRecordTemplate()).thenReturn(newAspect); + when(mockNewSystemAspect.getAspect(any(Class.class))) + .thenAnswer(args -> ReadItem.getAspect(args.getArgument(0), newAspect)); + + SystemAspect mockOldSystemAspect = null; + if (oldAspect != null) { + mockOldSystemAspect = mock(SystemAspect.class); + when(mockOldSystemAspect.getRecordTemplate()).thenReturn(oldAspect); + when(mockOldSystemAspect.getAspect(any(Class.class))) + .thenAnswer(args -> ReadItem.getAspect(args.getArgument(0), oldAspect)); + } + + return Set.of( + TestMCP.builder() + .urn(urn) + .entitySpec(entityRegistry.getEntitySpec(urn.getEntityType())) + .aspectSpec( + entityRegistry.getAspectSpecs().get(TestEntityRegistry.getAspectName(newAspect))) + .recordTemplate(newAspect) + .systemAspect(mockNewSystemAspect) + .previousSystemAspect(mockOldSystemAspect) + .build()); + } + + private Urn urn; + private RecordTemplate recordTemplate; + private SystemMetadata systemMetadata; + private AuditStamp auditStamp; + private ChangeType changeType; + @Nonnull private final EntitySpec entitySpec; + @Nonnull private final AspectSpec aspectSpec; + private SystemAspect systemAspect; + private MetadataChangeProposal metadataChangeProposal; + @Setter private SystemAspect previousSystemAspect; + @Setter private long nextAspectVersion; + + @Nonnull + @Override + public SystemAspect getSystemAspect(@Nullable Long nextAspectVersion) { + return null; + } +} diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 47db240d51a5ba..94be2f288521cf 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -16,6 +16,8 @@ public class Constants { public static final String ENTITY_TYPE_URN_PREFIX = "urn:li:entityType:"; public static final String DATA_TYPE_URN_PREFIX = "urn:li:dataType:"; public static final String STRUCTURED_PROPERTY_MAPPING_FIELD = "structuredProperties"; + public static final String STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX = + STRUCTURED_PROPERTY_MAPPING_FIELD + "."; // !!!!!!! IMPORTANT !!!!!!! // This effectively sets the max aspect size to 16 MB. Used in deserialization of messages. diff --git a/metadata-ingestion/docs/sources/trino/trino_recipe.yml b/metadata-ingestion/docs/sources/trino/trino_recipe.yml index 70e2afa81b972d..06158669143a9c 100644 --- a/metadata-ingestion/docs/sources/trino/trino_recipe.yml +++ b/metadata-ingestion/docs/sources/trino/trino_recipe.yml @@ -13,6 +13,15 @@ source: # options: # connect_args: # http_scheme: http + + # Optional -- A mapping of trino catalog to its connector details like connector database, env and platform instance. + # This configuration is used to ingest lineage of datasets to connectors. Use catalog name as key. + # catalog_to_connector_details: + # catalog_name: + # connector_database: db_name + # connector_platform: connector_platform_name + # platform_instance: cloud_instance + # env: DEV sink: # sink configs diff --git a/metadata-ingestion/scripts/modeldocgen.py b/metadata-ingestion/scripts/modeldocgen.py index 610c6d3107916e..ef6ce765c23edd 100644 --- a/metadata-ingestion/scripts/modeldocgen.py +++ b/metadata-ingestion/scripts/modeldocgen.py @@ -503,8 +503,8 @@ class EntityAspectName: class AspectPluginConfig: className: str enabled: bool - supportedOperations: List[str] supportedEntityAspectNames: List[EntityAspectName] + supportedOperations: Optional[List[str]] = None @dataclass diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index db7b071382be53..6f0e4d8fb6f37a 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -94,7 +94,7 @@ sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 - "acryl-sqlglot==20.4.1.dev14", + "acryl-sqlglot==21.1.2.dev10", } sql_common = ( @@ -245,9 +245,7 @@ powerbi_report_server = {"requests", "requests_ntlm"} -slack = { - "slack-sdk==3.18.1" -} +slack = {"slack-sdk==3.18.1"} databricks = { # 0.1.11 appears to have authentication issues with azure databricks @@ -421,7 +419,6 @@ "types-toml", "types-PyMySQL", "types-PyYAML", - "types-freezegun", "types-cachetools", # versions 0.1.13 and 0.1.14 seem to have issues "types-click==0.1.12", diff --git a/metadata-ingestion/src/datahub/cli/specific/dataset_cli.py b/metadata-ingestion/src/datahub/cli/specific/dataset_cli.py index 1c55651f4ff941..5601d7e716c797 100644 --- a/metadata-ingestion/src/datahub/cli/specific/dataset_cli.py +++ b/metadata-ingestion/src/datahub/cli/specific/dataset_cli.py @@ -93,13 +93,21 @@ def add_sibling(urn: str, sibling_urns: Tuple[str]) -> None: def _emit_sibling( graph: DataHubGraph, primary_urn: str, urn: str, all_urns: Set[str] ) -> None: - siblings = [] + siblings = _get_existing_siblings(graph, urn) for sibling_urn in all_urns: if sibling_urn != urn: - siblings.append(sibling_urn) + siblings.add(sibling_urn) graph.emit( MetadataChangeProposalWrapper( entityUrn=urn, aspect=Siblings(primary=primary_urn == urn, siblings=sorted(siblings)), ) ) + + +def _get_existing_siblings(graph: DataHubGraph, urn: str) -> Set[str]: + existing = graph.get_aspect(urn, Siblings) + if existing: + return set(existing.siblings) + else: + return set() diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index 0030332bcfd541..60e6bd9090be1e 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -309,8 +309,3 @@ class LineageConfig(ConfigModel): default=False, description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.", ) - - sql_parser_use_external_process: bool = Field( - default=False, - description="When enabled, sql parser will run in isolated in a separate process. This can affect processing time but can protect from sql parser's mem leak.", - ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index e30e16e774cd5a..f4afd441ba3ef0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -419,11 +419,6 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: key=dataset_urn, reason=f"Downsampling the collection schema because it has {collection_schema_size} fields. Threshold is {max_schema_size}", ) - collection_fields = sorted( - collection_schema.values(), - key=lambda x: (x["count"], x["delimited_name"]), - reverse=True, - )[0:max_schema_size] # Add this information to the custom properties so user can know they are looking at downsampled schema dataset_properties.customProperties[ "schema.downsampled" @@ -437,8 +432,12 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ) # append each schema field (sort so output is consistent) for schema_field in sorted( - collection_fields, key=lambda x: x["delimited_name"] - ): + collection_fields, + key=lambda x: ( + -x["count"], + x["delimited_name"], + ), # Negate `count` for descending order, `delimited_name` stays the same for ascending + )[0:max_schema_size]: field = SchemaField( fieldPath=schema_field["delimited_name"], nativeDataType=self.get_pymongo_type_string( diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py index 5b196782cbad22..f7b8bb09724a16 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redash.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py @@ -39,6 +39,7 @@ ChartTypeClass, DashboardInfoClass, ) +from datahub.utilities.lossy_collections import LossyDict, LossyList from datahub.utilities.perf_timer import PerfTimer from datahub.utilities.sql_parser import SQLParser @@ -282,7 +283,7 @@ class RedashConfig(ConfigModel): @dataclass class RedashSourceReport(SourceReport): items_scanned: int = 0 - filtered: List[str] = field(default_factory=list) + filtered: LossyList[str] = field(default_factory=LossyList) queries_problem_parsing: Set[str] = field(default_factory=set) queries_no_dataset: Set[str] = field(default_factory=set) charts_no_input: Set[str] = field(default_factory=set) @@ -295,7 +296,7 @@ class RedashSourceReport(SourceReport): ) max_page_dashboards: Optional[int] = field(default=None) api_page_limit: Optional[float] = field(default=None) - timing: Dict[str, int] = field(default_factory=dict) + timing: LossyDict[str, int] = field(default_factory=LossyDict) def report_item_scanned(self) -> None: self.items_scanned += 1 diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py index fe66ef006ec692..5220ee32595bb8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py @@ -84,7 +84,7 @@ class RedshiftConfig( scheme: str = Field( default="redshift+redshift_connector", description="", - hidden_from_schema=True, + hidden_from_docs=True, ) _database_alias_removed = pydantic_removed_field("database_alias") @@ -94,6 +94,11 @@ class RedshiftConfig( description="The default schema to use if the sql parser fails to parse the schema with `sql_based` lineage collector", ) + use_lineage_v2: bool = Field( + default=False, + description="Whether to use the new SQL-based lineage and usage collector.", + ) + include_table_lineage: bool = Field( default=True, description="Whether table lineage should be ingested." ) @@ -113,11 +118,11 @@ class RedshiftConfig( ) include_table_rename_lineage: bool = Field( - default=False, + default=True, description="Whether we should follow `alter table ... rename to` statements when computing lineage. ", ) - table_lineage_mode: Optional[LineageMode] = Field( - default=LineageMode.STL_SCAN_BASED, + table_lineage_mode: LineageMode = Field( + default=LineageMode.MIXED, description="Which table lineage collector mode to use. Available modes are: [stl_scan_based, sql_based, mixed]", ) extra_client_options: Dict[str, Any] = {} @@ -138,7 +143,7 @@ class RedshiftConfig( ) resolve_temp_table_in_lineage: bool = Field( - default=False, + default=True, description="Whether to resolve temp table appear in lineage to upstream permanent tables.", ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py index 7769bac30d599c..b73d6f030d3233 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py @@ -766,7 +766,7 @@ def get_lineage( table: Union[RedshiftTable, RedshiftView], dataset_urn: str, schema: RedshiftSchema, - ) -> Optional[Tuple[UpstreamLineageClass, Dict[str, str]]]: + ) -> Optional[UpstreamLineageClass]: upstream_lineage: List[UpstreamClass] = [] cll_lineage: List[FineGrainedLineage] = [] @@ -811,11 +811,9 @@ def get_lineage( else: return None - return ( - UpstreamLineage( - upstreams=upstream_lineage, fineGrainedLineages=cll_lineage or None - ), - {}, + return UpstreamLineage( + upstreams=upstream_lineage, + fineGrainedLineages=cll_lineage or None, ) def report_status(self, step: str, status: bool) -> None: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py new file mode 100644 index 00000000000000..3fbba909b25e6c --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py @@ -0,0 +1,387 @@ +import logging +import traceback +from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Union + +import redshift_connector + +from datahub.emitter import mce_builder +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.redshift.config import LineageMode, RedshiftConfig +from datahub.ingestion.source.redshift.lineage import ( + LineageCollectorType, + RedshiftLineageExtractor, +) +from datahub.ingestion.source.redshift.query import RedshiftQuery +from datahub.ingestion.source.redshift.redshift_schema import ( + LineageRow, + RedshiftDataDictionary, + RedshiftSchema, + RedshiftTable, + RedshiftView, +) +from datahub.ingestion.source.redshift.report import RedshiftReport +from datahub.ingestion.source.state.redundant_run_skip_handler import ( + RedundantLineageRunSkipHandler, +) +from datahub.metadata.urns import DatasetUrn +from datahub.sql_parsing.sql_parsing_aggregator import ( + KnownQueryLineageInfo, + SqlParsingAggregator, +) + +logger = logging.getLogger(__name__) + + +class RedshiftSqlLineageV2: + # does lineage and usage based on SQL parsing. + + def __init__( + self, + config: RedshiftConfig, + report: RedshiftReport, + context: PipelineContext, + database: str, + redundant_run_skip_handler: Optional[RedundantLineageRunSkipHandler] = None, + ): + self.platform = "redshift" + self.config = config + self.report = report + self.context = context + + self.database = database + self.aggregator = SqlParsingAggregator( + platform=self.platform, + platform_instance=self.config.platform_instance, + env=self.config.env, + generate_lineage=True, + generate_queries=True, + generate_usage_statistics=True, + generate_operations=True, + usage_config=self.config, + graph=self.context.graph, + ) + self.report.sql_aggregator = self.aggregator.report + + self._lineage_v1 = RedshiftLineageExtractor( + config=config, + report=report, + context=context, + redundant_run_skip_handler=redundant_run_skip_handler, + ) + + self.start_time, self.end_time = ( + self.report.lineage_start_time, + self.report.lineage_end_time, + ) = self._lineage_v1.get_time_window() + + self.known_urns: Set[str] = set() # will be set later + + def build( + self, + connection: redshift_connector.Connection, + all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]], + db_schemas: Dict[str, Dict[str, RedshiftSchema]], + ) -> None: + # Assume things not in `all_tables` as temp tables. + self.known_urns = set( + DatasetUrn.create_from_ids( + self.platform, + f"{db}.{schema}.{table.name}", + env=self.config.env, + platform_instance=self.config.platform_instance, + ).urn() + for db, schemas in all_tables.items() + for schema, tables in schemas.items() + for table in tables + ) + self.aggregator.is_temp_table = lambda urn: urn not in self.known_urns + + # Handle all the temp tables up front. + if self.config.resolve_temp_table_in_lineage: + for temp_row in self._lineage_v1.get_temp_tables(connection=connection): + self.aggregator.add_observed_query( + query=temp_row.query_text, + default_db=self.database, + default_schema=self.config.default_schema, + session_id=temp_row.session_id, + query_timestamp=temp_row.start_time, + is_known_temp_table=True, + ) + + populate_calls: List[Tuple[LineageCollectorType, str, Callable]] = [] + + if self.config.include_table_rename_lineage: + # Process all the ALTER TABLE RENAME statements + table_renames, _ = self._lineage_v1._process_table_renames( + database=self.database, + connection=connection, + all_tables={}, + ) + for new_urn, original_urn in table_renames.items(): + self.aggregator.add_table_rename( + original_urn=original_urn, new_urn=new_urn + ) + + if self.config.table_lineage_mode in { + LineageMode.SQL_BASED, + LineageMode.MIXED, + }: + # Populate lineage by parsing table creating sqls + query = RedshiftQuery.list_insert_create_queries_sql( + db_name=self.database, + start_time=self.start_time, + end_time=self.end_time, + ) + populate_calls.append( + ( + LineageCollectorType.QUERY_SQL_PARSER, + query, + self._process_sql_parser_lineage, + ) + ) + if self.config.table_lineage_mode in { + LineageMode.STL_SCAN_BASED, + LineageMode.MIXED, + }: + # Populate lineage by getting upstream tables from stl_scan redshift table + query = RedshiftQuery.stl_scan_based_lineage_query( + self.database, + self.start_time, + self.end_time, + ) + populate_calls.append( + (LineageCollectorType.QUERY_SCAN, query, self._process_stl_scan_lineage) + ) + + if self.config.include_views and self.config.include_view_lineage: + # Populate lineage for views + query = RedshiftQuery.view_lineage_query() + populate_calls.append( + (LineageCollectorType.VIEW, query, self._process_view_lineage) + ) + + # Populate lineage for late binding views + query = RedshiftQuery.list_late_view_ddls_query() + populate_calls.append( + ( + LineageCollectorType.VIEW_DDL_SQL_PARSING, + query, + self._process_view_lineage, + ) + ) + + if self.config.include_copy_lineage: + # Populate lineage for copy commands. + query = RedshiftQuery.list_copy_commands_sql( + db_name=self.database, + start_time=self.start_time, + end_time=self.end_time, + ) + populate_calls.append( + (LineageCollectorType.COPY, query, self._process_copy_command) + ) + + if self.config.include_unload_lineage: + # Populate lineage for unload commands. + query = RedshiftQuery.list_unload_commands_sql( + db_name=self.database, + start_time=self.start_time, + end_time=self.end_time, + ) + populate_calls.append( + (LineageCollectorType.UNLOAD, query, self._process_unload_command) + ) + + for lineage_type, query, processor in populate_calls: + self._populate_lineage_agg( + query=query, + lineage_type=lineage_type, + processor=processor, + connection=connection, + ) + + # Populate lineage for external tables. + self._process_external_tables(all_tables=all_tables, db_schemas=db_schemas) + + def _populate_lineage_agg( + self, + query: str, + lineage_type: LineageCollectorType, + processor: Callable[[LineageRow], None], + connection: redshift_connector.Connection, + ) -> None: + logger.info(f"Extracting {lineage_type.name} lineage for db {self.database}") + try: + logger.debug(f"Processing {lineage_type.name} lineage query: {query}") + + for lineage_row in RedshiftDataDictionary.get_lineage_rows( + conn=connection, query=query + ): + processor(lineage_row) + except Exception as e: + self.report.warning( + f"extract-{lineage_type.name}", + f"Error was {e}, {traceback.format_exc()}", + ) + self._lineage_v1.report_status(f"extract-{lineage_type.name}", False) + + def _process_sql_parser_lineage(self, lineage_row: LineageRow) -> None: + ddl = lineage_row.ddl + if ddl is None: + return + + # TODO actor + + self.aggregator.add_observed_query( + query=ddl, + default_db=self.database, + default_schema=self.config.default_schema, + query_timestamp=lineage_row.timestamp, + session_id=lineage_row.session_id, + ) + + def _make_filtered_target(self, lineage_row: LineageRow) -> Optional[DatasetUrn]: + target = DatasetUrn.create_from_ids( + self.platform, + f"{self.database}.{lineage_row.target_schema}.{lineage_row.target_table}", + env=self.config.env, + platform_instance=self.config.platform_instance, + ) + if target.urn() not in self.known_urns: + logger.debug( + f"Skipping lineage for {target.urn()} as it is not in known_urns" + ) + return None + + return target + + def _process_stl_scan_lineage(self, lineage_row: LineageRow) -> None: + target = self._make_filtered_target(lineage_row) + if not target: + return + + source = DatasetUrn.create_from_ids( + self.platform, + f"{self.database}.{lineage_row.source_schema}.{lineage_row.source_table}", + env=self.config.env, + platform_instance=self.config.platform_instance, + ) + + assert lineage_row.ddl, "stl scan entry is missing query text" + self.aggregator.add_known_query_lineage( + KnownQueryLineageInfo( + query_text=lineage_row.ddl, + downstream=target.urn(), + upstreams=[source.urn()], + timestamp=lineage_row.timestamp, + ), + merge_lineage=True, + ) + + def _process_view_lineage(self, lineage_row: LineageRow) -> None: + ddl = lineage_row.ddl + if ddl is None: + return + + target = self._make_filtered_target(lineage_row) + if not target: + return + + self.aggregator.add_view_definition( + view_urn=target, + view_definition=ddl, + default_db=self.database, + default_schema=self.config.default_schema, + ) + + def _process_copy_command(self, lineage_row: LineageRow) -> None: + source = self._lineage_v1._get_sources( + lineage_type=LineageCollectorType.COPY, + db_name=self.database, + source_schema=None, + source_table=None, + ddl=None, + filename=lineage_row.filename, + )[0] + if not source: + return + s3_urn = source[0].urn + + if not lineage_row.target_schema or not lineage_row.target_table: + return + target = self._make_filtered_target(lineage_row) + if not target: + return + + self.aggregator.add_known_lineage_mapping( + upstream_urn=s3_urn, downstream_urn=target.urn() + ) + + def _process_unload_command(self, lineage_row: LineageRow) -> None: + lineage_entry = self._lineage_v1._get_target_lineage( + alias_db_name=self.database, + lineage_row=lineage_row, + lineage_type=LineageCollectorType.UNLOAD, + all_tables_set={}, + ) + if not lineage_entry: + return + output_urn = lineage_entry.dataset.urn + + if not lineage_row.source_schema or not lineage_row.source_table: + return + source = DatasetUrn.create_from_ids( + self.platform, + f"{self.database}.{lineage_row.source_schema}.{lineage_row.source_table}", + env=self.config.env, + platform_instance=self.config.platform_instance, + ) + if source.urn() not in self.known_urns: + logger.debug( + f"Skipping unload lineage for {source.urn()} as it is not in known_urns" + ) + return + + self.aggregator.add_known_lineage_mapping( + upstream_urn=source.urn(), downstream_urn=output_urn + ) + + def _process_external_tables( + self, + all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]], + db_schemas: Dict[str, Dict[str, RedshiftSchema]], + ) -> None: + for schema_name, tables in all_tables[self.database].items(): + for table in tables: + if table.type == "EXTERNAL_TABLE": + schema = db_schemas[self.database][schema_name] + + # external_db_params = schema.option + upstream_platform = schema.type.lower() + + table_urn = mce_builder.make_dataset_urn_with_platform_instance( + self.platform, + f"{self.database}.{schema_name}.{table.name}", + platform_instance=self.config.platform_instance, + env=self.config.env, + ) + upstream_urn = mce_builder.make_dataset_urn_with_platform_instance( + upstream_platform, + f"{schema.external_database}.{table.name}", + platform_instance=( + self.config.platform_instance_map.get(upstream_platform) + if self.config.platform_instance_map + else None + ), + env=self.config.env, + ) + + self.aggregator.add_known_lineage_mapping( + upstream_urn=upstream_urn, + downstream_urn=table_urn, + ) + + def generate(self) -> Iterable[MetadataWorkUnit]: + for mcp in self.aggregator.gen_metadata(): + yield mcp.as_workunit() diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py index 93beb5980ea624..fd664f96471fbe 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py @@ -225,7 +225,9 @@ def stl_scan_based_lineage_query( target_table, username as username, source_schema, - source_table + source_table, + querytxt as query, -- TODO: this querytxt is truncated to 4000 characters + starttime as timestamp from ( select @@ -250,7 +252,8 @@ def stl_scan_based_lineage_query( sti.schema as source_schema, sti.table as source_table, scan_type, - sq.query as query + sq.query as query, + sq.querytxt as querytxt from ( select @@ -358,12 +361,12 @@ def list_unload_commands_sql( join stl_scan sc on sc.query = unl.query and sc.starttime >= '{start_time}' and - sc.endtime < '{end_time}' + sc.starttime < '{end_time}' join SVV_TABLE_INFO sti on sti.table_id = sc.tbl where unl.start_time >= '{start_time}' and - unl.end_time < '{end_time}' and + unl.start_time < '{end_time}' and sti.database = '{db_name}' and sc.type in (1, 2, 3) order by cluster, source_schema, source_table, filename, unl.start_time asc @@ -385,7 +388,9 @@ def list_insert_create_queries_sql( target_table, username, query as query_id, - LISTAGG(CASE WHEN LEN(RTRIM(querytxt)) = 0 THEN querytxt ELSE RTRIM(querytxt) END) WITHIN GROUP (ORDER BY sequence) as ddl + LISTAGG(CASE WHEN LEN(RTRIM(querytxt)) = 0 THEN querytxt ELSE RTRIM(querytxt) END) WITHIN GROUP (ORDER BY sequence) as ddl, + ANY_VALUE(pid) as session_id, + starttime as timestamp from ( select @@ -397,7 +402,8 @@ def list_insert_create_queries_sql( text as querytxt, sq.query, sequence, - si.starttime as starttime + si.starttime as starttime, + pid from stl_insert as si join SVV_TABLE_INFO sti on diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index d3b759c985233e..43696da7901e9c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -1,3 +1,4 @@ +import itertools import logging from collections import defaultdict from functools import partial @@ -39,6 +40,7 @@ ) from datahub.ingestion.source.redshift.config import RedshiftConfig from datahub.ingestion.source.redshift.lineage import RedshiftLineageExtractor +from datahub.ingestion.source.redshift.lineage_v2 import RedshiftSqlLineageV2 from datahub.ingestion.source.redshift.profile import RedshiftProfiler from datahub.ingestion.source.redshift.redshift_schema import ( RedshiftColumn, @@ -98,6 +100,7 @@ ) from datahub.metadata.schema_classes import GlobalTagsClass, TagAssociationClass from datahub.utilities import memory_footprint +from datahub.utilities.dedup_list import deduplicate_list from datahub.utilities.mapping import Constants from datahub.utilities.perf_timer import PerfTimer from datahub.utilities.registries.domain_registry import DomainRegistry @@ -305,7 +308,6 @@ def get_report(self) -> RedshiftReport: def __init__(self, config: RedshiftConfig, ctx: PipelineContext): super().__init__(config, ctx) - self.lineage_extractor: Optional[RedshiftLineageExtractor] = None self.catalog_metadata: Dict = {} self.config: RedshiftConfig = config self.report: RedshiftReport = RedshiftReport() @@ -413,20 +415,45 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit memory_footprint.total_size(self.db_views) ) - yield from self.process_schemas(connection, database) + if self.config.use_lineage_v2: + lineage_extractor = RedshiftSqlLineageV2( + config=self.config, + report=self.report, + context=self.ctx, + database=database, + redundant_run_skip_handler=self.redundant_lineage_run_skip_handler, + ) - all_tables = self.get_all_tables() + yield from lineage_extractor.aggregator.register_schemas_from_stream( + self.process_schemas(connection, database) + ) - if self.config.include_table_lineage or self.config.include_copy_lineage: self.report.report_ingestion_stage_start(LINEAGE_EXTRACTION) - yield from self.extract_lineage( - connection=connection, all_tables=all_tables, database=database + yield from self.extract_lineage_usage_v2( + connection=connection, + database=database, + lineage_extractor=lineage_extractor, ) - if self.config.include_usage_statistics: - yield from self.extract_usage( - connection=connection, all_tables=all_tables, database=database - ) + else: + yield from self.process_schemas(connection, database) + + all_tables = self.get_all_tables() + + if ( + self.config.include_table_lineage + or self.config.include_view_lineage + or self.config.include_copy_lineage + ): + self.report.report_ingestion_stage_start(LINEAGE_EXTRACTION) + yield from self.extract_lineage( + connection=connection, all_tables=all_tables, database=database + ) + + if self.config.include_usage_statistics: + yield from self.extract_usage( + connection=connection, all_tables=all_tables, database=database + ) if self.config.is_profiling_enabled(): self.report.report_ingestion_stage_start(PROFILING) @@ -899,7 +926,7 @@ def extract_lineage( if not self._should_ingest_lineage(): return - self.lineage_extractor = RedshiftLineageExtractor( + lineage_extractor = RedshiftLineageExtractor( config=self.config, report=self.report, context=self.ctx, @@ -907,14 +934,16 @@ def extract_lineage( ) with PerfTimer() as timer: - self.lineage_extractor.populate_lineage( + lineage_extractor.populate_lineage( database=database, connection=connection, all_tables=all_tables ) self.report.lineage_extraction_sec[f"{database}"] = round( timer.elapsed_seconds(), 2 ) - yield from self.generate_lineage(database) + yield from self.generate_lineage( + database, lineage_extractor=lineage_extractor + ) if self.redundant_lineage_run_skip_handler: # Update the checkpoint state for this run. @@ -922,6 +951,34 @@ def extract_lineage( self.config.start_time, self.config.end_time ) + def extract_lineage_usage_v2( + self, + connection: redshift_connector.Connection, + database: str, + lineage_extractor: RedshiftSqlLineageV2, + ) -> Iterable[MetadataWorkUnit]: + if not self._should_ingest_lineage(): + return + + with PerfTimer() as timer: + all_tables = self.get_all_tables() + + lineage_extractor.build( + connection=connection, all_tables=all_tables, db_schemas=self.db_schemas + ) + + yield from lineage_extractor.generate() + + self.report.lineage_extraction_sec[f"{database}"] = round( + timer.elapsed_seconds(), 2 + ) + + if self.redundant_lineage_run_skip_handler: + # Update the checkpoint state for this run. + self.redundant_lineage_run_skip_handler.update_state( + lineage_extractor.start_time, lineage_extractor.end_time + ) + def _should_ingest_lineage(self) -> bool: if ( self.redundant_lineage_run_skip_handler @@ -939,48 +996,40 @@ def _should_ingest_lineage(self) -> bool: return True - def generate_lineage(self, database: str) -> Iterable[MetadataWorkUnit]: - assert self.lineage_extractor - + def generate_lineage( + self, database: str, lineage_extractor: RedshiftLineageExtractor + ) -> Iterable[MetadataWorkUnit]: logger.info(f"Generate lineage for {database}") - for schema in self.db_tables[database]: - for table in self.db_tables[database][schema]: - if ( - database not in self.db_schemas - or schema not in self.db_schemas[database] - ): - logger.warning( - f"Either database {database} or {schema} exists in the lineage but was not discovered earlier. Something went wrong." - ) - continue - datahub_dataset_name = f"{database}.{schema}.{table.name}" - dataset_urn = self.gen_dataset_urn(datahub_dataset_name) - - lineage_info = self.lineage_extractor.get_lineage( - table, - dataset_urn, - self.db_schemas[database][schema], + for schema in deduplicate_list( + itertools.chain(self.db_tables[database], self.db_views[database]) + ): + if ( + database not in self.db_schemas + or schema not in self.db_schemas[database] + ): + logger.warning( + f"Either database {database} or {schema} exists in the lineage but was not discovered earlier. Something went wrong." ) - if lineage_info: - yield from gen_lineage( - dataset_urn, - lineage_info, - incremental_lineage=False, # incremental lineage generation is taken care by auto_incremental_lineage - ) + continue - for schema in self.db_views[database]: - for view in self.db_views[database][schema]: - datahub_dataset_name = f"{database}.{schema}.{view.name}" + table_or_view: Union[RedshiftTable, RedshiftView] + for table_or_view in ( + [] + + self.db_tables[database].get(schema, []) + + self.db_views[database].get(schema, []) + ): + datahub_dataset_name = f"{database}.{schema}.{table_or_view.name}" dataset_urn = self.gen_dataset_urn(datahub_dataset_name) - lineage_info = self.lineage_extractor.get_lineage( - view, + + lineage_info = lineage_extractor.get_lineage( + table_or_view, dataset_urn, self.db_schemas[database][schema], ) if lineage_info: yield from gen_lineage( dataset_urn, - lineage_info, + (lineage_info, {}), incremental_lineage=False, # incremental lineage generation is taken care by auto_incremental_lineage ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py index db8ff4dda8665b..cdc745a39a4835 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py @@ -79,6 +79,8 @@ class LineageRow: target_table: Optional[str] ddl: Optional[str] filename: Optional[str] + timestamp: Optional[datetime] + session_id: Optional[str] @dataclass @@ -400,6 +402,17 @@ def get_lineage_rows( if "filename" in field_names else None ), + timestamp=( + row[field_names.index("timestamp")] + if "timestamp" in field_names + else None + ), + session_id=( + str(row[field_names.index("session_id")]) + if "session_id" in field_names + and row[field_names.index("session_id")] + else None + ), ) rows = cursor.fetchmany() diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py index 36ac7955f15d51..6c2a12498f2c0d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py @@ -5,6 +5,7 @@ from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.ingestion.source_report.time_window import BaseTimeWindowReport +from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport from datahub.utilities.lossy_collections import LossyDict from datahub.utilities.stats_collections import TopKDict @@ -46,5 +47,8 @@ class RedshiftReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowRep stateful_usage_ingestion_enabled: bool = False num_unresolved_temp_columns: int = 0 + # lineage/usage v2 + sql_aggregator: Optional[SqlAggregatorReport] = None + def report_dropped(self, key: str) -> None: self.filtered.append(key) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 724e4392f1d612..93e29f3fb99bb3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -588,12 +588,15 @@ def usage_per_object_per_time_bucket_for_time_window( query_id, query_start_time, user_name, - NVL(USERS.email, CONCAT(user_name, '{email_domain}')) AS user_email, + -- Construct the email in the query, should match the Python behavior. + -- The user_email is only used by the email_filter_query. + NVL(USERS.email, CONCAT(LOWER(user_name), '{email_domain}')) AS user_email, {objects_column} from snowflake.account_usage.access_history LEFT JOIN snowflake.account_usage.users USERS + ON user_name = users.name WHERE query_start_time >= to_timestamp_ltz({start_time_millis}, 3) AND query_start_time < to_timestamp_ltz({end_time_millis}, 3) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index cb2e05765bfff6..cf199237e3041c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -1,7 +1,9 @@ +import functools import json +import logging import uuid from textwrap import dedent -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Iterable, List, Optional, Union import sqlalchemy import trino @@ -9,13 +11,19 @@ from pydantic.fields import Field from sqlalchemy import exc, sql from sqlalchemy.engine import reflection +from sqlalchemy.engine.base import Engine from sqlalchemy.engine.reflection import Inspector from sqlalchemy.sql import sqltypes from sqlalchemy.types import TypeEngine -from trino.exceptions import TrinoQueryError from trino.sqlalchemy import datatype from trino.sqlalchemy.dialect import TrinoDialect +from datahub.configuration.source_common import ( + EnvConfigMixin, + PlatformInstanceConfigMixin, +) +from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance +from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -25,12 +33,23 @@ platform_name, support_status, ) +from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.extractor import schema_util from datahub.ingestion.source.sql.sql_common import ( SQLAlchemySource, + SqlWorkUnit, register_custom_type, ) -from datahub.ingestion.source.sql.sql_config import BasicSQLAlchemyConfig +from datahub.ingestion.source.sql.sql_config import ( + BasicSQLAlchemyConfig, + SQLCommonConfig, +) +from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings +from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( + DatasetLineageType, + Upstream, + UpstreamLineage, +) from datahub.metadata.com.linkedin.pegasus2avro.schema import ( MapTypeClass, NumberTypeClass, @@ -42,11 +61,46 @@ register_custom_type(datatype.MAP, MapTypeClass) register_custom_type(datatype.DOUBLE, NumberTypeClass) + +KNOWN_CONNECTOR_PLATFORM_MAPPING = { + "clickhouse": "clickhouse", + "hive": "hive", + "glue": "glue", + "iceberg": "iceberg", + "mysql": "mysql", + "postgresql": "postgres", + "redshift": "redshift", + "bigquery": "bigquery", + "snowflake_distributed": "snowflake", + "snowflake_parallel": "snowflake", + "snowflake_jdbc": "snowflake", +} + +TWO_TIER_CONNECTORS = ["clickhouse", "hive", "glue", "mysql", "iceberg"] + +PROPERTIES_TABLE_SUPPORTED_CONNECTORS = ["hive", "iceberg"] + # Type JSON was introduced in trino sqlalchemy dialect in version 0.317.0 if version.parse(trino.__version__) >= version.parse("0.317.0"): register_custom_type(datatype.JSON, RecordTypeClass) +@functools.lru_cache() +def gen_catalog_connector_dict(engine: Engine) -> Dict[str, str]: + query = dedent( + """ + SELECT * + FROM "system"."metadata"."catalogs" + """ + ).strip() + res = engine.execute(sql.text(query)) + return {row.catalog_name: row.connector_name for row in res} + + +def get_catalog_connector_name(engine: Engine, catalog_name: str) -> Optional[str]: + return gen_catalog_connector_dict(engine).get(catalog_name) + + # Read only table names and skip view names, as view names will also be returned # from get_view_names @reflection.cache # type: ignore @@ -69,26 +123,27 @@ def get_table_names(self, connection, schema: str = None, **kw): # type: ignore @reflection.cache # type: ignore def get_table_comment(self, connection, table_name: str, schema: str = None, **kw): # type: ignore try: - properties_table = self._get_full_table(f"{table_name}$properties", schema) - query = f"SELECT * FROM {properties_table}" - row = connection.execute(sql.text(query)).fetchone() - - # Generate properties dictionary. - properties = {} - if row: - for col_name, col_value in row.items(): - if col_value is not None: - properties[col_name] = col_value - - return {"text": properties.get("comment", None), "properties": properties} - # Fallback to default trino-sqlalchemy behaviour if `$properties` table doesn't exist - except TrinoQueryError: - return self.get_table_comment_default(connection, table_name, schema) - # Exception raised when using Starburst Delta Connector that falls back to a Hive Catalog - except exc.ProgrammingError as e: - if isinstance(e.orig, TrinoQueryError): + catalog_name = self._get_default_catalog_name(connection) + if catalog_name is None: + raise exc.NoSuchTableError("catalog is required in connection") + connector_name = get_catalog_connector_name(connection.engine, catalog_name) + if connector_name is None: + return {} + if connector_name in PROPERTIES_TABLE_SUPPORTED_CONNECTORS: + properties_table = self._get_full_table(f"{table_name}$properties", schema) + query = f"SELECT * FROM {properties_table}" + row = connection.execute(sql.text(query)).fetchone() + + # Generate properties dictionary. + properties = {} + if row: + for col_name, col_value in row.items(): + if col_value is not None: + properties[col_name] = col_value + + return {"text": properties.get("comment", None), "properties": properties} + else: return self.get_table_comment_default(connection, table_name, schema) - raise except Exception: return {} @@ -131,19 +186,38 @@ def _get_columns(self, connection, table_name, schema: str = None, **kw): # typ TrinoDialect._get_columns = _get_columns +class ConnectorDetail(PlatformInstanceConfigMixin, EnvConfigMixin): + connector_database: Optional[str] = Field(default=None, description="") + connector_platform: Optional[str] = Field( + default=None, + description="A connector's actual platform name. If not provided, will take from metadata tables" + "Eg: hive catalog can have a connector platform as 'hive' or 'glue' or some other metastore.", + ) + + class TrinoConfig(BasicSQLAlchemyConfig): # defaults scheme: str = Field(default="trino", description="", hidden_from_docs=True) + database: str = Field(description="database (catalog)") + + catalog_to_connector_details: Dict[str, ConnectorDetail] = Field( + default={}, + description="A mapping of trino catalog to its connector details like connector database, env and platform instance." + "This configuration is used to build lineage to the underlying connector. Use catalog name as key.", + ) + + ingest_lineage_to_connectors: bool = Field( + default=True, + description="Whether lineage of datasets to connectors should be ingested", + ) + + trino_as_primary: bool = Field( + default=True, + description="Experimental feature. Whether trino dataset should be primary entity of the set of siblings", + ) def get_identifier(self: BasicSQLAlchemyConfig, schema: str, table: str) -> str: - identifier = f"{schema}.{table}" - if self.database: # TODO: this should be required field - identifier = f"{self.database}.{identifier}" - return ( - f"{self.platform_instance}.{identifier}" - if self.platform_instance - else identifier - ) + return f"{self.database}.{schema}.{table}" @platform_name("Trino", doc_order=1) @@ -175,6 +249,133 @@ def get_db_name(self, inspector: Inspector) -> str: else: return super().get_db_name(inspector) + def _get_source_dataset_urn( + self, + dataset_name: str, + inspector: Inspector, + schema: str, + table: str, + ) -> Optional[str]: + catalog_name = dataset_name.split(".")[0] + connector_name = get_catalog_connector_name(inspector.engine, catalog_name) + if not connector_name: + return None + connector_details = self.config.catalog_to_connector_details.get( + catalog_name, ConnectorDetail() + ) + connector_platform_name = KNOWN_CONNECTOR_PLATFORM_MAPPING.get( + connector_details.connector_platform or connector_name + ) + if not connector_platform_name: + logging.debug(f"Platform '{connector_platform_name}' is not yet supported.") + return None + + if connector_platform_name in TWO_TIER_CONNECTORS: # connector is two tier + return make_dataset_urn_with_platform_instance( + platform=connector_platform_name, + name=f"{schema}.{table}", + platform_instance=connector_details.platform_instance, + env=connector_details.env, + ) + elif connector_details.connector_database: # else connector is three tier + return make_dataset_urn_with_platform_instance( + platform=connector_platform_name, + name=f"{connector_details.connector_database}.{schema}.{table}", + platform_instance=connector_details.platform_instance, + env=connector_details.env, + ) + else: + logging.warning(f"Connector database missing for Catalog '{catalog_name}'.") + return None + + def gen_siblings_workunit( + self, + dataset_urn: str, + source_dataset_urn: str, + ) -> Iterable[MetadataWorkUnit]: + """ + Generate sibling workunit for both trino dataset and its connector source dataset + """ + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=Siblings( + primary=self.config.trino_as_primary, siblings=[source_dataset_urn] + ), + ).as_workunit() + + yield MetadataChangeProposalWrapper( + entityUrn=source_dataset_urn, + aspect=Siblings( + primary=not self.config.trino_as_primary, siblings=[dataset_urn] + ), + ).as_workunit() + + def gen_lineage_workunit( + self, + dataset_urn: str, + source_dataset_urn: str, + ) -> Iterable[MetadataWorkUnit]: + """ + Generate dataset to source connector lineage workunit + """ + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=UpstreamLineage( + upstreams=[ + Upstream(dataset=source_dataset_urn, type=DatasetLineageType.VIEW) + ] + ), + ).as_workunit() + + def _process_table( + self, + dataset_name: str, + inspector: Inspector, + schema: str, + table: str, + sql_config: SQLCommonConfig, + ) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]: + yield from super()._process_table( + dataset_name, inspector, schema, table, sql_config + ) + if self.config.ingest_lineage_to_connectors: + dataset_urn = make_dataset_urn_with_platform_instance( + self.platform, + dataset_name, + self.config.platform_instance, + self.config.env, + ) + source_dataset_urn = self._get_source_dataset_urn( + dataset_name, inspector, schema, table + ) + if source_dataset_urn: + yield from self.gen_siblings_workunit(dataset_urn, source_dataset_urn) + yield from self.gen_lineage_workunit(dataset_urn, source_dataset_urn) + + def _process_view( + self, + dataset_name: str, + inspector: Inspector, + schema: str, + view: str, + sql_config: SQLCommonConfig, + ) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]: + yield from super()._process_view( + dataset_name, inspector, schema, view, sql_config + ) + if self.config.ingest_lineage_to_connectors: + dataset_urn = make_dataset_urn_with_platform_instance( + self.platform, + dataset_name, + self.config.platform_instance, + self.config.env, + ) + source_dataset_urn = self._get_source_dataset_urn( + dataset_name, inspector, schema, view + ) + if source_dataset_urn: + yield from self.gen_siblings_workunit(dataset_urn, source_dataset_urn) + @classmethod def create(cls, config_dict, ctx): config = TrinoConfig.parse_obj(config_dict) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index c2091cacc1c4b7..5fdff458821f16 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -729,7 +729,7 @@ def _authenticate(self) -> None: logger.info("Authenticated to Tableau server") # Note that we're not catching ConfigurationError, since we want that to throw. except ValueError as e: - self.report.report_failure( + self.report.failure( key="tableau-login", reason=str(e), ) @@ -810,7 +810,7 @@ def get_connection_object_page( error and (error.get(c.EXTENSIONS) or {}).get(c.SEVERITY) == c.WARNING for error in errors ): - self.report.report_warning(key=connection_type, reason=f"{errors}") + self.report.warning(key=connection_type, reason=f"{errors}") else: raise RuntimeError(f"Query {connection_type} error: {errors}") @@ -2555,7 +2555,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: if self.database_tables: yield from self.emit_upstream_tables() except MetadataQueryException as md_exception: - self.report.report_failure( + self.report.failure( key="tableau-metadata", reason=f"Unable to retrieve metadata from tableau. Information: {str(md_exception)}", ) diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator_v2.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py similarity index 77% rename from metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator_v2.py rename to metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index cb23c9244cd862..cecaef33efcd7b 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator_v2.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -5,15 +5,18 @@ import logging import pathlib import tempfile +import uuid from collections import defaultdict from datetime import datetime, timezone -from typing import Callable, Dict, Iterable, List, Optional, Set, cast +from typing import Callable, Dict, Iterable, List, Optional, Set, Union, cast +import datahub.emitter.mce_builder as builder import datahub.metadata.schema_classes as models from datahub.emitter.mce_builder import get_sys_time, make_ts_millis from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.sql_parsing_builder import compute_upstream_fields from datahub.ingestion.api.report import Report +from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.source.usage.usage_common import BaseUsageConfig, UsageAggregator from datahub.metadata.urns import ( @@ -32,7 +35,8 @@ infer_output_schema, sqlglot_lineage, ) -from datahub.sql_parsing.sqlglot_utils import generate_hash +from datahub.sql_parsing.sqlglot_utils import generate_hash, get_query_fingerprint +from datahub.utilities.cooperative_timeout import CooperativeTimeoutError from datahub.utilities.file_backed_collections import ( ConnectionWrapper, FileBackedDict, @@ -57,8 +61,6 @@ class QueryLogSetting(enum.Enum): @dataclasses.dataclass class ViewDefinition: - # TODO view urn? - view_definition: str default_db: Optional[str] = None default_schema: Optional[str] = None @@ -95,6 +97,19 @@ def make_last_modified_audit_stamp(self) -> models.AuditStampClass: ) +@dataclasses.dataclass +class KnownQueryLineageInfo: + query_text: str + + downstream: UrnStr + upstreams: List[UrnStr] + column_lineage: Optional[List[ColumnLineageInfo]] = None + + timestamp: Optional[datetime] = None + session_id: Optional[str] = None + query_type: QueryType = QueryType.UNKNOWN + + @dataclasses.dataclass class SqlAggregatorReport(Report): _aggregator: "SqlParsingAggregator" @@ -102,13 +117,23 @@ class SqlAggregatorReport(Report): num_observed_queries: int = 0 num_observed_queries_failed: int = 0 + num_observed_queries_column_timeout: int = 0 num_observed_queries_column_failed: int = 0 - observed_query_parse_failures = LossyList[str]() + observed_query_parse_failures: LossyList[str] = dataclasses.field( + default_factory=LossyList + ) num_view_definitions: int = 0 num_views_failed: int = 0 + num_views_column_timeout: int = 0 num_views_column_failed: int = 0 - views_parse_failures = LossyDict[UrnStr, str]() + views_parse_failures: LossyDict[UrnStr, str] = dataclasses.field( + default_factory=LossyDict + ) + + num_known_query_lineage: int = 0 + num_known_mapping_lineage: int = 0 + num_table_renames: int = 0 num_queries_with_temp_tables_in_session: int = 0 @@ -142,8 +167,8 @@ def __init__( self, *, platform: str, - platform_instance: Optional[str], - env: str, + platform_instance: Optional[str] = None, + env: str = builder.DEFAULT_ENV, graph: Optional[DataHubGraph] = None, generate_lineage: bool = True, generate_queries: bool = True, @@ -234,6 +259,11 @@ def __init__( shared_connection=self._shared_connection, tablename="inferred_temp_schemas" ) + # Map of table renames, from original UrnStr to new UrnStr. + self._table_renames = FileBackedDict[UrnStr]( + shared_connection=self._shared_connection, tablename="table_renames" + ) + # Usage aggregator. This will only be initialized if usage statistics are enabled. # TODO: Replace with FileBackedDict. self._usage_aggregator: Optional[UsageAggregator[UrnStr]] = None @@ -246,7 +276,7 @@ def _need_schemas(self) -> bool: return self.generate_lineage or self.generate_usage_statistics def register_schema( - self, urn: DatasetUrn, schema: models.SchemaMetadataClass + self, urn: Union[str, DatasetUrn], schema: models.SchemaMetadataClass ) -> None: # If lineage or usage is enabled, adds the schema to the schema resolver # by putting the condition in here, we can avoid all the conditional @@ -255,6 +285,16 @@ def register_schema( if self._need_schemas: self._schema_resolver.add_schema_metadata(str(urn), schema) + def register_schemas_from_stream( + self, stream: Iterable[MetadataWorkUnit] + ) -> Iterable[MetadataWorkUnit]: + for wu in stream: + schema_metadata = wu.get_aspect_of_type(models.SchemaMetadataClass) + if schema_metadata: + self.register_schema(wu.get_urn(), schema_metadata) + + yield wu + def _initialize_schema_resolver_from_graph(self, graph: DataHubGraph) -> None: # requires a graph instance # if no schemas are currently registered in the schema resolver @@ -284,6 +324,101 @@ def _initialize_schema_resolver_from_graph(self, graph: DataHubGraph) -> None: env=self.env, ) + def add_known_query_lineage( + self, known_query_lineage: KnownQueryLineageInfo, merge_lineage: bool = False + ) -> None: + """Add a query and it's precomputed lineage to the aggregator. + + This is useful for cases where we have lineage information that was + computed outside of the SQL parsing aggregator, e.g. from a data + warehouse's system tables. + + This will also generate an operation aspect for the query if there is + a timestamp and the query type field is set to a mutation type. + + Args: + known_query_lineage: The known query lineage information. + """ + + self.report.num_known_query_lineage += 1 + + # Generate a fingerprint for the query. + query_fingerprint = get_query_fingerprint( + known_query_lineage.query_text, self.platform.platform_name + ) + # TODO format the query text? + + # Register the query. + self._add_to_query_map( + QueryMetadata( + query_id=query_fingerprint, + formatted_query_string=known_query_lineage.query_text, + session_id=known_query_lineage.session_id or _MISSING_SESSION_ID, + query_type=known_query_lineage.query_type, + lineage_type=models.DatasetLineageTypeClass.TRANSFORMED, + latest_timestamp=known_query_lineage.timestamp, + actor=None, + upstreams=known_query_lineage.upstreams, + column_lineage=known_query_lineage.column_lineage or [], + confidence_score=1.0, + ), + merge_lineage=merge_lineage, + ) + + # Register the lineage. + self._lineage_map.for_mutation( + known_query_lineage.downstream, OrderedSet() + ).add(query_fingerprint) + + def add_known_lineage_mapping( + self, + upstream_urn: UrnStr, + downstream_urn: UrnStr, + lineage_type: str = models.DatasetLineageTypeClass.COPY, + ) -> None: + """Add a known lineage mapping to the aggregator. + + By mapping, we mean that the downstream is effectively a copy or + alias of the upstream. This is useful for things like external tables + (e.g. Redshift Spectrum, Redshift UNLOADs, Snowflake external tables). + + Because this method takes in urns, it does not require that the urns + are part of the platform that the aggregator is configured for. + + TODO: In the future, this method will also generate CLL if we have + schemas for either the upstream or downstream. + + The known lineage mapping does not contribute to usage statistics or operations. + + Args: + upstream_urn: The upstream dataset URN. + downstream_urn: The downstream dataset URN. + """ + + self.report.num_known_mapping_lineage += 1 + + # We generate a fake "query" object to hold the lineage. + query_id = self._known_lineage_query_id() + + # Register the query. + self._add_to_query_map( + QueryMetadata( + query_id=query_id, + formatted_query_string="-skip-", + session_id=_MISSING_SESSION_ID, + query_type=QueryType.UNKNOWN, + lineage_type=lineage_type, + latest_timestamp=None, + actor=None, + upstreams=[upstream_urn], + column_lineage=[], + confidence_score=1.0, + ) + ) + + # Register the lineage. + self._lineage_map.for_mutation(downstream_urn, OrderedSet()).add(query_id) + def add_view_definition( self, view_urn: DatasetUrn, @@ -350,12 +485,14 @@ def add_observed_query( ) if parsed.debug_info.error: self.report.observed_query_parse_failures.append( - str(parsed.debug_info.error) + f"{parsed.debug_info.error} on query: {query[:100]}" ) if parsed.debug_info.table_error: self.report.num_observed_queries_failed += 1 return # we can't do anything with this query - elif parsed.debug_info.error: + elif isinstance(parsed.debug_info.column_error, CooperativeTimeoutError): + self.report.num_observed_queries_column_timeout += 1 + elif parsed.debug_info.column_error: self.report.num_observed_queries_column_failed += 1 # Register the query's usage. @@ -367,6 +504,13 @@ def add_observed_query( # TODO: We need a full list of columns referenced, not just the out tables. upstream_fields = compute_upstream_fields(parsed) for upstream_urn in parsed.in_tables: + # If the upstream table is a temp table, don't log usage for it. + if (self.is_temp_table and self.is_temp_table(upstream_urn)) or ( + require_out_table_schema + and not self._schema_resolver.has_urn(upstream_urn) + ): + continue + self._usage_aggregator.aggregate_event( resource=upstream_urn, start_time=query_timestamp, @@ -382,6 +526,12 @@ def add_observed_query( query_fingerprint = parsed.query_fingerprint assert query_fingerprint is not None + # Handle table renames. + is_renamed_table = False + if out_table in self._table_renames: + out_table = self._table_renames[out_table] + is_renamed_table = True + # Register the query. self._add_to_query_map( QueryMetadata( @@ -405,10 +555,15 @@ def add_observed_query( parsed.query_type.is_create() and parsed.query_type_props.get("temporary") ) - or (self.is_temp_table and self.is_temp_table(out_table)) or ( - require_out_table_schema - and not self._schema_resolver.has_urn(out_table) + not is_renamed_table + and ( + (self.is_temp_table and self.is_temp_table(out_table)) + or ( + require_out_table_schema + and not self._schema_resolver.has_urn(out_table) + ) + ) ) ): # Infer the schema of the output table and track it for later. @@ -427,6 +582,29 @@ def add_observed_query( query_fingerprint ) + def add_table_rename( + self, + original_urn: UrnStr, + new_urn: UrnStr, + ) -> None: + """Add a table rename to the aggregator. + + This will so that all _future_ observed queries that reference the original urn + will instead generate usage and lineage for the new urn. + + Currently, this does not affect any queries that have already been observed. + TODO: Add a mechanism to update the lineage for queries that have already been observed. + + Args: + original_urn: The original dataset URN. + new_urn: The new dataset URN. + """ + + self.report.num_table_renames += 1 + + # This will not work if the table is renamed multiple times. + self._table_renames[original_urn] = new_urn + def _make_schema_resolver_for_session( self, session_id: str ) -> SchemaResolverInterface: @@ -449,6 +627,10 @@ def _make_schema_resolver_for_session( def _process_view_definition( self, view_urn: UrnStr, view_definition: ViewDefinition ) -> None: + # Note that in some cases, the view definition will be a SELECT statement + # instead of a CREATE VIEW ... AS SELECT statement. In those cases, we can't + # trust the parsed query type or downstream urn. + # Run the SQL parser. parsed = self._run_sql_parser( view_definition.view_definition, @@ -457,17 +639,17 @@ def _process_view_definition( schema_resolver=self._schema_resolver, ) if parsed.debug_info.error: - self.report.views_parse_failures[view_urn] = str(parsed.debug_info.error) + self.report.views_parse_failures[ + view_urn + ] = f"{parsed.debug_info.error} on query: {view_definition.view_definition[:100]}" if parsed.debug_info.table_error: self.report.num_views_failed += 1 return # we can't do anything with this query - elif parsed.debug_info.error: + elif isinstance(parsed.debug_info.column_error, CooperativeTimeoutError): + self.report.num_views_column_timeout += 1 + elif parsed.debug_info.column_error: self.report.num_views_column_failed += 1 - # Note that in some cases, the view definition will be a SELECT statement - # instead of a CREATE VIEW ... AS SELECT statement. In those cases, we can't - # trust the parsed query type or downstream urn. - query_fingerprint = self._view_query_id(view_urn) # Register the query. @@ -518,7 +700,9 @@ def _run_sql_parser( return parsed - def _add_to_query_map(self, new: QueryMetadata) -> None: + def _add_to_query_map( + self, new: QueryMetadata, merge_lineage: bool = False + ) -> None: query_fingerprint = new.query_id if query_fingerprint in self._query_map: @@ -531,24 +715,25 @@ def _add_to_query_map(self, new: QueryMetadata) -> None: current.latest_timestamp = new.latest_timestamp or current.latest_timestamp current.actor = new.actor or current.actor - # An invariant of the fingerprinting is that if two queries have the - # same fingerprint, they must also have the same lineage. We overwrite - # here just in case more schemas got registered in the interim. - current.upstreams = new.upstreams - current.column_lineage = new.column_lineage - current.confidence_score = new.confidence_score + if not merge_lineage: + # An invariant of the fingerprinting is that if two queries have the + # same fingerprint, they must also have the same lineage. We overwrite + # here just in case more schemas got registered in the interim. + current.upstreams = new.upstreams + current.column_lineage = new.column_lineage + current.confidence_score = new.confidence_score + else: + # In the case of known query lineage, we might get things one at a time. + # TODO: We don't yet support merging CLL for a single query. + current.upstreams = list( + OrderedSet(current.upstreams) | OrderedSet(new.upstreams) + ) + current.confidence_score = min( + current.confidence_score, new.confidence_score + ) else: self._query_map[query_fingerprint] = new - """ - def add_lineage(self) -> None: - # A secondary mechanism for adding non-SQL-based lineage - # e.g. redshift external tables might use this when pointing at s3 - - # TODO Add this once we have a use case for it - pass - """ - def gen_metadata(self) -> Iterable[MetadataChangeProposalWrapper]: # diff from v1 - we generate operations here, and it also # generates MCPWs instead of workunits @@ -569,7 +754,7 @@ def _gen_lineage_mcps(self) -> Iterable[MetadataChangeProposalWrapper]: # Generate lineage and queries. queries_generated: Set[QueryId] = set() - for downstream_urn in self._lineage_map: + for downstream_urn in sorted(self._lineage_map): yield from self._gen_lineage_for_downstream( downstream_urn, queries_generated=queries_generated ) @@ -640,7 +825,9 @@ def _gen_lineage_for_downstream( dataset=upstream_urn, type=queries_map[query_id].lineage_type, query=( - self._query_urn(query_id) if self.generate_queries else None + self._query_urn(query_id) + if self.can_generate_query(query_id) + else None ), created=query.make_created_audit_stamp(), auditStamp=models.AuditStampClass( @@ -671,7 +858,9 @@ def _gen_lineage_for_downstream( SchemaFieldUrn(downstream_urn, downstream_column).urn() ], query=( - self._query_urn(query_id) if self.generate_queries else None + self._query_urn(query_id) + if self.can_generate_query(query_id) + else None ), confidenceScore=queries_map[query_id].confidence_score, ) @@ -682,9 +871,10 @@ def _gen_lineage_for_downstream( aspect=upstream_aspect, ) - if not self.generate_queries: - return for query_id in required_queries: + if not self.can_generate_query(query_id): + continue + # Avoid generating the same query twice. if query_id in queries_generated: continue @@ -729,6 +919,19 @@ def _composite_query_id(cls, composed_of_queries: Iterable[QueryId]) -> str: def _view_query_id(cls, view_urn: UrnStr) -> str: return f"view_{DatasetUrn.url_encode(view_urn)}" + @classmethod + def _known_lineage_query_id(cls) -> str: + return f"known_{uuid.uuid4()}" + + @classmethod + def _is_known_lineage_query_id(cls, query_id: QueryId) -> bool: + # Our query fingerprints are hex and won't have underscores, so this will + # never conflict with a real query fingerprint. + return query_id.startswith("known_") + + def can_generate_query(self, query_id: QueryId) -> bool: + return self.generate_queries and not self._is_known_lineage_query_id(query_id) + def _resolve_query_with_temp_tables( self, base_query: QueryMetadata, @@ -895,8 +1098,10 @@ def _gen_operation_for_downstream( operationType=operation_type, lastUpdatedTimestamp=make_ts_millis(query.latest_timestamp), actor=query.actor.urn() if query.actor else None, - customProperties={ - "query_urn": self._query_urn(query_id), - }, + customProperties=( + {"query_urn": self._query_urn(query_id)} + if self.can_generate_query(query_id) + else None + ), ) yield MetadataChangeProposalWrapper(entityUrn=downstream_urn, aspect=aspect) diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py index a68a98ecb5cbe9..31b3a756f8d703 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py @@ -1,6 +1,7 @@ import functools import itertools import logging +import traceback from collections import defaultdict from typing import Any, Dict, List, Optional, Set, Tuple, Union @@ -12,6 +13,7 @@ import sqlglot.optimizer.optimizer import sqlglot.optimizer.qualify +from datahub.cli.env_utils import get_boolean_env_variable from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import ( ArrayTypeClass, @@ -39,16 +41,24 @@ from datahub.sql_parsing.sqlglot_utils import ( DialectOrStr, get_dialect, - get_query_fingerprint, + get_query_fingerprint_debug, is_dialect_instance, parse_statement, ) +from datahub.utilities.cooperative_timeout import ( + CooperativeTimeoutError, + cooperative_timeout, +) logger = logging.getLogger(__name__) Urn = str SQL_PARSE_RESULT_CACHE_SIZE = 1000 +SQL_LINEAGE_TIMEOUT_ENABLED = get_boolean_env_variable( + "SQL_LINEAGE_TIMEOUT_ENABLED", True +) +SQL_LINEAGE_TIMEOUT_SECONDS = 10 RULES_BEFORE_TYPE_ANNOTATION: tuple = tuple( @@ -181,16 +191,28 @@ class ColumnLineageInfo(_ParserBaseModel): class SqlParsingDebugInfo(_ParserBaseModel): confidence: float = 0.0 - tables_discovered: int = 0 - table_schemas_resolved: int = 0 + tables_discovered: int = pydantic.Field(0, exclude=True) + table_schemas_resolved: int = pydantic.Field(0, exclude=True) - table_error: Optional[Exception] = None - column_error: Optional[Exception] = None + generalized_statement: Optional[str] = None + + table_error: Optional[Exception] = pydantic.Field(default=None, exclude=True) + column_error: Optional[Exception] = pydantic.Field(default=None, exclude=True) @property def error(self) -> Optional[Exception]: return self.table_error or self.column_error + @pydantic.validator("table_error", "column_error") + def remove_variables_from_error(cls, v: Optional[Exception]) -> Optional[Exception]: + if v and v.__traceback__: + # Remove local variables from the traceback to avoid memory leaks. + # See https://docs.python.org/3/library/traceback.html#traceback.clear_frames + # and https://docs.python.org/3/reference/datamodel.html#frame.clear + traceback.clear_frames(v.__traceback__) + + return v + class SqlParsingResult(_ParserBaseModel): query_type: QueryType = QueryType.UNKNOWN @@ -206,8 +228,7 @@ class SqlParsingResult(_ParserBaseModel): # TODO include list of referenced columns debug_info: SqlParsingDebugInfo = pydantic.Field( - default_factory=lambda: SqlParsingDebugInfo(), - exclude=True, + default_factory=lambda: SqlParsingDebugInfo() ) @classmethod @@ -500,6 +521,12 @@ def _schema_aware_fuzzy_column_resolve( elif isinstance(node.expression, sqlglot.exp.Table): table_ref = _TableName.from_sqlglot_table(node.expression) + if node.name == "*": + # This will happen if we couldn't expand the * to actual columns e.g. if + # we don't have schema info for the table. In this case, we can't generate + # column-level lineage, so we skip it. + continue + # Parse the column name out of the node name. # Sqlglot calls .sql(), so we have to do the inverse. normalized_col = sqlglot.parse_one(node.name).this.name @@ -553,7 +580,7 @@ def _schema_aware_fuzzy_column_resolve( ) # TODO: Also extract referenced columns (aka auxillary / non-SELECT lineage) - except (sqlglot.errors.OptimizeError, ValueError) as e: + except (sqlglot.errors.OptimizeError, ValueError, IndexError) as e: raise SqlUnderstandingError( f"sqlglot failed to compute some lineage: {e}" ) from e @@ -852,20 +879,33 @@ def _sqlglot_lineage_inner( column_lineage: Optional[List[_ColumnLineageInfo]] = None try: if select_statement is not None: - column_lineage = _column_level_lineage( - select_statement, - dialect=dialect, - table_schemas=table_name_schema_mapping, - output_table=downstream_table, - default_db=default_db, - default_schema=default_schema, - ) + with cooperative_timeout( + timeout=SQL_LINEAGE_TIMEOUT_SECONDS + if SQL_LINEAGE_TIMEOUT_ENABLED + else None + ): + column_lineage = _column_level_lineage( + select_statement, + dialect=dialect, + table_schemas=table_name_schema_mapping, + output_table=downstream_table, + default_db=default_db, + default_schema=default_schema, + ) except UnsupportedStatementTypeError as e: # Inject details about the outer statement type too. e.args = (f"{e.args[0]} (outer statement type: {type(statement)})",) debug_info.column_error = e logger.debug(debug_info.column_error) - except SqlUnderstandingError as e: + except CooperativeTimeoutError as e: + logger.debug(f"Timed out while generating column-level lineage: {e}") + debug_info.column_error = e + except ( + SqlUnderstandingError, + ValueError, + IndexError, + sqlglot.errors.SqlglotError, + ) as e: logger.debug(f"Failed to generate column-level lineage: {e}", exc_info=True) debug_info.column_error = e @@ -887,7 +927,9 @@ def _sqlglot_lineage_inner( query_type, query_type_props = get_query_type_of_sql( original_statement, dialect=dialect ) - query_fingerprint = get_query_fingerprint(original_statement, dialect=dialect) + query_fingerprint, debug_info.generalized_statement = get_query_fingerprint_debug( + original_statement, dialect=dialect + ) return SqlParsingResult( query_type=query_type, query_type_props=query_type_props, diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py index 587394cc14646a..710fffc4afbd30 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py @@ -1,8 +1,11 @@ import hashlib -from typing import Dict, Iterable, Optional, Union +import logging +from typing import Dict, Iterable, Optional, Tuple, Union import sqlglot +import sqlglot.errors +logger = logging.getLogger(__name__) DialectOrStr = Union[sqlglot.Dialect, str] @@ -116,6 +119,23 @@ def generate_hash(text: str) -> str: return hashlib.sha256(text.encode("utf-8")).hexdigest() +def get_query_fingerprint_debug( + expression: sqlglot.exp.ExpOrStr, dialect: DialectOrStr +) -> Tuple[str, str]: + try: + dialect = get_dialect(dialect) + expression_sql = generalize_query(expression, dialect=dialect) + except (ValueError, sqlglot.errors.SqlglotError) as e: + if not isinstance(expression, str): + raise + + logger.debug("Failed to generalize query for fingerprinting: %s", e) + expression_sql = expression + + fingerprint = generate_hash(expression_sql) + return fingerprint, expression_sql + + def get_query_fingerprint( expression: sqlglot.exp.ExpOrStr, dialect: DialectOrStr ) -> str: @@ -139,11 +159,7 @@ def get_query_fingerprint( The fingerprint for the SQL query. """ - dialect = get_dialect(dialect) - expression_sql = generalize_query(expression, dialect=dialect) - fingerprint = generate_hash(expression_sql) - - return fingerprint + return get_query_fingerprint_debug(expression, dialect)[0] def detach_ctes( diff --git a/metadata-ingestion/src/datahub/utilities/cooperative_timeout.py b/metadata-ingestion/src/datahub/utilities/cooperative_timeout.py new file mode 100644 index 00000000000000..748719f4430c68 --- /dev/null +++ b/metadata-ingestion/src/datahub/utilities/cooperative_timeout.py @@ -0,0 +1,65 @@ +import contextlib +import threading +import time +from typing import Iterator, Optional + +_cooperation = threading.local() + + +class CooperativeTimeoutError(TimeoutError): + """An exception raised when a cooperative timeout is exceeded.""" + + +def cooperate() -> None: + """Method to be called periodically to cooperate with the timeout mechanism.""" + + deadline = getattr(_cooperation, "deadline", None) + if deadline is not None and deadline < time.perf_counter_ns(): + raise CooperativeTimeoutError("CooperativeTimeout deadline exceeded") + + +@contextlib.contextmanager +def cooperative_timeout(timeout: Optional[float] = None) -> Iterator[None]: + """A cooperative timeout mechanism. + + Useful in cases where (1) we have control over the code that we want to time out + and (2) we can modify it to regularly and reliably call a specific function. + + This is not reentrant and cannot be used in nested contexts. It can be used + in multi-threaded contexts, so long as the cooperative function is called + from the same thread that created the timeout. + + Args: + timeout: The timeout in seconds. If None, the timeout is disabled. + + Raises: + RuntimeError: If a cooperative timeout is already active. + CooperativeTimeoutError: If the cooperative timeout is exceeded. + """ + + # Getting code to time out in Python is actually rather tricky, and so this felt + # like the most straightforward approach. Other approaches include: + # - Using the signal module to set a signal handler that raises an exception + # after a certain time. Unfortunately, this approach only works on the main + # thread, and is not available on Windows. + # - Creating a separate process to run the code and then killing it if it hasn't + # finished by the deadline. This usually requires that all arguments/return + # types are pickleable so that they can be passed between processes. Overall, + # this approach is heavy-handed and can be tricky to implement correctly. + # - Using `threading` is not an option, since Python threads are not interruptible + # (unless you're willing to use some hacks https://stackoverflow.com/a/61528202). + # Attempting to forcibly terminate a thread can deadlock on the GIL. + + if hasattr(_cooperation, "deadline"): + raise RuntimeError("cooperative timeout already active") + + if timeout is not None: + _cooperation.deadline = time.perf_counter_ns() + timeout * 1_000_000_000 + try: + yield + finally: + del _cooperation.deadline + + else: + # No-op. + yield diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py index 05c2041519858f..821b69c968ee46 100644 --- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py +++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py @@ -6,6 +6,7 @@ import shutil import sqlite3 import tempfile +import threading from dataclasses import dataclass, field from datetime import datetime from enum import Enum @@ -81,10 +82,16 @@ def __init__(self, filename: Optional[pathlib.Path] = None): if not filename: self._temp_directory = tempfile.mkdtemp() filename = pathlib.Path(self._temp_directory) / _DEFAULT_FILE_NAME + self.filename = filename - self.conn = sqlite3.connect(filename, isolation_level=None) + # SQLite connections are technically not supposed to be used from multiple threads. + # We bypass this restriction by setting `check_same_thread=False`. However, we + # still need to be careful to avoid concurrent access. + self.conn_lock = threading.Lock() + self.conn = sqlite3.connect( + filename, isolation_level=None, check_same_thread=False + ) self.conn.row_factory = sqlite3.Row - self.filename = filename # These settings are optimized for performance. # See https://www.sqlite.org/pragma.html for more information. @@ -107,17 +114,20 @@ def allow_table_name_reuse(self) -> bool: def execute( self, sql: str, parameters: Union[Dict[str, Any], Sequence[Any]] = () ) -> sqlite3.Cursor: - return self.conn.execute(sql, parameters) + with self.conn_lock: + return self.conn.execute(sql, parameters) def executemany( self, sql: str, parameters: Union[Dict[str, Any], Sequence[Any]] = () ) -> sqlite3.Cursor: - return self.conn.executemany(sql, parameters) + with self.conn_lock: + return self.conn.executemany(sql, parameters) def close(self) -> None: for obj in self._dependent_objects: obj.close() - self.conn.close() + with self.conn_lock: + self.conn.close() if self._temp_directory: shutil.rmtree(self._temp_directory) self._temp_directory = None @@ -165,11 +175,9 @@ def _default_deserializer(value: Any) -> Any: @dataclass(eq=False) class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]): - """ - A dict-like object that stores its data in a temporary SQLite database. - This is useful for storing large amounts of data that don't fit in memory. + """A dict-like object that stores its data in a temporary SQLite database. - This class is not thread-safe. + This is useful for storing large amounts of data that don't fit in memory. """ # Use a predefined connection, able to be shared across multiple FileBacked* objects @@ -433,11 +441,7 @@ def __del__(self) -> None: class FileBackedList(Generic[_VT]): - """ - An append-only, list-like object that stores its contents in a SQLite database. - - This class is not thread-safe. - """ + """An append-only, list-like object that stores its contents in a SQLite database.""" _len: int = field(default=0) _dict: FileBackedDict[_VT] = field(init=False) diff --git a/metadata-ingestion/src/datahub/utilities/logging_manager.py b/metadata-ingestion/src/datahub/utilities/logging_manager.py index dc96ef384234c8..52e6cd697c7c09 100644 --- a/metadata-ingestion/src/datahub/utilities/logging_manager.py +++ b/metadata-ingestion/src/datahub/utilities/logging_manager.py @@ -15,6 +15,7 @@ import contextlib import logging import os +import pathlib import sys from typing import Deque, Iterator, Optional @@ -23,7 +24,7 @@ from datahub.utilities.tee_io import TeeIO BASE_LOGGING_FORMAT = ( - "[%(asctime)s] %(levelname)-8s {%(filename)s:%(lineno)d} - %(message)s" + "[%(asctime)s] %(levelname)-8s {%(name)s:%(lineno)d} - %(message)s" ) DATAHUB_PACKAGES = [ "datahub", @@ -36,6 +37,56 @@ NO_COLOR = os.environ.get("NO_COLOR", False) +def extract_name_from_filename(filename: str, fallback_name: str) -> str: + """Guess the module path from the filename. + + Because the logger name may not be the same as the package path (e.g. when using stacklevel), + we do a best-effort attempt to extract the module name from the filename. + + >>> extract_name_from_filename("/datahub-ingestion/.local/lib/python3.10/site-packages/datahub/configuration/common.py", "bad") + 'datahub.configuration.common' + + >>> extract_name_from_filename("/home/user/datahub/metadata-ingestion/src/datahub/telemetry/telemetry.py", "bad") + 'datahub.telemetry.telemetry' + + >>> extract_name_from_filename("/this/is/not/a/normal/path.py", "fallback.package") + 'fallback.package' + + Args: + filename: The filename of the module. + fallback_name: The name to use if we can't guess the module. + + Returns: + The guessed module name. + """ + + with contextlib.suppress(Exception): + # Split the path into components + path_parts = list(pathlib.Path(filename).parts) + + # Remove the .py extension from the last part + if path_parts[-1].endswith(".py"): + path_parts[-1] = path_parts[-1][:-3] + + # If we're in a site-packages directory, we want to use the package name as the top-level module. + if "site-packages" in path_parts: + # Find the index of 'site-packages' in the path + site_packages_index = path_parts.index("site-packages") + # Join the parts from 'site-packages' onwards with '.' + return ".".join(path_parts[site_packages_index + 1 :]) + + # We're probably in a development environment, so take everything after 'metadata-ingestion' + metadata_ingestion_index = next( + (i for i, part in enumerate(path_parts) if "metadata-ingestion" in part), + None, + ) + if metadata_ingestion_index is not None: + # Join the parts from 'metadata-ingestion/src' onwards with '.' + return ".".join(path_parts[metadata_ingestion_index + 2 :]) + + return fallback_name + + class _ColorLogFormatter(logging.Formatter): # Adapted from https://stackoverflow.com/a/56944256/3638629. @@ -51,6 +102,7 @@ def __init__(self) -> None: super().__init__(BASE_LOGGING_FORMAT) def formatMessage(self, record: logging.LogRecord) -> str: + record.name = extract_name_from_filename(record.pathname, record.name) if not NO_COLOR and sys.stderr.isatty(): return self._formatMessageColor(record) else: diff --git a/metadata-ingestion/tests/conftest.py b/metadata-ingestion/tests/conftest.py index 0f278ab1e13115..eb58ad40abb305 100644 --- a/metadata-ingestion/tests/conftest.py +++ b/metadata-ingestion/tests/conftest.py @@ -33,6 +33,12 @@ except ImportError: pass +import freezegun # noqa: F401,E402 + +# The freezegun library has incomplete type annotations. +# See https://github.com/spulec/freezegun/issues/469 +freezegun.configure(extend_ignore_list=["datahub.utilities.cooperative_timeout"]) # type: ignore[attr-defined] + @pytest.fixture def mock_time(monkeypatch): diff --git a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_small_schema_size_golden.json b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_small_schema_size_golden.json new file mode 100644 index 00000000000000..72b5fee49a0dbd --- /dev/null +++ b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_small_schema_size_golden.json @@ -0,0 +1,683 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "emptyCollection", + "platform": "urn:li:dataPlatform:mongodb", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.Schemaless": {} + }, + "fields": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "firstCollection", + "platform": "urn:li:dataPlatform:mongodb", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.Schemaless": {} + }, + "fields": [ + { + "fieldPath": "_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BytesType": {} + } + }, + "nativeDataType": "oid", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "age", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "float", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "canSwim", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "OBJECT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.ingredients", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "ARRAY", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.ingredients.from", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.ingredients.name", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "legs", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "mixedType", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.UnionType": {} + } + }, + "nativeDataType": "mixed", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema.downsampled": "True", + "schema.totalFields": "22" + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "largeCollection", + "platform": "urn:li:dataPlatform:mongodb", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.Schemaless": {} + }, + "fields": [ + { + "fieldPath": "_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BytesType": {} + } + }, + "nativeDataType": "oid", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_205", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_214", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_220", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_224", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_233", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_239", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_241", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_242", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_247", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema.downsampled": "True", + "schema.totalFields": "502" + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "secondCollection", + "platform": "urn:li:dataPlatform:mongodb", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.Schemaless": {} + }, + "fields": [ + { + "fieldPath": "_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BytesType": {} + } + }, + "nativeDataType": "oid", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "mixedType", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.UnionType": {} + } + }, + "nativeDataType": "mixed", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "mixedType.fieldA", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "mixedType.fieldTwo", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "nullableMixedType", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.UnionType": {} + } + }, + "nativeDataType": "mixed", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rating", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "float", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "tasty", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "varieties", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "ARRAY", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test-small-schema-size", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mongodb/test_mongodb.py b/metadata-ingestion/tests/integration/mongodb/test_mongodb.py index 0a0ba55ff5b802..6dc8bb295ed455 100644 --- a/metadata-ingestion/tests/integration/mongodb/test_mongodb.py +++ b/metadata-ingestion/tests/integration/mongodb/test_mongodb.py @@ -46,3 +46,36 @@ def test_mongodb_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time output_path=tmp_path / "mongodb_mces.json", golden_path=test_resources_dir / "mongodb_mces_golden.json", ) + + # Run the metadata ingestion pipeline. + pipeline = Pipeline.create( + { + "run_id": "mongodb-test-small-schema-size", + "source": { + "type": "mongodb", + "config": { + "connect_uri": "mongodb://localhost:57017", + "username": "mongoadmin", + "password": "examplepass", + "maxSchemaSize": 10, + "platform_instance": "instance", + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/mongodb_mces_small_schema_size.json", + }, + }, + } + ) + pipeline.run() + pipeline.raise_from_status() + + # Verify the output. + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "mongodb_mces_small_schema_size.json", + golden_path=test_resources_dir + / "mongodb_mces_small_schema_size_golden.json", + ) diff --git a/metadata-ingestion/tests/integration/trino/test_trino.py b/metadata-ingestion/tests/integration/trino/test_trino.py index 8ab3ed8056e90f..4e24b5c8871943 100644 --- a/metadata-ingestion/tests/integration/trino/test_trino.py +++ b/metadata-ingestion/tests/integration/trino/test_trino.py @@ -1,4 +1,3 @@ -import re import subprocess import pytest @@ -9,7 +8,7 @@ from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.sink.file import FileSinkConfig from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig -from datahub.ingestion.source.sql.trino import TrinoConfig +from datahub.ingestion.source.sql.trino import ConnectorDetail, TrinoConfig from tests.test_helpers import fs_helpers, mce_helpers from tests.test_helpers.docker_helpers import wait_for_port @@ -53,7 +52,6 @@ def loaded_trino(trino_runner): @freeze_time(FROZEN_TIME) -@pytest.mark.xfail def test_trino_ingest( loaded_trino, test_resources_dir, pytestconfig, tmp_path, mock_time ): @@ -73,7 +71,7 @@ def test_trino_ingest( username="foo", schema_pattern=AllowDenyPattern(allow=["^librarydb"]), profile_pattern=AllowDenyPattern( - allow=["library_catalog.librarydb.*"] + allow=["postgresqldb.librarydb.*"] ), profiling=GEProfilingConfig( enabled=True, @@ -89,6 +87,12 @@ def test_trino_ingest( include_field_histogram=True, include_field_sample_values=True, ), + catalog_to_connector_details={ + "postgresqldb": ConnectorDetail( + connector_database="postgres", + platform_instance="local_server", + ) + }, ).dict(), }, "sink": { @@ -169,8 +173,6 @@ def test_trino_hive_ingest( def test_trino_instance_ingest( loaded_trino, test_resources_dir, pytestconfig, tmp_path, mock_time ): - instance = "production_warehouse" - platform = "trino" mce_out_file = "trino_instance_mces.json" events_file = tmp_path / mce_out_file pipeline_config = { @@ -183,6 +185,12 @@ def test_trino_instance_ingest( username="foo", platform_instance="production_warehouse", schema_pattern=AllowDenyPattern(allow=["^db1"]), + catalog_to_connector_details={ + "hivedb": ConnectorDetail( + connector_platform="glue", + platform_instance="local_server", + ) + }, ).dict(), }, "sink": { @@ -197,40 +205,12 @@ def test_trino_instance_ingest( pipeline.pretty_print_summary() pipeline.raise_from_status(raise_warnings=True) - # Assert that all events generated have instance specific urns - urn_pattern = "^" + re.escape( - f"urn:li:dataset:(urn:li:dataPlatform:{platform},{instance}." - ) - assert ( - mce_helpers.assert_mce_entity_urn( - "ALL", - entity_type="dataset", - regex_pattern=urn_pattern, - file=events_file, - ) - >= 0 - ), "There should be at least one match" - - assert ( - mce_helpers.assert_mcp_entity_urn( - "ALL", - entity_type="dataset", - regex_pattern=urn_pattern, - file=events_file, - ) - >= 0 - ), "There should be at least one MCP" - - # all dataset entities emitted must have a dataPlatformInstance aspect emitted - # there must be at least one entity emitted - assert ( - mce_helpers.assert_for_each_entity( - entity_type="dataset", - aspect_name="dataPlatformInstance", - aspect_field_matcher={ - "instance": f"urn:li:dataPlatformInstance:(urn:li:dataPlatform:{platform},{instance})" - }, - file=events_file, - ) - >= 1 + # Verify the output. + mce_helpers.check_golden_file( + pytestconfig, + output_path=events_file, + golden_path=test_resources_dir / "trino_hive_instance_mces_golden.json", + ignore_paths=[ + r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['transient_lastddltime'\]", + ], ) diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json new file mode 100644 index 00000000000000..d63995506cb9c3 --- /dev/null +++ b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json @@ -0,0 +1,2990 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "trino", + "instance": "production_warehouse", + "env": "PROD", + "database": "hivedb" + }, + "name": "hivedb" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "trino", + "instance": "production_warehouse", + "env": "PROD", + "database": "hivedb", + "schema": "db1" + }, + "name": "db1" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "another.comment": "This table has no partitions", + "comment": "This table has array of structs", + "numfiles": "1", + "numrows": "1", + "rawdatasize": "32", + "totalsize": "33", + "transient_lastddltime": "1708925463" + }, + "name": "array_struct_test", + "description": "This table has array of structs", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.array_struct_test", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "description": "id of property", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "description": "service types and providers", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "ARRAY(INTEGER())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "numfiles": "0", + "numrows": "0", + "rawdatasize": "0", + "totalsize": "0", + "transient_lastddltime": "1708925466" + }, + "name": "map_test", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.map_test", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "keyvalue", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=string].recordid", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.MapType": { + "keyType": "string", + "valueType": "string" + } + } + }, + "nativeDataType": "MAP(INTEGER(), VARCHAR())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"MAP(INTEGER(), VARCHAR())\", \"key_type\": {\"type\": \"int\", \"native_data_type\": \"INTEGER()\", \"_nullable\": true}, \"key_native_data_type\": \"INTEGER()\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.map_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.map_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "numfiles": "0", + "numrows": "0", + "rawdatasize": "0", + "totalsize": "0", + "transient_lastddltime": "1708925466" + }, + "name": "nested_struct_test", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.nested_struct_test", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())]))])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('type', VARCHAR()), ('provider', ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())]))])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=50)", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR(length=50)\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "SMALLINT()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"SMALLINT()\", \"_nullable\": true}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.nested_struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.nested_struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "transient_lastddltime": "1708925457" + }, + "name": "pokes", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.pokes", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "bar", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "baz", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.pokes,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.pokes,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "numfiles": "0", + "numrows": "0", + "rawdatasize": "0", + "totalsize": "0", + "transient_lastddltime": "1708925459" + }, + "name": "struct_test", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.struct_test", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "ARRAY(INTEGER())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "numfiles": "0", + "totalsize": "0", + "transient_lastddltime": "1708925466" + }, + "name": "struct_test_view_materialized", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.struct_test_view_materialized", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "ARRAY(INTEGER())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test_view_materialized,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test_view_materialized,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "numfiles": "0", + "numrows": "0", + "rawdatasize": "0", + "totalsize": "0", + "transient_lastddltime": "1708925459" + }, + "name": "_test_table_underscore", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1._test_table_underscore", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "bar", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1._test_table_underscore,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1._test_table_underscore,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "numfiles": "0", + "numrows": "0", + "rawdatasize": "0", + "totalsize": "0", + "transient_lastddltime": "1708925466" + }, + "name": "union_test", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.union_test", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('tag', SMALLINT()), ('field0', INTEGER()), ('field1', DOUBLE()), ('field2', ARRAY(VARCHAR())), ('field3', ROW([('a', INTEGER()), ('b', VARCHAR())]))])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('tag', SMALLINT()), ('field0', INTEGER()), ('field1', DOUBLE()), ('field2', ARRAY(VARCHAR())), ('field3', ROW([('a', INTEGER()), ('b', VARCHAR())]))])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=int].tag", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "SMALLINT()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"SMALLINT()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=int].field0", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"INTEGER()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=double].field1", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "DOUBLE()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"DOUBLE()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=array].[type=string].field2", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "string" + ] + } + } + }, + "nativeDataType": "ARRAY(VARCHAR())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(VARCHAR())\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('a', INTEGER()), ('b', VARCHAR())])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('a', INTEGER()), ('b', VARCHAR())])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3.[type=int].a", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"INTEGER()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3.[type=string].b", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.union_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.union_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "transient_lastddltime": "1708925466", + "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", + "is_view": "True" + }, + "name": "array_struct_test_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.array_struct_test_view", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "ARRAY(INTEGER())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test_view,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD),property_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD),property_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD),service)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD),service)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json index c43223c68a6b64..3e79c8721486e2 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json @@ -231,7 +231,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1698223433" + "transient_lastddltime": "1708925463" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -349,6 +349,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", @@ -374,6 +393,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)", @@ -408,7 +471,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1698223435" + "transient_lastddltime": "1708925466" }, "name": "map_test", "tags": [] @@ -492,6 +555,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)", @@ -517,6 +599,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)", @@ -551,7 +677,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1698223435" + "transient_lastddltime": "1708925466" }, "name": "nested_struct_test", "tags": [] @@ -684,6 +810,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)", @@ -709,6 +854,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)", @@ -738,7 +927,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1698223429" + "transient_lastddltime": "1708925457" }, "name": "pokes", "tags": [] @@ -830,6 +1019,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)", @@ -855,6 +1063,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)", @@ -889,7 +1141,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1698223431" + "transient_lastddltime": "1708925459" }, "name": "struct_test", "tags": [] @@ -1000,6 +1252,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)", @@ -1027,12 +1298,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "siblings", "aspect": { "json": { - "container": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)" + ], + "primary": false } }, "systemMetadata": { @@ -1042,26 +1316,67 @@ } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numfiles": "0", - "totalsize": "0", - "transient_lastddltime": "1698223435" - }, - "name": "struct_test_view_materialized", - "tags": [] - } - }, + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "numfiles": "0", + "totalsize": "0", + "transient_lastddltime": "1708925466" + }, + "name": "struct_test_view_materialized", + "tags": [] + } + }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { "schemaName": "hivedb.db1.struct_test_view_materialized", @@ -1167,6 +1482,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", @@ -1192,6 +1526,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)", @@ -1226,7 +1604,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1698223431" + "transient_lastddltime": "1708925459" }, "name": "_test_table_underscore", "tags": [] @@ -1306,6 +1684,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)", @@ -1331,6 +1728,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)", @@ -1365,7 +1806,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1698223435" + "transient_lastddltime": "1708925466" }, "name": "union_test", "tags": [] @@ -1529,6 +1970,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)", @@ -1554,6 +2014,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", @@ -1583,7 +2087,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1698223435", + "transient_lastddltime": "1708925466", "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", "is_view": "True" }, @@ -1722,41 +2226,485 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "siblings", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", - "type": "VIEW" - } + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)" ], - "fineGrainedLineages": [ + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),property_id)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),property_id)" - ], - "confidenceScore": 1.0 + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" }, { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),service)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),service)" - ], + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),property_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),property_id)" + ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),service)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),service)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" } ] } diff --git a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json index dce10ce377be3a..1f03f02fa9408f 100644 --- a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json @@ -1,7 +1,7 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:57aba13b10c1691508600999cd411c25", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -9,19 +9,20 @@ "customProperties": { "platform": "trino", "env": "PROD", - "database": "library_catalog" + "database": "postgresqldb" }, - "name": "library_catalog" + "name": "postgresqldb" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:57aba13b10c1691508600999cd411c25", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -31,12 +32,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:57aba13b10c1691508600999cd411c25", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -46,12 +48,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:57aba13b10c1691508600999cd411c25", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -63,12 +66,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:57aba13b10c1691508600999cd411c25", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -78,12 +82,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -91,7 +96,7 @@ "customProperties": { "platform": "trino", "env": "PROD", - "database": "library_catalog", + "database": "postgresqldb", "schema": "librarydb" }, "name": "librarydb" @@ -99,12 +104,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -114,12 +120,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -129,12 +136,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -146,63 +154,67 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:57aba13b10c1691508600999cd411c25" + "container": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:57aba13b10c1691508600999cd411c25", - "urn": "urn:li:container:57aba13b10c1691508600999cd411c25" + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "container": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -218,7 +230,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "library_catalog.librarydb.book", + "schemaName": "postgresqldb.librarydb.book", "platform": "urn:li:dataPlatform:trino", "version": 0, "created": { @@ -321,12 +333,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -338,52 +351,118 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:57aba13b10c1691508600999cd411c25", - "urn": "urn:li:container:57aba13b10c1691508600999cd411c25" + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" }, { - "id": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", - "urn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book,PROD)", + "type": "VIEW" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "container": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -399,7 +478,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "library_catalog.librarydb.issue_history", + "schemaName": "postgresqldb.librarydb.issue_history", "platform": "urn:li:dataPlatform:trino", "version": 0, "created": { @@ -473,12 +552,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -490,52 +570,118 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.issue_history,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:57aba13b10c1691508600999cd411c25", - "urn": "urn:li:container:57aba13b10c1691508600999cd411c25" + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" }, { - "id": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", - "urn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.issue_history,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "container": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -551,7 +697,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "library_catalog.librarydb.member", + "schemaName": "postgresqldb.librarydb.member", "platform": "urn:li:dataPlatform:trino", "version": 0, "created": { @@ -601,12 +747,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -618,52 +765,118 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.member,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:57aba13b10c1691508600999cd411c25", - "urn": "urn:li:container:57aba13b10c1691508600999cd411c25" + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" }, { - "id": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", - "urn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.member,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.member,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "container": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -679,7 +892,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "library_catalog.librarydb.book_in_circulation", + "schemaName": "postgresqldb.librarydb.book_in_circulation", "platform": "urn:li:dataPlatform:trino", "version": 0, "created": { @@ -777,12 +990,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -794,36 +1008,101 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book_in_circulation,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:57aba13b10c1691508600999cd411c25", - "urn": "urn:li:container:57aba13b10c1691508600999cd411c25" + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" }, { - "id": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", - "urn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book_in_circulation,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book_in_circulation,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -902,12 +1181,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -988,12 +1268,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -1038,12 +1319,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -1130,7 +1412,209 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book_in_circulation,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.member,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" + }, + { + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" + }, + { + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" + }, + { + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" + }, + { + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_add_known_query_lineage.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_add_known_query_lineage.json new file mode 100644 index 00000000000000..49015efc24a623 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_add_known_query_lineage.json @@ -0,0 +1,126 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 20000, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),a)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),a)" + ], + "confidenceScore": 1.0, + "query": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),b)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),b)" + ], + "confidenceScore": 1.0, + "query": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),c)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),c)" + ], + "confidenceScore": 1.0, + "query": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" + } + ] + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "insert into foo (a, b, c) select a, b, c from bar", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 20000, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 20000, + "actor": "urn:li:corpuser:_ingestion" + } + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)" + } + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1707182625000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "operationType": "INSERT", + "customProperties": { + "query_urn": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" + }, + "lastUpdatedTimestamp": 20000 + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_aggregate_operations.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_aggregate_operations.json index 551760b42394cb..25e75317096df6 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_aggregate_operations.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_aggregate_operations.json @@ -13,9 +13,6 @@ }, "actor": "urn:li:corpuser:user2", "operationType": "CREATE", - "customProperties": { - "query_urn": "urn:li:query:cbdb3e148ea7fdae81815da4dd64f57873fb9c3d7d4bfad4e83b3d1ebd3c45c2" - }, "lastUpdatedTimestamp": 25000 } } @@ -34,9 +31,6 @@ }, "actor": "urn:li:corpuser:user3", "operationType": "CREATE", - "customProperties": { - "query_urn": "urn:li:query:7fd78ed5f3d60f7f91206f5e0fea6851a2afe940944455fd292267613b7ee1e6" - }, "lastUpdatedTimestamp": 26000 } } diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_lineage.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_lineage.json index 5eaeb4e9839254..036e5e5fa4ff28 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_lineage.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_lineage.json @@ -67,7 +67,7 @@ "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 0, + "time": 1707182625000, "actor": "urn:li:corpuser:_ingestion" } } diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_column_lineage_deduplication.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_column_lineage_deduplication.json new file mode 100644 index 00000000000000..183c4c8c929eff --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_column_lineage_deduplication.json @@ -0,0 +1,147 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:fc48287a96588c73bcbdc1400f0c036b8d81196135618fb09a097459d54bd970" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),a)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),a)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:fc48287a96588c73bcbdc1400f0c036b8d81196135618fb09a097459d54bd970" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),b)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),b)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:fc48287a96588c73bcbdc1400f0c036b8d81196135618fb09a097459d54bd970" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),c)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),c)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" + } + ] + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:fc48287a96588c73bcbdc1400f0c036b8d81196135618fb09a097459d54bd970", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "/* query 2 */ insert into foo (a, b) select a, b from bar", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + } + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:fc48287a96588c73bcbdc1400f0c036b8d81196135618fb09a097459d54bd970", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)" + } + ] + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "/* query 1 */ insert into foo (a, b, c) select a, b, c from bar", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + } + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)" + } + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_known_lineage_mapping.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_known_lineage_mapping.json new file mode 100644 index 00000000000000..ab210c6f701b3f --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_known_lineage_mapping.json @@ -0,0 +1,77 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,bucket1/key1,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,bucket2/key2,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_overlapping_inserts.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_overlapping_inserts.json index 27bd757c267b73..7759d71fe4a773 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_overlapping_inserts.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_overlapping_inserts.json @@ -89,22 +89,22 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:377a73bbf094c8b176b15157c24242cdfc7a0f407d78e52e63ded08c913468f1", + "entityUrn": "urn:li:query:c4b3a21ef8c262ebbe99a5bdb6c29cb0be646392bb4af10b6f4a758af881470e", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { "json": { "statement": { - "value": "insert into downstream (a, b) select a, b from upstream1", + "value": "insert into downstream (a, c) select a, c from upstream2", "language": "SQL" }, "source": "SYSTEM", "created": { - "time": 0, + "time": 25000, "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 0, + "time": 25000, "actor": "urn:li:corpuser:_ingestion" } } @@ -112,7 +112,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:377a73bbf094c8b176b15157c24242cdfc7a0f407d78e52e63ded08c913468f1", + "entityUrn": "urn:li:query:c4b3a21ef8c262ebbe99a5bdb6c29cb0be646392bb4af10b6f4a758af881470e", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -122,7 +122,7 @@ "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.downstream,PROD)" }, { - "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.upstream1,PROD)" + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.upstream2,PROD)" } ] } @@ -130,22 +130,22 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:c4b3a21ef8c262ebbe99a5bdb6c29cb0be646392bb4af10b6f4a758af881470e", + "entityUrn": "urn:li:query:377a73bbf094c8b176b15157c24242cdfc7a0f407d78e52e63ded08c913468f1", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { "json": { "statement": { - "value": "insert into downstream (a, c) select a, c from upstream2", + "value": "insert into downstream (a, b) select a, b from upstream1", "language": "SQL" }, "source": "SYSTEM", "created": { - "time": 0, + "time": 20000, "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 0, + "time": 20000, "actor": "urn:li:corpuser:_ingestion" } } @@ -153,7 +153,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:c4b3a21ef8c262ebbe99a5bdb6c29cb0be646392bb4af10b6f4a758af881470e", + "entityUrn": "urn:li:query:377a73bbf094c8b176b15157c24242cdfc7a0f407d78e52e63ded08c913468f1", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -163,7 +163,7 @@ "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.downstream,PROD)" }, { - "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.upstream2,PROD)" + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.upstream1,PROD)" } ] } diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json new file mode 100644 index 00000000000000..70eb9cc2b14d26 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json @@ -0,0 +1,186 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.baz,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:e2629e6fd3a70a223cb3e2c9e5bd3416763782de3ec32124bc56cb835b60978a" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.baz,PROD),a)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),a)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:e2629e6fd3a70a223cb3e2c9e5bd3416763782de3ec32124bc56cb835b60978a" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.baz,PROD),b)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),b)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:e2629e6fd3a70a223cb3e2c9e5bd3416763782de3ec32124bc56cb835b60978a" + } + ] + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:e2629e6fd3a70a223cb3e2c9e5bd3416763782de3ec32124bc56cb835b60978a", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create table bar as select a, b from baz", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + } + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:e2629e6fd3a70a223cb3e2c9e5bd3416763782de3ec32124bc56cb835b60978a", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.baz,PROD)" + } + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_dep,PROD)", + "type": "TRANSFORMED", + "query": "urn:li:query:234a2904c367a6cc02d76cf358cd86937ec9e14af03e5539b5edb0b6df5db3dc" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_dep,PROD),a)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),a)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:234a2904c367a6cc02d76cf358cd86937ec9e14af03e5539b5edb0b6df5db3dc" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_dep,PROD),b)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),b)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:234a2904c367a6cc02d76cf358cd86937ec9e14af03e5539b5edb0b6df5db3dc" + } + ] + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:234a2904c367a6cc02d76cf358cd86937ec9e14af03e5539b5edb0b6df5db3dc", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create table foo_staging as select a, b from foo_dep", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1707182625000, + "actor": "urn:li:corpuser:_ingestion" + } + } + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:234a2904c367a6cc02d76cf358cd86937ec9e14af03e5539b5edb0b6df5db3dc", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_dep,PROD)" + } + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_temp_table.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_temp_table.json index 31a37d6237e7b9..b93e7e0f5260fe 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_temp_table.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_temp_table.json @@ -1,7 +1,7 @@ [ { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_session3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -16,24 +16,49 @@ "time": 0, "actor": "urn:li:corpuser:_ingestion" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:3e85e6f353c7fa33d6514cb090482852064d23df6491c9a8ae28be0d990a3c71" + "query": "urn:li:query:8b3a079997d562bdb1b14eb500e6123c4b00bb0263565dcaa0b66170e72602a1" } ], - "fineGrainedLineages": [] + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),a)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),a)" + ], + "confidenceScore": 0.35, + "query": "urn:li:query:8b3a079997d562bdb1b14eb500e6123c4b00bb0263565dcaa0b66170e72602a1" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),b)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),b)" + ], + "confidenceScore": 0.35, + "query": "urn:li:query:8b3a079997d562bdb1b14eb500e6123c4b00bb0263565dcaa0b66170e72602a1" + } + ] } } }, { "entityType": "query", - "entityUrn": "urn:li:query:3e85e6f353c7fa33d6514cb090482852064d23df6491c9a8ae28be0d990a3c71", + "entityUrn": "urn:li:query:8b3a079997d562bdb1b14eb500e6123c4b00bb0263565dcaa0b66170e72602a1", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { "json": { "statement": { - "value": "create table foo_session3 as select * from foo", + "value": "create table foo as select a, 2*b as b from bar", "language": "SQL" }, "source": "SYSTEM", @@ -42,7 +67,7 @@ "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 0, + "time": 1707182625000, "actor": "urn:li:corpuser:_ingestion" } } @@ -50,17 +75,17 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:3e85e6f353c7fa33d6514cb090482852064d23df6491c9a8ae28be0d990a3c71", + "entityUrn": "urn:li:query:8b3a079997d562bdb1b14eb500e6123c4b00bb0263565dcaa0b66170e72602a1", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { "json": { "subjects": [ { - "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_session3,PROD)" + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)" }, { - "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)" + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)" } ] } @@ -135,15 +160,33 @@ "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 1707251710392, + "time": 1707182625000, "actor": "urn:li:corpuser:_ingestion" } } } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:composite_66ddf44283e4543440529f1d13b82221b5d60635b6a8c39751718049ce4f47ec", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_session2,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)" + } + ] + } + } +}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_session3,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -158,67 +201,24 @@ "time": 0, "actor": "urn:li:corpuser:_ingestion" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:8b3a079997d562bdb1b14eb500e6123c4b00bb0263565dcaa0b66170e72602a1" + "query": "urn:li:query:3e85e6f353c7fa33d6514cb090482852064d23df6491c9a8ae28be0d990a3c71" } ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),a)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),a)" - ], - "confidenceScore": 0.35, - "query": "urn:li:query:8b3a079997d562bdb1b14eb500e6123c4b00bb0263565dcaa0b66170e72602a1" - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),b)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),b)" - ], - "confidenceScore": 0.35, - "query": "urn:li:query:8b3a079997d562bdb1b14eb500e6123c4b00bb0263565dcaa0b66170e72602a1" - } - ] - } - } -}, -{ - "entityType": "query", - "entityUrn": "urn:li:query:composite_66ddf44283e4543440529f1d13b82221b5d60635b6a8c39751718049ce4f47ec", - "changeType": "UPSERT", - "aspectName": "querySubjects", - "aspect": { - "json": { - "subjects": [ - { - "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_session2,PROD)" - }, - { - "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)" - } - ] + "fineGrainedLineages": [] } } }, { "entityType": "query", - "entityUrn": "urn:li:query:8b3a079997d562bdb1b14eb500e6123c4b00bb0263565dcaa0b66170e72602a1", + "entityUrn": "urn:li:query:3e85e6f353c7fa33d6514cb090482852064d23df6491c9a8ae28be0d990a3c71", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { "json": { "statement": { - "value": "create table foo as select a, 2*b as b from bar", + "value": "create table foo_session3 as select * from foo", "language": "SQL" }, "source": "SYSTEM", @@ -227,7 +227,7 @@ "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 0, + "time": 1707182625000, "actor": "urn:li:corpuser:_ingestion" } } @@ -235,17 +235,17 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:8b3a079997d562bdb1b14eb500e6123c4b00bb0263565dcaa0b66170e72602a1", + "entityUrn": "urn:li:query:3e85e6f353c7fa33d6514cb090482852064d23df6491c9a8ae28be0d990a3c71", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { "json": { "subjects": [ { - "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)" + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_session3,PROD)" }, { - "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)" + "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)" } ] } diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json index c237e3bff2a9b2..229e047010ffe6 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json @@ -85,5 +85,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.425, + "generalized_statement": "CREATE VIEW `my-proj-2`.dataset.my_view AS WITH cte1 AS (SELECT * FROM dataset.table1 WHERE col1 = ?), cte2 AS (SELECT col3, col4 AS join_key FROM dataset.table2 WHERE col3 = ?) SELECT col5, cte1.*, col3 FROM dataset.table3 JOIN cte1 ON table3.col5 = cte1.col2 JOIN cte2 USING (join_key)" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json index 1f4b3563f01148..5e926fca87a7e4 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json @@ -43,5 +43,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT * FROM `bq-proj`.dataset.`table_2023*`" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json index 63df823b31ccbb..047827adbab856 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json @@ -43,5 +43,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT * FROM (SELECT * FROM (SELECT * FROM `bq-proj`.dataset.table1))" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json index edc70335244c6d..ad2cda34a73be5 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json @@ -43,5 +43,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT * FROM `bq-proj`.dataset.table_20230101" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json index 56b592112df88d..ba3075f43851c7 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json @@ -65,5 +65,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.35, + "generalized_statement": "CREATE VIEW `my-project`.`my-dataset`.test_table AS SELECT * REPLACE (LOWER(something) AS something) FROM `my-project2`.`my-dataset2`.test_physical_table" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json index 7153546632d644..6d41e1285a387b 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json @@ -56,5 +56,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.4, + "generalized_statement": "CREATE VIEW my_view AS SELECT * FROM my_project_2.my_dataset_2.sometable UNION DISTINCT SELECT * FROM my_project_2.my_dataset_2.sometable2 AS a" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json index a0ae968bfefb7a..5fc9fdef710028 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json @@ -59,5 +59,9 @@ }, "upstreams": [] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "CREATE TABLE IF NOT EXISTS costs (id INTEGER PRIMARY KEY, month TEXT NOT NULL, total_cost REAL NOT NULL, area REAL NOT NULL)" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json index 24c85e7d0871b5..ecc104e36c89ba 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json @@ -52,5 +52,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "CREATE VIEW vsal AS SELECT a.deptno AS \"Department\", a.num_emp / b.total_count AS \"Employees\", a.sal_sum / b.total_sal AS \"Salary\" FROM (SELECT deptno, COUNT() AS num_emp, SUM(sal) AS sal_sum FROM scott.emp WHERE city = ? GROUP BY deptno) a, (SELECT COUNT() AS total_count, SUM(sal) AS total_sal FROM scott.emp WHERE city = ?) b" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json index d72471884e97d1..46675d4ff5b841 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json @@ -187,5 +187,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT SUM(totalprice) AS total_agg, * FROM snowflake_sample_data.tpch_sf1.orders WHERE orderdate = ?" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json index dbc844490ab334..98928adfe31e2c 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json @@ -98,5 +98,9 @@ }, "upstreams": [] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "INSERT INTO query72 SELECT i_item_desc, w_warehouse_name, d1.d_week_seq, SUM(CASE WHEN promotion.p_promo_sk IS NULL THEN ? ELSE ? END) AS no_promo, SUM(CASE WHEN NOT promotion.p_promo_sk IS NULL THEN ? ELSE ? END) AS promo, COUNT(*) AS total_cnt FROM catalog_sales JOIN inventory ON (cs_item_sk = inv_item_sk) JOIN warehouse ON (w_warehouse_sk = inv_warehouse_sk) JOIN item ON (i_item_sk = cs_item_sk) JOIN customer_demographics ON (cs_bill_cdemo_sk = cd_demo_sk) JOIN household_demographics ON (cs_bill_hdemo_sk = hd_demo_sk) JOIN date_dim AS d1 ON (cs_sold_date_sk = d1.d_date_sk) JOIN date_dim AS d2 ON (inv_date_sk = d2.d_date_sk) JOIN date_dim AS d3 ON (cs_ship_date_sk = d3.d_date_sk) LEFT OUTER JOIN promotion ON (cs_promo_sk = p_promo_sk) LEFT OUTER JOIN catalog_returns ON (cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number) WHERE d1.d_week_seq = d2.d_week_seq AND inv_quantity_on_hand < cs_quantity AND hd_buy_potential = ? AND cd_marital_status = ? GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq LIMIT ?" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_with_column_list.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_with_column_list.json index 8dd3e352bc535c..6c6c99ff239b79 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_with_column_list.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_with_column_list.json @@ -37,5 +37,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "INSERT INTO downstream (a, c) SELECT a, c FROM upstream2" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_invalid_sql.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_invalid_sql.json new file mode 100644 index 00000000000000..bcf31f6be803a2 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_invalid_sql.json @@ -0,0 +1,12 @@ +{ + "query_type": "UNKNOWN", + "query_type_props": {}, + "query_fingerprint": null, + "in_tables": [], + "out_tables": [], + "column_lineage": null, + "debug_info": { + "confidence": 0.0, + "generalized_statement": null + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json index ddbb559e8f1b1a..4ba44d9e54c9db 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json @@ -1,7 +1,7 @@ { "query_type": "MERGE", "query_type_props": {}, - "query_fingerprint": "d6631edda99334cff011305d30c423f01d996185372c2aec6bb3e82e185fcad0", + "query_fingerprint": "38a78af8cc48333df0e4de7d6af5b9507a87dd8a2f129ef97c9b06dce2ca7b9f", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:bigquery,demo-pipelines-stg.referrer.prep_from_ios,PROD)", "urn:li:dataset:(urn:li:dataPlatform:bigquery,demo-pipelines-stg.referrer.prep_from_web,PROD)" @@ -9,5 +9,9 @@ "out_tables": [ "urn:li:dataset:(urn:li:dataPlatform:bigquery,demo-pipelines-stg.referrer.base_union,PROD)" ], - "column_lineage": null + "column_lineage": null, + "debug_info": { + "confidence": 0.2, + "generalized_statement": "MERGE INTO `demo-pipelines-stg`.`referrer`.`base_union` AS DBT_INTERNAL_DEST USING (SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_ios` WHERE partition_time = ? UNION ALL SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_web` WHERE partition_time = ?) AS DBT_INTERNAL_SOURCE ON FALSE WHEN NOT MATCHED BY SOURCE AND timestamp_trunc(DBT_INTERNAL_DEST.partition_time, DAY) IN (TIMESTAMP(?)) THEN delete WHEN NOT MATCHED THEN INSERT (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`) VALUES (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`)" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json index 660da7d53a1052..155a6bb786a7a3 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json @@ -62,5 +62,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT a, b, (SELECT c FROM table2 WHERE table2.id = table1.id) AS c FROM table1" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_materialized_view_auto_refresh.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_materialized_view_auto_refresh.json index de55394173329e..5096c382889c1b 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_materialized_view_auto_refresh.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_materialized_view_auto_refresh.json @@ -54,5 +54,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "CREATE MATERIALIZED VIEW mv_total_orders AUTO REFRESH YES AS SELECT c.cust_id, c.first_name, SUM(o.amount) AS total_amount FROM orders AS o JOIN customer AS c ON c.cust_id = o.customer_id GROUP BY c.cust_id, c.first_name" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_system_automove.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_system_automove.json new file mode 100644 index 00000000000000..9ba9fdb0a9fab1 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_system_automove.json @@ -0,0 +1,45 @@ +{ + "query_type": "CREATE_TABLE_AS_SELECT", + "query_type_props": { + "kind": "TABLE" + }, + "query_fingerprint": "93a463cd97acfcf1bf0a57a087596771f72d65e013f856a9d8c17c81b6439a2f", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.public.permanent_1,PROD)" + ], + "out_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.pg_automv.mv_tbl__auto_mv_12708107__0_recomputed,PROD)" + ], + "column_lineage": [ + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.pg_automv.mv_tbl__auto_mv_12708107__0_recomputed,PROD)", + "column": "aggvar_3", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "BIGINT" + }, + "upstreams": [] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.pg_automv.mv_tbl__auto_mv_12708107__0_recomputed,PROD)", + "column": "num_rec", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "BIGINT" + }, + "upstreams": [] + } + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "CREATE TABLE \"pg_automv\".\"mv_tbl__auto_mv_12708107__0_recomputed\" AS (SELECT COUNT(CAST(? AS INTEGER)) AS \"aggvar_3\", COUNT(CAST(? AS INTEGER)) AS \"num_rec\" FROM \"public\".\"permanent_1\" AS \"permanent_1\" WHERE ((CAST(\"permanent_1\".\"insertxid\" AS SMALLINT) <= ?) AND (CAST(\"permanent_1\".\"deletexid\" AS SMALLINT) > ?)) OR (CAST(FALSE AS BOOLEAN) AND (CAST(\"permanent_1\".\"insertxid\" AS SMALLINT) = ?) AND (CAST(\"permanent_1\".\"deletexid\" AS SMALLINT) <> ?)))" + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_temp_table_shortcut.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_temp_table_shortcut.json index 9a4069d0304e1e..a56480f41c6f3f 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_temp_table_shortcut.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_temp_table_shortcut.json @@ -48,5 +48,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.35, + "generalized_statement": "CREATE TABLE #my_custom_name DISTKEY(1) SORTKEY(\"1\", \"2\") AS WITH cte AS (SELECT * FROM other_schema.table1) SELECT * FROM cte" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_union_view.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_union_view.json new file mode 100644 index 00000000000000..a7dad433a713a9 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_union_view.json @@ -0,0 +1,19 @@ +{ + "query_type": "CREATE_VIEW", + "query_type_props": { + "kind": "VIEW" + }, + "query_fingerprint": "c39068cf687524197dc7039afa9512f39cb35cdad6813037d4d5c348980e71cf", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.public.sales,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.spectrum.sales,PROD)" + ], + "out_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.sales_vw,PROD)" + ], + "column_lineage": null, + "debug_info": { + "confidence": 0.3, + "generalized_statement": "CREATE VIEW sales_vw AS SELECT * FROM public.sales UNION ALL SELECT * FROM spectrum.sales WITH NO SCHEMA BINDING" + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json index 4763a736b08c37..26cfce8cff2e58 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json @@ -35,5 +35,9 @@ }, "upstreams": [] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "SELECT A, B, C FROM t1 INNER JOIN t2 ON t1.id = t2.id" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json index bdff51521b2a64..6ed783ea7e45b5 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json @@ -25,5 +25,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "SELECT COUNT(etl_data_dt_id) FROM something_prd.fact_complaint_snapshot WHERE etl_data_dt_id = ?" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json index 438accca34649d..9ce3e80e1a4384 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json @@ -57,5 +57,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT DISTINCT post_id, widget.asset.id, LEAST(widget.metric.metricA, widget.metric.metric_b) AS min_metric FROM data_reporting.abcde_transformed WHERE post_id LIKE ?" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json index c6a24638a86c88..3523eae9ee4ffa 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json @@ -43,5 +43,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT ? AS label, SUM(totalprice) AS total_agg FROM snowflake_sample_data.tpch_sf10.orders UNION ALL SELECT ? AS label, SUM(totalprice) AS total_agg FROM snowflake_sample_data.tpch_sf100.orders" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json index 46edda4f0909be..e9518d5e54fdc7 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json @@ -25,5 +25,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "SELECT GREATEST(col1, COL2) AS max_col FROM mytable" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max_with_schema.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max_with_schema.json index a914447bdd4c03..974bb510e8d886 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max_with_schema.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max_with_schema.json @@ -25,5 +25,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT GREATEST(`col1`, COL2, `this_will_not_resolve`) AS max_col FROM mytable" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json index baa5ade05bb3c2..7c2d48a455c031 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json @@ -36,5 +36,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "WITH cte1 AS (SELECT col1, col2 FROM table1 WHERE col1 = ?), cte2 AS (SELECT col3, col4 FROM table2 WHERE col2 = ?) SELECT cte1.col1, cte2.col3 FROM cte1 JOIN cte2 ON cte1.col2 = cte2.col4" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json index a118fa7acb7113..f4c5e6bd7a0140 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json @@ -39,5 +39,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT DISTINCT post_id, widget.asset.id FROM data_reporting.abcde_transformed WHERE post_id LIKE ?" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json index 25fb499cfd084c..cef52215b38b81 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json @@ -47,5 +47,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT CASE WHEN o.\"totalprice\" > ? THEN ? WHEN o.\"totalprice\" > ? THEN ? ELSE ? END AS total_price_category, CASE WHEN o.\"is_payment_successful\" THEN o.\"totalprice\" ELSE ? END AS total_price_success FROM snowflake_sample_data.tpch_sf1.orders AS o" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json index 0d3fda383870ea..34f29692ea5010 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json @@ -61,5 +61,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT CAST(o.o_orderkey AS DECIMAL(?, ?)) AS orderkey, CAST(o.o_totalprice AS INT) AS total_cast_int, CAST(o.o_totalprice AS DECIMAL(?, ?)) AS total_cast_float FROM snowflake_sample_data.tpch_sf1.orders AS o LIMIT ?" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json index 3f4a8ece99a8c4..e63d393cd0fff4 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json @@ -79,5 +79,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "SELECT SUM(o.\"totalprice\") AS total_agg, AVG(\"TotalPrice\") AS total_avg, MIN(\"TOTALPRICE\") AS total_min, MAX(TotalPrice) AS total_max FROM snowflake_sample_data.tpch_sf1.orders AS o" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json index 5ccf44a333db08..e97ad8837ca23d 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json @@ -83,5 +83,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "CREATE TABLE snowflake_sample_data.tpch_sf1.orders_normalized AS SELECT SUM(o.\"totalprice\") AS Total_Agg, AVG(\"TotalPrice\") AS TOTAL_AVG, MIN(\"TOTALPRICE\") AS TOTAL_MIN, MAX(TotalPrice) AS Total_Max FROM snowflake_sample_data.tpch_sf1.orders AS o" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_cte_name_collision.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_cte_name_collision.json index ca0fdac55d5552..0a832165aa1087 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_cte_name_collision.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_cte_name_collision.json @@ -45,5 +45,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "WITH cte_alias AS (SELECT col1, col2 FROM table1) SELECT table2.col2, cte_alias.col1 FROM table2 JOIN table3 AS cte_alias ON cte_alias.col2 = cte_alias.col2" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json index 53fe500950507f..6571d894c76786 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json @@ -3,7 +3,7 @@ "query_type_props": { "kind": "TABLE" }, - "query_fingerprint": "7ec7291bc95ab8e4978f86f3854336100aadca5143b79cfd92edbd5f245af916", + "query_fingerprint": "2aa655ab211e061dc8c1161e0b2a7073b38636f9ffcc4719d4e70743e3321cb2", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.customer_last_purchase_date,PROD)", "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.ecommerce.purchases,PROD)" @@ -124,5 +124,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.4, + "generalized_statement": "CREATE TABLE active_customer_ltv AS (WITH active_customers AS (SELECT * FROM customer_last_purchase_date WHERE last_purchase_date >= CURRENT_DATE - INTERVAL DAYS), purchases AS (SELECT * FROM ecommerce.purchases) SELECT active_customers.user_fk, active_customers.email, active_customers.last_purchase_date, SUM(purchases.purchase_amount) AS lifetime_purchase_amount, COUNT(DISTINCT (purchases.pk)) AS lifetime_purchase_count, SUM(purchases.purchase_amount) / COUNT(DISTINCT (purchases.pk)) AS average_purchase_amount FROM active_customers JOIN purchases ON active_customers.user_fk = purchases.user_fk GROUP BY ?, ?, ?)" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_full_table_name_col_reference.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_full_table_name_col_reference.json index 1e88e638f0fae6..020fe45bcb9704 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_full_table_name_col_reference.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_full_table_name_col_reference.json @@ -53,5 +53,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "SELECT my_db.my_schema.my_table.id, CASE WHEN my_db.my_schema.my_table.id > ? THEN ? ELSE ? END AS id_gt_100, my_db.my_schema.my_table.struct_field.field1 AS struct_field1 FROM my_db.my_schema.my_table" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_unused_cte.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_unused_cte.json index d629d16848cdb7..fa1389768d2dc9 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_unused_cte.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_unused_cte.json @@ -37,5 +37,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "WITH cte1 AS (SELECT col1, col2 FROM table1 WHERE col1 = ?), cte2 AS (SELECT col3, col4 FROM table2 WHERE col2 = ?) SELECT cte1.col1, table3.col6 FROM cte1 JOIN table3 ON table3.col5 = cte1.col2" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json index a4b2e636a6867e..e70267da3ec5be 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json @@ -55,5 +55,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "UPDATE my_table SET col1 = t1.col1 || t1.col2, col2 = t1.col1 || t2.col2 FROM table1 AS t1 JOIN table2 AS t2 ON t1.id = t2.id WHERE my_table.id = t1.id" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json index a6ac74f36006b3..e4c98b3010a6cd 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json @@ -35,5 +35,9 @@ }, "upstreams": [] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "UPDATE snowflake_sample_data.tpch_sf1.orders SET orderkey = ?, totalprice = ? WHERE orderkey = ?" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json index 6ffc9cfc2c714a..ef47ca574bad9d 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json @@ -27,5 +27,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "UPDATE snowflake_sample_data.tpch_sf1.orders SET orderkey = orderkey + ?" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json index 40cf06865ac21d..69bde16494b6d8 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json @@ -40,5 +40,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.9, + "generalized_statement": "CREATE TABLE demo_user.test_lineage2 AS (SELECT ppd.PatientId, ppf.bmi FROM demo_user.pima_patient_features AS ppf JOIN demo_user.pima_patient_diagnoses AS ppd ON ppd.PatientId = ppf.PatientId) WITH DATA" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json index d9285024879848..23a4e84a9b6d7e 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json @@ -44,5 +44,9 @@ } ] } - ] + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "SELECT col1, col2 FROM dbc.table1 WHERE col1 = ? EXCEPT SELECT col1, col2 FROM dbc.table2" + } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py index 8b0318664ea05f..ab2fc0f005e76e 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py @@ -4,12 +4,14 @@ import pytest from freezegun import freeze_time -import datahub.emitter.mce_builder as builder from datahub.metadata.urns import CorpUserUrn, DatasetUrn -from datahub.sql_parsing.sql_parsing_aggregator_v2 import ( +from datahub.sql_parsing.sql_parsing_aggregator import ( + KnownQueryLineageInfo, QueryLogSetting, SqlParsingAggregator, ) +from datahub.sql_parsing.sql_parsing_common import QueryType +from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo, ColumnRef from tests.test_helpers import mce_helpers RESOURCE_DIR = pathlib.Path(__file__).parent / "aggregator_goldens" @@ -24,8 +26,6 @@ def _ts(ts: int) -> datetime: def test_basic_lineage(pytestconfig: pytest.Config) -> None: aggregator = SqlParsingAggregator( platform="redshift", - platform_instance=None, - env=builder.DEFAULT_ENV, generate_lineage=True, generate_usage_statistics=False, generate_operations=False, @@ -50,8 +50,6 @@ def test_basic_lineage(pytestconfig: pytest.Config) -> None: def test_overlapping_inserts(pytestconfig: pytest.Config) -> None: aggregator = SqlParsingAggregator( platform="redshift", - platform_instance=None, - env=builder.DEFAULT_ENV, generate_lineage=True, generate_usage_statistics=False, generate_operations=False, @@ -83,8 +81,6 @@ def test_overlapping_inserts(pytestconfig: pytest.Config) -> None: def test_temp_table(pytestconfig: pytest.Config) -> None: aggregator = SqlParsingAggregator( platform="redshift", - platform_instance=None, - env=builder.DEFAULT_ENV, generate_lineage=True, generate_usage_statistics=False, generate_operations=False, @@ -136,8 +132,6 @@ def test_temp_table(pytestconfig: pytest.Config) -> None: def test_aggregate_operations(pytestconfig: pytest.Config) -> None: aggregator = SqlParsingAggregator( platform="redshift", - platform_instance=None, - env=builder.DEFAULT_ENV, generate_lineage=False, generate_queries=False, generate_usage_statistics=False, @@ -181,8 +175,6 @@ def test_aggregate_operations(pytestconfig: pytest.Config) -> None: def test_view_lineage(pytestconfig: pytest.Config) -> None: aggregator = SqlParsingAggregator( platform="redshift", - platform_instance=None, - env=builder.DEFAULT_ENV, generate_lineage=True, generate_usage_statistics=False, generate_operations=False, @@ -215,3 +207,151 @@ def test_view_lineage(pytestconfig: pytest.Config) -> None: outputs=mcps, golden_path=RESOURCE_DIR / "test_view_lineage.json", ) + + +@freeze_time(FROZEN_TIME) +def test_known_lineage_mapping(pytestconfig: pytest.Config) -> None: + aggregator = SqlParsingAggregator( + platform="redshift", + generate_lineage=True, + generate_usage_statistics=False, + generate_operations=False, + ) + + aggregator.add_known_lineage_mapping( + upstream_urn=DatasetUrn("redshift", "dev.public.bar").urn(), + downstream_urn=DatasetUrn("redshift", "dev.public.foo").urn(), + ) + aggregator.add_known_lineage_mapping( + upstream_urn=DatasetUrn("s3", "bucket1/key1").urn(), + downstream_urn=DatasetUrn("redshift", "dev.public.bar").urn(), + ) + aggregator.add_known_lineage_mapping( + upstream_urn=DatasetUrn("redshift", "dev.public.foo").urn(), + downstream_urn=DatasetUrn("s3", "bucket2/key2").urn(), + ) + + mcps = list(aggregator.gen_metadata()) + + mce_helpers.check_goldens_stream( + pytestconfig, + outputs=mcps, + golden_path=RESOURCE_DIR / "test_known_lineage_mapping.json", + ) + + +@freeze_time(FROZEN_TIME) +def test_column_lineage_deduplication(pytestconfig: pytest.Config) -> None: + aggregator = SqlParsingAggregator( + platform="redshift", + generate_lineage=True, + generate_usage_statistics=False, + generate_operations=False, + ) + + aggregator.add_observed_query( + query="/* query 1 */ insert into foo (a, b, c) select a, b, c from bar", + default_db="dev", + default_schema="public", + ) + aggregator.add_observed_query( + query="/* query 2 */ insert into foo (a, b) select a, b from bar", + default_db="dev", + default_schema="public", + ) + + mcps = list(aggregator.gen_metadata()) + + # In this case, the lineage for a and b is attributed to query 2, and + # the lineage for c is attributed to query 1. Note that query 1 does + # not get any credit for a and b, as they are already covered by query 2, + # which came later and hence has higher precedence. + + mce_helpers.check_goldens_stream( + pytestconfig, + outputs=mcps, + golden_path=RESOURCE_DIR / "test_column_lineage_deduplication.json", + ) + + +@freeze_time(FROZEN_TIME) +def test_add_known_query_lineage(pytestconfig: pytest.Config) -> None: + aggregator = SqlParsingAggregator( + platform="redshift", + generate_lineage=True, + generate_usage_statistics=False, + generate_operations=True, + ) + + downstream_urn = DatasetUrn("redshift", "dev.public.foo").urn() + upstream_urn = DatasetUrn("redshift", "dev.public.bar").urn() + + known_query_lineage = KnownQueryLineageInfo( + query_text="insert into foo (a, b, c) select a, b, c from bar", + downstream=downstream_urn, + upstreams=[upstream_urn], + column_lineage=[ + ColumnLineageInfo( + downstream=ColumnRef(table=downstream_urn, column="a"), + upstreams=[ColumnRef(table=upstream_urn, column="a")], + ), + ColumnLineageInfo( + downstream=ColumnRef(table=downstream_urn, column="b"), + upstreams=[ColumnRef(table=upstream_urn, column="b")], + ), + ColumnLineageInfo( + downstream=ColumnRef(table=downstream_urn, column="c"), + upstreams=[ColumnRef(table=upstream_urn, column="c")], + ), + ], + timestamp=_ts(20), + query_type=QueryType.INSERT, + ) + + aggregator.add_known_query_lineage(known_query_lineage) + + mcps = list(aggregator.gen_metadata()) + + mce_helpers.check_goldens_stream( + pytestconfig, + outputs=mcps, + golden_path=RESOURCE_DIR / "test_add_known_query_lineage.json", + ) + + +@freeze_time(FROZEN_TIME) +def test_table_rename(pytestconfig: pytest.Config) -> None: + aggregator = SqlParsingAggregator( + platform="redshift", + generate_lineage=True, + generate_usage_statistics=False, + generate_operations=False, + ) + + # Register that foo_staging is renamed to foo. + aggregator.add_table_rename( + original_urn=DatasetUrn("redshift", "dev.public.foo_staging").urn(), + new_urn=DatasetUrn("redshift", "dev.public.foo").urn(), + ) + + # Add an unrelated query. + aggregator.add_observed_query( + query="create table bar as select a, b from baz", + default_db="dev", + default_schema="public", + ) + + # Add the query that created the staging table. + aggregator.add_observed_query( + query="create table foo_staging as select a, b from foo_dep", + default_db="dev", + default_schema="public", + ) + + mcps = list(aggregator.gen_metadata()) + + mce_helpers.check_goldens_stream( + pytestconfig, + outputs=mcps, + golden_path=RESOURCE_DIR / "test_table_rename.json", + ) diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index e3396e74fccddc..de329c6cfe2396 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -7,6 +7,18 @@ RESOURCE_DIR = pathlib.Path(__file__).parent / "goldens" +def test_invalid_sql(): + assert_sql_result( + """ +SELECT as ' +FROM snowflake_sample_data.tpch_sf1.orders o +""", + dialect="snowflake", + expected_file=RESOURCE_DIR / "test_invalid_sql.json", + allow_table_error=True, + ) + + def test_select_max(): # The COL2 should get normalized to col2. assert_sql_result( @@ -1028,3 +1040,54 @@ def test_redshift_temp_table_shortcut(): }, expected_file=RESOURCE_DIR / "test_redshift_temp_table_shortcut.json", ) + + +def test_redshift_union_view(): + # TODO: This currently fails to generate CLL. Need to debug further. + assert_sql_result( + """ +CREATE VIEW sales_vw AS SELECT * FROM public.sales UNION ALL SELECT * FROM spectrum.sales WITH NO SCHEMA BINDING +""", + dialect="redshift", + default_db="my_db", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.public.sales,PROD)": { + "col1": "INTEGER", + "col2": "INTEGER", + }, + # Testing a case where we only have one schema available. + }, + expected_file=RESOURCE_DIR / "test_redshift_union_view.json", + ) + + +@pytest.mark.skip(reason="sqlglot doesn't recognize the BACKUP directive right now") +def test_redshift_system_automove() -> None: + # Came across this in the Redshift query log, but it seems to be a system-generated query. + assert_sql_result( + """ +CREATE TABLE "pg_automv"."mv_tbl__auto_mv_12708107__0_recomputed" +BACKUP YES +DISTSTYLE KEY +DISTKEY(2) +AS ( + SELECT + COUNT(CAST(1 AS INT4)) AS "aggvar_3", + COUNT(CAST(1 AS INT4)) AS "num_rec" + FROM + "public"."permanent_1" AS "permanent_1" + WHERE ( + (CAST("permanent_1"."insertxid" AS INT8) <= 41990135) + AND (CAST("permanent_1"."deletexid" AS INT8) > 41990135) + ) + OR ( + CAST(FALSE AS BOOL) + AND (CAST("permanent_1"."insertxid" AS INT8) = 0) + AND (CAST("permanent_1"."deletexid" AS INT8) <> 0) + ) +) +""", + dialect="redshift", + default_db="my_db", + expected_file=RESOURCE_DIR / "test_redshift_system_automove.json", + ) diff --git a/metadata-ingestion/tests/unit/test_utilities.py b/metadata-ingestion/tests/unit/test_utilities.py index 368cedfe480401..fc2aa27f70b431 100644 --- a/metadata-ingestion/tests/unit/test_utilities.py +++ b/metadata-ingestion/tests/unit/test_utilities.py @@ -1,3 +1,5 @@ +import doctest + from datahub.utilities.delayed_iter import delayed_iter from datahub.utilities.sql_parser import SqlLineageSQLParser @@ -282,3 +284,14 @@ def test_sqllineage_sql_parser_tables_with_special_names(): ] assert sorted(SqlLineageSQLParser(sql_query).get_tables()) == expected_tables assert sorted(SqlLineageSQLParser(sql_query).get_columns()) == expected_columns + + +def test_logging_name_extraction(): + import datahub.utilities.logging_manager + + assert ( + doctest.testmod( + datahub.utilities.logging_manager, raise_on_error=True + ).attempted + > 0 + ) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java index a3711afb753dc8..544afc32a52e78 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java @@ -19,10 +19,10 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; -import com.linkedin.metadata.aspect.batch.MCPBatchItem; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityUtils; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.utils.DataPlatformInstanceUtils; import com.linkedin.metadata.utils.GenericRecordUtils; @@ -49,10 +49,10 @@ private DefaultAspectsUtil() {} public static final Set<ChangeType> SUPPORTED_TYPES = Set.of(ChangeType.UPSERT, ChangeType.CREATE, ChangeType.PATCH); - public static List<MCPBatchItem> getAdditionalChanges( + public static List<MCPItem> getAdditionalChanges( @Nonnull AspectsBatch batch, @Nonnull EntityService<?> entityService, boolean browsePathV2) { - Map<Urn, List<MCPBatchItem>> itemsByUrn = + Map<Urn, List<MCPItem>> itemsByUrn = batch.getMCPItems().stream() .filter(item -> SUPPORTED_TYPES.contains(item.getChangeType())) .collect(Collectors.groupingBy(BatchItem::getUrn)); @@ -79,13 +79,13 @@ public static List<MCPBatchItem> getAdditionalChanges( RecordTemplate entityKeyAspect = defaultAspects.get(0).getSecond(); // pick the first item as a template (use entity information) - MCPBatchItem templateItem = aspectsEntry.getValue().get(0); + MCPItem templateItem = aspectsEntry.getValue().get(0); // generate default aspects (including key aspect, always upserts) return defaultAspects.stream() .map( entry -> - MCPUpsertBatchItem.MCPUpsertBatchItemBuilder.build( + ChangeItemImpl.ChangeItemImplBuilder.build( getProposalFromAspect( entry.getKey(), entry.getValue(), entityKeyAspect, templateItem), templateItem.getAuditStamp(), @@ -280,7 +280,7 @@ private static MetadataChangeProposal getProposalFromAspect( String aspectName, RecordTemplate aspect, RecordTemplate entityKeyAspect, - MCPBatchItem templateItem) { + MCPItem templateItem) { MetadataChangeProposal proposal = new MetadataChangeProposal(); GenericAspect genericAspect = GenericRecordUtils.serializeAspect(aspect); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java index 974406c0be0df1..0fcb765b340cf2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java @@ -3,7 +3,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.entity.Aspect; import com.linkedin.entity.client.SystemEntityClient; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.aspect.CachingAspectRetriever; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.r2.RemoteInvocationException; import java.net.URISyntaxException; @@ -11,14 +11,29 @@ import java.util.Set; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import javax.annotation.PostConstruct; import lombok.Builder; import lombok.Getter; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Component; @Builder -public class EntityClientAspectRetriever implements AspectRetriever { +@Component +@RequiredArgsConstructor +public class EntityClientAspectRetriever implements CachingAspectRetriever { @Getter private final EntityRegistry entityRegistry; private final SystemEntityClient entityClient; + /** + * Preventing a circular dependency. Once constructed the AspectRetriever is injected into a few + * of the services which rely on the AspectRetriever when using the Java EntityClient. The Java + * EntityClient depends on services which in turn depend on the AspectRetriever + */ + @PostConstruct + public void postConstruct() { + entityClient.postConstruct(this); + } + @Nullable @Override public Aspect getLatestAspectObject(@Nonnull Urn urn, @Nonnull String aspectName) @@ -30,6 +45,10 @@ public Aspect getLatestAspectObject(@Nonnull Urn urn, @Nonnull String aspectName @Override public Map<Urn, Map<String, Aspect>> getLatestAspectObjects( Set<Urn> urns, Set<String> aspectNames) throws RemoteInvocationException, URISyntaxException { - return entityClient.getLatestAspects(urns, aspectNames); + if (urns.isEmpty() || aspectNames.isEmpty()) { + return Map.of(); + } else { + return entityClient.getLatestAspects(urns, aspectNames); + } } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index 15de029340a3c7..fed6379f921045 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -20,6 +20,7 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.aspect.EnvelopedAspectArray; import com.linkedin.metadata.aspect.VersionedAspect; @@ -83,15 +84,26 @@ public class JavaEntityClient implements EntityClient { private final Clock _clock = Clock.systemUTC(); - private final EntityService<?> _entityService; - private final DeleteEntityService _deleteEntityService; - private final EntitySearchService _entitySearchService; - private final CachingEntitySearchService _cachingEntitySearchService; - private final SearchService _searchService; - private final LineageSearchService _lineageSearchService; - private final TimeseriesAspectService _timeseriesAspectService; + private final EntityService<?> entityService; + private final DeleteEntityService deleteEntityService; + private final EntitySearchService entitySearchService; + private final CachingEntitySearchService cachingEntitySearchService; + private final SearchService searchService; + private final LineageSearchService lineageSearchService; + private final TimeseriesAspectService timeseriesAspectService; private final RollbackService rollbackService; - private final EventProducer _eventProducer; + private final EventProducer eventProducer; + + /** + * Preventing a circular dependency. Once constructed the AspectRetriever is injected into a few + * of the services which rely on the AspectRetriever when using the Java EntityClient. The Java + * EntityClient depends on services which in turn depend on the AspectRetriever + */ + @Override + public void postConstruct(AspectRetriever aspectRetriever) { + entitySearchService.postConstruct(aspectRetriever); + timeseriesAspectService.postConstruct(aspectRetriever); + } @Nullable public EntityResponse getV2( @@ -101,13 +113,13 @@ public EntityResponse getV2( @Nonnull final Authentication authentication) throws RemoteInvocationException, URISyntaxException { final Set<String> projectedAspects = - aspectNames == null ? _entityService.getEntityAspectNames(entityName) : aspectNames; - return _entityService.getEntityV2(entityName, urn, projectedAspects); + aspectNames == null ? entityService.getEntityAspectNames(entityName) : aspectNames; + return entityService.getEntityV2(entityName, urn, projectedAspects); } @Nonnull public Entity get(@Nonnull final Urn urn, @Nonnull final Authentication authentication) { - return _entityService.getEntity(urn, ImmutableSet.of()); + return entityService.getEntity(urn, ImmutableSet.of()); } @Nonnull @@ -119,8 +131,8 @@ public Map<Urn, EntityResponse> batchGetV2( @Nonnull Authentication authentication) throws RemoteInvocationException, URISyntaxException { final Set<String> projectedAspects = - aspectNames == null ? _entityService.getEntityAspectNames(entityName) : aspectNames; - return _entityService.getEntitiesV2(entityName, urns, projectedAspects); + aspectNames == null ? entityService.getEntityAspectNames(entityName) : aspectNames; + return entityService.getEntitiesV2(entityName, urns, projectedAspects); } @Nonnull @@ -131,14 +143,14 @@ public Map<Urn, EntityResponse> batchGetVersionedV2( @Nonnull final Authentication authentication) throws RemoteInvocationException, URISyntaxException { final Set<String> projectedAspects = - aspectNames == null ? _entityService.getEntityAspectNames(entityName) : aspectNames; - return _entityService.getEntitiesVersionedV2(versionedUrns, projectedAspects); + aspectNames == null ? entityService.getEntityAspectNames(entityName) : aspectNames; + return entityService.getEntitiesVersionedV2(versionedUrns, projectedAspects); } @Nonnull public Map<Urn, Entity> batchGet( @Nonnull final Set<Urn> urns, @Nonnull final Authentication authentication) { - return _entityService.getEntities(urns, ImmutableSet.of()); + return entityService.getEntities(urns, ImmutableSet.of()); } /** @@ -160,7 +172,7 @@ public AutoCompleteResult autoComplete( @Nullable String field, @Nonnull final Authentication authentication) throws RemoteInvocationException { - return _cachingEntitySearchService.autoComplete( + return cachingEntitySearchService.autoComplete( entityType, query, field, filterOrDefaultEmptyFilter(requestFilters), limit, null); } @@ -181,7 +193,7 @@ public AutoCompleteResult autoComplete( @Nonnull int limit, @Nonnull final Authentication authentication) throws RemoteInvocationException { - return _cachingEntitySearchService.autoComplete( + return cachingEntitySearchService.autoComplete( entityType, query, "", filterOrDefaultEmptyFilter(requestFilters), limit, null); } @@ -205,9 +217,9 @@ public BrowseResult browse( @Nonnull final Authentication authentication) throws RemoteInvocationException { return ValidationUtils.validateBrowseResult( - _cachingEntitySearchService.browse( + cachingEntitySearchService.browse( entityType, path, newFilter(requestFilters), start, limit, null), - _entityService); + entityService); } /** @@ -232,8 +244,7 @@ public BrowseResultV2 browseV2( @Nonnull Authentication authentication, @Nullable SearchFlags searchFlags) { // TODO: cache browseV2 results - return _entitySearchService.browseV2( - entityName, path, filter, input, start, count, searchFlags); + return entitySearchService.browseV2(entityName, path, filter, input, start, count, searchFlags); } /** @@ -258,7 +269,7 @@ public BrowseResultV2 browseV2( @Nonnull Authentication authentication, @Nullable SearchFlags searchFlags) { // TODO: cache browseV2 results - return _entitySearchService.browseV2( + return entitySearchService.browseV2( entityNames, path, filter, input, start, count, searchFlags); } @@ -270,7 +281,7 @@ public void update(@Nonnull final Entity entity, @Nonnull final Authentication a AuditStamp auditStamp = new AuditStamp(); auditStamp.setActor(Urn.createFromString(authentication.getActor().toUrnStr())); auditStamp.setTime(Clock.systemUTC().millis()); - _entityService.ingestEntity(entity, auditStamp); + entityService.ingestEntity(entity, auditStamp); } @SneakyThrows @@ -289,7 +300,7 @@ public void updateWithSystemMetadata( auditStamp.setActor(Urn.createFromString(authentication.getActor().toUrnStr())); auditStamp.setTime(Clock.systemUTC().millis()); - _entityService.ingestEntity(entity, auditStamp, systemMetadata); + entityService.ingestEntity(entity, auditStamp, systemMetadata); tryIndexRunId( com.datahub.util.ModelUtils.getUrnFromSnapshotUnion(entity.getValue()), systemMetadata); } @@ -302,7 +313,7 @@ public void batchUpdate( AuditStamp auditStamp = new AuditStamp(); auditStamp.setActor(Urn.createFromString(authentication.getActor().toUrnStr())); auditStamp.setTime(Clock.systemUTC().millis()); - _entityService.ingestEntities( + entityService.ingestEntities( entities.stream().collect(Collectors.toList()), auditStamp, ImmutableList.of()); } @@ -331,9 +342,9 @@ public SearchResult search( throws RemoteInvocationException { return ValidationUtils.validateSearchResult( - _entitySearchService.search( + entitySearchService.search( List.of(entity), input, newFilter(requestFilters), null, start, count, searchFlags), - _entityService); + entityService); } /** @@ -358,8 +369,8 @@ public ListResult list( throws RemoteInvocationException { return ValidationUtils.validateListResult( toListResult( - _entitySearchService.filter(entity, newFilter(requestFilters), null, start, count)), - _entityService); + entitySearchService.filter(entity, newFilter(requestFilters), null, start, count)), + entityService); } /** @@ -386,9 +397,9 @@ public SearchResult search( @Nullable SearchFlags searchFlags) throws RemoteInvocationException { return ValidationUtils.validateSearchResult( - _entitySearchService.search( + entitySearchService.search( List.of(entity), input, filter, sortCriterion, start, count, searchFlags), - _entityService); + entityService); } @Nonnull @@ -434,9 +445,9 @@ public SearchResult searchAcrossEntities( final SearchFlags finalFlags = searchFlags != null ? searchFlags : new SearchFlags().setFulltext(true); return ValidationUtils.validateSearchResult( - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( entities, input, filter, sortCriterion, start, count, finalFlags, facets), - _entityService); + entityService); } @Nonnull @@ -454,9 +465,9 @@ public ScrollResult scrollAcrossEntities( final SearchFlags finalFlags = searchFlags != null ? searchFlags : new SearchFlags().setFulltext(true); return ValidationUtils.validateScrollResult( - _searchService.scrollAcrossEntities( + searchService.scrollAcrossEntities( entities, input, filter, null, scrollId, keepAlive, count, finalFlags), - _entityService); + entityService); } @Nonnull @@ -475,7 +486,7 @@ public LineageSearchResult searchAcrossLineage( @Nonnull final Authentication authentication) throws RemoteInvocationException { return ValidationUtils.validateLineageSearchResult( - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( sourceUrn, direction, entities, @@ -488,7 +499,7 @@ public LineageSearchResult searchAcrossLineage( null, null, searchFlags), - _entityService); + entityService); } @Nonnull @@ -509,7 +520,7 @@ public LineageSearchResult searchAcrossLineage( @Nonnull final Authentication authentication) throws RemoteInvocationException { return ValidationUtils.validateLineageSearchResult( - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( sourceUrn, direction, entities, @@ -522,7 +533,7 @@ public LineageSearchResult searchAcrossLineage( startTimeMillis, endTimeMillis, searchFlags), - _entityService); + entityService); } @Nonnull @@ -546,7 +557,7 @@ public LineageScrollResult scrollAcrossLineage( final SearchFlags finalFlags = searchFlags != null ? searchFlags : new SearchFlags().setFulltext(true).setSkipCache(true); return ValidationUtils.validateLineageScrollResult( - _lineageSearchService.scrollAcrossLineage( + lineageSearchService.scrollAcrossLineage( sourceUrn, direction, entities, @@ -560,7 +571,7 @@ public LineageScrollResult scrollAcrossLineage( startTimeMillis, endTimeMillis, finalFlags), - _entityService); + entityService); } /** @@ -573,19 +584,19 @@ public LineageScrollResult scrollAcrossLineage( @Nonnull public StringArray getBrowsePaths(@Nonnull Urn urn, @Nonnull final Authentication authentication) throws RemoteInvocationException { - return new StringArray(_entitySearchService.getBrowsePaths(urn.getEntityType(), urn)); + return new StringArray(entitySearchService.getBrowsePaths(urn.getEntityType(), urn)); } public void setWritable(boolean canWrite, @Nonnull final Authentication authentication) throws RemoteInvocationException { - _entityService.setWritable(canWrite); + entityService.setWritable(canWrite); } @Nonnull public Map<String, Long> batchGetTotalEntityCount( @Nonnull List<String> entityNames, @Nonnull final Authentication authentication) throws RemoteInvocationException { - return _searchService.docCountPerEntity(entityNames); + return searchService.docCountPerEntity(entityNames); } /** List all urns existing for a particular Entity type. */ @@ -595,19 +606,19 @@ public ListUrnsResult listUrns( final int count, @Nonnull final Authentication authentication) throws RemoteInvocationException { - return _entityService.listUrns(entityName, start, count); + return entityService.listUrns(entityName, start, count); } /** Hard delete an entity with a particular urn. */ public void deleteEntity(@Nonnull final Urn urn, @Nonnull final Authentication authentication) throws RemoteInvocationException { - _entityService.deleteUrn(urn); + entityService.deleteUrn(urn); } @Override public void deleteEntityReferences(@Nonnull Urn urn, @Nonnull Authentication authentication) throws RemoteInvocationException { - withRetry(() -> _deleteEntityService.deleteReferencesTo(urn, false), "deleteEntityReferences"); + withRetry(() -> deleteEntityService.deleteReferencesTo(urn, false), "deleteEntityReferences"); } @Nonnull @@ -621,13 +632,13 @@ public SearchResult filter( @Nonnull final Authentication authentication) throws RemoteInvocationException { return ValidationUtils.validateSearchResult( - _entitySearchService.filter(entity, filter, sortCriterion, start, count), _entityService); + entitySearchService.filter(entity, filter, sortCriterion, start, count), entityService); } @Override public boolean exists(@Nonnull Urn urn, @Nonnull final Authentication authentication) throws RemoteInvocationException { - return _entityService.exists(urn, true); + return entityService.exists(urn, true); } @SneakyThrows @@ -638,7 +649,7 @@ public VersionedAspect getAspect( @Nonnull Long version, @Nonnull final Authentication authentication) throws RemoteInvocationException { - return _entityService.getVersionedAspect(Urn.createFromString(urn), aspect, version); + return entityService.getVersionedAspect(Urn.createFromString(urn), aspect, version); } @SneakyThrows @@ -649,7 +660,7 @@ public VersionedAspect getAspectOrNull( @Nonnull Long version, @Nonnull final Authentication authentication) throws RemoteInvocationException { - return _entityService.getVersionedAspect(Urn.createFromString(urn), aspect, version); + return entityService.getVersionedAspect(Urn.createFromString(urn), aspect, version); } @SneakyThrows @@ -682,7 +693,7 @@ public List<EnvelopedAspect> getTimeseriesAspectValues( } response.setValues( new EnvelopedAspectArray( - _timeseriesAspectService.getAspectValues( + timeseriesAspectService.getAspectValues( Urn.createFromString(urn), entity, aspect, @@ -711,10 +722,10 @@ public String ingestProposal( AspectsBatch batch = AspectsBatchImpl.builder() - .mcps(List.of(metadataChangeProposal), auditStamp, _entityService) + .mcps(List.of(metadataChangeProposal), auditStamp, entityService) .build(); - IngestResult one = _entityService.ingestProposal(batch, async).stream().findFirst().get(); + IngestResult one = entityService.ingestProposal(batch, async).stream().findFirst().get(); Urn urn = one.getUrn(); tryIndexRunId(urn, metadataChangeProposal.getSystemMetadata()); @@ -731,7 +742,7 @@ public <T extends RecordTemplate> Optional<T> getVersionedAspect( @Nonnull final Authentication authentication) throws RemoteInvocationException { VersionedAspect entity = - _entityService.getVersionedAspect(Urn.createFromString(urn), aspect, version); + entityService.getVersionedAspect(Urn.createFromString(urn), aspect, version); if (entity != null && entity.hasAspect()) { DataMap rawAspect = ((DataMap) entity.data().get("aspect")); if (rawAspect.containsKey(aspectClass.getCanonicalName())) { @@ -750,7 +761,7 @@ public DataMap getRawAspect( @Nonnull Authentication authentication) throws RemoteInvocationException { VersionedAspect entity = - _entityService.getVersionedAspect(Urn.createFromString(urn), aspect, version); + entityService.getVersionedAspect(Urn.createFromString(urn), aspect, version); if (entity == null) { return null; } @@ -770,7 +781,7 @@ public void producePlatformEvent( @Nonnull PlatformEvent event, @Nonnull Authentication authentication) throws Exception { - _eventProducer.producePlatformEvent(name, key, event); + eventProducer.producePlatformEvent(name, key, event); } @Override @@ -782,7 +793,7 @@ public void rollbackIngestion( private void tryIndexRunId(Urn entityUrn, @Nullable SystemMetadata systemMetadata) { if (systemMetadata != null && systemMetadata.hasRunId()) { - _entitySearchService.appendRunId( + entitySearchService.appendRunId( entityUrn.getEntityType(), entityUrn, systemMetadata.getRunId()); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java index d72586e289ea78..ae1b3007ed647e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java @@ -1,19 +1,27 @@ package com.linkedin.metadata.entity; +import static com.linkedin.metadata.entity.EntityUtils.parseSystemMetadata; + +import com.datahub.util.RecordUtils; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.aspect.batch.SystemAspect; -import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.entity.AspectType; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; import com.linkedin.mxe.SystemMetadata; -import java.net.URISyntaxException; import java.sql.Timestamp; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.AllArgsConstructor; +import lombok.Builder; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; import lombok.Setter; +import lombok.extern.slf4j.Slf4j; /** * This is an internal representation of an entity aspect record {@link EntityServiceImpl} and @@ -21,6 +29,7 @@ * own aspect record implementations, they cary implementation details that should not leak outside. * Therefore, this is the type to use in public {@link AspectDao} methods. */ +@Slf4j @Getter @Setter @NoArgsConstructor @@ -44,60 +53,52 @@ public class EntityAspect { private String createdFor; - public EntityAspectIdentifier toAspectIdentifier() { + public EntityAspectIdentifier getAspectIdentifier() { return new EntityAspectIdentifier(getUrn(), getAspect(), getVersion()); } - @Nonnull - public SystemAspect asSystemAspect() { - return EntitySystemAspect.from(this); - } - /** * Provide a typed EntityAspect without breaking the existing public contract with generic types. */ + @Builder @Getter - @AllArgsConstructor @EqualsAndHashCode public static class EntitySystemAspect implements SystemAspect { - - @Nullable - public static EntitySystemAspect from(EntityAspect entityAspect) { - return entityAspect != null ? new EntitySystemAspect(entityAspect) : null; - } - @Nonnull private final EntityAspect entityAspect; + @Nonnull private final Urn urn; - @Nonnull - public Urn getUrn() { - try { - return Urn.createFromString(entityAspect.getUrn()); - } catch (URISyntaxException e) { - throw new RuntimeException(e); - } - } + /** Note that read mutations depend on the mutability of recordTemplate */ + @Nullable private final RecordTemplate recordTemplate; + + @Nonnull private final EntitySpec entitySpec; + @Nonnull private final AspectSpec aspectSpec; @Nonnull public String getUrnRaw() { return entityAspect.getUrn(); } - @Override - public SystemMetadata getSystemMetadata() { - return EntityUtils.parseSystemMetadata(entityAspect.getSystemMetadata()); - } - @Nullable public String getSystemMetadataRaw() { return entityAspect.getSystemMetadata(); } + public String getMetadataRaw() { + return entityAspect.getMetadata(); + } + @Override public Timestamp getCreatedOn() { return entityAspect.getCreatedOn(); } @Override + public String getCreatedBy() { + return entityAspect.getCreatedBy(); + } + + @Override + @Nonnull public String getAspectName() { return entityAspect.aspect; } @@ -107,14 +108,72 @@ public long getVersion() { return entityAspect.getVersion(); } - @Override - public RecordTemplate getRecordTemplate(EntityRegistry entityRegistry) { - return EntityUtils.toAspectRecord( - getUrn().getEntityType(), getAspectName(), entityAspect.getMetadata(), entityRegistry); + @Nullable + public SystemMetadata getSystemMetadata() { + return parseSystemMetadata(getSystemMetadataRaw()); + } + + public EntityAspectIdentifier getAspectIdentifier() { + return entityAspect.getAspectIdentifier(); } - public EntityAspect asRaw() { - return entityAspect; + /** + * Convert to enveloped aspect + * + * @return enveloped aspect + */ + public EnvelopedAspect toEnvelopedAspects() { + // Now turn it into an EnvelopedAspect + final com.linkedin.entity.Aspect aspect = + new com.linkedin.entity.Aspect(getRecordTemplate().data()); + + final EnvelopedAspect envelopedAspect = new EnvelopedAspect(); + envelopedAspect.setName(getAspectName()); + envelopedAspect.setVersion(getVersion()); + + // TODO: I think we can assume this here, adding as it's a required field so object mapping + // barfs when trying to access it, + // since nowhere else is using it should be safe for now at least + envelopedAspect.setType(AspectType.VERSIONED); + envelopedAspect.setValue(aspect); + + try { + if (getSystemMetadata() != null) { + envelopedAspect.setSystemMetadata(getSystemMetadata()); + } + } catch (Exception e) { + log.warn( + "Exception encountered when setting system metadata on enveloped aspect {}. Error: {}", + envelopedAspect.getName(), + e.toString()); + } + + envelopedAspect.setCreated(getAuditStamp()); + + return envelopedAspect; + } + + public static class EntitySystemAspectBuilder { + + private EntityAspect.EntitySystemAspect build() { + return null; + } + + public EntityAspect.EntitySystemAspect build( + @Nonnull EntitySpec entitySpec, + @Nonnull AspectSpec aspectSpec, + @Nonnull EntityAspect entityAspect) { + this.entityAspect = entityAspect; + this.urn = UrnUtils.getUrn(entityAspect.getUrn()); + this.aspectSpec = aspectSpec; + if (entityAspect.getMetadata() != null) { + this.recordTemplate = + RecordUtils.toRecordTemplate( + aspectSpec.getDataTemplateClass(), entityAspect.getMetadata()); + } + + return new EntitySystemAspect(entityAspect, urn, recordTemplate, entitySpec, aspectSpec); + } } } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index eec5c6120886dd..ed3a78ceddba4a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -14,7 +14,6 @@ import com.codahale.metrics.Timer; import com.datahub.util.RecordUtils; -import com.datahub.util.exception.ModelConversionException; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -34,30 +33,30 @@ import com.linkedin.data.template.SetMode; import com.linkedin.data.template.StringMap; import com.linkedin.data.template.UnionTemplate; -import com.linkedin.entity.AspectType; import com.linkedin.entity.Entity; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; -import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.Aspect; +import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; -import com.linkedin.metadata.aspect.batch.MCPBatchItem; -import com.linkedin.metadata.aspect.batch.SystemAspect; -import com.linkedin.metadata.aspect.batch.UpsertItem; -import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import com.linkedin.metadata.entity.ebean.batch.DeleteItemImpl; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; import com.linkedin.metadata.entity.retention.BulkApplyRetentionResult; +import com.linkedin.metadata.entity.validation.ValidationException; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; @@ -65,9 +64,7 @@ import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.ListUrnsResult; import com.linkedin.metadata.run.AspectRowSummary; -import com.linkedin.metadata.service.UpdateIndicesService; import com.linkedin.metadata.snapshot.Snapshot; -import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.PegasusUtils; import com.linkedin.metadata.utils.metrics.MetricUtils; @@ -138,7 +135,7 @@ * class. */ @Slf4j -public class EntityServiceImpl implements EntityService<MCPUpsertBatchItem> { +public class EntityServiceImpl implements EntityService<ChangeItemImpl> { /** * As described above, the latest version of an aspect should <b>always</b> take the value 0, with @@ -146,15 +143,15 @@ public class EntityServiceImpl implements EntityService<MCPUpsertBatchItem> { */ private static final int DEFAULT_MAX_TRANSACTION_RETRY = 3; - protected final AspectDao _aspectDao; + protected final AspectDao aspectDao; - @VisibleForTesting @Getter private final EventProducer _producer; - private final EntityRegistry _entityRegistry; - private final Map<String, Set<String>> _entityToValidAspects; - private RetentionService<MCPUpsertBatchItem> _retentionService; - private final Boolean _alwaysEmitChangeLog; - @Getter private final UpdateIndicesService _updateIndicesService; - private final PreProcessHooks _preProcessHooks; + @VisibleForTesting @Getter private final EventProducer producer; + private final EntityRegistry entityRegistry; + private final Map<String, Set<String>> entityToValidAspects; + private RetentionService<ChangeItemImpl> retentionService; + private final Boolean alwaysEmitChangeLog; + @Nullable @Getter private SearchIndicesService updateIndicesService; + private final PreProcessHooks preProcessHooks; protected static final int MAX_KEYS_PER_QUERY = 500; private final Integer ebeanMaxTransactionRetry; @@ -165,7 +162,6 @@ public EntityServiceImpl( @Nonnull final EventProducer producer, @Nonnull final EntityRegistry entityRegistry, final boolean alwaysEmitChangeLog, - @Nullable final UpdateIndicesService updateIndicesService, final PreProcessHooks preProcessHooks, final boolean enableBrowsePathV2) { this( @@ -173,7 +169,6 @@ public EntityServiceImpl( producer, entityRegistry, alwaysEmitChangeLog, - updateIndicesService, preProcessHooks, DEFAULT_MAX_TRANSACTION_RETRY, enableBrowsePathV2); @@ -184,25 +179,27 @@ public EntityServiceImpl( @Nonnull final EventProducer producer, @Nonnull final EntityRegistry entityRegistry, final boolean alwaysEmitChangeLog, - @Nullable final UpdateIndicesService updateIndicesService, final PreProcessHooks preProcessHooks, @Nullable final Integer retry, final boolean enableBrowseV2) { - _aspectDao = aspectDao; - _producer = producer; - _entityRegistry = entityRegistry; - _entityToValidAspects = buildEntityToValidAspects(entityRegistry); - _alwaysEmitChangeLog = alwaysEmitChangeLog; - _updateIndicesService = updateIndicesService; - if (_updateIndicesService != null) { - _updateIndicesService.initializeAspectRetriever(this); - } - _preProcessHooks = preProcessHooks; + this.aspectDao = aspectDao; + this.producer = producer; + this.entityRegistry = entityRegistry; + entityToValidAspects = EntityUtils.buildEntityToValidAspects(entityRegistry); + this.alwaysEmitChangeLog = alwaysEmitChangeLog; + this.preProcessHooks = preProcessHooks; ebeanMaxTransactionRetry = retry != null ? retry : DEFAULT_MAX_TRANSACTION_RETRY; this.enableBrowseV2 = enableBrowseV2; } + public void setUpdateIndicesService(@Nullable SearchIndicesService updateIndicesService) { + this.updateIndicesService = updateIndicesService; + if (this.updateIndicesService != null) { + this.updateIndicesService.initializeAspectRetriever(this); + } + } + @Override public RecordTemplate getLatestAspect(@Nonnull Urn urn, @Nonnull String aspectName) { log.debug("Invoked getLatestAspect with urn {}, aspect {}", urn, aspectName); @@ -236,24 +233,22 @@ public Map<Urn, List<RecordTemplate>> getLatestAspects( .keySet() .forEach( key -> { - final RecordTemplate keyAspect = EntityUtils.buildKeyAspect(_entityRegistry, key); + final RecordTemplate keyAspect = EntityUtils.buildKeyAspect(entityRegistry, key); urnToAspects.get(key).add(keyAspect); }); - batchGetResults.forEach( - (key, aspectEntry) -> { - final Urn urn = toUrn(key.getUrn()); - final String aspectName = key.getAspect(); - // for now, don't add the key aspect here- we have already added it above - if (aspectName.equals(getKeyAspectName(urn))) { - return; - } + List<SystemAspect> systemAspects = EntityUtils.toSystemAspects(batchGetResults.values(), this); - final RecordTemplate aspectRecord = - aspectEntry.asSystemAspect().getRecordTemplate(getEntityRegistry()); - urnToAspects.putIfAbsent(urn, new ArrayList<>()); - urnToAspects.get(urn).add(aspectRecord); - }); + systemAspects.stream() + // for now, don't add the key aspect here we have already added it above + .filter( + systemAspect -> + !getKeyAspectName(systemAspect.getUrn()).equals(systemAspect.getAspectName())) + .forEach( + systemAspect -> + urnToAspects + .computeIfAbsent(systemAspect.getUrn(), u -> new ArrayList<>()) + .add(systemAspect.getRecordTemplate())); return urnToAspects; } @@ -265,15 +260,10 @@ public Map<String, RecordTemplate> getLatestAspectsForUrn( Map<EntityAspectIdentifier, EntityAspect> batchGetResults = getLatestAspect(new HashSet<>(Arrays.asList(urn)), aspectNames); - final Map<String, RecordTemplate> result = new HashMap<>(); - batchGetResults.forEach( - (key, aspectEntry) -> { - final String aspectName = key.getAspect(); - final RecordTemplate aspectRecord = - aspectEntry.asSystemAspect().getRecordTemplate(getEntityRegistry()); - result.put(aspectName, aspectRecord); - }); - return result; + return EntityUtils.toSystemAspects(batchGetResults.values(), this).stream() + .map( + systemAspect -> Pair.of(systemAspect.getAspectName(), systemAspect.getRecordTemplate())) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); } /** @@ -291,7 +281,12 @@ public Map<String, RecordTemplate> getLatestAspectsForUrn( @Nullable @Override public RecordTemplate getAspect( - @Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull long version) { + @Nonnull final Urn urn, @Nonnull final String aspectName, long version) { + return getAspectVersionPair(urn, aspectName, version).getFirst(); + } + + public Pair<RecordTemplate, Long> getAspectVersionPair( + @Nonnull final Urn urn, @Nonnull final String aspectName, long version) { log.debug( "Invoked getAspect with urn: {}, aspectName: {}, version: {}", urn, aspectName, version); @@ -299,14 +294,13 @@ public RecordTemplate getAspect( version = calculateVersionNumber(urn, aspectName, version); final EntityAspectIdentifier primaryKey = new EntityAspectIdentifier(urn.toString(), aspectName, version); - final Optional<EntityAspect> maybeAspect = - Optional.ofNullable(_aspectDao.getAspect(primaryKey)); - return maybeAspect - .map( - aspect -> - EntityUtils.toAspectRecord( - urn, aspectName, aspect.getMetadata(), getEntityRegistry())) - .orElse(null); + final Optional<EntityAspect> maybeAspect = Optional.ofNullable(aspectDao.getAspect(primaryKey)); + + return Pair.of( + EntityUtils.toSystemAspect(maybeAspect.orElse(null), this) + .map(SystemAspect::getRecordTemplate) + .orElse(null), + version); } /** @@ -347,7 +341,8 @@ public Map<Urn, EntityResponse> getEntitiesV2( return getLatestEnvelopedAspects(urns, aspectNames).entrySet().stream() .collect( Collectors.toMap( - Map.Entry::getKey, entry -> toEntityResponse(entry.getKey(), entry.getValue()))); + Map.Entry::getKey, + entry -> EntityUtils.toEntityResponse(entry.getKey(), entry.getValue()))); } /** @@ -366,7 +361,8 @@ public Map<Urn, EntityResponse> getEntitiesVersionedV2( return getVersionedEnvelopedAspects(versionedUrns, aspectNames).entrySet().stream() .collect( Collectors.toMap( - Map.Entry::getKey, entry -> toEntityResponse(entry.getKey(), entry.getValue()))); + Map.Entry::getKey, + entry -> EntityUtils.toEntityResponse(entry.getKey(), entry.getValue()))); } /** @@ -473,7 +469,7 @@ private Map<Urn, List<EnvelopedAspect>> getCorrespondingAspects( for (Urn urn : urns) { List<EnvelopedAspect> aspects = urnToAspects.getOrDefault(urn.toString(), Collections.emptyList()); - EnvelopedAspect keyAspect = getKeyEnvelopedAspect(urn); + EnvelopedAspect keyAspect = EntityUtils.getKeyEnvelopedAspect(urn, entityRegistry); // Add key aspect if it does not exist in the returned aspects if (aspects.isEmpty() || aspects.stream().noneMatch(aspect -> keyAspect.getName().equals(aspect.getName()))) { @@ -520,29 +516,16 @@ public VersionedAspect getVersionedAspect( VersionedAspect result = new VersionedAspect(); - version = calculateVersionNumber(urn, aspectName, version); - - final EntityAspectIdentifier primaryKey = - new EntityAspectIdentifier(urn.toString(), aspectName, version); - final Optional<EntityAspect> maybeAspect = - Optional.ofNullable(_aspectDao.getAspect(primaryKey)); - RecordTemplate aspectRecord = - maybeAspect - .map( - aspect -> - EntityUtils.toAspectRecord( - urn, aspectName, aspect.getMetadata(), getEntityRegistry())) - .orElse(null); - - if (aspectRecord == null) { + Pair<RecordTemplate, Long> aspectRecord = getAspectVersionPair(urn, aspectName, version); + if (aspectRecord.getFirst() == null) { return null; } Aspect resultAspect = new Aspect(); - RecordUtils.setSelectedRecordTemplateInUnion(resultAspect, aspectRecord); + RecordUtils.setSelectedRecordTemplateInUnion(resultAspect, aspectRecord.getFirst()); result.setAspect(resultAspect); - result.setVersion(version); + result.setVersion(aspectRecord.getSecond()); return result; } @@ -575,20 +558,22 @@ public ListResult<RecordTemplate> listLatestAspects( count); final ListResult<String> aspectMetadataList = - _aspectDao.listLatestAspectMetadata(entityName, aspectName, start, count); + aspectDao.listLatestAspectMetadata(entityName, aspectName, start, count); - final List<RecordTemplate> aspects = new ArrayList<>(); + List<EntityAspect> entityAspects = new ArrayList<>(); for (int i = 0; i < aspectMetadataList.getValues().size(); i++) { - aspects.add( - EntityUtils.toAspectRecord( - aspectMetadataList.getMetadata().getExtraInfos().get(i).getUrn(), - aspectName, - aspectMetadataList.getValues().get(i), - getEntityRegistry())); + EntityAspect entityAspect = new EntityAspect(); + entityAspect.setUrn( + aspectMetadataList.getMetadata().getExtraInfos().get(i).getUrn().toString()); + entityAspect.setAspect(aspectName); + entityAspect.setMetadata(aspectMetadataList.getValues().get(i)); + entityAspects.add(entityAspect); } return new ListResult<>( - aspects, + EntityUtils.toSystemAspects(entityAspects, this).stream() + .map(SystemAspect::getRecordTemplate) + .collect(Collectors.toList()), aspectMetadataList.getMetadata(), aspectMetadataList.getNextStart(), aspectMetadataList.isHasNext(), @@ -612,11 +597,11 @@ public List<UpdateAspectResult> ingestAspects( List<Pair<String, RecordTemplate>> pairList, @Nonnull final AuditStamp auditStamp, SystemMetadata systemMetadata) { - List<? extends MCPBatchItem> items = + List<? extends MCPItem> items = pairList.stream() .map( pair -> - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(pair.getKey()) .recordTemplate(pair.getValue()) @@ -624,7 +609,8 @@ public List<UpdateAspectResult> ingestAspects( .auditStamp(auditStamp) .build(this)) .collect(Collectors.toList()); - return ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + return ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(this).items(items).build(), true, true); } /** @@ -643,7 +629,8 @@ public List<UpdateAspectResult> ingestAspects( // Generate additional items as needed items.addAll(DefaultAspectsUtil.getAdditionalChanges(aspectsBatch, this, enableBrowseV2)); - AspectsBatch withDefaults = AspectsBatchImpl.builder().items(items).build(); + AspectsBatch withDefaults = + AspectsBatchImpl.builder().aspectRetriever(this).items(items).build(); Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestAspectsToLocalDB").time(); @@ -672,73 +659,84 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB( log.warn(String.format("Batch contains duplicates: %s", aspectsBatch)); } - return _aspectDao + return aspectDao .runInTransactionWithRetry( (tx) -> { // Read before write is unfortunate, however batch it final Map<String, Set<String>> urnAspects = aspectsBatch.getUrnAspectsMap(); // read #1 final Map<String, Map<String, SystemAspect>> latestAspects = - toSystemEntityAspects(_aspectDao.getLatestAspects(urnAspects)); + EntityUtils.toSystemAspects(aspectDao.getLatestAspects(urnAspects), this); // read #2 final Map<String, Map<String, Long>> nextVersions = - _aspectDao.getNextVersions(urnAspects); + aspectDao.getNextVersions(urnAspects); // 1. Convert patches to full upserts // 2. Run any entity/aspect level hooks - Pair<Map<String, Set<String>>, List<UpsertItem>> updatedItems = - aspectsBatch.toUpsertBatchItems(latestAspects, this); + Pair<Map<String, Set<String>>, List<ChangeMCP>> updatedItems = + aspectsBatch.toUpsertBatchItems(latestAspects); // Fetch additional information if needed final Map<String, Map<String, SystemAspect>> updatedLatestAspects; final Map<String, Map<String, Long>> updatedNextVersions; if (!updatedItems.getFirst().isEmpty()) { Map<String, Map<String, SystemAspect>> newLatestAspects = - toSystemEntityAspects(_aspectDao.getLatestAspects(updatedItems.getFirst())); + EntityUtils.toSystemAspects( + aspectDao.getLatestAspects(updatedItems.getFirst()), this); Map<String, Map<String, Long>> newNextVersions = - _aspectDao.getNextVersions(updatedItems.getFirst()); + aspectDao.getNextVersions(updatedItems.getFirst()); // merge - updatedLatestAspects = aspectsBatch.merge(latestAspects, newLatestAspects); - updatedNextVersions = aspectsBatch.merge(nextVersions, newNextVersions); + updatedLatestAspects = AspectsBatch.merge(latestAspects, newLatestAspects); + updatedNextVersions = AspectsBatch.merge(nextVersions, newNextVersions); } else { updatedLatestAspects = latestAspects; updatedNextVersions = nextVersions; } - // do final pre-commit checks with previous aspect value - updatedItems - .getSecond() - .forEach( - item -> { - SystemAspect previousAspect = + // Add previous version to each upsert + List<ChangeMCP> changeMCPs = + updatedItems.getSecond().stream() + .peek( + changeMCP -> { + String urnStr = changeMCP.getUrn().toString(); + long nextVersion = + updatedNextVersions + .getOrDefault(urnStr, Map.of()) + .getOrDefault(changeMCP.getAspectName(), 0L); + + changeMCP.setPreviousSystemAspect( + updatedLatestAspects + .getOrDefault(urnStr, Map.of()) + .getOrDefault(changeMCP.getAspectName(), null)); + + changeMCP.setNextAspectVersion(nextVersion); + + // support inner-batch upserts updatedLatestAspects - .getOrDefault(item.getUrn().toString(), Map.of()) - .get(item.getAspectSpec().getName()); - try { - item.validatePreCommit( - previousAspect == null - ? null - : previousAspect.getRecordTemplate(_entityRegistry), - this); - } catch (AspectValidationException e) { - throw new RuntimeException(e); - } - }); + .computeIfAbsent(urnStr, key -> new HashMap<>()) + .put( + changeMCP.getAspectName(), + changeMCP.getSystemAspect(nextVersion)); + updatedNextVersions + .computeIfAbsent(urnStr, key -> new HashMap<>()) + .put(changeMCP.getAspectName(), nextVersion + 1); + }) + .collect(Collectors.toList()); + + // do final pre-commit checks with previous aspect value + ValidationExceptionCollection exceptions = + AspectsBatch.validatePreCommit(changeMCPs, this); + if (!exceptions.isEmpty()) { + throw new ValidationException(exceptions.toString()); + } // Database Upsert results List<UpdateAspectResult> upsertResults = - updatedItems.getSecond().stream() + changeMCPs.stream() .map( item -> { - final String urnStr = item.getUrn().toString(); - final SystemAspect latest = - updatedLatestAspects - .getOrDefault(urnStr, Map.of()) - .get(item.getAspectName()); - final long nextVersion = - updatedNextVersions - .getOrDefault(urnStr, Map.of()) - .getOrDefault(item.getAspectName(), 0L); + final EntityAspect.EntitySystemAspect latest = + (EntityAspect.EntitySystemAspect) item.getPreviousSystemAspect(); final UpdateAspectResult result; if (overwrite || latest == null) { @@ -750,26 +748,17 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB( item.getRecordTemplate(), item.getAuditStamp(), item.getSystemMetadata(), - latest == null - ? null - : ((EntityAspect.EntitySystemAspect) latest).asRaw(), - nextVersion) + latest == null ? null : latest, + item.getNextAspectVersion()) .toBuilder() .request(item) .build(); - // support inner-batch upserts - latestAspects - .computeIfAbsent(urnStr, key -> new HashMap<>()) - .put(item.getAspectName(), item.toLatestEntityAspect()); - nextVersions - .computeIfAbsent(urnStr, key -> new HashMap<>()) - .put(item.getAspectName(), nextVersion + 1); } else { - RecordTemplate oldValue = latest.getRecordTemplate(_entityRegistry); + RecordTemplate oldValue = latest.getRecordTemplate(); SystemMetadata oldMetadata = latest.getSystemMetadata(); result = - UpdateAspectResult.<MCPUpsertBatchItem>builder() + UpdateAspectResult.<ChangeItemImpl>builder() .urn(item.getUrn()) .request(item) .oldValue(oldValue) @@ -792,7 +781,7 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB( } // Retention optimization and tx - if (_retentionService != null) { + if (retentionService != null) { List<RetentionService.RetentionContext> retentionBatch = upsertResults.stream() // Only consider retention when there was a previous version @@ -810,7 +799,7 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB( // value return oldAspect != newAspect && oldAspect != null - && _retentionService != null; + && retentionService != null; }) .map( result -> @@ -820,7 +809,7 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB( .maxVersion(Optional.of(result.getMaxVersion())) .build()) .collect(Collectors.toList()); - _retentionService.applyRetentionWithPolicyDefaults(retentionBatch); + retentionService.applyRetentionWithPolicyDefaults(retentionBatch); } else { log.warn("Retention service is missing!"); } @@ -834,25 +823,6 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB( .collect(Collectors.toList()); } - /** - * Convert EntityAspect to EntitySystemAspect - * - * @param latestAspects latest aspect map - * @return map with converted values - */ - private static Map<String, Map<String, SystemAspect>> toSystemEntityAspects( - Map<String, Map<String, EntityAspect>> latestAspects) { - return latestAspects.entrySet().stream() - .map( - e -> - Map.entry( - e.getKey(), - e.getValue().entrySet().stream() - .map(e2 -> Map.entry(e2.getKey(), e2.getValue().asSystemAspect())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - } - @Nonnull private List<UpdateAspectResult> emitMCL(List<UpdateAspectResult> sqlResults, boolean emitMCL) { List<UpdateAspectResult> withEmitMCL = @@ -924,13 +894,14 @@ public RecordTemplate ingestAspectIfNotPresent( AspectsBatchImpl aspectsBatch = AspectsBatchImpl.builder() .one( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(urn) .aspectName(aspectName) .recordTemplate(newValue) .systemMetadata(systemMetadata) .auditStamp(auditStamp) - .build(this)) + .build(this), + this) .build(); List<UpdateAspectResult> ingested = ingestAspects(aspectsBatch, true, false); @@ -1007,17 +978,20 @@ private Stream<IngestResult> ingestTimeseriesProposal( .filter(item -> item.getAspectSpec().isTimeseries()) .collect(Collectors.toList()); - List<MCPBatchItem> defaultAspects = + List<MCPItem> defaultAspects = DefaultAspectsUtil.getAdditionalChanges( - AspectsBatchImpl.builder().items(timeseriesItems).build(), this, enableBrowseV2); - ingestProposalSync(AspectsBatchImpl.builder().items(defaultAspects).build()); + AspectsBatchImpl.builder().aspectRetriever(this).items(timeseriesItems).build(), + this, + enableBrowseV2); + ingestProposalSync( + AspectsBatchImpl.builder().aspectRetriever(this).items(defaultAspects).build()); } // Emit timeseries MCLs - List<Pair<MCPUpsertBatchItem, Optional<Pair<Future<?>, Boolean>>>> timeseriesResults = + List<Pair<ChangeItemImpl, Optional<Pair<Future<?>, Boolean>>>> timeseriesResults = aspectsBatch.getItems().stream() .filter(item -> item.getAspectSpec().isTimeseries()) - .map(item -> (MCPUpsertBatchItem) item) + .map(item -> (ChangeItemImpl) item) .map( item -> Pair.of( @@ -1047,7 +1021,7 @@ private Stream<IngestResult> ingestTimeseriesProposal( } }); - MCPUpsertBatchItem request = result.getFirst(); + ChangeItemImpl request = result.getFirst(); return IngestResult.builder() .urn(request.getUrn()) .request(request) @@ -1065,7 +1039,7 @@ private Stream<IngestResult> ingestTimeseriesProposal( * @return produced items to the MCP topic */ private Stream<IngestResult> ingestProposalAsync(AspectsBatch aspectsBatch) { - List<? extends MCPBatchItem> nonTimeseries = + List<? extends MCPItem> nonTimeseries = aspectsBatch.getMCPItems().stream() .filter(item -> !item.getAspectSpec().isTimeseries()) .collect(Collectors.toList()); @@ -1075,7 +1049,7 @@ private Stream<IngestResult> ingestProposalAsync(AspectsBatch aspectsBatch) { .map( item -> // When async is turned on, we write to proposal log and return without waiting - _producer.produceMetadataChangeProposal( + producer.produceMetadataChangeProposal( item.getUrn(), item.getMetadataChangeProposal())) .filter(Objects::nonNull) .collect(Collectors.toList()); @@ -1084,7 +1058,7 @@ private Stream<IngestResult> ingestProposalAsync(AspectsBatch aspectsBatch) { return nonTimeseries.stream() .map( item -> - IngestResult.<MCPBatchItem>builder() + IngestResult.<MCPItem>builder() .urn(item.getUrn()) .request(item) .publishedMCP(true) @@ -1104,13 +1078,14 @@ private Stream<IngestResult> ingestProposalAsync(AspectsBatch aspectsBatch) { private Stream<IngestResult> ingestProposalSync(AspectsBatch aspectsBatch) { AspectsBatchImpl nonTimeseries = AspectsBatchImpl.builder() + .aspectRetriever(this) .items( aspectsBatch.getItems().stream() .filter(item -> !item.getAspectSpec().isTimeseries()) .collect(Collectors.toList())) .build(); - List<? extends MCPBatchItem> unsupported = + List<? extends MCPItem> unsupported = nonTimeseries.getMCPItems().stream() .filter( item -> @@ -1130,7 +1105,7 @@ private Stream<IngestResult> ingestProposalSync(AspectsBatch aspectsBatch) { return upsertResults.stream() .map( result -> { - UpsertItem item = result.getRequest(); + ChangeMCP item = result.getRequest(); return IngestResult.builder() .urn(item.getUrn()) @@ -1160,18 +1135,20 @@ public String batchApplyRetention( args.attemptWithVersion = attemptWithVersion; args.aspectName = aspectName; args.urn = urn; - BulkApplyRetentionResult result = _retentionService.batchApplyRetentionEntities(args); + BulkApplyRetentionResult result = retentionService.batchApplyRetentionEntities(args); return result.toString(); } private boolean preprocessEvent(MetadataChangeLog metadataChangeLog) { - if (_preProcessHooks.isUiEnabled()) { + if (preProcessHooks.isUiEnabled()) { if (metadataChangeLog.getSystemMetadata() != null) { if (metadataChangeLog.getSystemMetadata().getProperties() != null) { if (UI_SOURCE.equals( metadataChangeLog.getSystemMetadata().getProperties().get(APP_SOURCE))) { // Pre-process the update indices hook for UI updates to avoid perceived lag from Kafka - _updateIndicesService.handleChangeEvent(metadataChangeLog); + if (updateIndicesService != null) { + updateIndicesService.handleChangeEvent(metadataChangeLog); + } return true; } } @@ -1182,7 +1159,7 @@ private boolean preprocessEvent(MetadataChangeLog metadataChangeLog) { @Override public Integer getCountAspect(@Nonnull String aspectName, @Nullable String urnLike) { - return _aspectDao.countAspect(aspectName, urnLike); + return aspectDao.countAspect(aspectName, urnLike); } @Nonnull @@ -1198,7 +1175,7 @@ public RestoreIndicesResult restoreIndices( "Reading rows %s through %s from the aspects table started.", args.start, args.start + args.batchSize)); long startTime = System.currentTimeMillis(); - PagedList<EbeanAspectV2> rows = _aspectDao.getPagedAspects(args); + PagedList<EbeanAspectV2> rows = aspectDao.getPagedAspects(args); result.timeSqlQueryMs = System.currentTimeMillis() - startTime; startTime = System.currentTimeMillis(); logger.accept( @@ -1208,19 +1185,23 @@ public RestoreIndicesResult restoreIndices( LinkedList<Future<?>> futures = new LinkedList<>(); - for (EbeanAspectV2 aspect : rows != null ? rows.getList() : List.<EbeanAspectV2>of()) { + List<SystemAspect> systemAspects = + EntityUtils.toSystemAspectFromEbeanAspects( + rows != null ? rows.getList() : List.<EbeanAspectV2>of(), this); + + for (SystemAspect aspect : systemAspects) { // 1. Extract an Entity type from the entity Urn result.timeGetRowMs = System.currentTimeMillis() - startTime; startTime = System.currentTimeMillis(); Urn urn; try { - urn = Urn.createFromString(aspect.getKey().getUrn()); + urn = aspect.getUrn(); result.lastUrn = urn.toString(); } catch (Exception e) { logger.accept( String.format( "Failed to bind Urn with value %s into Urn object: %s. Ignoring row.", - aspect.getKey().getUrn(), e)); + aspect.getUrn(), e)); ignored = ignored + 1; continue; } @@ -1231,7 +1212,7 @@ public RestoreIndicesResult restoreIndices( final String entityName = urn.getEntityType(); final EntitySpec entitySpec; try { - entitySpec = _entityRegistry.getEntitySpec(entityName); + entitySpec = entityRegistry.getEntitySpec(entityName); } catch (Exception e) { logger.accept( String.format( @@ -1242,7 +1223,7 @@ public RestoreIndicesResult restoreIndices( } result.timeEntityRegistryCheckMs += System.currentTimeMillis() - startTime; startTime = System.currentTimeMillis(); - final String aspectName = aspect.getKey().getAspect(); + final String aspectName = aspect.getAspectName(); result.lastAspect = aspectName; // 3. Verify that the aspect is a valid aspect associated with the entity @@ -1261,14 +1242,12 @@ public RestoreIndicesResult restoreIndices( // 4. Create record from json aspect final RecordTemplate aspectRecord; try { - aspectRecord = - EntityUtils.toAspectRecord( - entityName, aspectName, aspect.getMetadata(), _entityRegistry); + aspectRecord = aspect.getRecordTemplate(); } catch (Exception e) { logger.accept( String.format( - "Failed to deserialize row %s for entity %s, aspect %s: %s. Ignoring row.", - aspect.getMetadata(), entityName, aspectName, e)); + "Failed to deserialize for entity %s, aspect %s: %s. Ignoring row.", + entityName, aspectName, e)); ignored = ignored + 1; continue; } @@ -1276,8 +1255,8 @@ public RestoreIndicesResult restoreIndices( startTime = System.currentTimeMillis(); // Force indexing to skip diff mode and fix error states - SystemMetadata latestSystemMetadata = - EntityUtils.parseSystemMetadata(aspect.getSystemMetadata()); + SystemMetadata latestSystemMetadata = aspect.getSystemMetadata(); + StringMap properties = latestSystemMetadata.getProperties() != null ? latestSystemMetadata.getProperties() @@ -1343,7 +1322,7 @@ public ListUrnsResult listUrns( final String keyAspectName = getEntityRegistry().getEntitySpec(entityName).getKeyAspectSpec().getName(); final ListResult<String> keyAspectList = - _aspectDao.listUrns(entityName, keyAspectName, start, count); + aspectDao.listUrns(entityName, keyAspectName, start, count); final ListUrnsResult result = new ListUrnsResult(); result.setStart(start); @@ -1393,7 +1372,8 @@ public Map<Urn, Entity> getEntities( return Collections.emptyMap(); } return getSnapshotUnions(urns, aspectNames).entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, entry -> toEntity(entry.getValue()))); + .collect( + Collectors.toMap(Map.Entry::getKey, entry -> EntityUtils.toEntity(entry.getValue()))); } @Override @@ -1401,7 +1381,7 @@ public Pair<Future<?>, Boolean> alwaysProduceMCLAsync( @Nonnull final Urn urn, @Nonnull final AspectSpec aspectSpec, @Nonnull final MetadataChangeLog metadataChangeLog) { - Future<?> future = _producer.produceMetadataChangeLog(urn, aspectSpec, metadataChangeLog); + Future<?> future = producer.produceMetadataChangeLog(urn, aspectSpec, metadataChangeLog); return Pair.of(future, preprocessEvent(metadataChangeLog)); } @@ -1442,7 +1422,7 @@ public Optional<Pair<Future<?>, Boolean>> conditionallyProduceMCLAsync( AuditStamp auditStamp, AspectSpec aspectSpec) { boolean isNoOp = oldAspect == newAspect; - if (!isNoOp || _alwaysEmitChangeLog || shouldAspectEmitChangeLog(aspectSpec)) { + if (!isNoOp || alwaysEmitChangeLog || shouldAspectEmitChangeLog(aspectSpec)) { log.debug( "Producing MetadataChangeLog for ingested aspect {}, urn {}", aspectSpec.getName(), @@ -1475,7 +1455,7 @@ public Optional<Pair<Future<?>, Boolean>> conditionallyProduceMCLAsync( } private UpdateAspectResult conditionallyProduceMCLAsync(UpdateAspectResult result) { - UpsertItem request = result.getRequest(); + ChangeMCP request = result.getRequest(); Optional<Pair<Future<?>, Boolean>> emissionStatus = conditionallyProduceMCLAsync( result.getOldValue(), @@ -1537,7 +1517,9 @@ public void ingestEntity( protected Map<Urn, Snapshot> getSnapshotUnions( @Nonnull final Set<Urn> urns, @Nonnull final Set<String> aspectNames) { return getSnapshotRecords(urns, aspectNames).entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, entry -> toSnapshotUnion(entry.getValue()))); + .collect( + Collectors.toMap( + Map.Entry::getKey, entry -> EntityUtils.toSnapshotUnion(entry.getValue()))); } @Nonnull @@ -1582,11 +1564,12 @@ private void ingestSnapshotUnion( AspectsBatchImpl aspectsBatch = AspectsBatchImpl.builder() + .aspectRetriever(this) .items( aspectRecordsToIngest.stream() .map( pair -> - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(urn) .aspectName(pair.getKey()) .recordTemplate(pair.getValue()) @@ -1606,38 +1589,28 @@ public AspectSpec getKeyAspectSpec(@Nonnull final Urn urn) { @Override public AspectSpec getKeyAspectSpec(@Nonnull final String entityName) { - final EntitySpec spec = _entityRegistry.getEntitySpec(entityName); + final EntitySpec spec = entityRegistry.getEntitySpec(entityName); return spec.getKeyAspectSpec(); } @Override public Optional<AspectSpec> getAspectSpec( @Nonnull final String entityName, @Nonnull final String aspectName) { - final EntitySpec entitySpec = _entityRegistry.getEntitySpec(entityName); + final EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); return Optional.ofNullable(entitySpec.getAspectSpec(aspectName)); } @Override public String getKeyAspectName(@Nonnull final Urn urn) { - final EntitySpec spec = _entityRegistry.getEntitySpec(urnToEntityName(urn)); + final EntitySpec spec = entityRegistry.getEntitySpec(urnToEntityName(urn)); final AspectSpec keySpec = spec.getKeyAspectSpec(); return keySpec.getName(); } - protected Entity toEntity(@Nonnull final Snapshot snapshot) { - return new Entity().setValue(snapshot); - } - - protected Snapshot toSnapshotUnion(@Nonnull final RecordTemplate snapshotRecord) { - final Snapshot snapshot = new Snapshot(); - RecordUtils.setSelectedRecordTemplateInUnion(snapshot, snapshotRecord); - return snapshot; - } - protected RecordTemplate toSnapshotRecord( @Nonnull final Urn urn, @Nonnull final List<UnionTemplate> aspectUnionTemplates) { final String entityName = urnToEntityName(urn); - final EntitySpec entitySpec = _entityRegistry.getEntitySpec(entityName); + final EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); return com.datahub.util.ModelUtils.newSnapshot( getDataTemplateClassFromSchema(entitySpec.getSnapshotSchema(), RecordTemplate.class), urn, @@ -1646,7 +1619,7 @@ protected RecordTemplate toSnapshotRecord( protected UnionTemplate toAspectUnion( @Nonnull final Urn urn, @Nonnull final RecordTemplate aspectRecord) { - final EntitySpec entitySpec = _entityRegistry.getEntitySpec(urnToEntityName(urn)); + final EntitySpec entitySpec = entityRegistry.getEntitySpec(urnToEntityName(urn)); final TyperefDataSchema aspectSchema = entitySpec.getAspectTyperefSchema(); if (aspectSchema == null) { throw new RuntimeException( @@ -1659,49 +1632,15 @@ protected UnionTemplate toAspectUnion( aspectRecord); } - protected Urn toUrn(final String urnStr) { - try { - return Urn.createFromString(urnStr); - } catch (URISyntaxException e) { - log.error("Failed to convert urn string {} into Urn object", urnStr); - throw new ModelConversionException( - String.format("Failed to convert urn string %s into Urn object ", urnStr), e); - } - } - - private EntityResponse toEntityResponse( - final Urn urn, final List<EnvelopedAspect> envelopedAspects) { - final EntityResponse response = new EntityResponse(); - response.setUrn(urn); - response.setEntityName(urnToEntityName(urn)); - response.setAspects( - new EnvelopedAspectMap( - envelopedAspects.stream() - .collect(Collectors.toMap(EnvelopedAspect::getName, aspect -> aspect)))); - return response; - } - - private static Map<String, Set<String>> buildEntityToValidAspects( - final EntityRegistry entityRegistry) { - return entityRegistry.getEntitySpecs().values().stream() - .collect( - Collectors.toMap( - EntitySpec::getName, - entry -> - entry.getAspectSpecs().stream() - .map(AspectSpec::getName) - .collect(Collectors.toSet()))); - } - @Override @Nonnull public EntityRegistry getEntityRegistry() { - return _entityRegistry; + return entityRegistry; } @Override - public void setRetentionService(RetentionService<MCPUpsertBatchItem> retentionService) { - _retentionService = retentionService; + public void setRetentionService(RetentionService<ChangeItemImpl> retentionService) { + this.retentionService = retentionService; } protected Set<String> getEntityAspectNames(final Urn entityUrn) { @@ -1710,13 +1649,13 @@ protected Set<String> getEntityAspectNames(final Urn entityUrn) { @Override public Set<String> getEntityAspectNames(final String entityName) { - return _entityToValidAspects.get(entityName); + return entityToValidAspects.get(entityName); } @Override public void setWritable(boolean canWrite) { log.debug("Setting writable to {}", canWrite); - _aspectDao.setWritable(canWrite); + aspectDao.setWritable(canWrite); } @Override @@ -1797,7 +1736,7 @@ public RollbackRunResult deleteUrn(Urn urn) { EntityAspect latestKey = null; try { - latestKey = _aspectDao.getLatestAspect(urn.toString(), keyAspectName); + latestKey = aspectDao.getLatestAspect(urn.toString(), keyAspectName); } catch (EntityNotFoundException e) { log.warn("Entity to delete does not exist. {}", urn.toString()); } @@ -1805,7 +1744,8 @@ public RollbackRunResult deleteUrn(Urn urn) { return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion); } - SystemMetadata latestKeySystemMetadata = latestKey.asSystemAspect().getSystemMetadata(); + SystemMetadata latestKeySystemMetadata = + EntityUtils.toSystemAspect(latestKey, this).map(SystemAspect::getSystemMetadata).get(); RollbackResult result = deleteAspect( urn.toString(), @@ -1850,29 +1790,26 @@ public RollbackRunResult deleteUrn(Urn urn) { return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion); } - /** - * Returns a set of urns of entities that exist (has materialized aspects). - * - * @param urns the list of urns of the entities to check - * @param includeSoftDeleted whether to consider soft delete - * @return a set of urns of entities that exist. - */ @Override - public Set<Urn> exists(@Nonnull final Collection<Urn> urns, boolean includeSoftDeleted) { - + public Set<Urn> exists( + @Nonnull final Collection<Urn> urns, + @Nullable String aspectName, + boolean includeSoftDeleted) { final Set<EntityAspectIdentifier> dbKeys = urns.stream() .map( urn -> new EntityAspectIdentifier( urn.toString(), - _entityRegistry - .getEntitySpec(urn.getEntityType()) - .getKeyAspectSpec() - .getName(), + aspectName == null + ? entityRegistry + .getEntitySpec(urn.getEntityType()) + .getKeyAspectSpec() + .getName() + : aspectName, ASPECT_LATEST_VERSION)) .collect(Collectors.toSet()); - final Map<EntityAspectIdentifier, EntityAspect> aspects = _aspectDao.batchGet(dbKeys); + final Map<EntityAspectIdentifier, EntityAspect> aspects = aspectDao.batchGet(dbKeys); final Set<String> existingUrnStrings = aspects.values().stream() .filter(aspect -> aspect != null) @@ -1901,37 +1838,43 @@ public Set<Urn> exists(@Nonnull final Collection<Urn> urns, boolean includeSoftD } } - @Override - public Boolean exists(Urn urn, String aspectName) { - EntityAspectIdentifier dbKey = - new EntityAspectIdentifier(urn.toString(), aspectName, ASPECT_LATEST_VERSION); - Map<EntityAspectIdentifier, EntityAspect> aspects = _aspectDao.batchGet(Set.of(dbKey)); - return aspects.values().stream().anyMatch(Objects::nonNull); - } - @Nullable @Override public RollbackResult deleteAspect( String urn, String aspectName, @Nonnull Map<String, String> conditions, boolean hardDelete) { + final AuditStamp auditStamp = + new AuditStamp() + .setActor(UrnUtils.getUrn(Constants.SYSTEM_ACTOR)) + .setTime(System.currentTimeMillis()); + // Validate pre-conditions before running queries - Urn entityUrn; - EntitySpec entitySpec; - try { - entityUrn = Urn.createFromString(urn); - String entityName = PegasusUtils.urnToEntityName(entityUrn); - entitySpec = getEntityRegistry().getEntitySpec(entityName); - } catch (URISyntaxException uriSyntaxException) { - // don't expect this to happen, so raising RuntimeException here - throw new RuntimeException(String.format("Failed to extract urn from %s", urn)); + Urn entityUrn = UrnUtils.getUrn(urn); + + // Runs simple validations + MCPItem deleteItem = + DeleteItemImpl.builder() + .urn(entityUrn) + .aspectName(aspectName) + .auditStamp(auditStamp) + .build(this); + + // Delete validation hooks + ValidationExceptionCollection exceptions = + AspectsBatch.validateProposed(List.of(deleteItem), this); + if (!exceptions.isEmpty()) { + throw new ValidationException(exceptions.toString()); } final RollbackResult result = - _aspectDao.runInTransactionWithRetry( + aspectDao.runInTransactionWithRetry( (tx) -> { Integer additionalRowsDeleted = 0; // 1. Fetch the latest existing version of the aspect. - final EntityAspect latest = _aspectDao.getLatestAspect(urn, aspectName); + final EntityAspect.EntitySystemAspect latest = + (EntityAspect.EntitySystemAspect) + EntityUtils.toSystemAspect(aspectDao.getLatestAspect(urn, aspectName), this) + .orElse(null); // 1.1 If no latest exists, skip this aspect if (latest == null) { @@ -1939,63 +1882,86 @@ public RollbackResult deleteAspect( } // 2. Compare the match conditions, if they don't match, ignore. - SystemMetadata latestSystemMetadata = latest.asSystemAspect().getSystemMetadata(); + SystemMetadata latestSystemMetadata = latest.getSystemMetadata(); if (!filterMatch(latestSystemMetadata, conditions)) { return null; } - String latestMetadata = latest.getMetadata(); // 3. Check if this is a key aspect Boolean isKeyAspect = getKeyAspectName(entityUrn).equals(aspectName); // 4. Fetch all preceding aspects, that match List<EntityAspect> aspectsToDelete = new ArrayList<>(); - long maxVersion = _aspectDao.getMaxVersion(urn, aspectName); - EntityAspect survivingAspect = null; + long maxVersion = aspectDao.getMaxVersion(urn, aspectName); + EntityAspect.EntitySystemAspect survivingAspect = null; String previousMetadata = null; boolean filterMatch = true; while (maxVersion > 0 && filterMatch) { - EntityAspect candidateAspect = _aspectDao.getAspect(urn, aspectName, maxVersion); + EntityAspect.EntitySystemAspect candidateAspect = + (EntityAspect.EntitySystemAspect) + EntityUtils.toSystemAspect( + aspectDao.getAspect(urn, aspectName, maxVersion), this) + .orElse(null); SystemMetadata previousSysMetadata = - candidateAspect != null - ? candidateAspect.asSystemAspect().getSystemMetadata() - : null; + candidateAspect != null ? candidateAspect.getSystemMetadata() : null; filterMatch = previousSysMetadata != null && filterMatch(previousSysMetadata, conditions); if (filterMatch) { - aspectsToDelete.add(candidateAspect); + aspectsToDelete.add(candidateAspect.getEntityAspect()); maxVersion = maxVersion - 1; } else { survivingAspect = candidateAspect; - previousMetadata = survivingAspect.getMetadata(); + previousMetadata = survivingAspect.getMetadataRaw(); } } - // 5. Apply deletes and fix up latest row + // Delete validation hooks + ValidationExceptionCollection preCommitExceptions = + AspectsBatch.validatePreCommit( + aspectsToDelete.stream() + .map( + toDelete -> + DeleteItemImpl.builder() + .urn(UrnUtils.getUrn(toDelete.getUrn())) + .aspectName(toDelete.getAspect()) + .auditStamp(auditStamp) + .build(this)) + .collect(Collectors.toList()), + this); + if (!preCommitExceptions.isEmpty()) { + throw new ValidationException(preCommitExceptions.toString()); + } - aspectsToDelete.forEach(aspect -> _aspectDao.deleteAspect(tx, aspect)); + // 5. Apply deletes and fix up latest row + aspectsToDelete.forEach(aspect -> aspectDao.deleteAspect(tx, aspect)); if (survivingAspect != null) { // if there was a surviving aspect, copy its information into the latest row // eBean does not like us updating a pkey column (version) for the surviving aspect // as a result we copy information from survivingAspect to latest and delete // survivingAspect - latest.setMetadata(survivingAspect.getMetadata()); - latest.setSystemMetadata(survivingAspect.getSystemMetadata()); - latest.setCreatedOn(survivingAspect.getCreatedOn()); - latest.setCreatedBy(survivingAspect.getCreatedBy()); - latest.setCreatedFor(survivingAspect.getCreatedFor()); - _aspectDao.saveAspect(tx, latest, false); + latest + .getEntityAspect() + .setMetadata(survivingAspect.getEntityAspect().getMetadata()); + latest + .getEntityAspect() + .setSystemMetadata(survivingAspect.getEntityAspect().getSystemMetadata()); + latest.getEntityAspect().setCreatedOn(survivingAspect.getCreatedOn()); + latest.getEntityAspect().setCreatedBy(survivingAspect.getCreatedBy()); + latest + .getEntityAspect() + .setCreatedFor(survivingAspect.getEntityAspect().getCreatedFor()); + aspectDao.saveAspect(tx, latest.getEntityAspect(), false); // metrics - _aspectDao.incrementWriteMetrics( - aspectName, 1, latest.getAspect().getBytes(StandardCharsets.UTF_8).length); - _aspectDao.deleteAspect(tx, survivingAspect); + aspectDao.incrementWriteMetrics( + aspectName, 1, latest.getMetadataRaw().getBytes(StandardCharsets.UTF_8).length); + aspectDao.deleteAspect(tx, survivingAspect.getEntityAspect()); } else { if (isKeyAspect) { if (hardDelete) { // If this is the key aspect, delete the entity entirely. - additionalRowsDeleted = _aspectDao.deleteUrn(tx, urn); - } else if (entitySpec.hasAspect(Constants.STATUS_ASPECT_NAME)) { + additionalRowsDeleted = aspectDao.deleteUrn(tx, urn); + } else if (deleteItem.getEntitySpec().hasAspect(Constants.STATUS_ASPECT_NAME)) { // soft delete by setting status.removed=true (if applicable) final Status statusAspect = new Status(); statusAspect.setRemoved(true); @@ -2006,38 +1972,21 @@ public RollbackResult deleteAspect( gmce.setEntityType(entityUrn.getEntityType()); gmce.setAspectName(Constants.STATUS_ASPECT_NAME); gmce.setAspect(GenericRecordUtils.serializeAspect(statusAspect)); - final AuditStamp auditStamp = - new AuditStamp() - .setActor(UrnUtils.getUrn(Constants.SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()); this.ingestProposal(gmce, auditStamp, false); } } else { // Else, only delete the specific aspect. - _aspectDao.deleteAspect(tx, latest); + aspectDao.deleteAspect(tx, latest.getEntityAspect()); } } // 6. Emit the Update try { final RecordTemplate latestValue = - latest == null - ? null - : EntityUtils.toAspectRecord( - entitySpec.getName(), - latest.getAspect(), - latestMetadata, - getEntityRegistry()); - + latest == null ? null : latest.getRecordTemplate(); final RecordTemplate previousValue = - survivingAspect == null - ? null - : EntityUtils.toAspectRecord( - entitySpec.getName(), - survivingAspect.getAspect(), - previousMetadata, - getEntityRegistry()); + survivingAspect == null ? null : latest.getRecordTemplate(); final Urn urnObj = Urn.createFromString(urn); // We are not deleting key aspect if hardDelete has not been set so do not return a @@ -2048,13 +1997,11 @@ public RollbackResult deleteAspect( return new RollbackResult( urnObj, urnObj.getEntityType(), - latest.getAspect(), + latest.getAspectName(), latestValue, previousValue, latestSystemMetadata, - previousValue == null - ? null - : survivingAspect.asSystemAspect().getSystemMetadata(), + previousValue == null ? null : survivingAspect.getSystemMetadata(), survivingAspect == null ? ChangeType.DELETE : ChangeType.UPSERT, isKeyAspect, additionalRowsDeleted); @@ -2128,7 +2075,7 @@ private Map<EntityAspectIdentifier, EntityAspect> getLatestAspect( Map<EntityAspectIdentifier, EntityAspect> batchGetResults = new HashMap<>(); Iterators.partition(dbKeys.iterator(), MAX_KEYS_PER_QUERY) .forEachRemaining( - batch -> batchGetResults.putAll(_aspectDao.batchGet(ImmutableSet.copyOf(batch)))); + batch -> batchGetResults.putAll(aspectDao.batchGet(ImmutableSet.copyOf(batch)))); return batchGetResults; } @@ -2140,83 +2087,24 @@ private Map<EntityAspectIdentifier, EntityAspect> getLatestAspect( private long calculateVersionNumber( @Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull long version) { if (version < 0) { - return _aspectDao.getMaxVersion(urn.toString(), aspectName) + version + 1; + return aspectDao.getMaxVersion(urn.toString(), aspectName) + version + 1; } return version; } private Map<EntityAspectIdentifier, EnvelopedAspect> getEnvelopedAspects( final Set<EntityAspectIdentifier> dbKeys) { - final Map<EntityAspectIdentifier, EnvelopedAspect> result = new HashMap<>(); - final Map<EntityAspectIdentifier, EntityAspect> dbEntries = _aspectDao.batchGet(dbKeys); - - for (EntityAspectIdentifier currKey : dbKeys) { - - final EntityAspect currAspectEntry = dbEntries.get(currKey); - - if (currAspectEntry == null) { - // No aspect found. - continue; - } + final Map<EntityAspectIdentifier, EntityAspect> dbEntries = aspectDao.batchGet(dbKeys); - result.put(currKey, toEnvelopedAspect(currAspectEntry)); - } - return result; - } - - private static EnvelopedAspect toEnvelopedAspect(EntityAspect entityAspect) { - // Aspect found. Now turn it into an EnvelopedAspect - final com.linkedin.entity.Aspect aspect = - RecordUtils.toRecordTemplate(com.linkedin.entity.Aspect.class, entityAspect.getMetadata()); - final EnvelopedAspect envelopedAspect = new EnvelopedAspect(); - envelopedAspect.setName(entityAspect.getAspect()); - envelopedAspect.setVersion(entityAspect.getVersion()); - // TODO: I think we can assume this here, adding as it's a required field so object mapping - // barfs when trying to access it, - // since nowhere else is using it should be safe for now at least - envelopedAspect.setType(AspectType.VERSIONED); - envelopedAspect.setValue(aspect); + List<SystemAspect> envelopedAspects = EntityUtils.toSystemAspects(dbEntries.values(), this); - try { - if (entityAspect.getSystemMetadata() != null) { - final SystemMetadata systemMetadata = entityAspect.asSystemAspect().getSystemMetadata(); - envelopedAspect.setSystemMetadata(systemMetadata); - } - } catch (Exception e) { - log.warn( - "Exception encountered when setting system metadata on enveloped aspect {}. Error: {}", - envelopedAspect.getName(), - e.toString()); - } - - envelopedAspect.setCreated( - new AuditStamp() - .setActor(UrnUtils.getUrn(entityAspect.getCreatedBy())) - .setTime(entityAspect.getCreatedOn().getTime())); - - return envelopedAspect; - } - - private EnvelopedAspect getKeyEnvelopedAspect(final Urn urn) { - final EntitySpec spec = getEntityRegistry().getEntitySpec(PegasusUtils.urnToEntityName(urn)); - final AspectSpec keySpec = spec.getKeyAspectSpec(); - final com.linkedin.entity.Aspect aspect = - new com.linkedin.entity.Aspect(EntityKeyUtils.convertUrnToEntityKey(urn, keySpec).data()); - - final EnvelopedAspect envelopedAspect = new EnvelopedAspect(); - envelopedAspect.setName(keySpec.getName()); - envelopedAspect.setVersion(ASPECT_LATEST_VERSION); - envelopedAspect.setValue(aspect); - // TODO: I think we can assume this here, adding as it's a required field so object mapping - // barfs when trying to access it, - // since nowhere else is using it should be safe for now at least - envelopedAspect.setType(AspectType.VERSIONED); - envelopedAspect.setCreated( - new AuditStamp() - .setActor(UrnUtils.getUrn(SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis())); - - return envelopedAspect; + return envelopedAspects.stream() + .collect( + Collectors.toMap( + systemAspect -> + ((EntityAspect.EntitySystemAspect) systemAspect).getAspectIdentifier(), + systemAspect -> + ((EntityAspect.EntitySystemAspect) systemAspect).toEnvelopedAspects())); } @Nonnull @@ -2227,7 +2115,7 @@ private UpdateAspectResult ingestAspectToLocalDB( @Nonnull final RecordTemplate newValue, @Nonnull final AuditStamp auditStamp, @Nonnull final SystemMetadata providedSystemMetadata, - @Nullable final EntityAspect latest, + @Nullable final EntityAspect.EntitySystemAspect latest, @Nonnull final Long nextVersion) { // Set the "last run id" to be the run id provided with the new system metadata. This will be @@ -2237,34 +2125,30 @@ private UpdateAspectResult ingestAspectToLocalDB( providedSystemMetadata.getRunId(GetMode.NULL), SetMode.IGNORE_NULL); // 2. Compare the latest existing and new. - final RecordTemplate oldValue = - latest == null - ? null - : EntityUtils.toAspectRecord( - urn, aspectName, latest.getMetadata(), getEntityRegistry()); + final RecordTemplate oldValue = latest == null ? null : latest.getRecordTemplate(); // 3. If there is no difference between existing and new, we just update // the lastObserved in system metadata. RunId should stay as the original runId if (oldValue != null && DataTemplateUtil.areEqual(oldValue, newValue)) { - SystemMetadata latestSystemMetadata = latest.asSystemAspect().getSystemMetadata(); + SystemMetadata latestSystemMetadata = latest.getSystemMetadata(); latestSystemMetadata.setLastObserved(providedSystemMetadata.getLastObserved()); latestSystemMetadata.setLastRunId( providedSystemMetadata.getLastRunId(GetMode.NULL), SetMode.IGNORE_NULL); - latest.setSystemMetadata(RecordUtils.toJsonString(latestSystemMetadata)); + latest.getEntityAspect().setSystemMetadata(RecordUtils.toJsonString(latestSystemMetadata)); log.info("Ingesting aspect with name {}, urn {}", aspectName, urn); - _aspectDao.saveAspect(tx, latest, false); + aspectDao.saveAspect(tx, latest.getEntityAspect(), false); // metrics - _aspectDao.incrementWriteMetrics( - aspectName, 1, latest.getAspect().getBytes(StandardCharsets.UTF_8).length); + aspectDao.incrementWriteMetrics( + aspectName, 1, latest.getMetadataRaw().getBytes(StandardCharsets.UTF_8).length); return UpdateAspectResult.builder() .urn(urn) .oldValue(oldValue) .newValue(oldValue) - .oldSystemMetadata(latest.asSystemAspect().getSystemMetadata()) + .oldSystemMetadata(latest.getSystemMetadata()) .newSystemMetadata(latestSystemMetadata) .operation(MetadataAuditOperation.UPDATE) .auditStamp(auditStamp) @@ -2276,15 +2160,15 @@ private UpdateAspectResult ingestAspectToLocalDB( log.debug("Ingesting aspect with name {}, urn {}", aspectName, urn); String newValueStr = EntityUtils.toJsonAspect(newValue); long versionOfOld = - _aspectDao.saveLatestAspect( + aspectDao.saveLatestAspect( tx, urn.toString(), aspectName, latest == null ? null : EntityUtils.toJsonAspect(oldValue), latest == null ? null : latest.getCreatedBy(), - latest == null ? null : latest.getCreatedFor(), + latest == null ? null : latest.getEntityAspect().getCreatedFor(), latest == null ? null : latest.getCreatedOn(), - latest == null ? null : latest.getSystemMetadata(), + latest == null ? null : latest.getSystemMetadataRaw(), newValueStr, auditStamp.getActor().toString(), auditStamp.hasImpersonator() ? auditStamp.getImpersonator().toString() : null, @@ -2293,14 +2177,14 @@ private UpdateAspectResult ingestAspectToLocalDB( nextVersion); // metrics - _aspectDao.incrementWriteMetrics( + aspectDao.incrementWriteMetrics( aspectName, 1, newValueStr.getBytes(StandardCharsets.UTF_8).length); return UpdateAspectResult.builder() .urn(urn) .oldValue(oldValue) .newValue(newValue) - .oldSystemMetadata(latest == null ? null : latest.asSystemAspect().getSystemMetadata()) + .oldSystemMetadata(latest == null ? null : latest.getSystemMetadata()) .newSystemMetadata(providedSystemMetadata) .operation(MetadataAuditOperation.UPDATE) .auditStamp(auditStamp) @@ -2318,7 +2202,11 @@ private static boolean shouldAspectEmitChangeLog(@Nonnull final AspectSpec aspec @Override public Map<Urn, Map<String, com.linkedin.entity.Aspect>> getLatestAspectObjects( Set<Urn> urns, Set<String> aspectNames) throws RemoteInvocationException, URISyntaxException { - String entityName = urns.stream().findFirst().map(Urn::getEntityType).get(); - return entityResponseToAspectMap(getEntitiesV2(entityName, urns, aspectNames)); + if (urns.isEmpty() || aspectNames.isEmpty()) { + return Map.of(); + } else { + String entityName = urns.stream().findFirst().map(Urn::getEntityType).get(); + return entityResponseToAspectMap(getEntitiesV2(entityName, urns, aspectNames)); + } } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java index f353e5142755d1..fecf246b31c02b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java @@ -7,21 +7,38 @@ import com.google.common.base.Preconditions; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; -import com.linkedin.data.schema.RecordDataSchema; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.entity.AspectType; +import com.linkedin.entity.Entity; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.ReadItem; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.validation.EntityRegistryUrnValidator; import com.linkedin.metadata.entity.validation.RecordTemplateValidator; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.PegasusUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; import java.net.URISyntaxException; -import java.net.URLEncoder; +import java.util.Collection; import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -31,9 +48,6 @@ public class EntityUtils { private EntityUtils() {} - public static final int URN_NUM_BYTES_LIMIT = 512; - public static final String URN_DELIMITER_SEPARATOR = "␟"; - @Nonnull public static String toJsonAspect(@Nonnull final RecordTemplate aspectRecord) { return RecordUtils.toJsonString(aspectRecord); @@ -101,45 +115,205 @@ public static RecordTemplate getAspectFromEntity( } } + public static RecordTemplate buildKeyAspect( + @Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { + final EntitySpec spec = entityRegistry.getEntitySpec(urnToEntityName(urn)); + final AspectSpec keySpec = spec.getKeyAspectSpec(); + return EntityKeyUtils.convertUrnToEntityKey(urn, keySpec); + } + + static Entity toEntity(@Nonnull final Snapshot snapshot) { + return new Entity().setValue(snapshot); + } + + static Snapshot toSnapshotUnion(@Nonnull final RecordTemplate snapshotRecord) { + final Snapshot snapshot = new Snapshot(); + RecordUtils.setSelectedRecordTemplateInUnion(snapshot, snapshotRecord); + return snapshot; + } + + static EnvelopedAspect getKeyEnvelopedAspect(final Urn urn, final EntityRegistry entityRegistry) { + final EntitySpec spec = entityRegistry.getEntitySpec(PegasusUtils.urnToEntityName(urn)); + final AspectSpec keySpec = spec.getKeyAspectSpec(); + final com.linkedin.entity.Aspect aspect = + new com.linkedin.entity.Aspect(EntityKeyUtils.convertUrnToEntityKey(urn, keySpec).data()); + + final EnvelopedAspect envelopedAspect = new EnvelopedAspect(); + envelopedAspect.setName(keySpec.getName()); + envelopedAspect.setVersion(ASPECT_LATEST_VERSION); + envelopedAspect.setValue(aspect); + // TODO: I think we can assume this here, adding as it's a required field so object mapping + // barfs when trying to access it, + // since nowhere else is using it should be safe for now at least + envelopedAspect.setType(AspectType.VERSIONED); + envelopedAspect.setCreated( + new AuditStamp() + .setActor(UrnUtils.getUrn(SYSTEM_ACTOR)) + .setTime(System.currentTimeMillis())); + + return envelopedAspect; + } + + static EntityResponse toEntityResponse( + final Urn urn, final List<EnvelopedAspect> envelopedAspects) { + final EntityResponse response = new EntityResponse(); + response.setUrn(urn); + response.setEntityName(urnToEntityName(urn)); + response.setAspects( + new EnvelopedAspectMap( + envelopedAspects.stream() + .collect(Collectors.toMap(EnvelopedAspect::getName, aspect -> aspect)))); + return response; + } + + static Map<String, Set<String>> buildEntityToValidAspects(final EntityRegistry entityRegistry) { + return entityRegistry.getEntitySpecs().values().stream() + .collect( + Collectors.toMap( + EntitySpec::getName, + entry -> + entry.getAspectSpecs().stream() + .map(AspectSpec::getName) + .collect(Collectors.toSet()))); + } + + /** + * Prefer batched interfaces + * + * @param entityAspect optional entity aspect + * @param aspectRetriever + * @return + */ + public static Optional<SystemAspect> toSystemAspect( + @Nullable EntityAspect entityAspect, @Nonnull AspectRetriever aspectRetriever) { + return Optional.ofNullable(entityAspect) + .map(aspect -> EntityUtils.toSystemAspects(List.of(aspect), aspectRetriever)) + .filter(systemAspects -> !systemAspects.isEmpty()) + .map(systemAspects -> systemAspects.get(0)); + } + + /** + * Given a `Map<EntityUrn, <Map<AspectName, EntityAspect>>` from the database representation, + * translate that into our java classes + * + * @param rawAspects `Map<EntityUrn, <Map<AspectName, EntityAspect>>` + * @param aspectRetriever used for read mutations + * @return the java map for the given database object map + */ + @Nonnull + public static Map<String, Map<String, SystemAspect>> toSystemAspects( + @Nonnull Map<String, Map<String, EntityAspect>> rawAspects, + @Nonnull AspectRetriever aspectRetriever) { + List<SystemAspect> systemAspects = + toSystemAspects( + rawAspects.values().stream() + .flatMap(m -> m.values().stream()) + .collect(Collectors.toList()), + aspectRetriever); + + // map the list into the desired shape + return systemAspects.stream() + .collect(Collectors.groupingBy(SystemAspect::getUrn)) + .entrySet() + .stream() + .map( + entry -> + Pair.of( + entry.getKey(), + entry.getValue().stream() + .collect(Collectors.groupingBy(SystemAspect::getAspectName)))) + .collect( + Collectors.toMap( + p -> p.getFirst().toString(), + p -> + p.getSecond().entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().get(0))))); + } + @Nonnull - public static RecordTemplate toAspectRecord( - @Nonnull final Urn entityUrn, - @Nonnull final String aspectName, - @Nonnull final String jsonAspect, - @Nonnull final EntityRegistry entityRegistry) { - return toAspectRecord( - PegasusUtils.urnToEntityName(entityUrn), aspectName, jsonAspect, entityRegistry); + public static List<SystemAspect> toSystemAspectFromEbeanAspects( + @Nonnull Collection<EbeanAspectV2> rawAspects, @Nonnull AspectRetriever aspectRetriever) { + return toSystemAspects( + rawAspects.stream().map(EbeanAspectV2::toEntityAspect).collect(Collectors.toList()), + aspectRetriever); } /** - * @param entityName - * @param aspectName - * @param jsonAspect - * @param entityRegistry - * @return a RecordTemplate which has been validated, validation errors are logged as warnings + * Convert EntityAspect to EntitySystemAspect + * + * <p>This should be the 1 point that all conversions from database representations to java + * objects happens since we need to enforce read mutations happen. + * + * @param rawAspects raw aspects to convert + * @return map converted aspects */ - public static RecordTemplate toAspectRecord( - @Nonnull final String entityName, - @Nonnull final String aspectName, - @Nonnull final String jsonAspect, - @Nonnull final EntityRegistry entityRegistry) { - final EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); - final AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName); - // TODO: aspectSpec can be null here - Preconditions.checkState( - aspectSpec != null, String.format("Aspect %s could not be found", aspectName)); - final RecordDataSchema aspectSchema = aspectSpec.getPegasusSchema(); - RecordTemplate aspectRecord = - RecordUtils.toRecordTemplate(aspectSpec.getDataTemplateClass(), jsonAspect); - RecordTemplateValidator.validate( - aspectRecord, - validationFailure -> { - log.warn(String.format("Failed to validate record %s against its schema.", aspectRecord)); + @Nonnull + public static List<SystemAspect> toSystemAspects( + @Nonnull Collection<EntityAspect> rawAspects, @Nonnull AspectRetriever aspectRetriever) { + EntityRegistry entityRegistry = aspectRetriever.getEntityRegistry(); + + // Build + List<SystemAspect> systemAspects = + rawAspects.stream() + .map( + raw -> { + Urn urn = UrnUtils.getUrn(raw.getUrn()); + AspectSpec aspectSpec = + entityRegistry + .getEntitySpec(urn.getEntityType()) + .getAspectSpec(raw.getAspect()); + + // TODO: aspectSpec can be null here + Preconditions.checkState( + aspectSpec != null, + String.format("Aspect %s could not be found", raw.getAspect())); + + return EntityAspect.EntitySystemAspect.builder() + .build(entityRegistry.getEntitySpec(urn.getEntityType()), aspectSpec, raw); + }) + .collect(Collectors.toList()); + + // Read Mutate + Map<Pair<EntitySpec, AspectSpec>, List<ReadItem>> grouped = + systemAspects.stream() + .collect( + Collectors.groupingBy(item -> Pair.of(item.getEntitySpec(), item.getAspectSpec()))); + + grouped.forEach( + (key, value) -> { + AspectsBatch.applyReadMutationHooks(value, aspectRetriever); }); - return aspectRecord; + + // Read Validate + systemAspects.forEach( + systemAspect -> + RecordTemplateValidator.validate( + systemAspect.getRecordTemplate(), + validationFailure -> + log.warn( + String.format( + "Failed to validate record %s against its schema.", + systemAspect.getRecordTemplate())))); + + // TODO consider applying write validation plugins + + return systemAspects; + } + + public static <T extends RecordTemplate> MetadataChangeProposal buildMCP( + Urn entityUrn, String aspectName, ChangeType changeType, @Nullable T aspect) { + MetadataChangeProposal proposal = new MetadataChangeProposal(); + proposal.setEntityUrn(entityUrn); + proposal.setChangeType(changeType); + proposal.setEntityType(entityUrn.getEntityType()); + proposal.setAspectName(aspectName); + if (aspect != null) { + proposal.setAspect(GenericRecordUtils.serializeAspect(aspect)); + } + return proposal; } - public static SystemMetadata parseSystemMetadata(String jsonSystemMetadata) { + static SystemMetadata parseSystemMetadata(String jsonSystemMetadata) { if (jsonSystemMetadata == null || jsonSystemMetadata.equals("")) { SystemMetadata response = new SystemMetadata(); response.setRunId(DEFAULT_RUN_ID); @@ -148,43 +322,4 @@ public static SystemMetadata parseSystemMetadata(String jsonSystemMetadata) { } return RecordUtils.toRecordTemplate(SystemMetadata.class, jsonSystemMetadata); } - - public static RecordTemplate buildKeyAspect( - @Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { - final EntitySpec spec = entityRegistry.getEntitySpec(urnToEntityName(urn)); - final AspectSpec keySpec = spec.getKeyAspectSpec(); - return EntityKeyUtils.convertUrnToEntityKey(urn, keySpec); - } - - public static void validateUrn(@Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { - EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); - validator.setCurrentEntitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); - RecordTemplateValidator.validate( - EntityUtils.buildKeyAspect(entityRegistry, urn), - validationResult -> { - throw new IllegalArgumentException( - "Invalid urn: " + urn + "\n Cause: " + validationResult.getMessages()); - }, - validator); - - if (urn.toString().trim().length() != urn.toString().length()) { - throw new IllegalArgumentException( - "Error: cannot provide an URN with leading or trailing whitespace"); - } - if (URLEncoder.encode(urn.toString()).length() > URN_NUM_BYTES_LIMIT) { - throw new IllegalArgumentException( - "Error: cannot provide an URN longer than " - + Integer.toString(URN_NUM_BYTES_LIMIT) - + " bytes (when URL encoded)"); - } - if (urn.toString().contains(URN_DELIMITER_SEPARATOR)) { - throw new IllegalArgumentException( - "Error: URN cannot contain " + URN_DELIMITER_SEPARATOR + " character"); - } - try { - Urn.createFromString(urn.toString()); - } catch (URISyntaxException e) { - throw new IllegalArgumentException(e); - } - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java index f37f63913abe45..c1e76e7c678363 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java @@ -201,7 +201,7 @@ public Map<EntityAspectIdentifier, EntityAspect> batchGet( return keys.stream() .map(this::getAspect) .filter(Objects::nonNull) - .collect(Collectors.toMap(EntityAspect::toAspectIdentifier, aspect -> aspect)); + .collect(Collectors.toMap(EntityAspect::getAspectIdentifier, aspect -> aspect)); } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java index 4d9d2b3c416b7b..91e31975298771 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java @@ -17,7 +17,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.batch.AspectsBatch; -import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityAspectIdentifier; import com.linkedin.metadata.entity.EntityService; @@ -45,7 +45,7 @@ @Slf4j @RequiredArgsConstructor -public class CassandraRetentionService<U extends UpsertItem> extends RetentionService<U> { +public class CassandraRetentionService<U extends ChangeMCP> extends RetentionService<U> { private final EntityService<U> _entityService; private final CqlSession _cqlSession; private final int _batchSize; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index 3342d4632f642e..23d443c10b71fc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -10,8 +10,9 @@ import com.google.common.cache.LoadingCache; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.AspectsBatch; -import com.linkedin.metadata.aspect.batch.MCPBatchItem; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.config.EbeanConfiguration; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.AspectMigrationsDao; @@ -637,13 +638,14 @@ public <T> List<T> runInTransactionWithRetry( Set<Urn> urnsWithKeyAspects = batch.getMCPItems().stream() .filter(i -> i.getEntitySpec().getKeyAspectSpec().equals(i.getAspectSpec())) - .map(MCPBatchItem::getUrn) + .map(MCPItem::getUrn) .collect(Collectors.toSet()); if (!urnsWithKeyAspects.isEmpty()) { // Split into batches by urn with key aspect, remaining aspects in the pair's second - Pair<List<AspectsBatch>, AspectsBatch> splitBatches = splitByUrn(batch, urnsWithKeyAspects); + Pair<List<AspectsBatch>, AspectsBatch> splitBatches = + splitByUrn(batch, urnsWithKeyAspects, batch.getAspectRetriever()); // Run non-key aspect `other` batch per normal if (!splitBatches.getSecond().getItems().isEmpty()) { @@ -902,12 +904,13 @@ private static String buildMetricName( * @return separated batches */ private static Pair<List<AspectsBatch>, AspectsBatch> splitByUrn( - AspectsBatch batch, Set<Urn> urns) { - Map<Urn, List<MCPBatchItem>> itemsByUrn = - batch.getMCPItems().stream().collect(Collectors.groupingBy(MCPBatchItem::getUrn)); + AspectsBatch batch, Set<Urn> urns, AspectRetriever aspectRetriever) { + Map<Urn, List<MCPItem>> itemsByUrn = + batch.getMCPItems().stream().collect(Collectors.groupingBy(MCPItem::getUrn)); AspectsBatch other = AspectsBatchImpl.builder() + .aspectRetriever(aspectRetriever) .items( itemsByUrn.entrySet().stream() .filter(entry -> !urns.contains(entry.getKey())) @@ -917,7 +920,12 @@ private static Pair<List<AspectsBatch>, AspectsBatch> splitByUrn( List<AspectsBatch> nonEmptyBatches = urns.stream() - .map(urn -> AspectsBatchImpl.builder().items(itemsByUrn.get(urn)).build()) + .map( + urn -> + AspectsBatchImpl.builder() + .aspectRetriever(aspectRetriever) + .items(itemsByUrn.get(urn)) + .build()) .filter(b -> !b.getItems().isEmpty()) .collect(Collectors.toList()); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java index eba550714766b8..250a81d9c8edcf 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java @@ -5,7 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.batch.AspectsBatch; -import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.RetentionService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -40,7 +40,7 @@ @Slf4j @RequiredArgsConstructor -public class EbeanRetentionService<U extends UpsertItem> extends RetentionService<U> { +public class EbeanRetentionService<U extends ChangeMCP> extends RetentionService<U> { private final EntityService<U> _entityService; private final Database _server; private final int _batchSize; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java index 1718bd835dc31f..3edb55f265dc13 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java @@ -3,13 +3,13 @@ import com.linkedin.common.AuditStamp; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; -import com.linkedin.metadata.aspect.batch.SystemAspect; -import com.linkedin.metadata.aspect.batch.UpsertItem; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; import java.util.Collection; import java.util.LinkedList; @@ -18,6 +18,7 @@ import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; +import javax.annotation.Nonnull; import lombok.Builder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; @@ -27,7 +28,8 @@ @Builder(toBuilder = true) public class AspectsBatchImpl implements AspectsBatch { - private final Collection<? extends BatchItem> items; + @Nonnull private final Collection<? extends BatchItem> items; + @Nonnull private final AspectRetriever aspectRetriever; /** * Convert patches to upserts, apply hooks at the aspect and batch level. @@ -37,10 +39,11 @@ public class AspectsBatchImpl implements AspectsBatch { * various hooks */ @Override - public Pair<Map<String, Set<String>>, List<UpsertItem>> toUpsertBatchItems( - final Map<String, Map<String, SystemAspect>> latestAspects, AspectRetriever aspectRetriever) { + public Pair<Map<String, Set<String>>, List<ChangeMCP>> toUpsertBatchItems( + final Map<String, Map<String, SystemAspect>> latestAspects) { - LinkedList<UpsertItem> upsertBatchItems = + // Convert patches to upserts if needed + LinkedList<ChangeMCP> upsertBatchItems = items.stream() .map( item -> { @@ -49,35 +52,26 @@ public Pair<Map<String, Set<String>>, List<UpsertItem>> toUpsertBatchItems( final SystemAspect latest = latestAspects.getOrDefault(urnStr, Map.of()).get(item.getAspectName()); - final MCPUpsertBatchItem upsertItem; - if (item instanceof MCPUpsertBatchItem) { - upsertItem = (MCPUpsertBatchItem) item; + final ChangeItemImpl upsertItem; + if (item instanceof ChangeItemImpl) { + upsertItem = (ChangeItemImpl) item; } else { // patch to upsert - MCPPatchBatchItem patchBatchItem = (MCPPatchBatchItem) item; + PatchItemImpl patchBatchItem = (PatchItemImpl) item; final RecordTemplate currentValue = - latest != null - ? latest.getRecordTemplate(aspectRetriever.getEntityRegistry()) - : null; + latest != null ? latest.getRecordTemplate() : null; upsertItem = patchBatchItem.applyPatch(currentValue, aspectRetriever); } - // Apply hooks - final SystemMetadata oldSystemMetadata = - latest != null ? latest.getSystemMetadata() : null; - final RecordTemplate oldAspectValue = - latest != null - ? latest.getRecordTemplate(aspectRetriever.getEntityRegistry()) - : null; - upsertItem.applyMutationHooks(oldAspectValue, oldSystemMetadata, aspectRetriever); - return upsertItem; }) .collect(Collectors.toCollection(LinkedList::new)); - LinkedList<UpsertItem> newItems = - applyMCPSideEffects(upsertBatchItems, aspectRetriever) - .collect(Collectors.toCollection(LinkedList::new)); + // Apply write hooks before side effects + applyWriteMutationHooks(upsertBatchItems); + + LinkedList<ChangeMCP> newItems = + applyMCPSideEffects(upsertBatchItems).collect(Collectors.toCollection(LinkedList::new)); Map<String, Set<String>> newUrnAspectNames = getNewUrnAspectsMap(getUrnAspectsMap(), newItems); upsertBatchItems.addAll(newItems); @@ -91,28 +85,41 @@ public static class AspectsBatchImplBuilder { * @param data aspect data * @return builder */ - public AspectsBatchImplBuilder one(BatchItem data) { - this.items = List.of(data); + public AspectsBatchImplBuilder one(BatchItem data, AspectRetriever aspectRetriever) { + aspectRetriever(aspectRetriever); + items(List.of(data)); return this; } public AspectsBatchImplBuilder mcps( List<MetadataChangeProposal> mcps, AuditStamp auditStamp, AspectRetriever aspectRetriever) { - this.items = + + aspectRetriever(aspectRetriever); + items( mcps.stream() .map( mcp -> { if (mcp.getChangeType().equals(ChangeType.PATCH)) { - return MCPPatchBatchItem.MCPPatchBatchItemBuilder.build( + return PatchItemImpl.PatchItemImplBuilder.build( mcp, auditStamp, aspectRetriever.getEntityRegistry()); } else { - return MCPUpsertBatchItem.MCPUpsertBatchItemBuilder.build( + return ChangeItemImpl.ChangeItemImplBuilder.build( mcp, auditStamp, aspectRetriever); } }) - .collect(Collectors.toList()); + .collect(Collectors.toList())); return this; } + + public AspectsBatchImpl build() { + ValidationExceptionCollection exceptions = + AspectsBatch.validateProposed(this.items, this.aspectRetriever); + if (!exceptions.isEmpty()) { + throw new IllegalArgumentException("Failed to validate MCP due to: " + exceptions); + } + + return new AspectsBatchImpl(this.items, this.aspectRetriever); + } } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPUpsertBatchItem.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java similarity index 63% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPUpsertBatchItem.java rename to metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java index b9d5f24e7ce084..b2e3363547dd02 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPUpsertBatchItem.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.entity.ebean.batch; -import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION; import static com.linkedin.metadata.entity.AspectUtils.validateAspect; import com.datahub.util.exception.ModelConversionException; @@ -9,13 +8,11 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.aspect.batch.SystemAspect; -import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.patch.template.common.GenericPatchTemplate; -import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; -import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.entity.validation.ValidationUtils; @@ -33,26 +30,24 @@ import javax.annotation.Nullable; import lombok.Builder; import lombok.Getter; +import lombok.Setter; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @Slf4j @Getter @Builder(toBuilder = true) -public class MCPUpsertBatchItem extends UpsertItem { +public class ChangeItemImpl implements ChangeMCP { - public static MCPUpsertBatchItem fromPatch( + public static ChangeItemImpl fromPatch( @Nonnull Urn urn, @Nonnull AspectSpec aspectSpec, @Nullable RecordTemplate recordTemplate, GenericPatchTemplate<? extends RecordTemplate> genericPatchTemplate, @Nonnull AuditStamp auditStamp, AspectRetriever aspectRetriever) { - MCPUpsertBatchItem.MCPUpsertBatchItemBuilder builder = - MCPUpsertBatchItem.builder() - .urn(urn) - .auditStamp(auditStamp) - .aspectName(aspectSpec.getName()); + ChangeItemImplBuilder builder = + ChangeItemImpl.builder().urn(urn).auditStamp(auditStamp).aspectName(aspectSpec.getName()); RecordTemplate currentValue = recordTemplate != null ? recordTemplate : genericPatchTemplate.getDefault(); @@ -84,76 +79,45 @@ public static MCPUpsertBatchItem fromPatch( @Nonnull private final EntitySpec entitySpec; @Nonnull private final AspectSpec aspectSpec; + @Setter @Nullable private SystemAspect previousSystemAspect; + @Setter private long nextAspectVersion; + @Nonnull @Override public ChangeType getChangeType() { return ChangeType.UPSERT; } - public void applyMutationHooks( - @Nullable RecordTemplate oldAspectValue, - @Nullable SystemMetadata oldSystemMetadata, - @Nonnull AspectRetriever aspectRetriever) { - // add audit stamp/system meta if needed - for (MutationHook mutationHook : - aspectRetriever - .getEntityRegistry() - .getMutationHooks(getChangeType(), entitySpec.getName(), aspectSpec.getName())) { - mutationHook.applyMutation( - getChangeType(), - entitySpec, - aspectSpec, - oldAspectValue, - recordTemplate, - oldSystemMetadata, - systemMetadata, - auditStamp, - aspectRetriever); - } - } - - @Override - public SystemAspect toLatestEntityAspect() { - EntityAspect latest = new EntityAspect(); - latest.setAspect(getAspectName()); - latest.setMetadata(EntityUtils.toJsonAspect(getRecordTemplate())); - latest.setUrn(getUrn().toString()); - latest.setVersion(ASPECT_LATEST_VERSION); - latest.setCreatedOn(new Timestamp(auditStamp.getTime())); - latest.setCreatedBy(auditStamp.getActor().toString()); - return latest.asSystemAspect(); - } - + @Nonnull @Override - public void validatePreCommit( - @Nullable RecordTemplate previous, @Nonnull AspectRetriever aspectRetriever) - throws AspectValidationException { - - for (AspectPayloadValidator validator : - aspectRetriever - .getEntityRegistry() - .getAspectPayloadValidators( - getChangeType(), entitySpec.getName(), aspectSpec.getName())) { - validator.validatePreCommit( - getChangeType(), urn, getAspectSpec(), previous, this.recordTemplate, aspectRetriever); - } + public SystemAspect getSystemAspect(@Nullable Long version) { + EntityAspect entityAspect = new EntityAspect(); + entityAspect.setAspect(getAspectName()); + entityAspect.setMetadata(EntityUtils.toJsonAspect(getRecordTemplate())); + entityAspect.setUrn(getUrn().toString()); + entityAspect.setVersion(version == null ? getNextAspectVersion() : version); + entityAspect.setCreatedOn(new Timestamp(getAuditStamp().getTime())); + entityAspect.setCreatedBy(getAuditStamp().getActor().toString()); + entityAspect.setSystemMetadata(EntityUtils.toJsonAspect(getSystemMetadata())); + return EntityAspect.EntitySystemAspect.builder() + .build(getEntitySpec(), getAspectSpec(), entityAspect); } - public static class MCPUpsertBatchItemBuilder { + public static class ChangeItemImplBuilder { // Ensure use of other builders - private MCPUpsertBatchItem build() { + private ChangeItemImpl build() { return null; } - public MCPUpsertBatchItemBuilder systemMetadata(SystemMetadata systemMetadata) { + public ChangeItemImplBuilder systemMetadata(SystemMetadata systemMetadata) { this.systemMetadata = SystemMetadataUtils.generateSystemMetadataIfEmpty(systemMetadata); return this; } @SneakyThrows - public MCPUpsertBatchItem build(AspectRetriever aspectRetriever) { - EntityUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); + public ChangeItemImpl build(AspectRetriever aspectRetriever) { + ValidationUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); log.debug("entity type = {}", this.urn.getEntityType()); entitySpec(aspectRetriever.getEntityRegistry().getEntitySpec(this.urn.getEntityType())); @@ -163,14 +127,9 @@ public MCPUpsertBatchItem build(AspectRetriever aspectRetriever) { log.debug("aspect spec = {}", this.aspectSpec); ValidationUtils.validateRecordTemplate( - ChangeType.UPSERT, - this.entitySpec, - this.aspectSpec, - this.urn, - this.recordTemplate, - aspectRetriever); + this.entitySpec, this.urn, this.recordTemplate, aspectRetriever); - return new MCPUpsertBatchItem( + return new ChangeItemImpl( this.urn, this.aspectName, this.recordTemplate, @@ -178,10 +137,12 @@ public MCPUpsertBatchItem build(AspectRetriever aspectRetriever) { this.auditStamp, this.metadataChangeProposal, this.entitySpec, - this.aspectSpec); + this.aspectSpec, + this.previousSystemAspect, + this.nextAspectVersion); } - public static MCPUpsertBatchItem build( + public static ChangeItemImpl build( MetadataChangeProposal mcp, AuditStamp auditStamp, AspectRetriever aspectRetriever) { if (!mcp.getChangeType().equals(ChangeType.UPSERT)) { throw new IllegalArgumentException( @@ -193,7 +154,7 @@ public static MCPUpsertBatchItem build( aspectRetriever.getEntityRegistry().getEntitySpec(mcp.getEntityType()); AspectSpec aspectSpec = validateAspect(mcp, entitySpec); - if (!isValidChangeType(ChangeType.UPSERT, aspectSpec)) { + if (!MCPItem.isValidChangeType(ChangeType.UPSERT, aspectSpec)) { throw new UnsupportedOperationException( "ChangeType not supported: " + mcp.getChangeType() @@ -206,7 +167,7 @@ public static MCPUpsertBatchItem build( urn = EntityKeyUtils.getUrnFromProposal(mcp, entitySpec.getKeyAspectSpec()); } - return MCPUpsertBatchItem.builder() + return ChangeItemImpl.builder() .urn(urn) .aspectName(mcp.getAspectName()) .systemMetadata( @@ -217,16 +178,6 @@ public static MCPUpsertBatchItem build( .build(aspectRetriever); } - private MCPUpsertBatchItemBuilder entitySpec(EntitySpec entitySpec) { - this.entitySpec = entitySpec; - return this; - } - - private MCPUpsertBatchItemBuilder aspectSpec(AspectSpec aspectSpec) { - this.aspectSpec = aspectSpec; - return this; - } - private static RecordTemplate convertToRecordTemplate( MetadataChangeProposal mcp, AspectSpec aspectSpec) { RecordTemplate aspect; @@ -241,7 +192,6 @@ private static RecordTemplate convertToRecordTemplate( "Could not deserialize %s for aspect %s", mcp.getAspect().getValue(), mcp.getAspectName())); } - log.debug("aspect = {}", aspect); return aspect; } } @@ -254,7 +204,7 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) { return false; } - MCPUpsertBatchItem that = (MCPUpsertBatchItem) o; + ChangeItemImpl that = (ChangeItemImpl) o; return urn.equals(that.urn) && aspectName.equals(that.aspectName) && Objects.equals(systemMetadata, that.systemMetadata) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java new file mode 100644 index 00000000000000..0ab854198a2828 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java @@ -0,0 +1,139 @@ +package com.linkedin.metadata.entity.ebean.batch; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.entity.EntityAspect; +import com.linkedin.metadata.entity.EntityUtils; +import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import java.util.Objects; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import lombok.Setter; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Getter +@Builder(toBuilder = true) +public class DeleteItemImpl implements ChangeMCP { + + // urn an urn associated with the new aspect + @Nonnull private final Urn urn; + + // aspectName name of the aspect being inserted + @Nonnull private final String aspectName; + + @Nonnull private final AuditStamp auditStamp; + + // derived + @Nonnull private final EntitySpec entitySpec; + @Nonnull private final AspectSpec aspectSpec; + + @Setter @Nullable private SystemAspect previousSystemAspect; + + @Nonnull + @Override + public ChangeType getChangeType() { + return ChangeType.DELETE; + } + + @Nullable + @Override + public RecordTemplate getRecordTemplate() { + return null; + } + + @Nullable + @Override + public SystemMetadata getSystemMetadata() { + return null; + } + + @Nullable + @Override + public MetadataChangeProposal getMetadataChangeProposal() { + return EntityUtils.buildMCP(getUrn(), aspectName, getChangeType(), null); + } + + @Nonnull + @Override + public SystemAspect getSystemAspect(@Nullable Long nextAspectVersion) { + EntityAspect entityAspect = new EntityAspect(); + entityAspect.setAspect(getAspectName()); + entityAspect.setUrn(getUrn().toString()); + entityAspect.setVersion(0); + return EntityAspect.EntitySystemAspect.builder() + .build(getEntitySpec(), getAspectSpec(), entityAspect); + } + + @Override + public long getNextAspectVersion() { + return 0; + } + + @Override + public void setNextAspectVersion(long nextAspectVersion) { + throw new IllegalStateException("Next aspect version is always zero"); + } + + public static class DeleteItemImplBuilder { + + // Ensure use of other builders + private DeleteItemImpl build() { + return null; + } + + @SneakyThrows + public DeleteItemImpl build(AspectRetriever aspectRetriever) { + ValidationUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); + log.debug("entity type = {}", this.urn.getEntityType()); + + entitySpec(aspectRetriever.getEntityRegistry().getEntitySpec(this.urn.getEntityType())); + log.debug("entity spec = {}", this.entitySpec); + + aspectSpec(ValidationUtils.validate(this.entitySpec, this.aspectName)); + log.debug("aspect spec = {}", this.aspectSpec); + + return new DeleteItemImpl( + this.urn, + this.aspectName, + this.auditStamp, + this.entitySpec, + this.aspectSpec, + this.previousSystemAspect); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + DeleteItemImpl that = (DeleteItemImpl) o; + return urn.equals(that.urn) && aspectName.equals(that.aspectName); + } + + @Override + public int hashCode() { + return Objects.hash(urn, aspectName); + } + + @Override + public String toString() { + return "UpsertBatchItem{" + "urn=" + urn + ", aspectName='" + aspectName + '\'' + '}'; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLBatchItemImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java similarity index 77% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLBatchItemImpl.java rename to metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java index a2ed2eb18fe6a3..6efc1e78b543c1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLBatchItemImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java @@ -1,14 +1,12 @@ package com.linkedin.metadata.entity.ebean.batch; -import static com.linkedin.metadata.entity.AspectUtils.validateAspect; - import com.datahub.util.exception.ModelConversionException; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.aspect.batch.MCLBatchItem; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.entity.EntityUtils; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.entity.validation.ValidationUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; @@ -26,7 +24,7 @@ @Slf4j @Getter @Builder(toBuilder = true) -public class MCLBatchItemImpl implements MCLBatchItem { +public class MCLItemImpl implements MCLItem { @Nonnull private final MetadataChangeLog metadataChangeLog; @@ -38,19 +36,18 @@ public class MCLBatchItemImpl implements MCLBatchItem { private final EntitySpec entitySpec; private final AspectSpec aspectSpec; - public static class MCLBatchItemImplBuilder { + public static class MCLItemImplBuilder { // Ensure use of other builders - private MCLBatchItemImpl build() { + private MCLItemImpl build() { return null; } - public MCLBatchItemImpl build( - MetadataChangeLog metadataChangeLog, AspectRetriever aspectRetriever) { - return MCLBatchItemImpl.builder().metadataChangeLog(metadataChangeLog).build(aspectRetriever); + public MCLItemImpl build(MetadataChangeLog metadataChangeLog, AspectRetriever aspectRetriever) { + return MCLItemImpl.builder().metadataChangeLog(metadataChangeLog).build(aspectRetriever); } - public MCLBatchItemImpl build(AspectRetriever aspectRetriever) { + public MCLItemImpl build(AspectRetriever aspectRetriever) { EntityRegistry entityRegistry = aspectRetriever.getEntityRegistry(); log.debug("entity type = {}", this.metadataChangeLog.getEntityType()); @@ -58,7 +55,7 @@ public MCLBatchItemImpl build(AspectRetriever aspectRetriever) { aspectRetriever .getEntityRegistry() .getEntitySpec(this.metadataChangeLog.getEntityType())); - aspectSpec(validateAspect(this.metadataChangeLog, this.entitySpec)); + aspectSpec(AspectUtils.validateAspect(this.metadataChangeLog, this.entitySpec)); Urn urn = this.metadataChangeLog.getEntityUrn(); if (urn == null) { @@ -66,7 +63,7 @@ public MCLBatchItemImpl build(AspectRetriever aspectRetriever) { EntityKeyUtils.getUrnFromLog( this.metadataChangeLog, this.entitySpec.getKeyAspectSpec()); } - EntityUtils.validateUrn(entityRegistry, urn); + ValidationUtils.validateUrn(entityRegistry, urn); log.debug("entity type = {}", urn.getEntityType()); entitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); @@ -80,14 +77,9 @@ public MCLBatchItemImpl build(AspectRetriever aspectRetriever) { // validate new ValidationUtils.validateRecordTemplate( - this.metadataChangeLog.getChangeType(), - this.entitySpec, - this.aspectSpec, - urn, - aspects.getFirst(), - aspectRetriever); + this.entitySpec, urn, aspects.getFirst(), aspectRetriever); - return new MCLBatchItemImpl( + return new MCLItemImpl( this.metadataChangeLog, aspects.getFirst(), aspects.getSecond(), @@ -95,12 +87,12 @@ public MCLBatchItemImpl build(AspectRetriever aspectRetriever) { this.aspectSpec); } - private MCLBatchItemImplBuilder entitySpec(EntitySpec entitySpec) { + private MCLItemImplBuilder entitySpec(EntitySpec entitySpec) { this.entitySpec = entitySpec; return this; } - private MCLBatchItemImplBuilder aspectSpec(AspectSpec aspectSpec) { + private MCLItemImplBuilder aspectSpec(AspectSpec aspectSpec) { this.aspectSpec = aspectSpec; return this; } @@ -150,7 +142,7 @@ public boolean equals(Object o) { return false; } - MCLBatchItemImpl that = (MCLBatchItemImpl) o; + MCLItemImpl that = (MCLItemImpl) o; return metadataChangeLog.equals(that.metadataChangeLog); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPPatchBatchItem.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java similarity index 84% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPPatchBatchItem.java rename to metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java index d0cb2a4cc59b8a..cf9c3978e3a374 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPPatchBatchItem.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java @@ -15,10 +15,10 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.aspect.batch.PatchItem; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.batch.PatchMCP; import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.entity.validation.ValidationUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; @@ -39,7 +39,7 @@ @Slf4j @Getter @Builder(toBuilder = true) -public class MCPPatchBatchItem extends PatchItem { +public class PatchItemImpl implements PatchMCP { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); static { @@ -79,10 +79,9 @@ public RecordTemplate getRecordTemplate() { return null; } - public MCPUpsertBatchItem applyPatch( - RecordTemplate recordTemplate, AspectRetriever aspectRetriever) { - MCPUpsertBatchItem.MCPUpsertBatchItemBuilder builder = - MCPUpsertBatchItem.builder() + public ChangeItemImpl applyPatch(RecordTemplate recordTemplate, AspectRetriever aspectRetriever) { + ChangeItemImpl.ChangeItemImplBuilder builder = + ChangeItemImpl.builder() .urn(getUrn()) .aspectName(getAspectName()) .metadataChangeProposal(getMetadataChangeProposal()) @@ -116,16 +115,15 @@ public MCPUpsertBatchItem applyPatch( return builder.build(aspectRetriever); } - public static class MCPPatchBatchItemBuilder { + public static class PatchItemImplBuilder { - public MCPPatchBatchItem.MCPPatchBatchItemBuilder systemMetadata( - SystemMetadata systemMetadata) { + public PatchItemImpl.PatchItemImplBuilder systemMetadata(SystemMetadata systemMetadata) { this.systemMetadata = SystemMetadataUtils.generateSystemMetadataIfEmpty(systemMetadata); return this; } - public MCPPatchBatchItem build(EntityRegistry entityRegistry) { - EntityUtils.validateUrn(entityRegistry, this.urn); + public PatchItemImpl build(EntityRegistry entityRegistry) { + ValidationUtils.validateUrn(entityRegistry, this.urn); log.debug("entity type = {}", this.urn.getEntityType()); entitySpec(entityRegistry.getEntitySpec(this.urn.getEntityType())); @@ -139,7 +137,7 @@ public MCPPatchBatchItem build(EntityRegistry entityRegistry) { String.format("Missing patch to apply. Aspect: %s", this.aspectSpec.getName())); } - return new MCPPatchBatchItem( + return new PatchItemImpl( this.urn, this.aspectName, SystemMetadataUtils.generateSystemMetadataIfEmpty(this.systemMetadata), @@ -150,13 +148,13 @@ public MCPPatchBatchItem build(EntityRegistry entityRegistry) { this.aspectSpec); } - public static MCPPatchBatchItem build( + public static PatchItemImpl build( MetadataChangeProposal mcp, AuditStamp auditStamp, EntityRegistry entityRegistry) { log.debug("entity type = {}", mcp.getEntityType()); EntitySpec entitySpec = entityRegistry.getEntitySpec(mcp.getEntityType()); AspectSpec aspectSpec = validateAspect(mcp, entitySpec); - if (!PatchItem.isValidChangeType(ChangeType.PATCH, aspectSpec)) { + if (!MCPItem.isValidChangeType(ChangeType.PATCH, aspectSpec)) { throw new UnsupportedOperationException( "ChangeType not supported: " + mcp.getChangeType() @@ -169,7 +167,7 @@ public static MCPPatchBatchItem build( urn = EntityKeyUtils.getUrnFromProposal(mcp, entitySpec.getKeyAspectSpec()); } - return MCPPatchBatchItem.builder() + return PatchItemImpl.builder() .urn(urn) .aspectName(mcp.getAspectName()) .systemMetadata( @@ -180,16 +178,6 @@ public static MCPPatchBatchItem build( .build(entityRegistry); } - private MCPPatchBatchItemBuilder entitySpec(EntitySpec entitySpec) { - this.entitySpec = entitySpec; - return this; - } - - private MCPPatchBatchItemBuilder aspectSpec(AspectSpec aspectSpec) { - this.aspectSpec = aspectSpec; - return this; - } - private static Patch convertToJsonPatch(MetadataChangeProposal mcp) { JsonNode json; try { @@ -209,7 +197,7 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) { return false; } - MCPPatchBatchItem that = (MCPPatchBatchItem) o; + PatchItemImpl that = (PatchItemImpl) o; return urn.equals(that.urn) && aspectName.equals(that.aspectName) && Objects.equals(systemMetadata, that.systemMetadata) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java index 947f0116b587c6..16942a02b0e4a3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java @@ -3,14 +3,13 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.schema.validation.ValidationResult; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; +import java.net.URISyntaxException; +import java.net.URLEncoder; import java.util.function.Consumer; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -18,6 +17,8 @@ @Slf4j public class ValidationUtils { + public static final int URN_NUM_BYTES_LIMIT = 512; + public static final String URN_DELIMITER_SEPARATOR = "␟"; /** * Validates a {@link RecordTemplate} and throws {@link @@ -66,9 +67,7 @@ public static AspectSpec validate(EntitySpec entitySpec, String aspectName) { } public static void validateRecordTemplate( - ChangeType changeType, EntitySpec entitySpec, - AspectSpec aspectSpec, Urn urn, @Nullable RecordTemplate aspect, @Nonnull AspectRetriever aspectRetriever) { @@ -89,17 +88,38 @@ public static void validateRecordTemplate( if (aspect != null) { RecordTemplateValidator.validate(aspect, resultFunction, validator); + } + } - for (AspectPayloadValidator aspectValidator : - entityRegistry.getAspectPayloadValidators( - changeType, entitySpec.getName(), aspectSpec.getName())) { - try { - aspectValidator.validateProposed(changeType, urn, aspectSpec, aspect, aspectRetriever); - } catch (AspectValidationException e) { + public static void validateUrn(@Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { + EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); + validator.setCurrentEntitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); + RecordTemplateValidator.validate( + EntityUtils.buildKeyAspect(entityRegistry, urn), + validationResult -> { throw new IllegalArgumentException( - "Failed to validate aspect due to: " + e.getMessage(), e); - } - } + "Invalid urn: " + urn + "\n Cause: " + validationResult.getMessages()); + }, + validator); + + if (urn.toString().trim().length() != urn.toString().length()) { + throw new IllegalArgumentException( + "Error: cannot provide an URN with leading or trailing whitespace"); + } + if (URLEncoder.encode(urn.toString()).length() > URN_NUM_BYTES_LIMIT) { + throw new IllegalArgumentException( + "Error: cannot provide an URN longer than " + + Integer.toString(URN_NUM_BYTES_LIMIT) + + " bytes (when URL encoded)"); + } + if (urn.toString().contains(URN_DELIMITER_SEPARATOR)) { + throw new IllegalArgumentException( + "Error: URN cannot contain " + URN_DELIMITER_SEPARATOR + " character"); + } + try { + Urn.createFromString(urn.toString()); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index 270615aa0e3568..5fc3dfc779fa46 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -302,6 +302,7 @@ public LineageResponse getLineage( exploreMultiplePaths); for (LineageRelationship oneHopRelnship : oneHopRelationships) { if (result.containsKey(oneHopRelnship.getEntity())) { + log.debug("Urn encountered again during graph walk {}", oneHopRelnship.getEntity()); result.put( oneHopRelnship.getEntity(), mergeLineageRelationships(result.get(oneHopRelnship.getEntity()), oneHopRelnship)); @@ -553,26 +554,6 @@ public static void addEdgeToPaths( addEdgeToPaths(existingPaths, parentUrn, null, childUrn); } - /** - * Utility method to log paths to the debug log. - * - * @param paths - * @param message - */ - private static void logPaths(UrnArrayArray paths, String message) { - if (log.isDebugEnabled()) { - log.debug("xxxxxxxxxx"); - log.debug(message); - log.debug("---------"); - if (paths != null) { - paths.forEach(path -> log.debug("{}", path)); - } else { - log.debug("EMPTY"); - } - log.debug("xxxxxxxxxx"); - } - } - private static boolean containsCycle(final UrnArray path) { Set<Urn> urnSet = path.stream().collect(Collectors.toUnmodifiableSet()); // path contains a cycle if any urn is repeated twice @@ -587,8 +568,6 @@ public static boolean addEdgeToPaths( boolean edgeAdded = false; // Collect all full-paths to this child node. This is what will be returned. UrnArrayArray pathsToParent = existingPaths.get(parentUrn); - logPaths(pathsToParent, String.format("Paths to Parent: %s, Child: %s", parentUrn, childUrn)); - logPaths(existingPaths.get(childUrn), String.format("Existing Paths to Child: %s", childUrn)); if (pathsToParent != null && !pathsToParent.isEmpty()) { // If there are existing paths to this parent node, then we attempt // to append the child to each of the existing paths (lengthen it). @@ -630,7 +609,6 @@ public static boolean addEdgeToPaths( existingPaths.get(childUrn).add(pathToChild); edgeAdded = true; } - logPaths(existingPaths.get(childUrn), String.format("New paths to Child: %s", childUrn)); return edgeAdded; } @@ -655,7 +633,6 @@ private static List<LineageRelationship> extractRelationships( for (SearchHit hit : hits) { index++; final Map<String, Object> document = hit.getSourceAsMap(); - log.debug("{}: hit: {}", index, document); final Urn sourceUrn = UrnUtils.getUrn(((Map<String, Object>) document.get(SOURCE)).get("urn").toString()); final Urn destinationUrn = @@ -808,7 +785,6 @@ private static List<LineageRelationship> extractRelationships( } List<LineageRelationship> result = new ArrayList<>(lineageRelationshipMap.values()); log.debug("Number of lineage relationships in list: {}", result.size()); - log.debug("Result: {}", result); return result; } catch (Exception e) { // This exception handler merely exists to log the exception at an appropriate point and diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index 61b18eb884d4df..936ecb6a8ead1c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.search.elasticsearch; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultV2; import com.linkedin.metadata.query.AutoCompleteResult; @@ -41,6 +42,13 @@ public class ElasticSearchService implements EntitySearchService, ElasticSearchI private final ESBrowseDAO esBrowseDAO; private final ESWriteDAO esWriteDAO; + @Override + public ElasticSearchService postConstruct(AspectRetriever aspectRetriever) { + esSearchDAO.setAspectRetriever(aspectRetriever); + esBrowseDAO.setAspectRetriever(aspectRetriever); + return this; + } + @Override public void configure() { indexBuilders.reindexAll(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index b8085885200892..dd1c09853114d1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -7,6 +7,7 @@ import com.google.common.annotations.VisibleForTesting; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultEntity; import com.linkedin.metadata.browse.BrowseResultEntityArray; @@ -20,7 +21,6 @@ import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; @@ -41,7 +41,9 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; +import lombok.Setter; import lombok.Value; +import lombok.experimental.Accessors; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang.StringUtils; import org.opensearch.action.search.SearchRequest; @@ -62,9 +64,10 @@ @Slf4j @RequiredArgsConstructor +@Accessors(chain = true) public class ESBrowseDAO { - private final EntityRegistry entityRegistry; + @Setter private AspectRetriever aspectRetriever; private final RestHighLevelClient client; private final IndexConvention indexConvention; @Nonnull private final SearchConfiguration searchConfiguration; @@ -118,7 +121,8 @@ public BrowseResult browse( try { final String indexName = - indexConvention.getIndexName(entityRegistry.getEntitySpec(entityName)); + indexConvention.getIndexName( + aspectRetriever.getEntityRegistry().getEntitySpec(entityName)); final SearchResponse groupsResponse; try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "esGroupSearch").time()) { @@ -373,7 +377,8 @@ private static int getPathDepth(@Nonnull String path) { */ @Nonnull public List<String> getBrowsePaths(@Nonnull String entityName, @Nonnull Urn urn) { - final String indexName = indexConvention.getIndexName(entityRegistry.getEntitySpec(entityName)); + final String indexName = + indexConvention.getIndexName(aspectRetriever.getEntityRegistry().getEntitySpec(entityName)); final SearchRequest searchRequest = new SearchRequest(indexName); searchRequest.source( new SearchSourceBuilder().query(QueryBuilders.termQuery(URN, urn.toString()))); @@ -478,7 +483,8 @@ private SearchRequest constructGroupsSearchRequestV2( @Nullable Filter filter, @Nonnull String input, @Nullable SearchFlags searchFlags) { - final String indexName = indexConvention.getIndexName(entityRegistry.getEntitySpec(entityName)); + final String indexName = + indexConvention.getIndexName(aspectRetriever.getEntityRegistry().getEntitySpec(entityName)); final SearchRequest searchRequest = new SearchRequest(indexName); final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.size(0); @@ -503,7 +509,9 @@ private SearchRequest constructGroupsSearchRequestBrowseAcrossEntities( @Nullable SearchFlags searchFlags) { List<EntitySpec> entitySpecs = - entities.stream().map(entityRegistry::getEntitySpec).collect(Collectors.toList()); + entities.stream() + .map(name -> aspectRetriever.getEntityRegistry().getEntitySpec(name)) + .collect(Collectors.toList()); String[] indexArray = entities.stream().map(indexConvention::getEntityIndexName).toArray(String[]::new); @@ -553,9 +561,10 @@ private QueryBuilder buildQueryStringV2( final BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); - EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); + EntitySpec entitySpec = aspectRetriever.getEntityRegistry().getEntitySpec(entityName); QueryBuilder query = - SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpec, searchConfiguration, customSearchConfiguration, aspectRetriever) .getQuery(input, Boolean.TRUE.equals(finalSearchFlags.isFulltext())); queryBuilder.must(query); @@ -568,7 +577,8 @@ private QueryBuilder buildQueryStringV2( queryBuilder.filter(QueryBuilders.rangeQuery(BROWSE_PATH_V2_DEPTH).gt(browseDepthVal)); queryBuilder.filter( - SearchRequestHandler.getFilterQuery(filter, entitySpec.getSearchableFieldTypes())); + SearchRequestHandler.getFilterQuery( + filter, entitySpec.getSearchableFieldTypes(), aspectRetriever)); return queryBuilder; } @@ -587,7 +597,8 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities( final BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); QueryBuilder query = - SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpecs, searchConfiguration, customSearchConfiguration, aspectRetriever) .getQuery(input, Boolean.TRUE.equals(finalSearchFlags.isFulltext())); queryBuilder.must(query); @@ -608,7 +619,8 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities( set1.addAll(set2); return set1; })); - queryBuilder.filter(SearchRequestHandler.getFilterQuery(filter, searchableFields)); + queryBuilder.filter( + SearchRequestHandler.getFilterQuery(filter, searchableFields, aspectRetriever)); return queryBuilder; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index bfe9a8bd9e10a3..4cfb54dacb5f03 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -9,10 +9,10 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.annotations.VisibleForTesting; import com.linkedin.data.template.LongMap; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; @@ -41,6 +41,8 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; +import lombok.Setter; +import lombok.experimental.Accessors; import lombok.extern.slf4j.Slf4j; import org.opensearch.action.explain.ExplainRequest; import org.opensearch.action.explain.ExplainResponse; @@ -62,6 +64,7 @@ /** A search DAO for Elasticsearch backend. */ @Slf4j @RequiredArgsConstructor +@Accessors(chain = true) public class ESSearchDAO { private static final NamedXContentRegistry X_CONTENT_REGISTRY; @@ -70,7 +73,7 @@ public class ESSearchDAO { X_CONTENT_REGISTRY = new NamedXContentRegistry(searchModule.getNamedXContents()); } - private final EntityRegistry entityRegistry; + @Setter private AspectRetriever aspectRetriever; private final RestHighLevelClient client; private final IndexConvention indexConvention; private final boolean pointInTimeCreationEnabled; @@ -79,10 +82,12 @@ public class ESSearchDAO { @Nullable private final CustomSearchConfiguration customSearchConfiguration; public long docCount(@Nonnull String entityName) { - EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); + EntitySpec entitySpec = aspectRetriever.getEntityRegistry().getEntitySpec(entityName); CountRequest countRequest = new CountRequest(indexConvention.getIndexName(entitySpec)) - .query(SearchRequestHandler.getFilterQuery(null, entitySpec.getSearchableFieldTypes())); + .query( + SearchRequestHandler.getFilterQuery( + null, entitySpec.getSearchableFieldTypes(), aspectRetriever)); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "docCount").time()) { return client.count(countRequest, RequestOptions.DEFAULT).getCount(); } catch (IOException e) { @@ -107,7 +112,7 @@ private SearchResult executeAndExtract( // extract results, validated against document model as well return transformIndexIntoEntityName( SearchRequestHandler.getBuilder( - entitySpec, searchConfiguration, customSearchConfiguration) + entitySpec, searchConfiguration, customSearchConfiguration, aspectRetriever) .extractResult(searchResponse, filter, from, size)); } catch (Exception e) { log.error("Search query failed", e); @@ -191,7 +196,7 @@ private ScrollResult executeAndExtract( // extract results, validated against document model as well return transformIndexIntoEntityName( SearchRequestHandler.getBuilder( - entitySpecs, searchConfiguration, customSearchConfiguration) + entitySpecs, searchConfiguration, customSearchConfiguration, aspectRetriever) .extractScrollResult( searchResponse, filter, scrollId, keepAlive, size, supportsPointInTime())); } catch (Exception e) { @@ -228,11 +233,14 @@ public SearchResult search( final String finalInput = input.isEmpty() ? "*" : input; Timer.Context searchRequestTimer = MetricUtils.timer(this.getClass(), "searchRequest").time(); List<EntitySpec> entitySpecs = - entityNames.stream().map(entityRegistry::getEntitySpec).collect(Collectors.toList()); + entityNames.stream() + .map(name -> aspectRetriever.getEntityRegistry().getEntitySpec(name)) + .collect(Collectors.toList()); Filter transformedFilters = transformFilterForEntities(postFilters, indexConvention); // Step 1: construct the query final SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpecs, searchConfiguration, customSearchConfiguration, aspectRetriever) .getSearchRequest( finalInput, transformedFilters, sortCriterion, from, size, searchFlags, facets); searchRequest.indices( @@ -260,10 +268,11 @@ public SearchResult filter( @Nullable SortCriterion sortCriterion, int from, int size) { - EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); + EntitySpec entitySpec = aspectRetriever.getEntityRegistry().getEntitySpec(entityName); Filter transformedFilters = transformFilterForEntities(filters, indexConvention); final SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpec, searchConfiguration, customSearchConfiguration, aspectRetriever) .getFilterRequest(transformedFilters, sortCriterion, from, size); searchRequest.indices(indexConvention.getIndexName(entitySpec)); @@ -290,8 +299,9 @@ public AutoCompleteResult autoComplete( @Nullable Filter requestParams, int limit) { try { - EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); - AutocompleteRequestHandler builder = AutocompleteRequestHandler.getBuilder(entitySpec); + EntitySpec entitySpec = aspectRetriever.getEntityRegistry().getEntitySpec(entityName); + AutocompleteRequestHandler builder = + AutocompleteRequestHandler.getBuilder(entitySpec, aspectRetriever); SearchRequest req = builder.getSearchRequest( query, field, transformFilterForEntities(requestParams, indexConvention), limit); @@ -321,13 +331,16 @@ public Map<String, Long> aggregateByValue( int limit) { List<EntitySpec> entitySpecs; if (entityNames == null || entityNames.isEmpty()) { - entitySpecs = QueryUtils.getQueryByDefaultEntitySpecs(entityRegistry); + entitySpecs = QueryUtils.getQueryByDefaultEntitySpecs(aspectRetriever.getEntityRegistry()); } else { entitySpecs = - entityNames.stream().map(entityRegistry::getEntitySpec).collect(Collectors.toList()); + entityNames.stream() + .map(name -> aspectRetriever.getEntityRegistry().getEntitySpec(name)) + .collect(Collectors.toList()); } final SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpecs, searchConfiguration, customSearchConfiguration, aspectRetriever) .getAggregationRequest( field, transformFilterForEntities(requestParams, indexConvention), limit); if (entityNames == null) { @@ -336,7 +349,7 @@ public Map<String, Long> aggregateByValue( } else { Stream<String> stream = entityNames.stream() - .map(entityRegistry::getEntitySpec) + .map(name -> aspectRetriever.getEntityRegistry().getEntitySpec(name)) .map(indexConvention::getIndexName); searchRequest.indices(stream.toArray(String[]::new)); } @@ -381,7 +394,9 @@ public ScrollResult scroll( entities.stream().map(indexConvention::getEntityIndexName).toArray(String[]::new); Timer.Context scrollRequestTimer = MetricUtils.timer(this.getClass(), "scrollRequest").time(); List<EntitySpec> entitySpecs = - entities.stream().map(entityRegistry::getEntitySpec).collect(Collectors.toList()); + entities.stream() + .map(name -> aspectRetriever.getEntityRegistry().getEntitySpec(name)) + .collect(Collectors.toList()); Filter transformedFilters = transformFilterForEntities(postFilters, indexConvention); // TODO: Align scroll and search using facets final SearchRequest searchRequest = @@ -436,7 +451,7 @@ private SearchRequest getScrollRequest( } return SearchRequestHandler.getBuilder( - entitySpecs, searchConfiguration, customSearchConfiguration) + entitySpecs, searchConfiguration, customSearchConfiguration, aspectRetriever) .getSearchRequest( finalInput, postFilters, @@ -502,7 +517,7 @@ public ExplainResponse explain( @Nullable String keepAlive, int size, @Nullable List<String> facets) { - EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); + EntitySpec entitySpec = aspectRetriever.getEntityRegistry().getEntitySpec(entityName); Filter transformedFilters = transformFilterForEntities(postFilters, indexConvention); final String[] indexArray = new String[] {indexConvention.getEntityIndexName(entityName)}; final String finalQuery = query.isEmpty() ? "*" : query; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java index 887d4b22f37e24..fb3b51930370c4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java @@ -5,6 +5,7 @@ import static com.linkedin.metadata.utils.SearchUtil.*; import com.linkedin.data.template.LongMap; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.StructuredPropertyUtils; @@ -47,6 +48,7 @@ public class AggregationQueryBuilder { private static final String URN_FILTER = "urn"; + private final AspectRetriever aspectRetriever; private final SearchConfiguration configs; private final Set<String> defaultFacetFields; private final Set<String> allFacetFields; @@ -56,7 +58,8 @@ public class AggregationQueryBuilder { public AggregationQueryBuilder( @Nonnull final SearchConfiguration configs, - @Nonnull Map<EntitySpec, List<SearchableAnnotation>> entitySearchAnnotations) { + @Nonnull Map<EntitySpec, List<SearchableAnnotation>> entitySearchAnnotations, + @Nonnull AspectRetriever aspectRetriever) { this.configs = Objects.requireNonNull(configs, "configs must not be null"); this.entitySearchAnnotations = entitySearchAnnotations; @@ -66,6 +69,7 @@ public AggregationQueryBuilder( .collect(Collectors.toList()); this.defaultFacetFields = getDefaultFacetFields(annotations); this.allFacetFields = getAllFacetFields(annotations); + this.aspectRetriever = aspectRetriever; } /** Get the set of default aggregations, across all facets. */ @@ -130,11 +134,12 @@ private AggregationBuilder facetToAggregationBuilder(final String inputFacet) { AggregationBuilder lastAggBuilder = null; for (int i = facets.size() - 1; i >= 0; i--) { String facet = facets.get(i); - if (facet.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD + ".")) { + if (facet.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { String structPropFqn = facet.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1); + StructuredPropertyUtils.validateStructuredPropertyFQN( + Set.of(structPropFqn), aspectRetriever); facet = - STRUCTURED_PROPERTY_MAPPING_FIELD - + "." + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + StructuredPropertyUtils.sanitizeStructuredPropertyFQN(structPropFqn); } AggregationBuilder aggBuilder; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index 38350322478741..de35d53bcde49b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; @@ -46,7 +47,10 @@ public class AutocompleteRequestHandler { private static final Map<EntitySpec, AutocompleteRequestHandler> AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME = new ConcurrentHashMap<>(); - public AutocompleteRequestHandler(@Nonnull EntitySpec entitySpec) { + private final AspectRetriever aspectRetriever; + + public AutocompleteRequestHandler( + @Nonnull EntitySpec entitySpec, @Nonnull AspectRetriever aspectRetriever) { List<SearchableFieldSpec> fieldSpecs = entitySpec.getSearchableFieldSpecs(); _defaultAutocompleteFields = Stream.concat( @@ -70,11 +74,13 @@ public AutocompleteRequestHandler(@Nonnull EntitySpec entitySpec) { set1.addAll(set2); return set1; })); + this.aspectRetriever = aspectRetriever; } - public static AutocompleteRequestHandler getBuilder(@Nonnull EntitySpec entitySpec) { + public static AutocompleteRequestHandler getBuilder( + @Nonnull EntitySpec entitySpec, @Nonnull AspectRetriever aspectRetriever) { return AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME.computeIfAbsent( - entitySpec, k -> new AutocompleteRequestHandler(entitySpec)); + entitySpec, k -> new AutocompleteRequestHandler(entitySpec, aspectRetriever)); } public SearchRequest getSearchRequest( @@ -83,7 +89,8 @@ public SearchRequest getSearchRequest( SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.size(limit); searchSourceBuilder.query(getQuery(input, field)); - searchSourceBuilder.postFilter(ESUtils.buildFilterQuery(filter, false, searchableFieldTypes)); + searchSourceBuilder.postFilter( + ESUtils.buildFilterQuery(filter, false, searchableFieldTypes, aspectRetriever)); searchSourceBuilder.highlighter(getHighlights(field)); searchRequest.source(searchSourceBuilder); return searchRequest; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 88e8af52f01ace..db09c52d2099c8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -9,6 +9,7 @@ import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.DoubleMap; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; @@ -72,40 +73,45 @@ public class SearchRequestHandler { .setSkipHighlighting(false); private static final Map<List<EntitySpec>, SearchRequestHandler> REQUEST_HANDLER_BY_ENTITY_NAME = new ConcurrentHashMap<>(); - private final List<EntitySpec> _entitySpecs; - private final Set<String> _defaultQueryFieldNames; - private final HighlightBuilder _highlights; + private final List<EntitySpec> entitySpecs; + private final Set<String> defaultQueryFieldNames; + private final HighlightBuilder highlights; - private final SearchConfiguration _configs; - private final SearchQueryBuilder _searchQueryBuilder; - private final AggregationQueryBuilder _aggregationQueryBuilder; + private final SearchConfiguration configs; + private final SearchQueryBuilder searchQueryBuilder; + private final AggregationQueryBuilder aggregationQueryBuilder; private final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes; + private final AspectRetriever aspectRetriever; + private SearchRequestHandler( @Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { - this(ImmutableList.of(entitySpec), configs, customSearchConfiguration); + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull AspectRetriever aspectRetriever) { + this(ImmutableList.of(entitySpec), configs, customSearchConfiguration, aspectRetriever); } private SearchRequestHandler( @Nonnull List<EntitySpec> entitySpecs, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { - _entitySpecs = entitySpecs; + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull AspectRetriever aspectRetriever) { + this.entitySpecs = entitySpecs; Map<EntitySpec, List<SearchableAnnotation>> entitySearchAnnotations = getSearchableAnnotations(); List<SearchableAnnotation> annotations = entitySearchAnnotations.values().stream() .flatMap(List::stream) .collect(Collectors.toList()); - _defaultQueryFieldNames = getDefaultQueryFieldNames(annotations); - _highlights = getHighlights(); - _searchQueryBuilder = new SearchQueryBuilder(configs, customSearchConfiguration); - _aggregationQueryBuilder = new AggregationQueryBuilder(configs, entitySearchAnnotations); - _configs = configs; + defaultQueryFieldNames = getDefaultQueryFieldNames(annotations); + highlights = getHighlights(); + searchQueryBuilder = new SearchQueryBuilder(configs, customSearchConfiguration); + aggregationQueryBuilder = + new AggregationQueryBuilder(configs, entitySearchAnnotations, aspectRetriever); + this.configs = configs; searchableFieldTypes = - _entitySpecs.stream() + this.entitySpecs.stream() .flatMap(entitySpec -> entitySpec.getSearchableFieldTypes().entrySet().stream()) .collect( Collectors.toMap( @@ -115,28 +121,35 @@ private SearchRequestHandler( set1.addAll(set2); return set1; })); + this.aspectRetriever = aspectRetriever; } public static SearchRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull AspectRetriever aspectRetriever) { return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( ImmutableList.of(entitySpec), - k -> new SearchRequestHandler(entitySpec, configs, customSearchConfiguration)); + k -> + new SearchRequestHandler( + entitySpec, configs, customSearchConfiguration, aspectRetriever)); } public static SearchRequestHandler getBuilder( @Nonnull List<EntitySpec> entitySpecs, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull AspectRetriever aspectRetriever) { return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( ImmutableList.copyOf(entitySpecs), - k -> new SearchRequestHandler(entitySpecs, configs, customSearchConfiguration)); + k -> + new SearchRequestHandler( + entitySpecs, configs, customSearchConfiguration, aspectRetriever)); } private Map<EntitySpec, List<SearchableAnnotation>> getSearchableAnnotations() { - return _entitySpecs.stream() + return entitySpecs.stream() .map( spec -> Pair.of( @@ -158,13 +171,15 @@ private Set<String> getDefaultQueryFieldNames(List<SearchableAnnotation> annotat } public BoolQueryBuilder getFilterQuery(@Nullable Filter filter) { - return getFilterQuery(filter, searchableFieldTypes); + return getFilterQuery(filter, searchableFieldTypes, aspectRetriever); } public static BoolQueryBuilder getFilterQuery( @Nullable Filter filter, - Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes) { - BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery(filter, false, searchableFieldTypes); + Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes, + @Nonnull AspectRetriever aspectRetriever) { + BoolQueryBuilder filterQuery = + ESUtils.buildFilterQuery(filter, false, searchableFieldTypes, aspectRetriever); return filterSoftDeletedByDefault(filter, filterQuery); } @@ -209,12 +224,12 @@ public SearchRequest getSearchRequest( .must(getQuery(input, Boolean.TRUE.equals(finalSearchFlags.isFulltext()))) .filter(filterQuery)); if (Boolean.FALSE.equals(finalSearchFlags.isSkipAggregates())) { - _aggregationQueryBuilder.getAggregations(facets).forEach(searchSourceBuilder::aggregation); + aggregationQueryBuilder.getAggregations(facets).forEach(searchSourceBuilder::aggregation); } if (Boolean.FALSE.equals(finalSearchFlags.isSkipHighlighting())) { - searchSourceBuilder.highlighter(_highlights); + searchSourceBuilder.highlighter(highlights); } - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, entitySpecs); if (Boolean.TRUE.equals(finalSearchFlags.isGetSuggestions())) { ESUtils.buildNameSuggestions(searchSourceBuilder, input); @@ -266,12 +281,12 @@ public SearchRequest getSearchRequest( .must(getQuery(input, Boolean.TRUE.equals(finalSearchFlags.isFulltext()))) .filter(filterQuery)); if (Boolean.FALSE.equals(finalSearchFlags.isSkipAggregates())) { - _aggregationQueryBuilder.getAggregations(facets).forEach(searchSourceBuilder::aggregation); + aggregationQueryBuilder.getAggregations(facets).forEach(searchSourceBuilder::aggregation); } if (Boolean.FALSE.equals(finalSearchFlags.isSkipHighlighting())) { - searchSourceBuilder.highlighter(_highlights); + searchSourceBuilder.highlighter(highlights); } - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, entitySpecs); searchRequest.source(searchSourceBuilder); log.debug("Search request is: " + searchRequest); searchRequest.indicesOptions(null); @@ -298,7 +313,7 @@ public SearchRequest getFilterRequest( final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(filterQuery); searchSourceBuilder.from(from).size(size); - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, entitySpecs); searchRequest.source(searchSourceBuilder); return searchRequest; @@ -329,7 +344,7 @@ public SearchRequest getAggregationRequest( } public QueryBuilder getQuery(@Nonnull String query, boolean fulltext) { - return _searchQueryBuilder.buildQuery(_entitySpecs, query, fulltext); + return searchQueryBuilder.buildQuery(entitySpecs, query, fulltext); } @VisibleForTesting @@ -341,7 +356,7 @@ public HighlightBuilder getHighlights() { highlightBuilder.postTags(""); // Check for each field name and any subfields - _defaultQueryFieldNames.stream() + defaultQueryFieldNames.stream() .flatMap(fieldName -> Stream.of(fieldName, fieldName + ".*")) .distinct() .forEach(highlightBuilder::field); @@ -446,7 +461,7 @@ private List<MatchedField> extractMatchedFields(@Nonnull SearchHit hit) { @Nonnull private Optional<String> getFieldName(String matchedField) { - return _defaultQueryFieldNames.stream().filter(matchedField::startsWith).findFirst(); + return defaultQueryFieldNames.stream().filter(matchedField::startsWith).findFirst(); } private Map<String, Double> extractFeatures(@Nonnull SearchHit searchHit) { @@ -499,7 +514,7 @@ private SearchResultMetadata extractSearchResultMetadata( new SearchResultMetadata().setAggregations(new AggregationMetadataArray()); final List<AggregationMetadata> aggregationMetadataList = - _aggregationQueryBuilder.extractAggregationMetadata(searchResponse, filter); + aggregationQueryBuilder.extractAggregationMetadata(searchResponse, filter); searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList)); final List<SearchSuggestion> searchSuggestions = extractSearchSuggestions(searchResponse); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index d52a80d685fd5b..75c3d23d26c667 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -11,7 +11,7 @@ import com.linkedin.data.schema.DataSchema; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.Aspect; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.validation.StructuredPropertiesValidator; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 9d4b9e6a378a09..5ca5087d5ac355 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -6,6 +6,7 @@ import static com.linkedin.metadata.search.utils.SearchUtils.isUrn; import com.google.common.collect.ImmutableList; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.StructuredPropertyUtils; @@ -131,11 +132,15 @@ private ESUtils() {} public static BoolQueryBuilder buildFilterQuery( @Nullable Filter filter, boolean isTimeseries, - final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes) { + final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes, + @Nonnull AspectRetriever aspectRetriever) { BoolQueryBuilder finalQueryBuilder = QueryBuilders.boolQuery(); if (filter == null) { return finalQueryBuilder; } + + StructuredPropertyUtils.validateFilter(filter, aspectRetriever); + if (filter.getOr() != null) { // If caller is using the new Filters API, build boolean query from that. filter @@ -386,11 +391,11 @@ public static String escapeReservedCharacters(@Nonnull String input) { public static String toFacetField(@Nonnull final String filterField) { String fieldName = filterField; if (fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD + ".")) { + String fqn = fieldName.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1); fieldName = STRUCTURED_PROPERTY_MAPPING_FIELD + "." - + StructuredPropertyUtils.sanitizeStructuredPropertyFQN( - fieldName.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1)); + + StructuredPropertyUtils.sanitizeStructuredPropertyFQN(fqn); } return fieldName.replace(ESUtils.KEYWORD_SUFFIX, ""); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index 3c73d1acab5c25..52f0d680ff4ba1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -20,9 +20,11 @@ import com.linkedin.dataset.UpstreamLineage; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.aspect.batch.MCLBatchItem; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.entity.ebean.batch.MCLBatchItemImpl; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.entity.SearchIndicesService; +import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; import com.linkedin.metadata.graph.Edge; import com.linkedin.metadata.graph.GraphIndexUtils; import com.linkedin.metadata.graph.GraphService; @@ -67,7 +69,7 @@ import org.springframework.beans.factory.annotation.Value; @Slf4j -public class UpdateIndicesService { +public class UpdateIndicesService implements SearchIndicesService { private static final String DOWNSTREAM_OF = "DownstreamOf"; private final GraphService _graphService; @@ -120,24 +122,21 @@ public UpdateIndicesService( _entityIndexBuilders = entityIndexBuilders; } + @Override public void handleChangeEvent(@Nonnull final MetadataChangeLog event) { try { - MCLBatchItemImpl batch = MCLBatchItemImpl.builder().build(event, aspectRetriever); + MCLItemImpl batch = MCLItemImpl.builder().build(event, aspectRetriever); - Stream<MCLBatchItem> sideEffects = - _entityRegistry - .getMCLSideEffects( - event.getChangeType(), event.getEntityType(), event.getAspectName()) - .stream() - .flatMap(mclSideEffect -> mclSideEffect.apply(List.of(batch), aspectRetriever)); + Stream<MCLItem> sideEffects = + AspectsBatch.applyMCLSideEffects(List.of(batch), aspectRetriever); - for (MCLBatchItem mclBatchItem : + for (MCLItem mclItem : Stream.concat(Stream.of(batch), sideEffects).collect(Collectors.toList())) { - MetadataChangeLog hookEvent = mclBatchItem.getMetadataChangeLog(); + MetadataChangeLog hookEvent = mclItem.getMetadataChangeLog(); if (UPDATE_CHANGE_TYPES.contains(hookEvent.getChangeType())) { - handleUpdateChangeEvent(mclBatchItem); + handleUpdateChangeEvent(mclItem); } else if (hookEvent.getChangeType() == ChangeType.DELETE) { - handleDeleteChangeEvent(mclBatchItem); + handleDeleteChangeEvent(mclItem); } } } catch (IOException e) { @@ -154,7 +153,7 @@ public void handleChangeEvent(@Nonnull final MetadataChangeLog event) { * * @param event the change event to be processed. */ - private void handleUpdateChangeEvent(@Nonnull final MCLBatchItem event) throws IOException { + private void handleUpdateChangeEvent(@Nonnull final MCLItem event) throws IOException { final EntitySpec entitySpec = event.getEntitySpec(); final AspectSpec aspectSpec = event.getAspectSpec(); @@ -251,7 +250,7 @@ public void updateIndexMappings( * * @param event the change event to be processed. */ - private void handleDeleteChangeEvent(@Nonnull final MCLBatchItem event) { + private void handleDeleteChangeEvent(@Nonnull final MCLItem event) { final EntitySpec entitySpec = event.getEntitySpec(); final Urn urn = event.getUrn(); @@ -696,7 +695,8 @@ private EntitySpec getEventEntitySpec(@Nonnull final MetadataChangeLog event) { * * @param aspectRetriever aspect Retriever */ - public void initializeAspectRetriever(AspectRetriever aspectRetriever) { + @Override + public void initializeAspectRetriever(@Nonnull AspectRetriever aspectRetriever) { this.aspectRetriever = aspectRetriever; this._entityRegistry = aspectRetriever.getEntityRegistry(); this._searchDocumentTransformer.setAspectRetriever(aspectRetriever); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index cb06dc75c70bc9..e9ace7bf449ef7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -11,6 +11,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.urn.Urn; import com.linkedin.data.ByteString; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; @@ -95,13 +96,14 @@ public class ElasticSearchTimeseriesAspectService private static final Integer DEFAULT_LIMIT = 10000; - private final IndexConvention _indexConvention; - private final ESBulkProcessor _bulkProcessor; - private final int _numRetries; - private final TimeseriesAspectIndexBuilders _indexBuilders; - private final RestHighLevelClient _searchClient; - private final ESAggregatedStatsDAO _esAggregatedStatsDAO; - private final EntityRegistry _entityRegistry; + private final IndexConvention indexConvention; + private final ESBulkProcessor bulkProcessor; + private final int numRetries; + private final TimeseriesAspectIndexBuilders indexBuilders; + private final RestHighLevelClient searchClient; + private final ESAggregatedStatsDAO esAggregatedStatsDAO; + private final EntityRegistry entityRegistry; + private AspectRetriever aspectRetriever; public ElasticSearchTimeseriesAspectService( @Nonnull RestHighLevelClient searchClient, @@ -110,14 +112,21 @@ public ElasticSearchTimeseriesAspectService( @Nonnull EntityRegistry entityRegistry, @Nonnull ESBulkProcessor bulkProcessor, int numRetries) { - _indexConvention = indexConvention; - _indexBuilders = indexBuilders; - _searchClient = searchClient; - _bulkProcessor = bulkProcessor; - _entityRegistry = entityRegistry; - _numRetries = numRetries; - - _esAggregatedStatsDAO = new ESAggregatedStatsDAO(indexConvention, searchClient, entityRegistry); + this.indexConvention = indexConvention; + this.indexBuilders = indexBuilders; + this.searchClient = searchClient; + this.bulkProcessor = bulkProcessor; + this.entityRegistry = entityRegistry; + this.numRetries = numRetries; + + esAggregatedStatsDAO = new ESAggregatedStatsDAO(indexConvention, searchClient, entityRegistry); + } + + @Override + public ElasticSearchTimeseriesAspectService postConstruct(AspectRetriever aspectRetriever) { + this.aspectRetriever = aspectRetriever; + esAggregatedStatsDAO.setAspectRetriever(aspectRetriever); + return this; } private static EnvelopedAspect parseDocument(@Nonnull SearchHit doc) { @@ -209,24 +218,24 @@ private static Pair<EnvelopedAspect, GenericTimeseriesDocument> toEnvAspectGener @Override public void configure() { - _indexBuilders.reindexAll(); + indexBuilders.reindexAll(); } @Override public List<ReindexConfig> buildReindexConfigs() { - return _indexBuilders.buildReindexConfigs(); + return indexBuilders.buildReindexConfigs(); } @Override public List<ReindexConfig> buildReindexConfigsWithAllStructProps( Collection<StructuredPropertyDefinition> properties) throws IOException { - return _indexBuilders.buildReindexConfigsWithAllStructProps(properties); + return indexBuilders.buildReindexConfigsWithAllStructProps(properties); } public String reindexAsync( String index, @Nullable QueryBuilder filterQuery, BatchWriteOperationsOptions options) throws Exception { - return _indexBuilders.reindexAsync(index, filterQuery, options); + return indexBuilders.reindexAsync(index, filterQuery, options); } @Override @@ -240,23 +249,23 @@ public void upsertDocument( @Nonnull String aspectName, @Nonnull String docId, @Nonnull JsonNode document) { - String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); + String indexName = indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); final UpdateRequest updateRequest = new UpdateRequest(indexName, docId) .detectNoop(false) .docAsUpsert(true) .doc(document.toString(), XContentType.JSON) - .retryOnConflict(_numRetries); - _bulkProcessor.add(updateRequest); + .retryOnConflict(numRetries); + bulkProcessor.add(updateRequest); } @Override public List<TimeseriesIndexSizeResult> getIndexSizes() { List<TimeseriesIndexSizeResult> res = new ArrayList<>(); try { - String indicesPattern = _indexConvention.getAllTimeseriesAspectIndicesPattern(); + String indicesPattern = indexConvention.getAllTimeseriesAspectIndicesPattern(); Response r = - _searchClient + searchClient .getLowLevelClient() .performRequest(new Request("GET", "/" + indicesPattern + "/_stats")); JsonNode body = new ObjectMapper().readTree(r.getEntity().getContent()); @@ -267,7 +276,7 @@ public List<TimeseriesIndexSizeResult> getIndexSizes() { TimeseriesIndexSizeResult elemResult = new TimeseriesIndexSizeResult(); elemResult.setIndexName(entry.getKey()); Optional<Pair<String, String>> indexEntityAndAspect = - _indexConvention.getEntityAndAspectName(entry.getKey()); + indexConvention.getEntityAndAspectName(entry.getKey()); if (indexEntityAndAspect.isPresent()) { elemResult.setEntityName(indexEntityAndAspect.get().getFirst()); elemResult.setAspectName(indexEntityAndAspect.get().getSecond()); @@ -289,19 +298,20 @@ public long countByFilter( @Nonnull final String entityName, @Nonnull final String aspectName, @Nullable final Filter filter) { - final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); + final String indexName = indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); final BoolQueryBuilder filterQueryBuilder = QueryBuilders.boolQuery() .must( ESUtils.buildFilterQuery( filter, true, - _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes())); + entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes(), + aspectRetriever)); CountRequest countRequest = new CountRequest(); countRequest.query(filterQueryBuilder); countRequest.indices(indexName); try { - CountResponse resp = _searchClient.count(countRequest, RequestOptions.DEFAULT); + CountResponse resp = searchClient.count(countRequest, RequestOptions.DEFAULT); return resp.getCount(); } catch (IOException e) { log.error("Count query failed:", e); @@ -320,10 +330,10 @@ public List<EnvelopedAspect> getAspectValues( @Nullable final Filter filter, @Nullable final SortCriterion sort) { Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes = - _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes(); + entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes(); final BoolQueryBuilder filterQueryBuilder = QueryBuilders.boolQuery() - .must(ESUtils.buildFilterQuery(filter, true, searchableFieldTypes)); + .must(ESUtils.buildFilterQuery(filter, true, searchableFieldTypes, aspectRetriever)); filterQueryBuilder.must(QueryBuilders.matchQuery("urn", urn.toString())); // NOTE: We are interested only in the un-exploded rows as only they carry the `event` payload. filterQueryBuilder.mustNot(QueryBuilders.termQuery(MappingsBuilder.IS_EXPLODED_FIELD, true)); @@ -363,7 +373,7 @@ public List<EnvelopedAspect> getAspectValues( final SearchRequest searchRequest = new SearchRequest(); searchRequest.source(searchSourceBuilder); - String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); + String indexName = indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); searchRequest.indices(indexName); log.debug("Search request is: " + searchRequest); @@ -371,7 +381,7 @@ public List<EnvelopedAspect> getAspectValues( try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "searchAspectValues_search").time()) { final SearchResponse searchResponse = - _searchClient.search(searchRequest, RequestOptions.DEFAULT); + searchClient.search(searchRequest, RequestOptions.DEFAULT); hits = searchResponse.getHits(); } catch (Exception e) { log.error("Search query failed:", e); @@ -390,7 +400,7 @@ public GenericTable getAggregatedStats( @Nonnull AggregationSpec[] aggregationSpecs, @Nullable Filter filter, @Nullable GroupingBucket[] groupingBuckets) { - return _esAggregatedStatsDAO.getAggregatedStats( + return esAggregatedStatsDAO.getAggregatedStats( entityName, aspectName, aggregationSpecs, filter, groupingBuckets); } @@ -410,13 +420,16 @@ public GenericTable getAggregatedStats( @Override public DeleteAspectValuesResult deleteAspectValues( @Nonnull String entityName, @Nonnull String aspectName, @Nonnull Filter filter) { - final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); + final String indexName = indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery( - filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes()); + filter, + true, + entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes(), + aspectRetriever); final Optional<DeleteAspectValuesResult> result = - _bulkProcessor + bulkProcessor .deleteByQuery( filterQueryBuilder, false, DEFAULT_LIMIT, TimeValue.timeValueMinutes(10), indexName) .map( @@ -438,17 +451,20 @@ public String deleteAspectValuesAsync( @Nonnull String aspectName, @Nonnull Filter filter, @Nonnull BatchWriteOperationsOptions options) { - final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); + final String indexName = indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery( - filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes()); + filter, + true, + entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes(), + aspectRetriever); final int batchSize = options.getBatchSize() > 0 ? options.getBatchSize() : DEFAULT_LIMIT; TimeValue timeout = options.getTimeoutSeconds() > 0 ? TimeValue.timeValueSeconds(options.getTimeoutSeconds()) : null; final Optional<TaskSubmissionResponse> result = - _bulkProcessor.deleteByQueryAsync(filterQueryBuilder, false, batchSize, timeout, indexName); + bulkProcessor.deleteByQueryAsync(filterQueryBuilder, false, batchSize, timeout, indexName); if (result.isPresent()) { return result.get().getTask(); @@ -464,10 +480,13 @@ public String reindexAsync( @Nonnull String aspectName, @Nonnull Filter filter, @Nonnull BatchWriteOperationsOptions options) { - final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); + final String indexName = indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery( - filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes()); + filter, + true, + entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes(), + aspectRetriever); try { return this.reindexAsync(indexName, filterQueryBuilder, options); } catch (Exception e) { @@ -484,7 +503,7 @@ public DeleteAspectValuesResult rollbackTimeseriesAspects(@Nonnull String runId) Filter filter = QueryUtils.newFilter("runId", runId); // Delete the timeseries aspects across all entities with the runId. - for (Map.Entry<String, EntitySpec> entry : _entityRegistry.getEntitySpecs().entrySet()) { + for (Map.Entry<String, EntitySpec> entry : entityRegistry.getEntitySpecs().entrySet()) { for (AspectSpec aspectSpec : entry.getValue().getAspectSpecs()) { if (aspectSpec.isTimeseries()) { DeleteAspectValuesResult result = @@ -517,10 +536,10 @@ public TimeseriesScrollResult scrollAspects( @Nullable Long endTimeMillis) { Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes = - _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes(); + entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes(); final BoolQueryBuilder filterQueryBuilder = QueryBuilders.boolQuery() - .filter(ESUtils.buildFilterQuery(filter, true, searchableFieldTypes)); + .filter(ESUtils.buildFilterQuery(filter, true, searchableFieldTypes, aspectRetriever)); if (startTimeMillis != null) { Criterion startTimeCriterion = @@ -583,11 +602,11 @@ private SearchResponse executeScrollSearchQuery( searchRequest.source(searchSourceBuilder); ESUtils.setSearchAfter(searchSourceBuilder, sort, null, null); - searchRequest.indices(_indexConvention.getTimeseriesAspectIndexName(entityName, aspectName)); + searchRequest.indices(indexConvention.getTimeseriesAspectIndexName(entityName, aspectName)); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "scrollAspects_search").time()) { - return _searchClient.search(searchRequest, RequestOptions.DEFAULT); + return searchClient.search(searchRequest, RequestOptions.DEFAULT); } catch (Exception e) { log.error("Search query failed", e); throw new ESQueryException("Search query failed:", e); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java index 6437bbc390d829..b59cd3a647d71c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java @@ -15,6 +15,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -23,15 +24,15 @@ @Slf4j @RequiredArgsConstructor public class TimeseriesAspectIndexBuilders implements ElasticSearchIndexed { - private final ESIndexBuilder _indexBuilder; - private final EntityRegistry _entityRegistry; - private final IndexConvention _indexConvention; + @Nonnull private final ESIndexBuilder indexBuilder; + @Nonnull private final EntityRegistry entityRegistry; + @Nonnull private final IndexConvention indexConvention; @Override public void reindexAll() { for (ReindexConfig config : buildReindexConfigs()) { try { - _indexBuilder.buildIndex(config); + indexBuilder.buildIndex(config); } catch (IOException e) { throw new RuntimeException(e); } @@ -41,13 +42,13 @@ public void reindexAll() { public String reindexAsync( String index, @Nullable QueryBuilder filterQuery, BatchWriteOperationsOptions options) throws Exception { - Optional<Pair<String, String>> entityAndAspect = _indexConvention.getEntityAndAspectName(index); + Optional<Pair<String, String>> entityAndAspect = indexConvention.getEntityAndAspectName(index); if (entityAndAspect.isEmpty()) { throw new IllegalArgumentException("Could not extract entity and aspect from index " + index); } String entityName = entityAndAspect.get().getFirst(); String aspectName = entityAndAspect.get().getSecond(); - EntitySpec entitySpec = _entityRegistry.getEntitySpec(entityName); + EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); for (String aspect : entitySpec.getAspectSpecMap().keySet()) { if (aspect.toLowerCase().equals(aspectName)) { aspectName = aspect; @@ -59,17 +60,17 @@ public String reindexAsync( String.format("Could not find aspect %s of entity %s", aspectName, entityName)); } ReindexConfig config = - _indexBuilder.buildReindexState( + indexBuilder.buildReindexState( index, MappingsBuilder.getMappings( - _entityRegistry.getEntitySpec(entityName).getAspectSpec(aspectName)), + entityRegistry.getEntitySpec(entityName).getAspectSpec(aspectName)), Collections.emptyMap()); - return _indexBuilder.reindexInPlaceAsync(index, filterQuery, options, config); + return indexBuilder.reindexInPlaceAsync(index, filterQuery, options, config); } @Override public List<ReindexConfig> buildReindexConfigs() { - return _entityRegistry.getEntitySpecs().values().stream() + return entityRegistry.getEntitySpecs().values().stream() .flatMap( entitySpec -> entitySpec.getAspectSpecs().stream() @@ -78,8 +79,8 @@ public List<ReindexConfig> buildReindexConfigs() { .map( pair -> { try { - return _indexBuilder.buildReindexState( - _indexConvention.getTimeseriesAspectIndexName( + return indexBuilder.buildReindexState( + indexConvention.getTimeseriesAspectIndexName( pair.getFirst().getName(), pair.getSecond().getName()), MappingsBuilder.getMappings(pair.getSecond()), Collections.emptyMap()); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java index 580888e54b7007..1324aebb80006d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java @@ -5,6 +5,7 @@ import com.linkedin.data.schema.DataSchema; import com.linkedin.data.template.StringArray; import com.linkedin.data.template.StringArrayArray; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.TimeseriesFieldCollectionSpec; @@ -29,6 +30,7 @@ import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.Setter; import lombok.extern.slf4j.Slf4j; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; @@ -61,17 +63,18 @@ public class ESAggregatedStatsDAO { ES_AGGREGATION_PREFIX + ES_MAX_AGGREGATION_PREFIX + ES_FIELD_TIMESTAMP; private static final int MAX_TERM_BUCKETS = 24 * 60; // minutes in a day. - private final IndexConvention _indexConvention; - private final RestHighLevelClient _searchClient; - private final EntityRegistry _entityRegistry; + private final IndexConvention indexConvention; + private final RestHighLevelClient searchClient; + private final EntityRegistry entityRegistry; + @Setter private AspectRetriever aspectRetriever; public ESAggregatedStatsDAO( @Nonnull IndexConvention indexConvention, @Nonnull RestHighLevelClient searchClient, @Nonnull EntityRegistry entityRegistry) { - _indexConvention = indexConvention; - _searchClient = searchClient; - _entityRegistry = entityRegistry; + this.indexConvention = indexConvention; + this.searchClient = searchClient; + this.entityRegistry = entityRegistry; } private static String toEsAggName(final String aggName) { @@ -353,7 +356,7 @@ private static String extractAggregationValue( private AspectSpec getTimeseriesAspectSpec( @Nonnull String entityName, @Nonnull String aspectName) { - EntitySpec entitySpec = _entityRegistry.getEntitySpec(entityName); + EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName); if (aspectSpec == null) { new IllegalArgumentException( @@ -379,7 +382,10 @@ public GenericTable getAggregatedStats( // Setup the filter query builder using the input filter provided. final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery( - filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes()); + filter, + true, + entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes(), + aspectRetriever); AspectSpec aspectSpec = getTimeseriesAspectSpec(entityName, aspectName); // Build and attach the grouping aggregations @@ -402,14 +408,14 @@ public GenericTable getAggregatedStats( final SearchRequest searchRequest = new SearchRequest(); searchRequest.source(searchSourceBuilder); - final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); + final String indexName = indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); searchRequest.indices(indexName); log.debug("Search request is: " + searchRequest); try { final SearchResponse searchResponse = - _searchClient.search(searchRequest, RequestOptions.DEFAULT); + searchClient.search(searchRequest, RequestOptions.DEFAULT); return generateResponseFromElastic( searchResponse, groupingBuckets, aggregationSpecs, aspectSpec); } catch (Exception e) { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java index 72bbc794171ff9..84d084e14d54d0 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java @@ -6,7 +6,7 @@ import com.linkedin.identity.CorpUserInfo; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.key.CorpUserKey; import java.util.HashMap; import java.util.LinkedList; @@ -26,16 +26,16 @@ public static Map<Urn, CorpUserKey> ingestCorpUserKeyAspects( @Nonnull public static Map<Urn, CorpUserKey> ingestCorpUserKeyAspects( - EntityService<MCPUpsertBatchItem> entityService, int aspectCount, int startIndex) { + EntityService<ChangeItemImpl> entityService, int aspectCount, int startIndex) { String aspectName = AspectGenerationUtils.getAspectName(new CorpUserKey()); Map<Urn, CorpUserKey> aspects = new HashMap<>(); - List<MCPUpsertBatchItem> items = new LinkedList<>(); + List<ChangeItemImpl> items = new LinkedList<>(); for (int i = startIndex; i < startIndex + aspectCount; i++) { Urn urn = UrnUtils.getUrn(String.format("urn:li:corpuser:tester%d", i)); CorpUserKey aspect = AspectGenerationUtils.createCorpUserKey(urn); aspects.put(urn, aspect); items.add( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(urn) .aspectName(aspectName) .recordTemplate(aspect) @@ -43,7 +43,8 @@ public static Map<Urn, CorpUserKey> ingestCorpUserKeyAspects( .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .build(entityService)); } - entityService.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + entityService.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(entityService).items(items).build(), true, true); return aspects; } @@ -58,14 +59,14 @@ public static Map<Urn, CorpUserInfo> ingestCorpUserInfoAspects( @Nonnull final EntityService entityService, int aspectCount, int startIndex) { String aspectName = AspectGenerationUtils.getAspectName(new CorpUserInfo()); Map<Urn, CorpUserInfo> aspects = new HashMap<>(); - List<MCPUpsertBatchItem> items = new LinkedList<>(); + List<ChangeItemImpl> items = new LinkedList<>(); for (int i = startIndex; i < startIndex + aspectCount; i++) { Urn urn = UrnUtils.getUrn(String.format("urn:li:corpuser:tester%d", i)); String email = String.format("email%d@test.com", i); CorpUserInfo aspect = AspectGenerationUtils.createCorpUserInfo(email); aspects.put(urn, aspect); items.add( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(urn) .aspectName(aspectName) .recordTemplate(aspect) @@ -73,7 +74,8 @@ public static Map<Urn, CorpUserInfo> ingestCorpUserInfoAspects( .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .build(entityService)); } - entityService.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + entityService.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(entityService).items(items).build(), true, true); return aspects; } @@ -88,7 +90,7 @@ public static Map<Urn, ChartInfo> ingestChartInfoAspects( @Nonnull final EntityService entityService, int aspectCount, int startIndex) { String aspectName = AspectGenerationUtils.getAspectName(new ChartInfo()); Map<Urn, ChartInfo> aspects = new HashMap<>(); - List<MCPUpsertBatchItem> items = new LinkedList<>(); + List<ChangeItemImpl> items = new LinkedList<>(); for (int i = startIndex; i < startIndex + aspectCount; i++) { Urn urn = UrnUtils.getUrn(String.format("urn:li:chart:(looker,test%d)", i)); String title = String.format("Test Title %d", i); @@ -96,7 +98,7 @@ public static Map<Urn, ChartInfo> ingestChartInfoAspects( ChartInfo aspect = AspectGenerationUtils.createChartInfo(title, description); aspects.put(urn, aspect); items.add( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(urn) .aspectName(aspectName) .recordTemplate(aspect) @@ -104,7 +106,8 @@ public static Map<Urn, ChartInfo> ingestChartInfoAspects( .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .build(entityService)); } - entityService.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + entityService.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(entityService).items(items).build(), true, true); return aspects; } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java index 308832a9c63ef0..c38e14711fe966 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java @@ -8,7 +8,7 @@ import com.linkedin.common.urn.DatasetUrn; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.EbeanTestUtils; -import com.linkedin.metadata.aspect.batch.MCPBatchItem; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.patch.builder.DatasetPropertiesPatchBuilder; import com.linkedin.metadata.config.EbeanConfiguration; import com.linkedin.metadata.config.PreProcessHooks; @@ -50,7 +50,7 @@ public void testAdditionalChanges() { preProcessHooks.setUiEnabled(true); EntityServiceImpl entityServiceImpl = new EntityServiceImpl( - aspectDao, mockProducer, _testEntityRegistry, true, null, preProcessHooks, false); + aspectDao, mockProducer, _testEntityRegistry, true, preProcessHooks, false); MetadataChangeProposal proposal1 = new DatasetPropertiesPatchBuilder() @@ -71,7 +71,7 @@ public void testAdditionalChanges() { entityServiceImpl, false) .stream() - .map(MCPBatchItem::getMetadataChangeProposal) + .map(MCPItem::getMetadataChangeProposal) .collect(Collectors.toList()); // proposals for key aspect, browsePath, browsePathV2, dataPlatformInstance Assert.assertEquals(proposalList.size(), 4); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java index d191ea2b9fa971..660fb1af47be4e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java @@ -48,14 +48,8 @@ private void configureComponents() { PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); _entityServiceImpl = - new EntityServiceImpl( - dao, - _mockProducer, - _testEntityRegistry, - true, - _mockUpdateIndicesService, - preProcessHooks, - true); + new EntityServiceImpl(dao, _mockProducer, _testEntityRegistry, true, preProcessHooks, true); + _entityServiceImpl.setUpdateIndicesService(_mockUpdateIndicesService); _retentionService = new CassandraRetentionService(_entityServiceImpl, session, 1000); _entityServiceImpl.setRetentionService(_retentionService); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java index 8d30fb02915c70..e1dd9eb21e78be 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java @@ -72,13 +72,8 @@ private void configureComponents() { preProcessHooks.setUiEnabled(true); _entityServiceImpl = new EntityServiceImpl( - _aspectDao, - _mockProducer, - _testEntityRegistry, - false, - _mockUpdateIndicesService, - preProcessHooks, - true); + _aspectDao, _mockProducer, _testEntityRegistry, false, preProcessHooks, true); + _entityServiceImpl.setUpdateIndicesService(_mockUpdateIndicesService); _retentionService = new CassandraRetentionService(_entityServiceImpl, session, 1000); _entityServiceImpl.setRetentionService(_retentionService); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java index 42fa2acb542375..390cbc7392b2ea 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java @@ -54,13 +54,8 @@ public DeleteEntityServiceTest() { preProcessHooks.setUiEnabled(true); _entityServiceImpl = new EntityServiceImpl( - _aspectDao, - mock(EventProducer.class), - _entityRegistry, - true, - _mockUpdateIndicesService, - preProcessHooks, - true); + _aspectDao, mock(EventProducer.class), _entityRegistry, true, preProcessHooks, true); + _entityServiceImpl.setUpdateIndicesService(_mockUpdateIndicesService); _deleteEntityService = new DeleteEntityService(_entityServiceImpl, _graphService); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java index d241fb3b9581b4..75a4a76192a207 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java @@ -39,14 +39,8 @@ public void setupTest() { PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); _entityServiceImpl = - new EntityServiceImpl( - dao, - _mockProducer, - _testEntityRegistry, - true, - _mockUpdateIndicesService, - preProcessHooks, - true); + new EntityServiceImpl(dao, _mockProducer, _testEntityRegistry, true, preProcessHooks, true); + _entityServiceImpl.setUpdateIndicesService(_mockUpdateIndicesService); _retentionService = new EbeanRetentionService(_entityServiceImpl, server, 1000); _entityServiceImpl.setRetentionService(_retentionService); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index 1e2cf4d4255d2e..586f6d3b79a8f4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -18,7 +18,7 @@ import com.linkedin.metadata.entity.ebean.EbeanAspectDao; import com.linkedin.metadata.entity.ebean.EbeanRetentionService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.key.CorpUserKey; import com.linkedin.metadata.models.registry.EntityRegistryException; @@ -71,13 +71,8 @@ public void setupTest() { preProcessHooks.setUiEnabled(true); _entityServiceImpl = new EntityServiceImpl( - _aspectDao, - _mockProducer, - _testEntityRegistry, - false, - _mockUpdateIndicesService, - preProcessHooks, - true); + _aspectDao, _mockProducer, _testEntityRegistry, false, preProcessHooks, true); + _entityServiceImpl.setUpdateIndicesService(_mockUpdateIndicesService); _retentionService = new EbeanRetentionService(_entityServiceImpl, server, 1000); _entityServiceImpl.setRetentionService(_retentionService); } @@ -118,30 +113,33 @@ public void testIngestListLatestAspects() throws AssertionError { // Ingest CorpUserInfo Aspect #3 CorpUserInfo writeAspect3 = AspectGenerationUtils.createCorpUserInfo("email3@test.com"); - List<MCPUpsertBatchItem> items = + List<ChangeItemImpl> items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn2) .aspectName(aspectName) .recordTemplate(writeAspect2) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn3) .aspectName(aspectName) .recordTemplate(writeAspect3) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); // List aspects ListResult<RecordTemplate> batch1 = @@ -187,30 +185,33 @@ public void testIngestListUrns() throws AssertionError { // Ingest CorpUserInfo Aspect #3 RecordTemplate writeAspect3 = AspectGenerationUtils.createCorpUserKey(entityUrn3); - List<MCPUpsertBatchItem> items = + List<ChangeItemImpl> items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn2) .aspectName(aspectName) .recordTemplate(writeAspect2) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn3) .aspectName(aspectName) .recordTemplate(writeAspect3) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); // List aspects urns ListUrnsResult batch1 = _entityServiceImpl.listUrns(entityUrn1.getEntityType(), 0, 2); @@ -284,7 +285,7 @@ public void dataGeneratorThreadingTest() { @Test // ensure same thread as h2 public void multiThreadingTest() { DataGenerator dataGenerator = new DataGenerator(_entityServiceImpl); - Database server = ((EbeanAspectDao) _entityServiceImpl._aspectDao).getServer(); + Database server = ((EbeanAspectDao) _entityServiceImpl.aspectDao).getServer(); // Add data List<String> aspects = List.of("status", "globalTags", "glossaryTerms"); @@ -340,7 +341,7 @@ public void multiThreadingTest() { @Test public void singleThreadingTest() { DataGenerator dataGenerator = new DataGenerator(_entityServiceImpl); - Database server = ((EbeanAspectDao) _entityServiceImpl._aspectDao).getServer(); + Database server = ((EbeanAspectDao) _entityServiceImpl.aspectDao).getServer(); // Add data List<String> aspects = List.of("status", "globalTags", "glossaryTerms"); @@ -393,7 +394,7 @@ private static void executeThreadingTest( EntityServiceImpl entityService, List<List<MetadataChangeProposal>> testData, int threadCount) { - Database server = ((EbeanAspectDao) entityService._aspectDao).getServer(); + Database server = ((EbeanAspectDao) entityService.aspectDao).getServer(); server.sqlUpdate("truncate metadata_aspect_v2"); int count = diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 384b54c7a1c8d3..e325e23ef86070 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -39,8 +39,9 @@ import com.linkedin.metadata.aspect.CorpUserAspectArray; import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; +import com.linkedin.metadata.entity.validation.ValidationUtils; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.key.CorpUserKey; import com.linkedin.metadata.models.AspectSpec; @@ -864,37 +865,40 @@ public void testRollbackAspect() throws AssertionError { CorpUserInfo writeAspect1Overwrite = AspectGenerationUtils.createCorpUserInfo("email1.overwrite@test.com"); - List<MCPUpsertBatchItem> items = + List<ChangeItemImpl> items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn2) .aspectName(aspectName) .recordTemplate(writeAspect2) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn3) .aspectName(aspectName) .recordTemplate(writeAspect3) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1Overwrite) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); // this should no-op since this run has been overwritten AspectRowSummary rollbackOverwrittenAspect = new AspectRowSummary(); @@ -943,30 +947,33 @@ public void testRollbackKey() throws AssertionError { CorpUserInfo writeAspect1Overwrite = AspectGenerationUtils.createCorpUserInfo("email1.overwrite@test.com"); - List<MCPUpsertBatchItem> items = + List<ChangeItemImpl> items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(keyAspectName) .recordTemplate(writeKey1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1Overwrite) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); // this should no-op since the key should have been written in the furst run AspectRowSummary rollbackKeyWithWrongRunId = new AspectRowSummary(); @@ -1023,44 +1030,47 @@ public void testRollbackUrn() throws AssertionError { CorpUserInfo writeAspect1Overwrite = AspectGenerationUtils.createCorpUserInfo("email1.overwrite@test.com"); - List<MCPUpsertBatchItem> items = + List<ChangeItemImpl> items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(keyAspectName) .recordTemplate(writeKey1) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn2) .aspectName(aspectName) .recordTemplate(writeAspect2) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn3) .aspectName(aspectName) .recordTemplate(writeAspect3) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1Overwrite) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); // this should no-op since the key should have been written in the furst run AspectRowSummary rollbackKeyWithWrongRunId = new AspectRowSummary(); @@ -1090,16 +1100,19 @@ public void testIngestGetLatestAspect() throws AssertionError { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(1625792689, "run-123"); SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); - List<MCPUpsertBatchItem> items = + List<ChangeItemImpl> items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); // Validate retrieval of CorpUserInfo Aspect #1 RecordTemplate readAspect1 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); @@ -1132,14 +1145,17 @@ public void testIngestGetLatestAspect() throws AssertionError { items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect2) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata2) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); // Validate retrieval of CorpUserInfo Aspect #2 RecordTemplate readAspect2 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); @@ -1176,16 +1192,19 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(1625792689, "run-123"); SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); - List<MCPUpsertBatchItem> items = + List<ChangeItemImpl> items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); // Validate retrieval of CorpUserInfo Aspect #1 EnvelopedAspect readAspect1 = @@ -1198,14 +1217,17 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect2) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); // Validate retrieval of CorpUserInfo Aspect #2 EnvelopedAspect readAspect2 = @@ -1250,16 +1272,19 @@ public void testIngestSameAspect() throws AssertionError { SystemMetadata metadata3 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-123", "run-456"); - List<MCPUpsertBatchItem> items = + List<ChangeItemImpl> items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); // Validate retrieval of CorpUserInfo Aspect #1 RecordTemplate readAspect1 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); @@ -1292,14 +1317,17 @@ public void testIngestSameAspect() throws AssertionError { items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect2) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); // Validate retrieval of CorpUserInfo Aspect #2 RecordTemplate readAspect2 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); @@ -1343,51 +1371,54 @@ public void testRetention() throws AssertionError { Status writeAspect2a = new Status().setRemoved(false); Status writeAspect2b = new Status().setRemoved(true); - List<MCPUpsertBatchItem> items = + List<ChangeItemImpl> items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1a) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1b) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2a) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2b) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); assertEquals(_entityServiceImpl.getAspect(entityUrn, aspectName, 1), writeAspect1); assertEquals(_entityServiceImpl.getAspect(entityUrn, aspectName2, 1), writeAspect2); @@ -1412,21 +1443,24 @@ public void testRetention() throws AssertionError { items = List.of( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1c) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2c) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); - _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityServiceImpl.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityServiceImpl).items(items).build(), + true, + true); assertNull(_entityServiceImpl.getAspect(entityUrn, aspectName, 1)); assertEquals(_entityServiceImpl.getAspect(entityUrn, aspectName2, 1), writeAspect2); @@ -1564,12 +1598,12 @@ public void testRestoreIndices() throws Exception { public void testValidateUrn() throws Exception { // Valid URN Urn validTestUrn = new Urn("li", "corpuser", new TupleKey("testKey")); - EntityUtils.validateUrn(_testEntityRegistry, validTestUrn); + ValidationUtils.validateUrn(_testEntityRegistry, validTestUrn); // URN with trailing whitespace Urn testUrnWithTrailingWhitespace = new Urn("li", "corpuser", new TupleKey("testKey ")); try { - EntityUtils.validateUrn(_testEntityRegistry, testUrnWithTrailingWhitespace); + ValidationUtils.validateUrn(_testEntityRegistry, testUrnWithTrailingWhitespace); Assert.fail("Should have raised IllegalArgumentException for URN with trailing whitespace"); } catch (IllegalArgumentException e) { assertEquals( @@ -1581,7 +1615,7 @@ public void testValidateUrn() throws Exception { Urn testUrnTooLong = new Urn("li", "corpuser", new TupleKey(stringTooLong)); try { - EntityUtils.validateUrn(_testEntityRegistry, testUrnTooLong); + ValidationUtils.validateUrn(_testEntityRegistry, testUrnTooLong); Assert.fail("Should have raised IllegalArgumentException for URN too long"); } catch (IllegalArgumentException e) { assertEquals( @@ -1600,9 +1634,9 @@ public void testValidateUrn() throws Exception { Urn testUrnSameLengthWhenEncoded = new Urn("li", "corpUser", new TupleKey(buildStringSameLengthWhenEncoded.toString())); // Same length when encoded should be allowed, the encoded one should not be - EntityUtils.validateUrn(_testEntityRegistry, testUrnSameLengthWhenEncoded); + ValidationUtils.validateUrn(_testEntityRegistry, testUrnSameLengthWhenEncoded); try { - EntityUtils.validateUrn(_testEntityRegistry, testUrnTooLongWhenEncoded); + ValidationUtils.validateUrn(_testEntityRegistry, testUrnTooLongWhenEncoded); Assert.fail("Should have raised IllegalArgumentException for URN too long"); } catch (IllegalArgumentException e) { assertEquals( @@ -1612,9 +1646,9 @@ public void testValidateUrn() throws Exception { // Urn containing disallowed character Urn testUrnSpecialCharValid = new Urn("li", "corpUser", new TupleKey("bob␇")); Urn testUrnSpecialCharInvalid = new Urn("li", "corpUser", new TupleKey("bob␟")); - EntityUtils.validateUrn(_testEntityRegistry, testUrnSpecialCharValid); + ValidationUtils.validateUrn(_testEntityRegistry, testUrnSpecialCharValid); try { - EntityUtils.validateUrn(_testEntityRegistry, testUrnSpecialCharInvalid); + ValidationUtils.validateUrn(_testEntityRegistry, testUrnSpecialCharInvalid); Assert.fail( "Should have raised IllegalArgumentException for URN containing the illegal char"); } catch (IllegalArgumentException e) { @@ -1623,7 +1657,7 @@ public void testValidateUrn() throws Exception { Urn urnWithMismatchedParens = new Urn("li", "corpuser", new TupleKey("test(Key")); try { - EntityUtils.validateUrn(_testEntityRegistry, urnWithMismatchedParens); + ValidationUtils.validateUrn(_testEntityRegistry, urnWithMismatchedParens); Assert.fail("Should have raised IllegalArgumentException for URN with mismatched parens"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains("mismatched paren nesting")); @@ -1631,7 +1665,7 @@ public void testValidateUrn() throws Exception { Urn invalidType = new Urn("li", "fakeMadeUpType", new TupleKey("testKey")); try { - EntityUtils.validateUrn(_testEntityRegistry, invalidType); + ValidationUtils.validateUrn(_testEntityRegistry, invalidType); Assert.fail( "Should have raised IllegalArgumentException for URN with non-existent entity type"); } catch (IllegalArgumentException e) { @@ -1640,12 +1674,12 @@ public void testValidateUrn() throws Exception { Urn validFabricType = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "PROD")); - EntityUtils.validateUrn(_testEntityRegistry, validFabricType); + ValidationUtils.validateUrn(_testEntityRegistry, validFabricType); Urn invalidFabricType = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "prod")); try { - EntityUtils.validateUrn(_testEntityRegistry, invalidFabricType); + ValidationUtils.validateUrn(_testEntityRegistry, invalidFabricType); Assert.fail("Should have raised IllegalArgumentException for URN with invalid fabric type"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains(invalidFabricType.toString())); @@ -1654,7 +1688,7 @@ public void testValidateUrn() throws Exception { Urn urnEndingInComma = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "PROD", "")); try { - EntityUtils.validateUrn(_testEntityRegistry, urnEndingInComma); + ValidationUtils.validateUrn(_testEntityRegistry, urnEndingInComma); Assert.fail("Should have raised IllegalArgumentException for URN ending in comma"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains(urnEndingInComma.toString())); @@ -1750,12 +1784,9 @@ public void testStructuredPropertyIngestProposal() throws Exception { STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) .map( entityAspect -> - EntityUtils.toAspectRecord( - STRUCTURED_PROPERTY_ENTITY_NAME, - STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, - entityAspect.getMetadata(), - _testEntityRegistry)) - .map(recordTemplate -> (StructuredPropertyDefinition) recordTemplate) + EntityUtils.toSystemAspect(entityAspect, _entityServiceImpl) + .get() + .getAspect(StructuredPropertyDefinition.class)) .collect(Collectors.toSet()); assertEquals(defs.size(), 1); assertEquals(defs, Set.of(structuredPropertyDefinition)); @@ -1826,12 +1857,9 @@ public void testStructuredPropertyIngestProposal() throws Exception { STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) .map( entityAspect -> - EntityUtils.toAspectRecord( - STRUCTURED_PROPERTY_ENTITY_NAME, - STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, - entityAspect.getMetadata(), - _testEntityRegistry)) - .map(recordTemplate -> (StructuredPropertyDefinition) recordTemplate) + EntityUtils.toSystemAspect(entityAspect, _entityServiceImpl) + .get() + .getAspect(StructuredPropertyDefinition.class)) .collect(Collectors.toSet()); assertEquals(defs.size(), 2); assertEquals(defs, Set.of(secondDefinition, structuredPropertyDefinition)); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java index 079ec084625150..9588140bebd65f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java @@ -31,6 +31,7 @@ import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.data.template.LongMap; import com.linkedin.metadata.TestEntityUtil; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; @@ -40,7 +41,6 @@ import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.graph.LineageRelationship; import com.linkedin.metadata.graph.LineageRelationshipArray; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Condition; @@ -63,6 +63,7 @@ import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import com.linkedin.r2.RemoteInvocationException; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collections; @@ -102,14 +103,14 @@ public abstract class LineageServiceTestBase extends AbstractTestNGSpringContext @Nonnull protected abstract CustomSearchConfiguration getCustomSearchConfiguration(); - private EntityRegistry _entityRegistry; - private IndexConvention _indexConvention; - private SettingsBuilder _settingsBuilder; - private ElasticSearchService _elasticSearchService; - private GraphService _graphService; - private CacheManager _cacheManager; - private LineageSearchService _lineageSearchService; - private RestHighLevelClient _searchClientSpy; + private AspectRetriever aspectRetriever; + private IndexConvention indexConvention; + private SettingsBuilder settingsBuilder; + private ElasticSearchService elasticSearchService; + private GraphService graphService; + private CacheManager cacheManager; + private LineageSearchService lineageSearchService; + private RestHighLevelClient searchClientSpy; private static final String ENTITY_NAME = "testEntity"; private static final Urn TEST_URN = TestEntityUtil.getTestEntityUrn(); @@ -126,20 +127,23 @@ public void disableAssert() { } @BeforeClass - public void setup() { - _entityRegistry = new SnapshotEntityRegistry(new Snapshot()); - _indexConvention = new IndexConventionImpl("lineage_search_service_test"); - _settingsBuilder = new SettingsBuilder(null); - _elasticSearchService = buildEntitySearchService(); - _elasticSearchService.configure(); - _cacheManager = new ConcurrentMapCacheManager(); - _graphService = mock(GraphService.class); + public void setup() throws RemoteInvocationException, URISyntaxException { + aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()) + .thenReturn(new SnapshotEntityRegistry(new Snapshot())); + when(aspectRetriever.getLatestAspectObjects(any(), any())).thenReturn(Map.of()); + indexConvention = new IndexConventionImpl("lineage_search_service_test"); + settingsBuilder = new SettingsBuilder(null); + elasticSearchService = buildEntitySearchService(); + elasticSearchService.configure(); + cacheManager = new ConcurrentMapCacheManager(); + graphService = mock(GraphService.class); resetService(true, false); } private void resetService(boolean withCache, boolean withLightingCache) { CachingEntitySearchService cachingEntitySearchService = - new CachingEntitySearchService(_cacheManager, _elasticSearchService, 100, true); + new CachingEntitySearchService(cacheManager, elasticSearchService, 100, true); EntityDocCountCacheConfiguration entityDocCountCacheConfiguration = new EntityDocCountCacheConfiguration(); entityDocCountCacheConfiguration.setTtlSeconds(600L); @@ -149,23 +153,25 @@ private void resetService(boolean withCache, boolean withLightingCache) { searchLineageCacheConfiguration.setTtlSeconds(600L); searchLineageCacheConfiguration.setLightningThreshold(withLightingCache ? -1 : 300); - _lineageSearchService = + lineageSearchService = spy( new LineageSearchService( new SearchService( new EntityDocCountCache( - _entityRegistry, _elasticSearchService, entityDocCountCacheConfiguration), + aspectRetriever.getEntityRegistry(), + elasticSearchService, + entityDocCountCacheConfiguration), cachingEntitySearchService, new SimpleRanker()), - _graphService, - _cacheManager.getCache("test"), + graphService, + cacheManager.getCache("test"), withCache, searchLineageCacheConfiguration)); } @BeforeMethod public void wipe() throws Exception { - _elasticSearchService.clear(); + elasticSearchService.clear(); clearCache(false); syncAfterWrite(getBulkProcessor()); } @@ -174,31 +180,38 @@ public void wipe() throws Exception { private ElasticSearchService buildEntitySearchService() { EntityIndexBuilders indexBuilders = new EntityIndexBuilders( - getIndexBuilder(), _entityRegistry, _indexConvention, _settingsBuilder); - _searchClientSpy = spy(getSearchClient()); + getIndexBuilder(), + aspectRetriever.getEntityRegistry(), + indexConvention, + settingsBuilder); + searchClientSpy = spy(getSearchClient()); ESSearchDAO searchDAO = new ESSearchDAO( - _entityRegistry, - _searchClientSpy, - _indexConvention, + searchClientSpy, + indexConvention, false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null); ESBrowseDAO browseDAO = new ESBrowseDAO( - _entityRegistry, - _searchClientSpy, - _indexConvention, + searchClientSpy, + indexConvention, getSearchConfiguration(), getCustomSearchConfiguration()); ESWriteDAO writeDAO = - new ESWriteDAO(_entityRegistry, _searchClientSpy, _indexConvention, getBulkProcessor(), 1); - return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); + new ESWriteDAO( + aspectRetriever.getEntityRegistry(), + searchClientSpy, + indexConvention, + getBulkProcessor(), + 1); + return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO) + .postConstruct(aspectRetriever); } private void clearCache(boolean withLightingCache) { - _cacheManager.getCacheNames().forEach(cache -> _cacheManager.getCache(cache).clear()); + cacheManager.getCacheNames().forEach(cache -> cacheManager.getCache(cache).clear()); resetService(true, withLightingCache); } @@ -212,7 +225,7 @@ private EntityLineageResult mockResult(List<LineageRelationship> lineageRelation @Test public void testSearchService() throws Exception { - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -227,7 +240,7 @@ public void testSearchService() throws Exception { assertEquals(searchResult.getNumEntities().intValue(), 0); clearCache(false); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -254,10 +267,10 @@ public void testSearchService() throws Exception { document.set("keyPart1", JsonNodeFactory.instance.textNode("test")); document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); syncAfterWrite(getBulkProcessor()); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -271,7 +284,7 @@ public void testSearchService() throws Exception { assertEquals(searchResult.getEntities().size(), 0); clearCache(false); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -304,22 +317,22 @@ public void testSearchService() throws Exception { document2.set("keyPart1", JsonNodeFactory.instance.textNode("random")); document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2")); document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); syncAfterWrite(getBulkProcessor()); - Mockito.reset(_searchClientSpy); + Mockito.reset(searchClientSpy); searchResult = searchAcrossLineage(null, TEST1); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); // Verify that highlighting was turned off in the query ArgumentCaptor<SearchRequest> searchRequestCaptor = ArgumentCaptor.forClass(SearchRequest.class); - Mockito.verify(_searchClientSpy, times(1)).search(searchRequestCaptor.capture(), any()); + Mockito.verify(searchClientSpy, times(1)).search(searchRequestCaptor.capture(), any()); SearchRequest capturedRequest = searchRequestCaptor.getValue(); assertNull(capturedRequest.source().highlighter()); clearCache(false); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -337,10 +350,10 @@ public void testSearchService() throws Exception { clearCache(false); // Test Cache Behavior - Mockito.reset(_graphService); + Mockito.reset(graphService); // Case 1: Use the maxHops in the cache. - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -354,7 +367,7 @@ public void testSearchService() throws Exception { new LineageRelationship().setDegree(3).setType("type").setEntity(urn)))); searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(ENTITY_NAME), @@ -369,7 +382,7 @@ public void testSearchService() throws Exception { new SearchFlags().setSkipCache(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); - Mockito.verify(_graphService, times(1)) + Mockito.verify(graphService, times(1)) .getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), @@ -381,7 +394,7 @@ public void testSearchService() throws Exception { // Hit the cache on second attempt searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(ENTITY_NAME), @@ -395,7 +408,7 @@ public void testSearchService() throws Exception { null, new SearchFlags().setSkipCache(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); - Mockito.verify(_graphService, times(1)) + Mockito.verify(graphService, times(1)) .getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), @@ -406,7 +419,7 @@ public void testSearchService() throws Exception { eq(null)); // Case 2: Use the start and end time in the cache. - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -420,7 +433,7 @@ public void testSearchService() throws Exception { new LineageRelationship().setDegree(3).setType("type").setEntity(urn)))); searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(), @@ -435,7 +448,7 @@ public void testSearchService() throws Exception { new SearchFlags().setSkipCache(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); - Mockito.verify(_graphService, times(1)) + Mockito.verify(graphService, times(1)) .getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), @@ -447,7 +460,7 @@ public void testSearchService() throws Exception { // Hit the cache on second attempt searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(ENTITY_NAME), @@ -461,7 +474,7 @@ public void testSearchService() throws Exception { 1L, new SearchFlags().setSkipCache(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); - Mockito.verify(_graphService, times(1)) + Mockito.verify(graphService, times(1)) .getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), @@ -474,11 +487,11 @@ public void testSearchService() throws Exception { clearCache(false); // Cleanup - _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); - _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); + elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); + elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); syncAfterWrite(getBulkProcessor()); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(), anyInt())) .thenReturn( mockResult( @@ -491,7 +504,7 @@ public void testSearchService() throws Exception { @Test public void testScrollAcrossLineage() throws Exception { - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -508,7 +521,7 @@ public void testScrollAcrossLineage() throws Exception { assertNull(scrollResult.getScrollId()); clearCache(false); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -537,10 +550,10 @@ public void testScrollAcrossLineage() throws Exception { document.set("keyPart1", JsonNodeFactory.instance.textNode("test")); document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); syncAfterWrite(getBulkProcessor()); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -555,7 +568,7 @@ public void testScrollAcrossLineage() throws Exception { assertNull(scrollResult.getScrollId()); clearCache(false); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -586,10 +599,10 @@ public void testScrollAcrossLineage() throws Exception { clearCache(false); // Cleanup - _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); + elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); syncAfterWrite(getBulkProcessor()); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(), anyInt())) .thenReturn( mockResult( @@ -611,7 +624,7 @@ public void testLightningSearchService() throws Exception { // Enable lightning resetService(true, true); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -624,7 +637,7 @@ public void testLightningSearchService() throws Exception { assertEquals(searchResult.getNumEntities().intValue(), 0); clearCache(true); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -646,10 +659,10 @@ public void testLightningSearchService() throws Exception { document.set("keyPart1", JsonNodeFactory.instance.textNode("test")); document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); syncAfterWrite(getBulkProcessor()); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -663,7 +676,7 @@ public void testLightningSearchService() throws Exception { assertEquals(searchResult.getEntities().size(), 0); clearCache(true); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -679,20 +692,20 @@ public void testLightningSearchService() throws Exception { assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); assertEquals(searchResult.getEntities().get(0).getDegree().intValue(), 1); - verify(_lineageSearchService, times(1)) + verify(lineageSearchService, times(1)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); searchResult = searchAcrossLineage(QueryUtils.newFilter("degree.keyword", "1"), testStar); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); assertEquals(searchResult.getEntities().get(0).getDegree().intValue(), 1); - verify(_lineageSearchService, times(2)) + verify(lineageSearchService, times(2)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); searchResult = searchAcrossLineage(QueryUtils.newFilter("degree.keyword", "2"), testStar); assertEquals(searchResult.getNumEntities().intValue(), 0); assertEquals(searchResult.getEntities().size(), 0); - verify(_lineageSearchService, times(3)) + verify(lineageSearchService, times(3)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); clearCache(true); // resets spy @@ -702,17 +715,17 @@ public void testLightningSearchService() throws Exception { document2.set("keyPart1", JsonNodeFactory.instance.textNode("random")); document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2")); document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); syncAfterWrite(getBulkProcessor()); searchResult = searchAcrossLineage(null, testStar); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); - verify(_lineageSearchService, times(1)) + verify(lineageSearchService, times(1)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); clearCache(true); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -727,16 +740,16 @@ public void testLightningSearchService() throws Exception { searchResult = searchAcrossLineage(null, testStar); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().size(), 1); - verify(_lineageSearchService, times(1)) + verify(lineageSearchService, times(1)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); clearCache(true); // Test Cache Behavior - reset(_graphService); - reset(_lineageSearchService); + reset(graphService); + reset(lineageSearchService); // Case 1: Use the maxHops in the cache. - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -750,7 +763,7 @@ public void testLightningSearchService() throws Exception { new LineageRelationship().setDegree(3).setType("type").setEntity(urn)))); searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(ENTITY_NAME), @@ -765,7 +778,7 @@ public void testLightningSearchService() throws Exception { new SearchFlags().setSkipCache(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); - verify(_graphService, times(1)) + verify(graphService, times(1)) .getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), @@ -774,12 +787,12 @@ public void testLightningSearchService() throws Exception { eq(1000), eq(null), eq(null)); - verify(_lineageSearchService, times(1)) + verify(lineageSearchService, times(1)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); // Hit the cache on second attempt searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(ENTITY_NAME), @@ -793,7 +806,7 @@ public void testLightningSearchService() throws Exception { null, new SearchFlags().setSkipCache(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); - verify(_graphService, times(1)) + verify(graphService, times(1)) .getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), @@ -802,11 +815,11 @@ public void testLightningSearchService() throws Exception { eq(1000), eq(null), eq(null)); - verify(_lineageSearchService, times(2)) + verify(lineageSearchService, times(2)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); // Case 2: Use the start and end time in the cache. - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), @@ -820,7 +833,7 @@ public void testLightningSearchService() throws Exception { new LineageRelationship().setDegree(3).setType("type").setEntity(urn)))); searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(), @@ -835,7 +848,7 @@ public void testLightningSearchService() throws Exception { new SearchFlags().setSkipCache(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); - verify(_graphService, times(1)) + verify(graphService, times(1)) .getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), @@ -844,12 +857,12 @@ public void testLightningSearchService() throws Exception { eq(1000), eq(0L), eq(1L)); - verify(_lineageSearchService, times(3)) + verify(lineageSearchService, times(3)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); // Hit the cache on second attempt searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(ENTITY_NAME), @@ -863,7 +876,7 @@ public void testLightningSearchService() throws Exception { 1L, new SearchFlags().setSkipCache(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); - verify(_graphService, times(1)) + verify(graphService, times(1)) .getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), @@ -872,17 +885,17 @@ public void testLightningSearchService() throws Exception { eq(1000), eq(0L), eq(1L)); - verify(_lineageSearchService, times(4)) + verify(lineageSearchService, times(4)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); /* * Test filtering */ - reset(_lineageSearchService); + reset(lineageSearchService); // Entity searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(DATASET_ENTITY_NAME), @@ -897,12 +910,12 @@ public void testLightningSearchService() throws Exception { new SearchFlags().setSkipCache(false)); assertEquals(searchResult.getNumEntities().intValue(), 0); assertEquals(searchResult.getEntities().size(), 0); - verify(_lineageSearchService, times(1)) + verify(lineageSearchService, times(1)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); // Cached searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(DATASET_ENTITY_NAME), @@ -915,7 +928,7 @@ public void testLightningSearchService() throws Exception { null, null, new SearchFlags().setSkipCache(false)); - Mockito.verify(_graphService, times(1)) + Mockito.verify(graphService, times(1)) .getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), @@ -924,7 +937,7 @@ public void testLightningSearchService() throws Exception { eq(1000), eq(0L), eq(1L)); - verify(_lineageSearchService, times(2)) + verify(lineageSearchService, times(2)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); assertEquals(searchResult.getNumEntities().intValue(), 0); assertEquals(searchResult.getEntities().size(), 0); @@ -945,7 +958,7 @@ public void testLightningSearchService() throws Exception { Filter filter = new Filter().setOr(conCritArr); searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(ENTITY_NAME), @@ -960,12 +973,12 @@ public void testLightningSearchService() throws Exception { new SearchFlags().setSkipCache(false)); assertEquals(searchResult.getNumEntities().intValue(), 0); assertEquals(searchResult.getEntities().size(), 0); - verify(_lineageSearchService, times(3)) + verify(lineageSearchService, times(3)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); // Cached searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(ENTITY_NAME), @@ -978,7 +991,7 @@ public void testLightningSearchService() throws Exception { null, null, new SearchFlags().setSkipCache(false)); - verify(_graphService, times(1)) + verify(graphService, times(1)) .getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), @@ -987,7 +1000,7 @@ public void testLightningSearchService() throws Exception { eq(1000), eq(0L), eq(1L)); - verify(_lineageSearchService, times(4)) + verify(lineageSearchService, times(4)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); assertEquals(searchResult.getNumEntities().intValue(), 0); assertEquals(searchResult.getEntities().size(), 0); @@ -995,7 +1008,7 @@ public void testLightningSearchService() throws Exception { // Environment Filter originFilter = QueryUtils.newFilter("origin", "PROD"); searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(ENTITY_NAME), @@ -1010,12 +1023,12 @@ public void testLightningSearchService() throws Exception { new SearchFlags().setSkipCache(false)); assertEquals(searchResult.getNumEntities().intValue(), 0); assertEquals(searchResult.getEntities().size(), 0); - verify(_lineageSearchService, times(5)) + verify(lineageSearchService, times(5)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); // Cached searchResult = - _lineageSearchService.searchAcrossLineage( + lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(ENTITY_NAME), @@ -1028,7 +1041,7 @@ public void testLightningSearchService() throws Exception { null, null, new SearchFlags().setSkipCache(false)); - verify(_graphService, times(1)) + verify(graphService, times(1)) .getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), @@ -1037,7 +1050,7 @@ public void testLightningSearchService() throws Exception { eq(1000), eq(0L), eq(1L)); - verify(_lineageSearchService, times(6)) + verify(lineageSearchService, times(6)) .getLightningSearchResult(any(), any(), anyInt(), anyInt(), anySet()); assertEquals(searchResult.getNumEntities().intValue(), 0); assertEquals(searchResult.getEntities().size(), 0); @@ -1045,11 +1058,11 @@ public void testLightningSearchService() throws Exception { clearCache(true); // Cleanup - _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); - _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); + elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); + elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); syncAfterWrite(getBulkProcessor()); - when(_graphService.getLineage( + when(graphService.getLineage( eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(), anyInt())) .thenReturn( mockResult( @@ -1089,7 +1102,7 @@ public void testLightningEnvFiltering() throws Exception { Set<String> entityNames = Collections.emptySet(); LineageSearchResult lineageSearchResult = - _lineageSearchService.getLightningSearchResult( + lineageSearchService.getLightningSearchResult( lineageRelationships, filter, from, size, entityNames); assertEquals(lineageSearchResult.getNumEntities(), Integer.valueOf(500)); @@ -1131,7 +1144,7 @@ public void testLightningEnvFiltering() throws Exception { filter = new Filter().setOr(conCritArr); lineageSearchResult = - _lineageSearchService.getLightningSearchResult( + lineageSearchService.getLightningSearchResult( lineageRelationships, filter, from, size, entityNames); // assert that if the query has an env filter, it is applied correctly @@ -1168,7 +1181,7 @@ public void testLightningPagination() throws Exception { Set<String> entityNames = Collections.emptySet(); LineageSearchResult lineageSearchResult = - _lineageSearchService.getLightningSearchResult( + lineageSearchService.getLightningSearchResult( lineageRelationships, filter, from, size, entityNames); assertEquals(lineageSearchResult.getNumEntities(), Integer.valueOf(500)); @@ -1181,7 +1194,7 @@ public void testLightningPagination() throws Exception { from = 50; size = 20; lineageSearchResult = - _lineageSearchService.getLightningSearchResult( + lineageSearchService.getLightningSearchResult( lineageRelationships, filter, from, size, entityNames); assertEquals(lineageSearchResult.getNumEntities(), Integer.valueOf(500)); @@ -1206,7 +1219,7 @@ public void testLightningPagination() throws Exception { size = 10; filter = new Filter().setOr(conCritArr); lineageSearchResult = - _lineageSearchService.getLightningSearchResult( + lineageSearchService.getLightningSearchResult( lineageRelationships, filter, from, size, entityNames); assertEquals(lineageSearchResult.getNumEntities(), Integer.valueOf(600)); @@ -1220,7 +1233,7 @@ public void testLightningPagination() throws Exception { from = 0; size = 10; lineageSearchResult = - _lineageSearchService.getLightningSearchResult( + lineageSearchService.getLightningSearchResult( lineageRelationships, null, from, size, entityNames); // Static Degree agg is the first element @@ -1270,7 +1283,7 @@ private LineageRelationship constructLineageRelationship(Urn urn) { // Convenience method to reduce spots where we're sending the same params private LineageSearchResult searchAcrossLineage(@Nullable Filter filter, @Nullable String input) { - return _lineageSearchService.searchAcrossLineage( + return lineageSearchService.searchAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(), @@ -1287,7 +1300,7 @@ private LineageSearchResult searchAcrossLineage(@Nullable Filter filter, @Nullab private LineageScrollResult scrollAcrossLineage( @Nullable Filter filter, @Nullable String input, String scrollId, int size) { - return _lineageSearchService.scrollAcrossLineage( + return lineageSearchService.scrollAcrossLineage( TEST_URN, LineageDirection.DOWNSTREAM, ImmutableList.of(), @@ -1326,8 +1339,7 @@ public void testCanDoLightning() throws Exception { int size = 10; Set<String> entityNames = Collections.emptySet(); - Assert.assertTrue( - _lineageSearchService.canDoLightning(lineageRelationships, "*", filter, null)); + Assert.assertTrue(lineageSearchService.canDoLightning(lineageRelationships, "*", filter, null)); // Set up filters ConjunctiveCriterionArray conCritArr = new ConjunctiveCriterionArray(); @@ -1350,7 +1362,6 @@ public void testCanDoLightning() throws Exception { from = 500; size = 10; filter = new Filter().setOr(conCritArr); - Assert.assertTrue( - _lineageSearchService.canDoLightning(lineageRelationships, "*", filter, null)); + Assert.assertTrue(lineageSearchService.canDoLightning(lineageRelationships, "*", filter, null)); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java index 71f35adabce368..d860776a316815 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java @@ -2,6 +2,9 @@ import static com.linkedin.metadata.Constants.ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH; import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; import com.datahub.test.Snapshot; @@ -11,10 +14,10 @@ import com.linkedin.common.urn.TestEntityUrn; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Condition; @@ -36,6 +39,9 @@ import com.linkedin.metadata.search.ranker.SimpleRanker; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import com.linkedin.r2.RemoteInvocationException; +import java.net.URISyntaxException; +import java.util.Map; import javax.annotation.Nonnull; import org.opensearch.client.RestHighLevelClient; import org.springframework.cache.CacheManager; @@ -62,44 +68,49 @@ public abstract class SearchServiceTestBase extends AbstractTestNGSpringContextT @Nonnull protected abstract CustomSearchConfiguration getCustomSearchConfiguration(); - private EntityRegistry _entityRegistry; - private IndexConvention _indexConvention; - private SettingsBuilder _settingsBuilder; - private ElasticSearchService _elasticSearchService; - private CacheManager _cacheManager; - private SearchService _searchService; + private AspectRetriever aspectRetriever; + private IndexConvention indexConvention; + private SettingsBuilder settingsBuilder; + private ElasticSearchService elasticSearchService; + private CacheManager cacheManager; + private SearchService searchService; private static final String ENTITY_NAME = "testEntity"; @BeforeClass - public void setup() { - _entityRegistry = new SnapshotEntityRegistry(new Snapshot()); - _indexConvention = new IndexConventionImpl("search_service_test"); - _settingsBuilder = new SettingsBuilder(null); - _elasticSearchService = buildEntitySearchService(); - _elasticSearchService.configure(); - _cacheManager = new ConcurrentMapCacheManager(); + public void setup() throws RemoteInvocationException, URISyntaxException { + aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()) + .thenReturn(new SnapshotEntityRegistry(new Snapshot())); + when(aspectRetriever.getLatestAspectObjects(any(), any())).thenReturn(Map.of()); + indexConvention = new IndexConventionImpl("search_service_test"); + settingsBuilder = new SettingsBuilder(null); + elasticSearchService = buildEntitySearchService(); + elasticSearchService.configure(); + cacheManager = new ConcurrentMapCacheManager(); resetSearchService(); } private void resetSearchService() { CachingEntitySearchService cachingEntitySearchService = - new CachingEntitySearchService(_cacheManager, _elasticSearchService, 100, true); + new CachingEntitySearchService(cacheManager, elasticSearchService, 100, true); EntityDocCountCacheConfiguration entityDocCountCacheConfiguration = new EntityDocCountCacheConfiguration(); entityDocCountCacheConfiguration.setTtlSeconds(600L); - _searchService = + searchService = new SearchService( new EntityDocCountCache( - _entityRegistry, _elasticSearchService, entityDocCountCacheConfiguration), + aspectRetriever.getEntityRegistry(), + elasticSearchService, + entityDocCountCacheConfiguration), cachingEntitySearchService, new SimpleRanker()); } @BeforeMethod public void wipe() throws Exception { - _elasticSearchService.clear(); + elasticSearchService.clear(); syncAfterWrite(getBulkProcessor()); } @@ -107,37 +118,44 @@ public void wipe() throws Exception { private ElasticSearchService buildEntitySearchService() { EntityIndexBuilders indexBuilders = new EntityIndexBuilders( - getIndexBuilder(), _entityRegistry, _indexConvention, _settingsBuilder); + getIndexBuilder(), + aspectRetriever.getEntityRegistry(), + indexConvention, + settingsBuilder); ESSearchDAO searchDAO = new ESSearchDAO( - _entityRegistry, getSearchClient(), - _indexConvention, + indexConvention, false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null); ESBrowseDAO browseDAO = new ESBrowseDAO( - _entityRegistry, getSearchClient(), - _indexConvention, + indexConvention, getSearchConfiguration(), getCustomSearchConfiguration()); ESWriteDAO writeDAO = - new ESWriteDAO(_entityRegistry, getSearchClient(), _indexConvention, getBulkProcessor(), 1); - return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); + new ESWriteDAO( + aspectRetriever.getEntityRegistry(), + getSearchClient(), + indexConvention, + getBulkProcessor(), + 1); + return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO) + .postConstruct(aspectRetriever); } private void clearCache() { - _cacheManager.getCacheNames().forEach(cache -> _cacheManager.getCache(cache).clear()); + cacheManager.getCacheNames().forEach(cache -> cacheManager.getCache(cache).clear()); resetSearchService(); } @Test public void testSearchService() throws Exception { SearchResult searchResult = - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( ImmutableList.of(ENTITY_NAME), "test", null, @@ -147,7 +165,7 @@ public void testSearchService() throws Exception { new SearchFlags().setFulltext(true).setSkipCache(true)); assertEquals(searchResult.getNumEntities().intValue(), 0); searchResult = - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( ImmutableList.of(), "test", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 0); clearCache(); @@ -158,11 +176,11 @@ public void testSearchService() throws Exception { document.set("keyPart1", JsonNodeFactory.instance.textNode("test")); document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( ImmutableList.of(), "test", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); @@ -174,24 +192,24 @@ public void testSearchService() throws Exception { document2.set("keyPart1", JsonNodeFactory.instance.textNode("random")); document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2")); document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( ImmutableList.of(), "'test2'", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn2); clearCache(); - long docCount = _elasticSearchService.docCount(ENTITY_NAME); + long docCount = elasticSearchService.docCount(ENTITY_NAME); assertEquals(docCount, 2L); - _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); - _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); + elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); + elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( ImmutableList.of(), "'test2'", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 0); } @@ -222,7 +240,7 @@ public void testAdvancedSearchOr() throws Exception { .setAnd(new CriterionArray(ImmutableList.of(subtypeCriterion))))); SearchResult searchResult = - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( ImmutableList.of(ENTITY_NAME), "test", filterWithCondition, @@ -242,7 +260,7 @@ public void testAdvancedSearchOr() throws Exception { document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); document.set("subtypes", JsonNodeFactory.instance.textNode("view")); document.set("platform", JsonNodeFactory.instance.textNode("snowflake")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); Urn urn2 = new TestEntityUrn("test", "testUrn", "VALUE_2"); ObjectNode document2 = JsonNodeFactory.instance.objectNode(); @@ -252,7 +270,7 @@ public void testAdvancedSearchOr() throws Exception { document2.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); document2.set("subtypes", JsonNodeFactory.instance.textNode("table")); document2.set("platform", JsonNodeFactory.instance.textNode("hive")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); Urn urn3 = new TestEntityUrn("test", "testUrn", "VALUE_3"); ObjectNode document3 = JsonNodeFactory.instance.objectNode(); @@ -262,12 +280,12 @@ public void testAdvancedSearchOr() throws Exception { document3.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); document3.set("subtypes", JsonNodeFactory.instance.textNode("table")); document3.set("platform", JsonNodeFactory.instance.textNode("snowflake")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( ImmutableList.of(), "test", filterWithCondition, @@ -307,7 +325,7 @@ public void testAdvancedSearchSoftDelete() throws Exception { ImmutableList.of(filterCriterion, removedCriterion))))); SearchResult searchResult = - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( ImmutableList.of(ENTITY_NAME), "test", filterWithCondition, @@ -328,7 +346,7 @@ public void testAdvancedSearchSoftDelete() throws Exception { document.set("subtypes", JsonNodeFactory.instance.textNode("view")); document.set("platform", JsonNodeFactory.instance.textNode("hive")); document.set("removed", JsonNodeFactory.instance.booleanNode(true)); - _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); Urn urn2 = new TestEntityUrn("test", "testUrn", "VALUE_2"); ObjectNode document2 = JsonNodeFactory.instance.objectNode(); @@ -339,7 +357,7 @@ public void testAdvancedSearchSoftDelete() throws Exception { document2.set("subtypes", JsonNodeFactory.instance.textNode("table")); document2.set("platform", JsonNodeFactory.instance.textNode("hive")); document.set("removed", JsonNodeFactory.instance.booleanNode(false)); - _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); Urn urn3 = new TestEntityUrn("test", "testUrn", "VALUE_3"); ObjectNode document3 = JsonNodeFactory.instance.objectNode(); @@ -350,12 +368,12 @@ public void testAdvancedSearchSoftDelete() throws Exception { document3.set("subtypes", JsonNodeFactory.instance.textNode("table")); document3.set("platform", JsonNodeFactory.instance.textNode("snowflake")); document.set("removed", JsonNodeFactory.instance.booleanNode(false)); - _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( ImmutableList.of(), "test", filterWithCondition, @@ -386,7 +404,7 @@ public void testAdvancedSearchNegated() throws Exception { .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); SearchResult searchResult = - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( ImmutableList.of(ENTITY_NAME), "test", filterWithCondition, @@ -407,7 +425,7 @@ public void testAdvancedSearchNegated() throws Exception { document.set("subtypes", JsonNodeFactory.instance.textNode("view")); document.set("platform", JsonNodeFactory.instance.textNode("hive")); document.set("removed", JsonNodeFactory.instance.booleanNode(true)); - _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); Urn urn2 = new TestEntityUrn("test", "testUrn", "VALUE_2"); ObjectNode document2 = JsonNodeFactory.instance.objectNode(); @@ -418,7 +436,7 @@ public void testAdvancedSearchNegated() throws Exception { document2.set("subtypes", JsonNodeFactory.instance.textNode("table")); document2.set("platform", JsonNodeFactory.instance.textNode("hive")); document.set("removed", JsonNodeFactory.instance.booleanNode(false)); - _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); Urn urn3 = new TestEntityUrn("test", "testUrn", "VALUE_3"); ObjectNode document3 = JsonNodeFactory.instance.objectNode(); @@ -429,12 +447,12 @@ public void testAdvancedSearchNegated() throws Exception { document3.set("subtypes", JsonNodeFactory.instance.textNode("table")); document3.set("platform", JsonNodeFactory.instance.textNode("snowflake")); document.set("removed", JsonNodeFactory.instance.booleanNode(false)); - _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _searchService.searchAcrossEntities( + searchService.searchAcrossEntities( ImmutableList.of(), "test", filterWithCondition, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java index b544faa061f0ed..40ccc8dfb5047e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java @@ -4,18 +4,16 @@ import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite; import static org.testng.Assert.assertEquals; -import com.datahub.test.Snapshot; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.TestEntityUrn; import com.linkedin.common.urn.Urn; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; -import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; @@ -30,6 +28,8 @@ import java.util.List; import javax.annotation.Nonnull; import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import org.testng.annotations.BeforeClass; @@ -53,20 +53,22 @@ public abstract class TestEntityTestBase extends AbstractTestNGSpringContextTest @Nonnull protected abstract CustomSearchConfiguration getCustomSearchConfiguration(); - private EntityRegistry _entityRegistry; - private IndexConvention _indexConvention; - private SettingsBuilder _settingsBuilder; - private ElasticSearchService _elasticSearchService; + @Autowired + @Qualifier("snapshotRegistryAspectRetriever") + AspectRetriever aspectRetriever; + + private IndexConvention indexConvention; + private SettingsBuilder settingsBuilder; + private ElasticSearchService elasticSearchService; private static final String ENTITY_NAME = "testEntity"; @BeforeClass public void setup() { - _entityRegistry = new SnapshotEntityRegistry(new Snapshot()); - _indexConvention = new IndexConventionImpl("es_service_test"); - _settingsBuilder = new SettingsBuilder(null); - _elasticSearchService = buildService(); - _elasticSearchService.configure(); + indexConvention = new IndexConventionImpl("es_service_test"); + settingsBuilder = new SettingsBuilder(null); + elasticSearchService = buildService(); + elasticSearchService.configure(); } @BeforeClass @@ -78,46 +80,55 @@ public void disableAssert() { @BeforeMethod public void wipe() throws Exception { - _elasticSearchService.clear(); + elasticSearchService.clear(); } @Nonnull private ElasticSearchService buildService() { EntityIndexBuilders indexBuilders = new EntityIndexBuilders( - getIndexBuilder(), _entityRegistry, _indexConvention, _settingsBuilder); + getIndexBuilder(), + aspectRetriever.getEntityRegistry(), + indexConvention, + settingsBuilder); ESSearchDAO searchDAO = new ESSearchDAO( - _entityRegistry, getSearchClient(), - _indexConvention, + indexConvention, false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null); ESBrowseDAO browseDAO = new ESBrowseDAO( - _entityRegistry, getSearchClient(), - _indexConvention, + indexConvention, getSearchConfiguration(), getCustomSearchConfiguration()); ESWriteDAO writeDAO = - new ESWriteDAO(_entityRegistry, getSearchClient(), _indexConvention, getBulkProcessor(), 1); - return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); + new ESWriteDAO( + aspectRetriever.getEntityRegistry(), + getSearchClient(), + indexConvention, + getBulkProcessor(), + 1); + ElasticSearchService searchService = + new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); + searchService.postConstruct(aspectRetriever); + return searchService; } @Test public void testElasticSearchServiceStructuredQuery() throws Exception { SearchResult searchResult = - _elasticSearchService.search( + elasticSearchService.search( List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 0); - BrowseResult browseResult = _elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); + BrowseResult browseResult = elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 0); - assertEquals(_elasticSearchService.docCount(ENTITY_NAME), 0); + assertEquals(elasticSearchService.docCount(ENTITY_NAME), 0); assertEquals( - _elasticSearchService + elasticSearchService .aggregateByValue(ImmutableList.of(ENTITY_NAME), "textField", null, 10) .size(), 0); @@ -129,16 +140,16 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); document.set("foreignKey", JsonNodeFactory.instance.textNode("urn:li:tag:Node.Value")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _elasticSearchService.search( + elasticSearchService.search( List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); searchResult = - _elasticSearchService.search( + elasticSearchService.search( List.of(ENTITY_NAME), "foreignKey:Node", null, @@ -148,15 +159,15 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); - browseResult = _elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); + browseResult = elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 1); assertEquals(browseResult.getGroups().get(0).getName(), "a"); - browseResult = _elasticSearchService.browse(ENTITY_NAME, "/a", null, 0, 10); + browseResult = elasticSearchService.browse(ENTITY_NAME, "/a", null, 0, 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 1); assertEquals(browseResult.getGroups().get(0).getName(), "b"); - assertEquals(_elasticSearchService.docCount(ENTITY_NAME), 1); + assertEquals(elasticSearchService.docCount(ENTITY_NAME), 1); assertEquals( - _elasticSearchService.aggregateByValue( + elasticSearchService.aggregateByValue( ImmutableList.of(ENTITY_NAME), "textFieldOverride", null, 10), ImmutableMap.of("textFieldOverride", 1L)); @@ -166,39 +177,39 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { document2.set("keyPart1", JsonNodeFactory.instance.textNode("random")); document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2")); document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _elasticSearchService.search( + elasticSearchService.search( List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn2); - browseResult = _elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); + browseResult = elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 2); assertEquals(browseResult.getGroups().get(0).getName(), "a"); assertEquals(browseResult.getGroups().get(1).getName(), "b"); - browseResult = _elasticSearchService.browse(ENTITY_NAME, "/a", null, 0, 10); + browseResult = elasticSearchService.browse(ENTITY_NAME, "/a", null, 0, 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 1); assertEquals(browseResult.getGroups().get(0).getName(), "b"); - assertEquals(_elasticSearchService.docCount(ENTITY_NAME), 2); + assertEquals(elasticSearchService.docCount(ENTITY_NAME), 2); assertEquals( - _elasticSearchService.aggregateByValue( + elasticSearchService.aggregateByValue( ImmutableList.of(ENTITY_NAME), "textFieldOverride", null, 10), ImmutableMap.of("textFieldOverride", 1L, "textFieldOverride2", 1L)); - _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); - _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); + elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); + elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _elasticSearchService.search( + elasticSearchService.search( List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 0); - browseResult = _elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); + browseResult = elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 0); - assertEquals(_elasticSearchService.docCount(ENTITY_NAME), 0); + assertEquals(elasticSearchService.docCount(ENTITY_NAME), 0); assertEquals( - _elasticSearchService + elasticSearchService .aggregateByValue(ImmutableList.of(ENTITY_NAME), "textField", null, 10) .size(), 0); @@ -207,7 +218,7 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { @Test public void testElasticSearchServiceFulltext() throws Exception { SearchResult searchResult = - _elasticSearchService.search( + elasticSearchService.search( List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 0); @@ -218,18 +229,18 @@ public void testElasticSearchServiceFulltext() throws Exception { document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); document.set("foreignKey", JsonNodeFactory.instance.textNode("urn:li:tag:Node.Value")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _elasticSearchService.search( + elasticSearchService.search( List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); - assertEquals(_elasticSearchService.docCount(ENTITY_NAME), 1); + assertEquals(elasticSearchService.docCount(ENTITY_NAME), 1); assertEquals( - _elasticSearchService.aggregateByValue( + elasticSearchService.aggregateByValue( ImmutableList.of(ENTITY_NAME), "textFieldOverride", null, 10), ImmutableMap.of("textFieldOverride", 1L)); @@ -239,32 +250,32 @@ public void testElasticSearchServiceFulltext() throws Exception { document2.set("keyPart1", JsonNodeFactory.instance.textNode("random")); document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2")); document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); - _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); + elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _elasticSearchService.search( + elasticSearchService.search( List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn2); - assertEquals(_elasticSearchService.docCount(ENTITY_NAME), 2); + assertEquals(elasticSearchService.docCount(ENTITY_NAME), 2); assertEquals( - _elasticSearchService.aggregateByValue( + elasticSearchService.aggregateByValue( ImmutableList.of(ENTITY_NAME), "textFieldOverride", null, 10), ImmutableMap.of("textFieldOverride", 1L, "textFieldOverride2", 1L)); - _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); - _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); + elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); + elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); syncAfterWrite(getBulkProcessor()); searchResult = - _elasticSearchService.search( + elasticSearchService.search( List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 0); - assertEquals(_elasticSearchService.docCount(ENTITY_NAME), 0); + assertEquals(elasticSearchService.docCount(ENTITY_NAME), 0); assertEquals( - _elasticSearchService + elasticSearchService .aggregateByValue(ImmutableList.of(ENTITY_NAME), "textField", null, 10) .size(), 0); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java index 0f70e07a6b2fc7..2c195e79bcedfe 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java @@ -31,7 +31,7 @@ public class SearchDAOElasticSearchTest extends SearchDAOTestBase { IndexConvention indexConvention; @Override - protected EntityRegistry getInjectedRegistry() { + protected EntityRegistry getEntityRegistry() { return entityRegistry; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java index 843da17fbd1321..5ad7b1218a5bf4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java @@ -21,44 +21,44 @@ }) public class TestEntityElasticSearchTest extends TestEntityTestBase { - @Autowired private RestHighLevelClient _searchClient; - @Autowired private ESBulkProcessor _bulkProcessor; - @Autowired private ESIndexBuilder _esIndexBuilder; - @Autowired private SearchConfiguration _searchConfiguration; - @Autowired private CustomSearchConfiguration _customSearchConfiguration; + @Autowired private RestHighLevelClient searchClient; + @Autowired private ESBulkProcessor bulkProcessor; + @Autowired private ESIndexBuilder esIndexBuilder; + @Autowired private SearchConfiguration searchConfiguration; + @Autowired private CustomSearchConfiguration customSearchConfiguration; @NotNull @Override protected RestHighLevelClient getSearchClient() { - return _searchClient; + return searchClient; } @NotNull @Override protected ESBulkProcessor getBulkProcessor() { - return _bulkProcessor; + return bulkProcessor; } @NotNull @Override protected ESIndexBuilder getIndexBuilder() { - return _esIndexBuilder; + return esIndexBuilder; } @NotNull @Override protected SearchConfiguration getSearchConfiguration() { - return _searchConfiguration; + return searchConfiguration; } @NotNull @Override protected CustomSearchConfiguration getCustomSearchConfiguration() { - return _customSearchConfiguration; + return customSearchConfiguration; } @Test public void initTest() { - AssertJUnit.assertNotNull(_searchClient); + AssertJUnit.assertNotNull(searchClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java index 6ebe42d0181e44..1f51d463a2963a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java @@ -7,6 +7,7 @@ import org.jetbrains.annotations.NotNull; import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; import org.testng.AssertJUnit; import org.testng.annotations.Test; @@ -16,7 +17,10 @@ public class TimeseriesAspectServiceElasticSearchTest extends TimeseriesAspectSe @Autowired private RestHighLevelClient _searchClient; @Autowired private ESBulkProcessor _bulkProcessor; - @Autowired private ESIndexBuilder _esIndexBuilder; + + @Autowired + @Qualifier("searchIndexBuilder") + private ESIndexBuilder _esIndexBuilder; @NotNull @Override diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java index 1fc4e624bbfea1..a0c9b8118d9de9 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java @@ -31,7 +31,7 @@ public class SearchDAOOpenSearchTest extends SearchDAOTestBase { IndexConvention indexConvention; @Override - protected EntityRegistry getInjectedRegistry() { + protected EntityRegistry getEntityRegistry() { return entityRegistry; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java index 63dffa9c210045..16ac03415ee5c2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java @@ -7,6 +7,7 @@ import org.jetbrains.annotations.NotNull; import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Import; import org.testng.AssertJUnit; import org.testng.annotations.Test; @@ -16,7 +17,10 @@ public class TimeseriesAspectServiceOpenSearchTest extends TimeseriesAspectServi @Autowired private RestHighLevelClient _searchClient; @Autowired private ESBulkProcessor _bulkProcessor; - @Autowired private ESIndexBuilder _esIndexBuilder; + + @Autowired + @Qualifier("searchIndexBuilder") + private ESIndexBuilder _esIndexBuilder; @NotNull @Override diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java index a261b53f25c605..53eeb9dc8314c3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java @@ -7,11 +7,13 @@ import static org.testng.Assert.assertEquals; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.entity.TestEntityRegistry; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import com.linkedin.r2.RemoteInvocationException; import io.datahubproject.test.search.config.SearchCommonTestConfiguration; import java.net.URISyntaxException; import java.util.Collections; @@ -31,22 +33,25 @@ @Import(SearchCommonTestConfiguration.class) public class BrowseDAOTest extends AbstractTestNGSpringContextTests { - private RestHighLevelClient _mockClient; - private ESBrowseDAO _browseDAO; + private RestHighLevelClient mockClient; + private ESBrowseDAO browseDAO; - @Autowired private SearchConfiguration _searchConfiguration; - @Autowired private CustomSearchConfiguration _customSearchConfiguration; + @Autowired private SearchConfiguration searchConfiguration; + @Autowired private CustomSearchConfiguration customSearchConfiguration; @BeforeMethod - public void setup() { - _mockClient = mock(RestHighLevelClient.class); - _browseDAO = + public void setup() throws RemoteInvocationException, URISyntaxException { + mockClient = mock(RestHighLevelClient.class); + AspectRetriever aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()).thenReturn(new TestEntityRegistry()); + when(aspectRetriever.getLatestAspectObjects(any(), any())).thenReturn(Map.of()); + browseDAO = new ESBrowseDAO( - new TestEntityRegistry(), - _mockClient, - new IndexConventionImpl("es_browse_dao_test"), - _searchConfiguration, - _customSearchConfiguration); + mockClient, + new IndexConventionImpl("es_browse_dao_test"), + searchConfiguration, + customSearchConfiguration) + .setAspectRetriever(aspectRetriever); } public static Urn makeUrn(Object id) { @@ -68,24 +73,24 @@ public void testGetBrowsePath() throws Exception { // Test when there is no search hit for getBrowsePaths when(mockSearchHits.getHits()).thenReturn(new SearchHit[0]); when(mockSearchResponse.getHits()).thenReturn(mockSearchHits); - when(_mockClient.search(any(), eq(RequestOptions.DEFAULT))).thenReturn(mockSearchResponse); - assertEquals(_browseDAO.getBrowsePaths("dataset", dummyUrn).size(), 0); + when(mockClient.search(any(), eq(RequestOptions.DEFAULT))).thenReturn(mockSearchResponse); + assertEquals(browseDAO.getBrowsePaths("dataset", dummyUrn).size(), 0); // Test the case of single search hit & browsePaths field doesn't exist sourceMap.remove("browse_paths"); when(mockSearchHit.getSourceAsMap()).thenReturn(sourceMap); when(mockSearchHits.getHits()).thenReturn(new SearchHit[] {mockSearchHit}); when(mockSearchResponse.getHits()).thenReturn(mockSearchHits); - when(_mockClient.search(any(), eq(RequestOptions.DEFAULT))).thenReturn(mockSearchResponse); - assertEquals(_browseDAO.getBrowsePaths("dataset", dummyUrn).size(), 0); + when(mockClient.search(any(), eq(RequestOptions.DEFAULT))).thenReturn(mockSearchResponse); + assertEquals(browseDAO.getBrowsePaths("dataset", dummyUrn).size(), 0); // Test the case of single search hit & browsePaths field exists sourceMap.put("browsePaths", Collections.singletonList("foo")); when(mockSearchHit.getSourceAsMap()).thenReturn(sourceMap); when(mockSearchHits.getHits()).thenReturn(new SearchHit[] {mockSearchHit}); when(mockSearchResponse.getHits()).thenReturn(mockSearchHits); - when(_mockClient.search(any(), eq(RequestOptions.DEFAULT))).thenReturn(mockSearchResponse); - List<String> browsePaths = _browseDAO.getBrowsePaths("dataset", dummyUrn); + when(mockClient.search(any(), eq(RequestOptions.DEFAULT))).thenReturn(mockSearchResponse); + List<String> browsePaths = browseDAO.getBrowsePaths("dataset", dummyUrn); assertEquals(browsePaths.size(), 1); assertEquals(browsePaths.get(0), "foo"); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java index 5ece280cd68908..b43d1556a68829 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java @@ -2,19 +2,21 @@ import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR; -import static org.junit.Assert.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotEquals; import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; -import com.datahub.test.Snapshot; import com.google.common.collect.ImmutableList; import com.linkedin.data.template.LongMap; import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -28,8 +30,11 @@ import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.search.SearchResultMetadata; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.opensearch.SearchDAOOpenSearchTest; import com.linkedin.metadata.utils.SearchUtil; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import com.linkedin.r2.RemoteInvocationException; +import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -37,6 +42,7 @@ import org.opensearch.action.explain.ExplainResponse; import org.opensearch.client.RestHighLevelClient; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public abstract class SearchDAOTestBase extends AbstractTestNGSpringContextTests { @@ -47,9 +53,16 @@ public abstract class SearchDAOTestBase extends AbstractTestNGSpringContextTests protected abstract IndexConvention getIndexConvention(); - protected abstract EntityRegistry getInjectedRegistry(); + protected abstract EntityRegistry getEntityRegistry(); - EntityRegistry entityRegistry = new SnapshotEntityRegistry(new Snapshot()); + protected AspectRetriever aspectRetriever; + + @BeforeClass + public void setup() throws RemoteInvocationException, URISyntaxException { + aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()).thenReturn(getEntityRegistry()); + when(aspectRetriever.getLatestAspectObjects(any(), any())).thenReturn(Map.of()); + } @Test public void testTransformFilterForEntitiesNoChange() { @@ -223,13 +236,13 @@ public void testTransformFilterForEntitiesWithSomeChanges() { public void testTransformIndexIntoEntityNameSingle() { ESSearchDAO searchDAO = new ESSearchDAO( - entityRegistry, - getSearchClient(), - getIndexConvention(), - false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, - getSearchConfiguration(), - null); + getSearchClient(), + getIndexConvention(), + false, + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, + getSearchConfiguration(), + null) + .setAspectRetriever(aspectRetriever); // Empty aggregations final SearchResultMetadata searchResultMetadata = new SearchResultMetadata().setAggregations(new AggregationMetadataArray()); @@ -306,13 +319,13 @@ public void testTransformIndexIntoEntityNameSingle() { public void testTransformIndexIntoEntityNameNested() { ESSearchDAO searchDAO = new ESSearchDAO( - entityRegistry, - getSearchClient(), - getIndexConvention(), - false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, - getSearchConfiguration(), - null); + getSearchClient(), + getIndexConvention(), + false, + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, + getSearchConfiguration(), + null) + .setAspectRetriever(aspectRetriever); // One nested facet Map<String, Long> entityTypeMap = Map.of( @@ -438,13 +451,15 @@ public void testTransformIndexIntoEntityNameNested() { public void testExplain() { ESSearchDAO searchDAO = new ESSearchDAO( - getInjectedRegistry(), - getSearchClient(), - getIndexConvention(), - false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, - getSearchConfiguration(), - null); + getSearchClient(), + getIndexConvention(), + false, + this instanceof SearchDAOOpenSearchTest + ? ELASTICSEARCH_IMPLEMENTATION_OPENSEARCH + : ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, + getSearchConfiguration(), + null) + .setAspectRetriever(aspectRetriever); ExplainResponse explainResponse = searchDAO.explain( "*", diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java index ed4c9db5db6430..4dd53775bbef7f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java @@ -1,27 +1,44 @@ package com.linkedin.metadata.search.query.request; import static com.linkedin.metadata.utils.SearchUtil.*; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; +import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder; +import com.linkedin.r2.RemoteInvocationException; +import java.net.URISyntaxException; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.testng.Assert; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class AggregationQueryBuilderTest { + private static AspectRetriever aspectRetriever; + + @BeforeClass + public static void setup() throws RemoteInvocationException, URISyntaxException { + aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()).thenReturn(mock(EntityRegistry.class)); + when(aspectRetriever.getLatestAspectObjects(any(), any())).thenReturn(Map.of()); + } + @Test public void testGetDefaultAggregationsHasFields() { SearchableAnnotation annotation = @@ -46,7 +63,9 @@ public void testGetDefaultAggregationsHasFields() { AggregationQueryBuilder builder = new AggregationQueryBuilder( - config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation))); + config, + ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation)), + aspectRetriever); List<AggregationBuilder> aggs = builder.getAggregations(); @@ -78,7 +97,9 @@ public void testGetDefaultAggregationsFields() { AggregationQueryBuilder builder = new AggregationQueryBuilder( - config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation))); + config, + ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation)), + aspectRetriever); List<AggregationBuilder> aggs = builder.getAggregations(); @@ -127,7 +148,8 @@ public void testGetSpecificAggregationsHasFields() { AggregationQueryBuilder builder = new AggregationQueryBuilder( config, - ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation1, annotation2))); + ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation1, annotation2)), + aspectRetriever); // Case 1: Ask for fields that should exist. List<AggregationBuilder> aggs = @@ -148,7 +170,7 @@ public void testAggregateOverStructuredProperty() { AggregationQueryBuilder builder = new AggregationQueryBuilder( - config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of())); + config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()), aspectRetriever); List<AggregationBuilder> aggs = builder.getAggregations(List.of("structuredProperties.ab.fgh.ten")); @@ -213,7 +235,8 @@ public void testAggregateOverFieldsAndStructProp() { AggregationQueryBuilder builder = new AggregationQueryBuilder( config, - ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation1, annotation2))); + ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation1, annotation2)), + aspectRetriever); // Aggregate over fields and structured properties List<AggregationBuilder> aggs = @@ -264,7 +287,9 @@ public void testMissingAggregation() { AggregationQueryBuilder builder = new AggregationQueryBuilder( - config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation))); + config, + ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation)), + aspectRetriever); List<AggregationBuilder> aggs = builder.getAggregations(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index ab832eb1ac24fa..bb37fb3f3b206a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -1,9 +1,11 @@ package com.linkedin.metadata.search.query.request; +import static org.mockito.Mockito.mock; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; import com.linkedin.metadata.TestEntitySpecBuilder; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; import java.util.List; import java.util.Map; @@ -18,7 +20,8 @@ public class AutocompleteRequestHandlerTest { private AutocompleteRequestHandler handler = - AutocompleteRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec()); + AutocompleteRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), mock(AspectRetriever.class)); @Test public void testDefaultAutocompleteRequest() { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index c1662db6186e03..14cc9e47913c1e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -6,12 +6,12 @@ import com.google.common.collect.ImmutableList; import com.linkedin.data.template.StringArray; import com.linkedin.metadata.TestEntitySpecBuilder; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.WordGramConfiguration; import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; @@ -49,7 +49,7 @@ @Import(SearchCommonTestConfiguration.class) public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { - @Autowired private EntityRegistry entityRegistry; + @Autowired private AspectRetriever aspectRetriever; public static SearchConfiguration testQueryConfig; @@ -81,9 +81,9 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { @Test public void testDatasetFieldsAndHighlights() { - EntitySpec entitySpec = entityRegistry.getEntitySpec("dataset"); + EntitySpec entitySpec = aspectRetriever.getEntityRegistry().getEntitySpec("dataset"); SearchRequestHandler datasetHandler = - SearchRequestHandler.getBuilder(entitySpec, testQueryConfig, null); + SearchRequestHandler.getBuilder(entitySpec, testQueryConfig, null, aspectRetriever); /* Ensure efficient query performance, we do not expect upstream/downstream/fineGrained lineage @@ -102,7 +102,8 @@ public void testDatasetFieldsAndHighlights() { @Test public void testSearchRequestHandlerHighlightingTurnedOff() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, aspectRetriever); SearchRequest searchRequest = requestHandler.getSearchRequest( "testQuery", @@ -141,7 +142,8 @@ public void testSearchRequestHandlerHighlightingTurnedOff() { @Test public void testSearchRequestHandler() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, aspectRetriever); SearchRequest searchRequest = requestHandler.getSearchRequest( "testQuery", null, null, 0, 10, new SearchFlags().setFulltext(false), null); @@ -196,7 +198,8 @@ public void testSearchRequestHandler() { @Test public void testAggregationsInSearch() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, aspectRetriever); final String nestedAggString = String.format("_entityType%stextFieldOverride", AGGREGATION_SEPARATOR_CHAR); SearchRequest searchRequest = @@ -264,7 +267,8 @@ public void testAggregationsInSearch() { public void testFilteredSearch() { final SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, aspectRetriever); final BoolQueryBuilder testQuery = constructFilterQuery(requestHandler, false); @@ -616,7 +620,8 @@ public void testBrowsePathQueryFilter() { Filter filter = new Filter(); filter.setOr(conjunctiveCriterionArray); - BoolQueryBuilder test = SearchRequestHandler.getFilterQuery(filter, new HashMap<>()); + BoolQueryBuilder test = + SearchRequestHandler.getFilterQuery(filter, new HashMap<>(), aspectRetriever); assertEquals(test.should().size(), 1); @@ -639,7 +644,8 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); final SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, aspectRetriever); return (BoolQueryBuilder) requestHandler diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java index 552cb0b52994f9..00168aefab1ef1 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java @@ -56,13 +56,8 @@ private void configureComponents() { preProcessHooks.setUiEnabled(true); _entityServiceImpl = new EntityServiceImpl( - _aspectDao, - _mockProducer, - _testEntityRegistry, - true, - _mockUpdateIndicesService, - preProcessHooks, - true); + _aspectDao, _mockProducer, _testEntityRegistry, true, preProcessHooks, true); + _entityServiceImpl.setUpdateIndicesService(_mockUpdateIndicesService); } /** diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java index 5d7137a52eb21e..e9c79f06f37c6a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java @@ -38,13 +38,8 @@ public void setupTest() { preProcessHooks.setUiEnabled(true); _entityServiceImpl = new EntityServiceImpl( - _aspectDao, - _mockProducer, - _testEntityRegistry, - true, - _mockUpdateIndicesService, - preProcessHooks, - true); + _aspectDao, _mockProducer, _testEntityRegistry, true, preProcessHooks, true); + _entityServiceImpl.setUpdateIndicesService(_mockUpdateIndicesService); } /** diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index 23ca4a4a4247e1..f6141a1d8803ff 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -3,6 +3,9 @@ import static com.linkedin.metadata.Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH; import static com.linkedin.metadata.Constants.MAX_JACKSON_STRING_SIZE; import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertTrue; @@ -24,6 +27,7 @@ import com.linkedin.data.template.StringArrayArray; import com.linkedin.data.template.StringMap; import com.linkedin.data.template.StringMapArray; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.DataSchemaFactory; @@ -45,6 +49,7 @@ import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import com.linkedin.r2.RemoteInvocationException; import com.linkedin.timeseries.AggregationSpec; import com.linkedin.timeseries.AggregationType; import com.linkedin.timeseries.CalendarInterval; @@ -54,6 +59,7 @@ import com.linkedin.timeseries.GroupingBucketType; import com.linkedin.timeseries.TimeWindowSize; import com.linkedin.timeseries.TimeseriesIndexSizeResult; +import java.net.URISyntaxException; import java.util.Calendar; import java.util.List; import java.util.Map; @@ -99,43 +105,49 @@ public abstract class TimeseriesAspectServiceTestBase extends AbstractTestNGSpri @Nonnull protected abstract ESIndexBuilder getIndexBuilder(); - private EntityRegistry _entityRegistry; - private IndexConvention _indexConvention; - private ElasticSearchTimeseriesAspectService _elasticSearchTimeseriesAspectService; - private AspectSpec _aspectSpec; + private AspectRetriever aspectRetriever; + private IndexConvention indexConvention; + private ElasticSearchTimeseriesAspectService elasticSearchTimeseriesAspectService; + private AspectSpec aspectSpec; - private Map<Long, TestEntityProfile> _testEntityProfiles; - private Long _startTime; + private Map<Long, TestEntityProfile> testEntityProfiles; + private Long startTime; /* * Basic setup and teardown */ @BeforeClass - public void setup() { - _entityRegistry = + public void setup() throws RemoteInvocationException, URISyntaxException { + EntityRegistry entityRegistry = new ConfigEntityRegistry( new DataSchemaFactory("com.datahub.test"), List.of(), TestEntityProfile.class .getClassLoader() .getResourceAsStream("test-entity-registry.yml")); - _indexConvention = new IndexConventionImpl("es_timeseries_aspect_service_test"); - _elasticSearchTimeseriesAspectService = buildService(); - _elasticSearchTimeseriesAspectService.configure(); - EntitySpec entitySpec = _entityRegistry.getEntitySpec(ENTITY_NAME); - _aspectSpec = entitySpec.getAspectSpec(ASPECT_NAME); + aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + when(aspectRetriever.getLatestAspectObjects(any(), any())).thenReturn(Map.of()); + + indexConvention = new IndexConventionImpl("es_timeseries_aspect_service_test"); + elasticSearchTimeseriesAspectService = buildService(); + elasticSearchTimeseriesAspectService.configure(); + EntitySpec entitySpec = entityRegistry.getEntitySpec(ENTITY_NAME); + aspectSpec = entitySpec.getAspectSpec(ASPECT_NAME); } @Nonnull private ElasticSearchTimeseriesAspectService buildService() { return new ElasticSearchTimeseriesAspectService( - getSearchClient(), - _indexConvention, - new TimeseriesAspectIndexBuilders(getIndexBuilder(), _entityRegistry, _indexConvention), - _entityRegistry, - getBulkProcessor(), - 1); + getSearchClient(), + indexConvention, + new TimeseriesAspectIndexBuilders( + getIndexBuilder(), aspectRetriever.getEntityRegistry(), indexConvention), + aspectRetriever.getEntityRegistry(), + getBulkProcessor(), + 1) + .postConstruct(aspectRetriever); } /* @@ -144,11 +156,11 @@ private ElasticSearchTimeseriesAspectService buildService() { private void upsertDocument(TestEntityProfile dp, Urn urn) throws JsonProcessingException { Map<String, JsonNode> documents = - TimeseriesAspectTransformer.transform(urn, dp, _aspectSpec, null); + TimeseriesAspectTransformer.transform(urn, dp, aspectSpec, null); assertEquals(documents.size(), 3); documents.forEach( (key, value) -> - _elasticSearchTimeseriesAspectService.upsertDocument( + elasticSearchTimeseriesAspectService.upsertDocument( ENTITY_NAME, ASPECT_NAME, key, value)); } @@ -190,10 +202,10 @@ private TestEntityProfile makeTestProfile(long eventTime, long stat, String mess @Test(groups = "upsert") public void testUpsertProfiles() throws Exception { // Create the testEntity profiles that we would like to use for testing. - _startTime = Calendar.getInstance().getTimeInMillis(); - _startTime = _startTime - _startTime % 86400000; + startTime = Calendar.getInstance().getTimeInMillis(); + startTime = startTime - startTime % 86400000; // Create the testEntity profiles that we would like to use for testing. - TestEntityProfile firstProfile = makeTestProfile(_startTime, 20, null); + TestEntityProfile firstProfile = makeTestProfile(startTime, 20, null); Stream<TestEntityProfile> testEntityProfileStream = Stream.iterate( firstProfile, @@ -201,17 +213,17 @@ public void testUpsertProfiles() throws Exception { makeTestProfile( prev.getTimestampMillis() + TIME_INCREMENT, prev.getStat() + 10, null)); - _testEntityProfiles = + testEntityProfiles = testEntityProfileStream .limit(NUM_PROFILES) .collect(Collectors.toMap(TestEntityProfile::getTimestampMillis, Function.identity())); - Long endTime = _startTime + (NUM_PROFILES - 1) * TIME_INCREMENT; + Long endTime = startTime + (NUM_PROFILES - 1) * TIME_INCREMENT; - assertNotNull(_testEntityProfiles.get(_startTime)); - assertNotNull(_testEntityProfiles.get(endTime)); + assertNotNull(testEntityProfiles.get(startTime)); + assertNotNull(testEntityProfiles.get(endTime)); // Upsert the documents into the index. - _testEntityProfiles + testEntityProfiles .values() .forEach( x -> { @@ -260,7 +272,7 @@ public void testUpsertProfilesWithUniqueMessageIds() throws Exception { syncAfterWrite(getBulkProcessor()); List<EnvelopedAspect> resultAspects = - _elasticSearchTimeseriesAspectService.getAspectValues( + elasticSearchTimeseriesAspectService.getAspectValues( urn, ENTITY_NAME, ASPECT_NAME, null, null, testEntityProfiles.size(), null); assertEquals(resultAspects.size(), testEntityProfiles.size()); } @@ -273,8 +285,8 @@ private void validateAspectValue(EnvelopedAspect envelopedAspectResult) { TestEntityProfile actualProfile = (TestEntityProfile) GenericRecordUtils.deserializeAspect( - envelopedAspectResult.getAspect().getValue(), CONTENT_TYPE, _aspectSpec); - TestEntityProfile expectedProfile = _testEntityProfiles.get(actualProfile.getTimestampMillis()); + envelopedAspectResult.getAspect().getValue(), CONTENT_TYPE, aspectSpec); + TestEntityProfile expectedProfile = testEntityProfiles.get(actualProfile.getTimestampMillis()); assertNotNull(expectedProfile); assertEquals(actualProfile.getStat(), expectedProfile.getStat()); assertEquals(actualProfile.getTimestampMillis(), expectedProfile.getTimestampMillis()); @@ -288,20 +300,20 @@ private void validateAspectValues(List<EnvelopedAspect> aspects, long numResults @Test(groups = "getAspectValues", dependsOnGroups = "upsert") public void testGetAspectTimeseriesValuesAll() { List<EnvelopedAspect> resultAspects = - _elasticSearchTimeseriesAspectService.getAspectValues( + elasticSearchTimeseriesAspectService.getAspectValues( TEST_URN, ENTITY_NAME, ASPECT_NAME, null, null, NUM_PROFILES, null); validateAspectValues(resultAspects, NUM_PROFILES); TestEntityProfile firstProfile = (TestEntityProfile) GenericRecordUtils.deserializeAspect( - resultAspects.get(0).getAspect().getValue(), CONTENT_TYPE, _aspectSpec); + resultAspects.get(0).getAspect().getValue(), CONTENT_TYPE, aspectSpec); TestEntityProfile lastProfile = (TestEntityProfile) GenericRecordUtils.deserializeAspect( resultAspects.get(resultAspects.size() - 1).getAspect().getValue(), CONTENT_TYPE, - _aspectSpec); + aspectSpec); // Now verify that the first index is the one with the highest stat value, and the last the one // with the lower. @@ -312,7 +324,7 @@ public void testGetAspectTimeseriesValuesAll() { @Test(groups = "getAspectValues", dependsOnGroups = "upsert") public void testGetAspectTimeseriesValuesAllSorted() { List<EnvelopedAspect> resultAspects = - _elasticSearchTimeseriesAspectService.getAspectValues( + elasticSearchTimeseriesAspectService.getAspectValues( TEST_URN, ENTITY_NAME, ASPECT_NAME, @@ -326,13 +338,13 @@ public void testGetAspectTimeseriesValuesAllSorted() { TestEntityProfile firstProfile = (TestEntityProfile) GenericRecordUtils.deserializeAspect( - resultAspects.get(0).getAspect().getValue(), CONTENT_TYPE, _aspectSpec); + resultAspects.get(0).getAspect().getValue(), CONTENT_TYPE, aspectSpec); TestEntityProfile lastProfile = (TestEntityProfile) GenericRecordUtils.deserializeAspect( resultAspects.get(resultAspects.size() - 1).getAspect().getValue(), CONTENT_TYPE, - _aspectSpec); + aspectSpec); // Now verify that the first index is the one with the highest stat value, and the last the one // with the lower. @@ -347,7 +359,7 @@ public void testGetAspectTimeseriesValuesWithFilter() { new Criterion().setField("stat").setCondition(Condition.EQUAL).setValue("20"); filter.setCriteria(new CriterionArray(hasStatEqualsTwenty)); List<EnvelopedAspect> resultAspects = - _elasticSearchTimeseriesAspectService.getAspectValues( + elasticSearchTimeseriesAspectService.getAspectValues( TEST_URN, ENTITY_NAME, ASPECT_NAME, null, null, NUM_PROFILES, filter); validateAspectValues(resultAspects, 1); } @@ -356,12 +368,12 @@ public void testGetAspectTimeseriesValuesWithFilter() { public void testGetAspectTimeseriesValuesSubRangeInclusiveOverlap() { int expectedNumRows = 10; List<EnvelopedAspect> resultAspects = - _elasticSearchTimeseriesAspectService.getAspectValues( + elasticSearchTimeseriesAspectService.getAspectValues( TEST_URN, ENTITY_NAME, ASPECT_NAME, - _startTime, - _startTime + TIME_INCREMENT * (expectedNumRows - 1), + startTime, + startTime + TIME_INCREMENT * (expectedNumRows - 1), expectedNumRows, null); validateAspectValues(resultAspects, expectedNumRows); @@ -371,12 +383,12 @@ public void testGetAspectTimeseriesValuesSubRangeInclusiveOverlap() { public void testGetAspectTimeseriesValuesSubRangeExclusiveOverlap() { int expectedNumRows = 10; List<EnvelopedAspect> resultAspects = - _elasticSearchTimeseriesAspectService.getAspectValues( + elasticSearchTimeseriesAspectService.getAspectValues( TEST_URN, ENTITY_NAME, ASPECT_NAME, - _startTime + TIME_INCREMENT / 2, - _startTime + TIME_INCREMENT * expectedNumRows + TIME_INCREMENT / 2, + startTime + TIME_INCREMENT / 2, + startTime + TIME_INCREMENT * expectedNumRows + TIME_INCREMENT / 2, expectedNumRows, null); validateAspectValues(resultAspects, expectedNumRows); @@ -386,12 +398,12 @@ public void testGetAspectTimeseriesValuesSubRangeExclusiveOverlap() { public void testGetAspectTimeseriesValuesSubRangeExclusiveOverlapLatestValueOnly() { int expectedNumRows = 1; List<EnvelopedAspect> resultAspects = - _elasticSearchTimeseriesAspectService.getAspectValues( + elasticSearchTimeseriesAspectService.getAspectValues( TEST_URN, ENTITY_NAME, ASPECT_NAME, - _startTime + TIME_INCREMENT / 2, - _startTime + TIME_INCREMENT * expectedNumRows + TIME_INCREMENT / 2, + startTime + TIME_INCREMENT / 2, + startTime + TIME_INCREMENT * expectedNumRows + TIME_INCREMENT / 2, expectedNumRows, null); validateAspectValues(resultAspects, expectedNumRows); @@ -401,12 +413,12 @@ public void testGetAspectTimeseriesValuesSubRangeExclusiveOverlapLatestValueOnly public void testGetAspectTimeseriesValuesExactlyOneResponse() { int expectedNumRows = 1; List<EnvelopedAspect> resultAspects = - _elasticSearchTimeseriesAspectService.getAspectValues( + elasticSearchTimeseriesAspectService.getAspectValues( TEST_URN, ENTITY_NAME, ASPECT_NAME, - _startTime + TIME_INCREMENT / 2, - _startTime + TIME_INCREMENT * 3 / 2, + startTime + TIME_INCREMENT / 2, + startTime + TIME_INCREMENT * 3 / 2, expectedNumRows, null); validateAspectValues(resultAspects, expectedNumRows); @@ -418,7 +430,7 @@ public void testGetAspectTimeseriesValuesExactlyOneResponse() { public void testGetAspectTimeseriesValueMissingUrn() { Urn nonExistingUrn = new TestEntityUrn("missing", "missing", "missing"); List<EnvelopedAspect> resultAspects = - _elasticSearchTimeseriesAspectService.getAspectValues( + elasticSearchTimeseriesAspectService.getAspectValues( nonExistingUrn, ENTITY_NAME, ASPECT_NAME, null, null, NUM_PROFILES, null); validateAspectValues(resultAspects, 0); } @@ -439,12 +451,12 @@ public void testGetAggregatedStatsLatestStatForDay1() { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT)); + .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -462,7 +474,7 @@ public void testGetAggregatedStatsLatestStatForDay1() { .setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY)); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {latestStatAggregationSpec}, @@ -481,8 +493,8 @@ public void testGetAggregatedStatsLatestStatForDay1() { resultTable.getRows(), new StringArrayArray( new StringArray( - _startTime.toString(), - _testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getStat().toString()))); + startTime.toString(), + testEntityProfiles.get(startTime + 23 * TIME_INCREMENT).getStat().toString()))); } @Test( @@ -496,13 +508,13 @@ public void testGetAggregatedStatsLatestStatForDay1WithValues() { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValues(new StringArray(_startTime.toString())) + .setValues(new StringArray(startTime.toString())) .setValue(""); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValues(new StringArray(String.valueOf(_startTime + 23 * TIME_INCREMENT))) + .setValues(new StringArray(String.valueOf(startTime + 23 * TIME_INCREMENT))) .setValue(""); Filter filter = @@ -521,7 +533,7 @@ public void testGetAggregatedStatsLatestStatForDay1WithValues() { .setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY)); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {latestStatAggregationSpec}, @@ -540,8 +552,8 @@ public void testGetAggregatedStatsLatestStatForDay1WithValues() { resultTable.getRows(), new StringArrayArray( new StringArray( - _startTime.toString(), - _testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getStat().toString()))); + startTime.toString(), + testEntityProfiles.get(startTime + 23 * TIME_INCREMENT).getStat().toString()))); } @Test( @@ -555,12 +567,12 @@ public void testGetAggregatedStatsLatestAComplexNestedRecordForDay1() { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT)); + .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -580,7 +592,7 @@ public void testGetAggregatedStatsLatestAComplexNestedRecordForDay1() { .setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY)); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {latestStatAggregationSpec}, @@ -595,13 +607,13 @@ public void testGetAggregatedStatsLatestAComplexNestedRecordForDay1() { // Validate rows assertNotNull(resultTable.getRows()); assertEquals(resultTable.getRows().size(), 1); - assertEquals(resultTable.getRows().get(0).get(0), _startTime.toString()); + assertEquals(resultTable.getRows().get(0).get(0), startTime.toString()); try { ComplexNestedRecord latestAComplexNestedRecord = OBJECT_MAPPER.readValue(resultTable.getRows().get(0).get(1), ComplexNestedRecord.class); assertEquals( latestAComplexNestedRecord, - _testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getAComplexNestedRecord()); + testEntityProfiles.get(startTime + 23 * TIME_INCREMENT).getAComplexNestedRecord()); } catch (JsonProcessingException e) { fail("Unexpected exception thrown" + e); } @@ -618,12 +630,12 @@ public void testGetAggregatedStatsLatestStrArrayDay1() { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT)); + .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -641,7 +653,7 @@ public void testGetAggregatedStatsLatestStrArrayDay1() { .setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY)); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {latestStatAggregationSpec}, @@ -656,7 +668,7 @@ public void testGetAggregatedStatsLatestStrArrayDay1() { assertNotNull(resultTable.getRows()); assertEquals(resultTable.getRows().size(), 1); StringArray expectedStrArray = - _testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getStrArray(); + testEntityProfiles.get(startTime + 23 * TIME_INCREMENT).getStrArray(); // assertEquals(resultTable.getRows(), new StringArrayArray(new // StringArray(_startTime.toString(), // expectedStrArray.toString()))); @@ -681,12 +693,12 @@ public void testGetAggregatedStatsLatestStatForTwoDays() { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(_startTime + 47 * TIME_INCREMENT)); + .setValue(String.valueOf(startTime + 47 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -704,7 +716,7 @@ public void testGetAggregatedStatsLatestStatForTwoDays() { .setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY)); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {latestStatAggregationSpec}, @@ -719,16 +731,16 @@ public void testGetAggregatedStatsLatestStatForTwoDays() { // Validate rows assertNotNull(resultTable.getRows()); assertEquals(resultTable.getRows().size(), 2); - Long latestDay1Ts = _startTime + 23 * TIME_INCREMENT; - Long latestDay2Ts = _startTime + 47 * TIME_INCREMENT; + Long latestDay1Ts = startTime + 23 * TIME_INCREMENT; + Long latestDay2Ts = startTime + 47 * TIME_INCREMENT; assertEquals( resultTable.getRows(), new StringArrayArray( new StringArray( - _startTime.toString(), _testEntityProfiles.get(latestDay1Ts).getStat().toString()), + startTime.toString(), testEntityProfiles.get(latestDay1Ts).getStat().toString()), new StringArray( - String.valueOf(_startTime + 24 * TIME_INCREMENT), - _testEntityProfiles.get(latestDay2Ts).getStat().toString()))); + String.valueOf(startTime + 24 * TIME_INCREMENT), + testEntityProfiles.get(latestDay2Ts).getStat().toString()))); } @Test( @@ -741,12 +753,12 @@ public void testGetAggregatedStatsLatestStatForFirst10HoursOfDay1() { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(_startTime + 9 * TIME_INCREMENT)); + .setValue(String.valueOf(startTime + 9 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -764,7 +776,7 @@ public void testGetAggregatedStatsLatestStatForFirst10HoursOfDay1() { .setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY)); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {latestStatAggregationSpec}, @@ -783,22 +795,22 @@ public void testGetAggregatedStatsLatestStatForFirst10HoursOfDay1() { resultTable.getRows(), new StringArrayArray( new StringArray( - _startTime.toString(), - _testEntityProfiles.get(_startTime + 9 * TIME_INCREMENT).getStat().toString()))); + startTime.toString(), + testEntityProfiles.get(startTime + 9 * TIME_INCREMENT).getStat().toString()))); } @Test( groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsLatestStatForCol1Day1() { - Long lastEntryTimeStamp = _startTime + 23 * TIME_INCREMENT; + Long lastEntryTimeStamp = startTime + 23 * TIME_INCREMENT; Criterion hasUrnCriterion = new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); Criterion startTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) @@ -833,7 +845,7 @@ public void testGetAggregatedStatsLatestStatForCol1Day1() { .setType(GroupingBucketType.STRING_GROUPING_BUCKET); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {latestStatAggregationSpec}, @@ -853,9 +865,9 @@ public void testGetAggregatedStatsLatestStatForCol1Day1() { resultTable.getRows(), new StringArrayArray( new StringArray( - _startTime.toString(), + startTime.toString(), "col1", - _testEntityProfiles + testEntityProfiles .get(lastEntryTimeStamp) .getComponentProfiles() .get(0) @@ -867,14 +879,14 @@ public void testGetAggregatedStatsLatestStatForCol1Day1() { groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsLatestStatForAllColumnsDay1() { - Long lastEntryTimeStamp = _startTime + 23 * TIME_INCREMENT; + Long lastEntryTimeStamp = startTime + 23 * TIME_INCREMENT; Criterion hasUrnCriterion = new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); Criterion startTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) @@ -904,7 +916,7 @@ public void testGetAggregatedStatsLatestStatForAllColumnsDay1() { .setType(GroupingBucketType.STRING_GROUPING_BUCKET); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {latestStatAggregationSpec}, @@ -920,9 +932,9 @@ public void testGetAggregatedStatsLatestStatForAllColumnsDay1() { // Validate rows StringArray expectedRow1 = new StringArray( - _startTime.toString(), + startTime.toString(), "col1", - _testEntityProfiles + testEntityProfiles .get(lastEntryTimeStamp) .getComponentProfiles() .get(0) @@ -930,9 +942,9 @@ public void testGetAggregatedStatsLatestStatForAllColumnsDay1() { .toString()); StringArray expectedRow2 = new StringArray( - _startTime.toString(), + startTime.toString(), "col2", - _testEntityProfiles + testEntityProfiles .get(lastEntryTimeStamp) .getComponentProfiles() .get(1) @@ -955,12 +967,12 @@ public void testGetAggregatedStatsSumStatForFirst10HoursOfDay1() { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(_startTime + 9 * TIME_INCREMENT)); + .setValue(String.valueOf(startTime + 9 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -978,7 +990,7 @@ public void testGetAggregatedStatsSumStatForFirst10HoursOfDay1() { .setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY)); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {sumAggregationSpec}, @@ -996,21 +1008,21 @@ public void testGetAggregatedStatsSumStatForFirst10HoursOfDay1() { // TODO: Compute this caching the documents. assertEquals( resultTable.getRows(), - new StringArrayArray(new StringArray(_startTime.toString(), String.valueOf(650)))); + new StringArrayArray(new StringArray(startTime.toString(), String.valueOf(650)))); } @Test( groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsSumStatForCol2Day1() { - Long lastEntryTimeStamp = _startTime + 23 * TIME_INCREMENT; + Long lastEntryTimeStamp = startTime + 23 * TIME_INCREMENT; Criterion hasUrnCriterion = new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); Criterion startTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) @@ -1045,7 +1057,7 @@ public void testGetAggregatedStatsSumStatForCol2Day1() { .setType(GroupingBucketType.STRING_GROUPING_BUCKET); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {sumStatAggregationSpec}, @@ -1065,7 +1077,7 @@ public void testGetAggregatedStatsSumStatForCol2Day1() { // TODO: Compute this caching the documents. assertEquals( resultTable.getRows(), - new StringArrayArray(new StringArray(_startTime.toString(), "col2", String.valueOf(3288)))); + new StringArrayArray(new StringArray(startTime.toString(), "col2", String.valueOf(3288)))); } @Test( @@ -1079,12 +1091,12 @@ public void testGetAggregatedStatsCardinalityAggStrStatDay1() { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT)); + .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -1104,7 +1116,7 @@ public void testGetAggregatedStatsCardinalityAggStrStatDay1() { .setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY)); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {cardinalityStatAggregationSpec}, @@ -1120,7 +1132,7 @@ public void testGetAggregatedStatsCardinalityAggStrStatDay1() { assertNotNull(resultTable.getRows()); assertEquals(resultTable.getRows().size(), 1); assertEquals( - resultTable.getRows(), new StringArrayArray(new StringArray(_startTime.toString(), "24"))); + resultTable.getRows(), new StringArrayArray(new StringArray(startTime.toString(), "24"))); } @Test( @@ -1134,12 +1146,12 @@ public void testGetAggregatedStatsSumStatsCollectionDay1() { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT)); + .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -1158,7 +1170,7 @@ public void testGetAggregatedStatsSumStatsCollectionDay1() { .setType(GroupingBucketType.STRING_GROUPING_BUCKET); GenericTable resultTable = - _elasticSearchTimeseriesAspectService.getAggregatedStats( + elasticSearchTimeseriesAspectService.getAggregatedStats( ENTITY_NAME, ASPECT_NAME, new AggregationSpec[] {cardinalityStatAggregationSpec}, @@ -1188,18 +1200,18 @@ public void testDeleteAspectValuesByUrnAndTimeRangeDay1() { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT)); + .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion)); DeleteAspectValuesResult result = - _elasticSearchTimeseriesAspectService.deleteAspectValues(ENTITY_NAME, ASPECT_NAME, filter); + elasticSearchTimeseriesAspectService.deleteAspectValues(ENTITY_NAME, ASPECT_NAME, filter); // For day1, we expect 24 (number of hours) * 3 (each testEntityProfile aspect expands 3 elastic // docs: // 1 original + 2 for componentProfiles) = 72 total. @@ -1214,7 +1226,7 @@ public void testDeleteAspectValuesByUrn() { new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion)); DeleteAspectValuesResult result = - _elasticSearchTimeseriesAspectService.deleteAspectValues(ENTITY_NAME, ASPECT_NAME, filter); + elasticSearchTimeseriesAspectService.deleteAspectValues(ENTITY_NAME, ASPECT_NAME, filter); // Of the 300 elastic docs upserted for TEST_URN, 72 got deleted by deleteAspectValues1 test // group leaving 228. assertEquals(result.getNumDocsDeleted(), Long.valueOf(228L)); @@ -1229,7 +1241,7 @@ public void testCountByFilter() { new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion)); long count = - _elasticSearchTimeseriesAspectService.countByFilter(ENTITY_NAME, ASPECT_NAME, filter); + elasticSearchTimeseriesAspectService.countByFilter(ENTITY_NAME, ASPECT_NAME, filter); assertEquals(count, 300L); // Test with filter with multiple criteria @@ -1237,24 +1249,24 @@ public void testCountByFilter() { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT)); + .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter urnAndTimeFilter = QueryUtils.getFilterFromCriteria( ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion)); count = - _elasticSearchTimeseriesAspectService.countByFilter( + elasticSearchTimeseriesAspectService.countByFilter( ENTITY_NAME, ASPECT_NAME, urnAndTimeFilter); assertEquals(count, 72L); // test without filter count = - _elasticSearchTimeseriesAspectService.countByFilter(ENTITY_NAME, ASPECT_NAME, new Filter()); + elasticSearchTimeseriesAspectService.countByFilter(ENTITY_NAME, ASPECT_NAME, new Filter()); // There may be other entities in there from other tests assertTrue(count >= 300L); } @@ -1269,7 +1281,7 @@ public void testCountByFilterAfterDelete() throws InterruptedException { new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion)); long count = - _elasticSearchTimeseriesAspectService.countByFilter(ENTITY_NAME, ASPECT_NAME, filter); + elasticSearchTimeseriesAspectService.countByFilter(ENTITY_NAME, ASPECT_NAME, filter); assertEquals(count, 228L); // Test with filter with multiple criteria @@ -1277,18 +1289,18 @@ public void testCountByFilterAfterDelete() throws InterruptedException { new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(_startTime.toString()); + .setValue(startTime.toString()); Criterion endTimeCriterion = new Criterion() .setField(ES_FIELD_TIMESTAMP) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(_startTime + 23 * TIME_INCREMENT)); + .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter urnAndTimeFilter = QueryUtils.getFilterFromCriteria( ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion)); count = - _elasticSearchTimeseriesAspectService.countByFilter( + elasticSearchTimeseriesAspectService.countByFilter( ENTITY_NAME, ASPECT_NAME, urnAndTimeFilter); assertEquals(count, 0L); } @@ -1297,7 +1309,7 @@ public void testCountByFilterAfterDelete() throws InterruptedException { groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"}) public void testGetIndexSizes() { - List<TimeseriesIndexSizeResult> result = _elasticSearchTimeseriesAspectService.getIndexSizes(); + List<TimeseriesIndexSizeResult> result = elasticSearchTimeseriesAspectService.getIndexSizes(); // CHECKSTYLE:OFF /* Example result: diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java index a23267dcf6f55e..d56ddb2cc808ea 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java @@ -5,7 +5,7 @@ import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.NumericNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; @@ -34,17 +34,18 @@ public class TimeseriesAspectServiceUnitTest { private final IndexConvention _indexConvention = mock(IndexConvention.class); private final TimeseriesAspectIndexBuilders _timeseriesAspectIndexBuilders = mock(TimeseriesAspectIndexBuilders.class); - private final EntityRegistry _entityRegistry = mock(EntityRegistry.class); + private final AspectRetriever aspectRetriever = mock(AspectRetriever.class); private final ESBulkProcessor _bulkProcessor = mock(ESBulkProcessor.class); private final RestClient _restClient = mock(RestClient.class); private final TimeseriesAspectService _timeseriesAspectService = new ElasticSearchTimeseriesAspectService( - _searchClient, - _indexConvention, - _timeseriesAspectIndexBuilders, - _entityRegistry, - _bulkProcessor, - 0); + _searchClient, + _indexConvention, + _timeseriesAspectIndexBuilders, + aspectRetriever.getEntityRegistry(), + _bulkProcessor, + 0) + .postConstruct(aspectRetriever); private static final String INDEX_PATTERN = "indexPattern"; diff --git a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java index eb4c85209ce422..c27a1c337ed5c5 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java +++ b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java @@ -16,7 +16,7 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.glossary.GlossaryTermInfo; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.aspect.batch.MCPBatchItem; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.AspectDao; @@ -27,7 +27,6 @@ import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.service.UpdateIndicesService; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; @@ -75,7 +74,6 @@ public static DataGenerator build(EntityRegistry entityRegistry) { mock(EventProducer.class), entityRegistry, false, - mock(UpdateIndicesService.class), mock(PreProcessHooks.class), anyBoolean()); return new DataGenerator(mockEntityServiceImpl); @@ -172,7 +170,7 @@ public Stream<List<MetadataChangeProposal>> generateMCPs( entityService, true) .stream() - .map(MCPBatchItem::getMetadataChangeProposal)) + .map(MCPItem::getMetadataChangeProposal)) .collect(Collectors.toList()); } else { return List.of(mcp); diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index b42cd89131f51f..24acb7bbcb4a70 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -8,6 +8,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; @@ -138,25 +139,29 @@ protected EntityIndexBuilders entityIndexBuildersHelper( protected ElasticSearchService entitySearchService( @Qualifier("entityRegistry") EntityRegistry entityRegistry, @Qualifier("sampleDataEntityIndexBuilders") EntityIndexBuilders indexBuilders, - @Qualifier("sampleDataIndexConvention") IndexConvention indexConvention) + @Qualifier("sampleDataIndexConvention") IndexConvention indexConvention, + @Qualifier("aspectRetriever") final AspectRetriever aspectRetriever) throws IOException { - return entitySearchServiceHelper(entityRegistry, indexBuilders, indexConvention); + return entitySearchServiceHelper( + entityRegistry, indexBuilders, indexConvention, aspectRetriever); } @Bean(name = "longTailEntitySearchService") protected ElasticSearchService longTailEntitySearchService( @Qualifier("entityRegistry") EntityRegistry longTailEntityRegistry, @Qualifier("longTailEntityIndexBuilders") EntityIndexBuilders longTailEndexBuilders, - @Qualifier("longTailIndexConvention") IndexConvention longTailIndexConvention) + @Qualifier("longTailIndexConvention") IndexConvention longTailIndexConvention, + @Qualifier("aspectRetriever") final AspectRetriever aspectRetriever) throws IOException { return entitySearchServiceHelper( - longTailEntityRegistry, longTailEndexBuilders, longTailIndexConvention); + longTailEntityRegistry, longTailEndexBuilders, longTailIndexConvention, aspectRetriever); } protected ElasticSearchService entitySearchServiceHelper( EntityRegistry entityRegistry, EntityIndexBuilders indexBuilders, - IndexConvention indexConvention) + IndexConvention indexConvention, + AspectRetriever aspectRetriever) throws IOException { CustomConfiguration customConfiguration = new CustomConfiguration(); customConfiguration.setEnabled(true); @@ -166,7 +171,6 @@ protected ElasticSearchService entitySearchServiceHelper( ESSearchDAO searchDAO = new ESSearchDAO( - entityRegistry, _searchClient, indexConvention, false, @@ -175,14 +179,11 @@ protected ElasticSearchService entitySearchServiceHelper( customSearchConfiguration); ESBrowseDAO browseDAO = new ESBrowseDAO( - entityRegistry, - _searchClient, - indexConvention, - _searchConfiguration, - _customSearchConfiguration); + _searchClient, indexConvention, _searchConfiguration, _customSearchConfiguration); ESWriteDAO writeDAO = new ESWriteDAO(entityRegistry, _searchClient, indexConvention, _bulkProcessor, 1); - return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); + return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO) + .postConstruct(aspectRetriever); } @Bean(name = "sampleDataSearchService") @@ -296,8 +297,7 @@ private EntityClient entityClientHelper( PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); return new JavaEntityClient( - new EntityServiceImpl( - mockAspectDao, null, entityRegistry, true, null, preProcessHooks, true), + new EntityServiceImpl(mockAspectDao, null, entityRegistry, true, preProcessHooks, true), null, entitySearchService, cachingEntitySearchService, diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index 07d27245222b9e..1c43e623443c1e 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -3,6 +3,7 @@ import static com.linkedin.metadata.Constants.*; import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; @@ -53,13 +54,13 @@ @Import(SearchCommonTestConfiguration.class) public class SearchLineageFixtureConfiguration { - @Autowired private ESBulkProcessor _bulkProcessor; + @Autowired private ESBulkProcessor bulkProcessor; - @Autowired private RestHighLevelClient _searchClient; + @Autowired private RestHighLevelClient searchClient; - @Autowired private SearchConfiguration _searchConfiguration; + @Autowired private SearchConfiguration searchConfiguration; - @Autowired private CustomSearchConfiguration _customSearchConfiguration; + @Autowired private CustomSearchConfiguration customSearchConfiguration; @Bean(name = "searchLineagePrefix") protected String indexPrefix() { @@ -91,7 +92,7 @@ protected EntityIndexBuilders entityIndexBuilders( GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty()); ESIndexBuilder indexBuilder = new ESIndexBuilder( - _searchClient, + searchClient, 1, 0, 1, @@ -107,28 +108,25 @@ protected EntityIndexBuilders entityIndexBuilders( @Bean(name = "searchLineageEntitySearchService") protected ElasticSearchService entitySearchService( - @Qualifier("entityRegistry") EntityRegistry entityRegistry, + @Qualifier("aspectRetriever") AspectRetriever aspectRetriever, @Qualifier("searchLineageEntityIndexBuilders") EntityIndexBuilders indexBuilders, @Qualifier("searchLineageIndexConvention") IndexConvention indexConvention) { ESSearchDAO searchDAO = new ESSearchDAO( - entityRegistry, - _searchClient, + searchClient, indexConvention, false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, - _searchConfiguration, + searchConfiguration, null); ESBrowseDAO browseDAO = new ESBrowseDAO( - entityRegistry, - _searchClient, - indexConvention, - _searchConfiguration, - _customSearchConfiguration); + searchClient, indexConvention, searchConfiguration, customSearchConfiguration); ESWriteDAO writeDAO = - new ESWriteDAO(entityRegistry, _searchClient, indexConvention, _bulkProcessor, 1); - return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); + new ESWriteDAO( + aspectRetriever.getEntityRegistry(), searchClient, indexConvention, bulkProcessor, 1); + return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO) + .postConstruct(aspectRetriever); } @Bean(name = "searchLineageESIndexBuilder") @@ -136,7 +134,7 @@ protected ElasticSearchService entitySearchService( protected ESIndexBuilder esIndexBuilder() { GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty()); return new ESIndexBuilder( - _searchClient, + searchClient, 1, 1, 1, @@ -158,11 +156,11 @@ protected ElasticSearchGraphService graphService( ElasticSearchGraphService graphService = new ElasticSearchGraphService( lineageRegistry, - _bulkProcessor, + bulkProcessor, indexConvention, - new ESGraphWriteDAO(indexConvention, _bulkProcessor, 1), + new ESGraphWriteDAO(indexConvention, bulkProcessor, 1), new ESGraphQueryDAO( - _searchClient, + searchClient, lineageRegistry, indexConvention, GraphQueryConfiguration.testDefaults), @@ -183,7 +181,7 @@ protected LineageSearchService lineageSearchService( // Load fixture data (after graphService mappings applied) FixtureReader.builder() - .bulkProcessor(_bulkProcessor) + .bulkProcessor(bulkProcessor) .fixtureName(fixtureName) .targetIndexPrefix(prefix) .refreshIntervalSeconds(SearchTestContainerConfiguration.REFRESH_INTERVAL_SECONDS) @@ -234,7 +232,7 @@ protected EntityClient entityClient( PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); return new JavaEntityClient( - new EntityServiceImpl(null, null, entityRegistry, true, null, preProcessHooks, true), + new EntityServiceImpl(null, null, entityRegistry, true, preProcessHooks, true), null, entitySearchService, cachingEntitySearchService, diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java index 17747d9ba1cc9e..ae81eaf1ef3884 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java @@ -1,6 +1,12 @@ package io.datahubproject.test.search.config; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.datahub.test.Snapshot; import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.CustomConfiguration; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; @@ -10,6 +16,10 @@ import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistryException; +import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; +import com.linkedin.r2.RemoteInvocationException; +import java.net.URISyntaxException; +import java.util.Map; import org.springframework.boot.test.context.TestConfiguration; import org.springframework.context.annotation.Bean; @@ -59,4 +69,23 @@ public EntityRegistry entityRegistry() throws EntityRegistryException { .getClassLoader() .getResourceAsStream("entity-registry.yml")); } + + @Bean(name = "aspectRetriever") + protected AspectRetriever aspectRetriever(final EntityRegistry entityRegistry) + throws RemoteInvocationException, URISyntaxException { + AspectRetriever aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + when(aspectRetriever.getLatestAspectObjects(any(), any())).thenReturn(Map.of()); + return aspectRetriever; + } + + @Bean(name = "snapshotRegistryAspectRetriever") + protected AspectRetriever snapshotRegistryAspectRetriever() + throws RemoteInvocationException, URISyntaxException { + AspectRetriever aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()) + .thenReturn(new SnapshotEntityRegistry(new Snapshot())); + when(aspectRetriever.getLatestAspectObjects(any(), any())).thenReturn(Map.of()); + return aspectRetriever; + } } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java index 8496c06db86a7d..a416e6f2e79086 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java @@ -56,6 +56,11 @@ public MetadataChangeLogProcessor(List<MetadataChangeLogHook> metadataChangeLogH .filter(MetadataChangeLogHook::isEnabled) .sorted(Comparator.comparing(MetadataChangeLogHook::executionOrder)) .collect(Collectors.toList()); + log.info( + "Enabled hooks: {}", + this.hooks.stream() + .map(hook -> hook.getClass().getSimpleName()) + .collect(Collectors.toList())); this.hooks.forEach(MetadataChangeLogHook::init); } diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java index fc47679bebd395..a80017a0956b22 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java @@ -7,13 +7,16 @@ import com.datahub.metadata.ingestion.IngestionScheduler; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig; +import com.linkedin.metadata.aspect.CachingAspectRetriever; import com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener; +import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.registry.SchemaRegistryService; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; import com.linkedin.metadata.search.transformer.SearchDocumentTransformer; +import com.linkedin.metadata.service.FormService; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import org.apache.avro.generic.GenericRecord; @@ -45,7 +48,7 @@ public class MCLSpringTestConfiguration { @MockBean public IngestionScheduler ingestionScheduler; - @Bean + @Bean(name = "systemEntityClient") public SystemEntityClient systemEntityClient( @Qualifier("systemAuthentication") Authentication systemAuthentication) { SystemEntityClient systemEntityClient = mock(SystemEntityClient.class); @@ -55,6 +58,13 @@ public SystemEntityClient systemEntityClient( @MockBean public ElasticSearchService searchService; + @MockBean public EntityService<?> entityService; + + @MockBean public FormService formService; + + @MockBean(name = "cachingAspectRetriever") + CachingAspectRetriever cachingAspectRetriever; + @MockBean(name = "systemAuthentication") public Authentication systemAuthentication; diff --git a/metadata-models-custom/README.md b/metadata-models-custom/README.md index a8982c2bd11164..d0274f2bc4e0e2 100644 --- a/metadata-models-custom/README.md +++ b/metadata-models-custom/README.md @@ -167,7 +167,7 @@ As you evolve the metadata model, you can publish new versions of the repository ### Custom Plugins -Adding custom aspects to DataHub's existing data model is a powerful way to extend DataHub without forking the entire repo. Often however extending +Adding custom aspects to DataHub's existing data model is a powerful way to extend DataHub without forking the entire repo. Often extending just the data model is not enough and additional custom code might be required. For a few of these use cases a plugin framework was developed to control how instances of custom aspects can be validated, mutated, and generate side effects (additional aspects). @@ -210,7 +210,7 @@ Custom aspects might require that instances of those aspects adhere to specific as a null or range check for one or more fields within the custom aspect. Additionally, a lookup can be done on other aspects in order to validate the current aspect using the `AspectRetriever`. There are two integration points for validation. The first integration point is `on request` via the `validateProposedAspect` method where the aspect is validated independent of the previous value. This validation is performed -outside of any kind of database transaction and can perform more intensive checks without introducing added latency within a transaction. +outside of a database transaction and can perform more intensive checks without introducing added latency within a transaction. Note that added latency from the validation check is still introduced into the request itself. The second integration point for validation occurs within the database transaction using the `validatePreCommitAspect` and has access to the new aspect as well as the old aspect. See the included example in [`CustomDataQualityRulesValidator.java`](src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java). @@ -220,26 +220,13 @@ Shown below is the interface to be implemented for a custom validator. ```java public class CustomDataQualityRulesValidator extends AspectPayloadValidator { @Override - protected void validateProposedAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nonnull RecordTemplate aspectPayload, - @Nonnull AspectRetriever aspectRetriever) - throws AspectValidationException { - + protected Stream<AspectValidationException> validateProposedAspects( + @Nonnull Collection<? extends BatchItem> mcpItems, @Nonnull AspectRetriever aspectRetriever) { } @Override - protected void validatePreCommitAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nullable RecordTemplate previousAspect, - @Nonnull RecordTemplate proposedAspect, - @Nonnull AspectRetriever aspectRetriever) - throws AspectValidationException { - + protected Stream<AspectValidationException> validatePreCommitAspects( + @Nonnull Collection<ChangeMCP> changeMCPs, AspectRetriever aspectRetriever) { } } ``` @@ -265,33 +252,42 @@ plugins: **Warning: This hook is for advanced users only. It is possible to corrupt data and render your system inoperable.** +Mutation hooks have two possible mutation points. The first is the `write` mutation which can change the data +being written to persistent storage. The second mutation hook is a `read` hook which can modify the data when +read from persistent storage. + +Write Mutation: + In this example, we want to make sure that the field type is always lowercase regardless of the string being provided by ingestion. The full example can be found in [`CustomDataQualityMutator.java`](src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java). ```java public class CustomDataQualityRulesMutator extends MutationHook { @Override - protected void mutate( - @Nonnull ChangeType changeType, - @Nonnull EntitySpec entitySpec, - @Nonnull AspectSpec aspectSpec, - @Nullable RecordTemplate oldAspectValue, - @Nullable RecordTemplate newAspectValue, - @Nullable SystemMetadata oldSystemMetadata, - @Nullable SystemMetadata newSystemMetadata, - @Nonnull AuditStamp auditStamp, - @Nonnull AspectRetriever aspectRetriever) { - - if (newAspectValue != null) { - DataQualityRules newDataQualityRules = new DataQualityRules(newAspectValue.data()); - - for (DataQualityRule rule : newDataQualityRules.getRules()) { - // Ensure uniform lowercase - if (!rule.getType().toLowerCase().equals(rule.getType())) { - rule.setType(rule.getType().toLowerCase()); - } - } - } + protected Stream<Pair<ChangeMCP, Boolean>> writeMutation( + @Nonnull Collection<ChangeMCP> changeMCPS, @Nonnull AspectRetriever aspectRetriever) { + return changeMCPS.stream() + .map( + changeMCP -> { + boolean mutated = false; + + if (changeMCP.getRecordTemplate() != null) { + DataQualityRules newDataQualityRules = + new DataQualityRules(changeMCP.getRecordTemplate().data()); + + for (DataQualityRule rule : newDataQualityRules.getRules()) { + // Ensure uniform lowercase + if (!rule.getType().toLowerCase().equals(rule.getType())) { + mutated = true; + rule.setType(rule.getType().toLowerCase()); + } + } + } + + return mutated ? changeMCP : null; + }) + .filter(Objects::nonNull) + .map(changeMCP -> Pair.of(changeMCP, true)); } } ``` @@ -308,6 +304,44 @@ plugins: aspectName: customDataQualityRules ``` +Read Mutation: + +A read mutator would implement the following interface and the following example is a read mutation which hides soft +deleted structured properties from being returned on entities. + +```java +public class StructuredPropertiesSoftDelete extends MutationHook { + @Override + protected Stream<Pair<ReadItem, Boolean>> readMutation( + @Nonnull Collection<ReadItem> items, @Nonnull AspectRetriever aspectRetriever) { + Map<Urn, StructuredProperties> entityStructuredPropertiesMap = + items.stream() + .filter(i -> i.getRecordTemplate() != null) + .map(i -> Pair.of(i.getUrn(), i.getAspect(StructuredProperties.class))) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + + // Apply filter + Map<Urn, Boolean> mutatedEntityStructuredPropertiesMap = + StructuredPropertyUtils.filterSoftDelete(entityStructuredPropertiesMap, aspectRetriever); + + return items.stream() + .map(i -> Pair.of(i, mutatedEntityStructuredPropertiesMap.getOrDefault(i.getUrn(), false))); + } +} +``` + +Note that the `supportedOperations` is left empty since those operation types only include change types like `UPSERT` or `DELETE` + +```yaml +plugins: + mutationHooks: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMutator' + enabled: true + supportedEntityAspectNames: + - entityName: '*' + aspectName: customDataQualityRules +``` + #### MetadataChangeProposal (MCP) Side Effects **Warning: This hook is for advanced users only. It is possible to corrupt data and render your system inoperable.** @@ -323,18 +357,22 @@ The full example can be found in [`CustomDataQualityRulesMCPSideEffect.java`](sr ```java public class CustomDataQualityRulesMCPSideEffect extends MCPSideEffect { @Override - protected Stream<UpsertItem> applyMCPSideEffect( - UpsertItem input, EntityRegistry entityRegistry, @Nonnull AspectRetriever aspectRetriever) { + protected Stream<ChangeMCP> applyMCPSideEffect( + Collection<ChangeMCP> changeMCPS, @Nonnull AspectRetriever aspectRetriever) { // Mirror aspects to another URN in SQL & Search - Urn mirror = UrnUtils.getUrn(input.getUrn().toString().replace(",PROD)", ",DEV)")); - return Stream.of( - MCPUpsertBatchItem.builder() - .urn(mirror) - .aspectName(input.getAspectName()) - .aspect(input.getAspect()) - .auditStamp(input.getAuditStamp()) - .systemMetadata(input.getSystemMetadata()) - .build(entityRegistry, aspectRetriever)); + return changeMCPS.stream() + .map( + changeMCP -> { + Urn mirror = + UrnUtils.getUrn(changeMCP.getUrn().toString().replace(",PROD)", ",DEV)")); + return ChangeItemImpl.builder() + .urn(mirror) + .aspectName(changeMCP.getAspectName()) + .recordTemplate(changeMCP.getRecordTemplate()) + .auditStamp(changeMCP.getAuditStamp()) + .systemMetadata(changeMCP.getSystemMetadata()) + .build(aspectRetriever); + }); } } ``` @@ -367,34 +405,32 @@ The full example can be found in [`CustomDataQualityRulesMCLSideEffect.java`](sr ```java public class CustomDataQualityRulesMCLSideEffect extends MCLSideEffect { @Override - protected Stream<MCLBatchItem> applyMCLSideEffect( - @Nonnull MCLBatchItem input, - @Nonnull EntityRegistry entityRegistry, - @Nonnull AspectRetriever aspectRetriever) { - - // Generate Timeseries event aspect based on non-Timeseries aspect - MetadataChangeLog originMCP = input.getMetadataChangeLog(); - - Optional<MCLBatchItem> timeseriesOptional = - buildEvent(originMCP) - .map( - event -> { - try { - MetadataChangeLog eventMCP = originMCP.clone(); - eventMCP.setAspect(GenericRecordUtils.serializeAspect(event)); - eventMCP.setAspectName("customDataQualityRuleEvent"); - return eventMCP; - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - }) - .map( - eventMCP -> - MCLBatchItemImpl.builder() - .metadataChangeLog(eventMCP) - .build(entityRegistry, aspectRetriever)); - - return timeseriesOptional.stream(); + protected Stream<MCLItem> applyMCLSideEffect( + @Nonnull Collection<MCLItem> mclItems, @Nonnull AspectRetriever aspectRetriever) { + return mclItems.stream() + .map( + item -> { + // Generate Timeseries event aspect based on non-Timeseries aspect + MetadataChangeLog originMCP = item.getMetadataChangeLog(); + + return buildEvent(originMCP) + .map( + event -> { + try { + MetadataChangeLog eventMCP = originMCP.clone(); + eventMCP.setAspect(GenericRecordUtils.serializeAspect(event)); + eventMCP.setAspectName("customDataQualityRuleEvent"); + return eventMCP; + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + }) + .map( + eventMCP -> + MCLItemImpl.builder().metadataChangeLog(eventMCP).build(aspectRetriever)); + }) + .filter(Optional::isPresent) + .map(Optional::get); } private Optional<DataQualityRuleEvent> buildEvent(MetadataChangeLog originMCP) { diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java index ba72a979088462..9ebcfb0ba0c6bb 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java @@ -1,12 +1,13 @@ package com.linkedin.metadata.aspect.plugins.hooks; -import com.linkedin.metadata.aspect.batch.MCLBatchItem; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.MCLItem; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.entity.ebean.batch.MCLBatchItemImpl; +import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeLog; import com.mycompany.dq.DataQualityRuleEvent; +import java.util.Collection; import java.util.Optional; import java.util.stream.Stream; import javax.annotation.Nonnull; @@ -18,30 +19,32 @@ public CustomDataQualityRulesMCLSideEffect(AspectPluginConfig config) { } @Override - protected Stream<MCLBatchItem> applyMCLSideEffect( - @Nonnull MCLBatchItem input, @Nonnull AspectRetriever aspectRetriever) { + protected Stream<MCLItem> applyMCLSideEffect( + @Nonnull Collection<MCLItem> mclItems, @Nonnull AspectRetriever aspectRetriever) { + return mclItems.stream() + .map( + item -> { + // Generate Timeseries event aspect based on non-Timeseries aspect + MetadataChangeLog originMCP = item.getMetadataChangeLog(); - // Generate Timeseries event aspect based on non-Timeseries aspect - MetadataChangeLog originMCP = input.getMetadataChangeLog(); - - Optional<MCLBatchItem> timeseriesOptional = - buildEvent(originMCP) - .map( - event -> { - try { - MetadataChangeLog eventMCP = originMCP.clone(); - eventMCP.setAspect(GenericRecordUtils.serializeAspect(event)); - eventMCP.setAspectName("customDataQualityRuleEvent"); - return eventMCP; - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - }) - .map( - eventMCP -> - MCLBatchItemImpl.builder().metadataChangeLog(eventMCP).build(aspectRetriever)); - - return timeseriesOptional.stream(); + return buildEvent(originMCP) + .map( + event -> { + try { + MetadataChangeLog eventMCP = originMCP.clone(); + eventMCP.setAspect(GenericRecordUtils.serializeAspect(event)); + eventMCP.setAspectName("customDataQualityRuleEvent"); + return eventMCP; + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + }) + .map( + eventMCP -> + MCLItemImpl.builder().metadataChangeLog(eventMCP).build(aspectRetriever)); + }) + .filter(Optional::isPresent) + .map(Optional::get); } private Optional<DataQualityRuleEvent> buildEvent(MetadataChangeLog originMCP) { diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java index c21b64c8a4fc00..103584f7a01401 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java @@ -2,10 +2,11 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import java.util.Collection; import java.util.stream.Stream; import javax.annotation.Nonnull; @@ -16,17 +17,21 @@ public CustomDataQualityRulesMCPSideEffect(AspectPluginConfig aspectPluginConfig } @Override - protected Stream<UpsertItem> applyMCPSideEffect( - UpsertItem input, @Nonnull AspectRetriever aspectRetriever) { + protected Stream<ChangeMCP> applyMCPSideEffect( + Collection<ChangeMCP> changeMCPS, @Nonnull AspectRetriever aspectRetriever) { // Mirror aspects to another URN in SQL & Search - Urn mirror = UrnUtils.getUrn(input.getUrn().toString().replace(",PROD)", ",DEV)")); - return Stream.of( - MCPUpsertBatchItem.builder() - .urn(mirror) - .aspectName(input.getAspectName()) - .recordTemplate(input.getRecordTemplate()) - .auditStamp(input.getAuditStamp()) - .systemMetadata(input.getSystemMetadata()) - .build(aspectRetriever)); + return changeMCPS.stream() + .map( + changeMCP -> { + Urn mirror = + UrnUtils.getUrn(changeMCP.getUrn().toString().replace(",PROD)", ",DEV)")); + return ChangeItemImpl.builder() + .urn(mirror) + .aspectName(changeMCP.getAspectName()) + .recordTemplate(changeMCP.getRecordTemplate()) + .auditStamp(changeMCP.getAuditStamp()) + .systemMetadata(changeMCP.getSystemMetadata()) + .build(aspectRetriever); + }); } } diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java index 576ba3bf305f53..35b99d6c02abd7 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java @@ -1,17 +1,15 @@ package com.linkedin.metadata.aspect.plugins.hooks; -import com.linkedin.common.AuditStamp; -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; import com.mycompany.dq.DataQualityRule; import com.mycompany.dq.DataQualityRules; +import java.util.Collection; +import java.util.Objects; +import java.util.stream.Stream; import javax.annotation.Nonnull; -import javax.annotation.Nullable; public class CustomDataQualityRulesMutator extends MutationHook { @@ -20,26 +18,29 @@ public CustomDataQualityRulesMutator(AspectPluginConfig config) { } @Override - protected void mutate( - @Nonnull ChangeType changeType, - @Nonnull EntitySpec entitySpec, - @Nonnull AspectSpec aspectSpec, - @Nullable RecordTemplate oldAspectValue, - @Nullable RecordTemplate newAspectValue, - @Nullable SystemMetadata oldSystemMetadata, - @Nullable SystemMetadata newSystemMetadata, - @Nonnull AuditStamp auditStamp, - @Nonnull AspectRetriever aspectRetriever) { + protected Stream<Pair<ChangeMCP, Boolean>> writeMutation( + @Nonnull Collection<ChangeMCP> changeMCPS, @Nonnull AspectRetriever aspectRetriever) { + return changeMCPS.stream() + .map( + changeMCP -> { + boolean mutated = false; - if (newAspectValue != null) { - DataQualityRules newDataQualityRules = new DataQualityRules(newAspectValue.data()); + if (changeMCP.getRecordTemplate() != null) { + DataQualityRules newDataQualityRules = + new DataQualityRules(changeMCP.getRecordTemplate().data()); - for (DataQualityRule rule : newDataQualityRules.getRules()) { - // Ensure uniform lowercase - if (!rule.getType().toLowerCase().equals(rule.getType())) { - rule.setType(rule.getType().toLowerCase()); - } - } - } + for (DataQualityRule rule : newDataQualityRules.getRules()) { + // Ensure uniform lowercase + if (!rule.getType().toLowerCase().equals(rule.getType())) { + mutated = true; + rule.setType(rule.getType().toLowerCase()); + } + } + } + + return mutated ? changeMCP : null; + }) + .filter(Objects::nonNull) + .map(changeMCP -> Pair.of(changeMCP, true)); } } diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java index 667d7ad614a791..ca291c46971230 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java @@ -1,16 +1,16 @@ package com.linkedin.metadata.aspect.plugins.validation; -import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; -import com.linkedin.metadata.models.AspectSpec; -import com.mycompany.dq.DataQualityRule; import com.mycompany.dq.DataQualityRules; +import java.util.Collection; import java.util.Map; +import java.util.Objects; import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; -import javax.annotation.Nullable; public class CustomDataQualityRulesValidator extends AspectPayloadValidator { @@ -19,52 +19,59 @@ public CustomDataQualityRulesValidator(AspectPluginConfig config) { } @Override - protected void validateProposedAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nonnull RecordTemplate aspectPayload, - @Nonnull AspectRetriever aspectRetriever) - throws AspectValidationException { - DataQualityRules rules = new DataQualityRules(aspectPayload.data()); - - // Enforce at least 1 rule - if (rules.getRules().isEmpty()) { - throw new AspectValidationException("At least one rule is required."); - } + protected Stream<AspectValidationException> validateProposedAspects( + @Nonnull Collection<? extends BatchItem> mcpItems, @Nonnull AspectRetriever aspectRetriever) { + return mcpItems.stream() + .map( + item -> { + DataQualityRules rules = new DataQualityRules(item.getRecordTemplate().data()); + // Enforce at least 1 rule + return rules.getRules().isEmpty() + ? new AspectValidationException( + item.getUrn(), item.getAspectName(), "At least one rule is required.") + : null; + }) + .filter(Objects::nonNull); } @Override - protected void validatePreCommitAspect( - @Nonnull ChangeType changeType, - @Nonnull Urn entityUrn, - @Nonnull AspectSpec aspectSpec, - @Nullable RecordTemplate previousAspect, - @Nonnull RecordTemplate proposedAspect, - @Nonnull AspectRetriever aspectRetriever) - throws AspectValidationException { + protected Stream<AspectValidationException> validatePreCommitAspects( + @Nonnull Collection<ChangeMCP> changeMCPs, AspectRetriever aspectRetriever) { + return changeMCPs.stream() + .flatMap( + changeMCP -> { + if (changeMCP.getPreviousSystemAspect() != null) { + DataQualityRules oldRules = changeMCP.getPreviousAspect(DataQualityRules.class); + DataQualityRules newRules = changeMCP.getAspect(DataQualityRules.class); - if (previousAspect != null) { - DataQualityRules oldRules = new DataQualityRules(previousAspect.data()); - DataQualityRules newRules = new DataQualityRules(proposedAspect.data()); + Map<String, String> newFieldTypeMap = + newRules.getRules().stream() + .filter(rule -> rule.getField() != null) + .map(rule -> Map.entry(rule.getField(), rule.getType())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - Map<String, String> newFieldTypeMap = - newRules.getRules().stream() - .filter(rule -> rule.getField() != null) - .map(rule -> Map.entry(rule.getField(), rule.getType())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + // Ensure the old and new field type is the same + return oldRules.getRules().stream() + .map( + oldRule -> { + if (!newFieldTypeMap + .getOrDefault(oldRule.getField(), oldRule.getType()) + .equals(oldRule.getType())) { + return new AspectValidationException( + changeMCP.getUrn(), + changeMCP.getAspectName(), + String.format( + "Field type mismatch. Field: %s Old: %s New: %s", + oldRule.getField(), + oldRule.getType(), + newFieldTypeMap.get(oldRule.getField()))); + } + return null; + }) + .filter(Objects::nonNull); + } - // Ensure the old and new field type is the same - for (DataQualityRule oldRule : oldRules.getRules()) { - if (!newFieldTypeMap - .getOrDefault(oldRule.getField(), oldRule.getType()) - .equals(oldRule.getType())) { - throw new AspectValidationException( - String.format( - "Field type mismatch. Field: %s Old: %s New: %s", - oldRule.getField(), oldRule.getType(), newFieldTypeMap.get(oldRule.getField()))); - } - } - } + return Stream.empty(); + }); } } diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index fd6eab2d10d1a3..cf8eb738e2443f 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -94,6 +94,7 @@ entities: - structuredProperties - incidentsSummary - forms + - subTypes - name: dataProcess keyAspect: dataProcessKey aspects: @@ -555,9 +556,12 @@ plugins: enabled: true supportedOperations: - UPSERT + - DELETE supportedEntityAspectNames: - entityName: structuredProperty aspectName: propertyDefinition + - entityName: structuredProperty + aspectName: structuredPropertyKey - className: 'com.linkedin.metadata.aspect.validation.StructuredPropertiesValidator' enabled: true supportedOperations: @@ -565,3 +569,9 @@ plugins: supportedEntityAspectNames: - entityName: '*' aspectName: structuredProperties + mutationHooks: + - className: 'com.linkedin.metadata.aspect.hooks.StructuredPropertiesSoftDelete' + enabled: true + supportedEntityAspectNames: + - entityName: '*' + aspectName: structuredProperties \ No newline at end of file diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authentication/user/NativeUserServiceTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/user/NativeUserServiceTest.java index 2b584c3461452e..60b10e3c53ef47 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authentication/user/NativeUserServiceTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/user/NativeUserServiceTest.java @@ -109,7 +109,7 @@ public void testCreateNativeUserUserSystemUser() throws Exception { @Test public void testCreateNativeUserPasses() throws Exception { - when(_entityService.exists(any(), any())).thenReturn(false); + when(_entityService.exists(any(Urn.class), anyBoolean())).thenReturn(false); when(_secretService.generateSalt(anyInt())).thenReturn(SALT); when(_secretService.encrypt(any())).thenReturn(ENCRYPTED_SALT); when(_secretService.getHashedPassword(any(), any())).thenReturn(HASHED_PASSWORD); diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml index 617982e53e4d66..29dd32a375c5e8 100644 --- a/metadata-service/configuration/src/main/resources/application.yml +++ b/metadata-service/configuration/src/main/resources/application.yml @@ -230,7 +230,7 @@ elasticsearch: timeoutSeconds: ${ELASTICSEARCH_SEARCH_GRAPH_TIMEOUT_SECONDS:50} # graph dao timeout seconds batchSize: ${ELASTICSEARCH_SEARCH_GRAPH_BATCH_SIZE:1000} # graph dao batch size maxResult: ${ELASTICSEARCH_SEARCH_GRAPH_MAX_RESULT:10000} # graph dao max result size - enableMultiPathSearch: ${ELASTICSEARCH_SEARCH_GRAPH_MULTI_PATH_SEARCH:true} + enableMultiPathSearch: ${ELASTICSEARCH_SEARCH_GRAPH_MULTI_PATH_SEARCH:false} # TODO: Kafka topic convention kafka: @@ -315,7 +315,7 @@ systemUpdate: backOffFactor: ${BOOTSTRAP_SYSTEM_UPDATE_BACK_OFF_FACTOR:2} # Multiplicative factor for back off, default values will result in waiting 5min 15s waitForSystemUpdate: ${BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE:true} dataJobNodeCLL: - enabled: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_ENABLED:true} + enabled: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_ENABLED:false} batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_BATCH_SIZE:200} browsePathsV2: enabled: ${BOOTSTRAP_SYSTEM_UPDATE_BROWSE_PATHS_V2_ENABLED:true} @@ -400,7 +400,8 @@ cache: corpUserCredentials: 20 corpUserSettings: 20 structuredProperty: - propertyDefinition: 86400 # 1 day + status: 300 # 5 min + propertyDefinition: 300 # 5 min structuredPropertyKey: 86400 # 1 day chart: usageFeatures: 21600 # 6hrs diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java index 2ccdee5fb1dbf5..0c7808abe538b1 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java @@ -6,9 +6,8 @@ import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.service.UpdateIndicesService; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; @@ -25,12 +24,11 @@ public class EntityServiceFactory { @Bean(name = "entityService") @DependsOn({"entityAspectDao", "kafkaEventProducer", "entityRegistry"}) @Nonnull - protected EntityService<MCPUpsertBatchItem> createInstance( + protected EntityService<ChangeItemImpl> createInstance( @Qualifier("kafkaEventProducer") final KafkaEventProducer eventProducer, @Qualifier("entityAspectDao") AspectDao aspectDao, EntityRegistry entityRegistry, ConfigurationProvider configurationProvider, - UpdateIndicesService updateIndicesService, @Value("${featureFlags.showBrowseV2}") final boolean enableBrowsePathV2) { FeatureFlags featureFlags = configurationProvider.getFeatureFlags(); @@ -40,7 +38,6 @@ protected EntityService<MCPUpsertBatchItem> createInstance( eventProducer, entityRegistry, featureFlags.isAlwaysEmitChangeLog(), - updateIndicesService, featureFlags.getPreProcessHooks(), _ebeanMaxTransactionRetry, enableBrowsePathV2); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java index 31ad933b9579d1..db9d9c8e657f84 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java @@ -5,7 +5,7 @@ import com.linkedin.metadata.entity.RetentionService; import com.linkedin.metadata.entity.cassandra.CassandraRetentionService; import com.linkedin.metadata.entity.ebean.EbeanRetentionService; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.spring.YamlPropertySourceFactory; import io.ebean.Database; import javax.annotation.Nonnull; @@ -24,7 +24,7 @@ public class RetentionServiceFactory { @Autowired @Qualifier("entityService") - private EntityService<MCPUpsertBatchItem> _entityService; + private EntityService<ChangeItemImpl> _entityService; @Value("${RETENTION_APPLICATION_BATCH_SIZE:1000}") private Integer _batchSize; @@ -33,8 +33,8 @@ public class RetentionServiceFactory { @DependsOn({"cassandraSession", "entityService"}) @ConditionalOnProperty(name = "entityService.impl", havingValue = "cassandra") @Nonnull - protected RetentionService<MCPUpsertBatchItem> createCassandraInstance(CqlSession session) { - RetentionService<MCPUpsertBatchItem> retentionService = + protected RetentionService<ChangeItemImpl> createCassandraInstance(CqlSession session) { + RetentionService<ChangeItemImpl> retentionService = new CassandraRetentionService<>(_entityService, session, _batchSize); _entityService.setRetentionService(retentionService); return retentionService; @@ -44,8 +44,8 @@ protected RetentionService<MCPUpsertBatchItem> createCassandraInstance(CqlSessio @DependsOn({"ebeanServer", "entityService"}) @ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true) @Nonnull - protected RetentionService<MCPUpsertBatchItem> createEbeanInstance(Database server) { - RetentionService<MCPUpsertBatchItem> retentionService = + protected RetentionService<ChangeItemImpl> createEbeanInstance(Database server) { + RetentionService<ChangeItemImpl> retentionService = new EbeanRetentionService<>(_entityService, server, _batchSize); _entityService.setRetentionService(retentionService); return retentionService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java index 34c1887d67c56f..d6b033e9268fc5 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java @@ -1,19 +1,17 @@ package com.linkedin.gms.factory.entity.update.indices; -import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.gms.factory.search.EntityIndexBuildersFactory; -import com.linkedin.metadata.client.EntityClientAspectRetriever; +import com.linkedin.metadata.aspect.CachingAspectRetriever; +import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphService; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; import com.linkedin.metadata.search.transformer.SearchDocumentTransformer; import com.linkedin.metadata.service.UpdateIndicesService; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.context.ApplicationContext; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; @@ -21,20 +19,45 @@ @Configuration @Import(EntityIndexBuildersFactory.class) public class UpdateIndicesServiceFactory { - @Autowired private ApplicationContext context; - - @Value("${entityClient.impl:java}") - private String entityClientImpl; + /* + When restli mode the EntityService is not available. Wire in an AspectRetriever here instead + based on the entity client + */ @Bean - public UpdateIndicesService updateIndicesService( + @ConditionalOnProperty(name = "entityClient.impl", havingValue = "restli") + public UpdateIndicesService searchIndicesServiceNonGMS( GraphService graphService, EntitySearchService entitySearchService, TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService, - EntityRegistry entityRegistry, SearchDocumentTransformer searchDocumentTransformer, - EntityIndexBuilders entityIndexBuilders) { + EntityIndexBuilders entityIndexBuilders, + @Qualifier("cachingAspectRetriever") final CachingAspectRetriever aspectRetriever) { + + UpdateIndicesService updateIndicesService = + new UpdateIndicesService( + graphService, + entitySearchService, + timeseriesAspectService, + systemMetadataService, + searchDocumentTransformer, + entityIndexBuilders); + updateIndicesService.initializeAspectRetriever(aspectRetriever); + + return updateIndicesService; + } + + @Bean + @ConditionalOnProperty(name = "entityClient.impl", havingValue = "java", matchIfMissing = true) + public UpdateIndicesService searchIndicesServiceGMS( + final GraphService graphService, + final EntitySearchService entitySearchService, + final TimeseriesAspectService timeseriesAspectService, + final SystemMetadataService systemMetadataService, + final SearchDocumentTransformer searchDocumentTransformer, + final EntityIndexBuilders entityIndexBuilders, + final EntityService<?> entityService) { UpdateIndicesService updateIndicesService = new UpdateIndicesService( @@ -45,18 +68,8 @@ public UpdateIndicesService updateIndicesService( searchDocumentTransformer, entityIndexBuilders); - if ("restli".equals(entityClientImpl)) { - /* - When restli mode the EntityService is not available. Wire in an AspectRetriever here instead - based on the entity client - */ - SystemEntityClient systemEntityClient = context.getBean(SystemEntityClient.class); - updateIndicesService.initializeAspectRetriever( - EntityClientAspectRetriever.builder() - .entityRegistry(entityRegistry) - .entityClient(systemEntityClient) - .build()); - } + updateIndicesService.initializeAspectRetriever(entityService); + entityService.setUpdateIndicesService(updateIndicesService); return updateIndicesService; } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/AspectRetrieverFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/AspectRetrieverFactory.java new file mode 100644 index 00000000000000..5892e1ec4e4204 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/AspectRetrieverFactory.java @@ -0,0 +1,27 @@ +package com.linkedin.gms.factory.entityclient; + +import com.linkedin.entity.client.SystemEntityClient; +import com.linkedin.metadata.aspect.CachingAspectRetriever; +import com.linkedin.metadata.client.EntityClientAspectRetriever; +import com.linkedin.metadata.models.registry.EntityRegistry; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Slf4j +@Configuration +public class AspectRetrieverFactory { + + @Bean(name = "cachingAspectRetriever") + @Nonnull + protected CachingAspectRetriever cachingAspectRetriever( + final EntityRegistry entityRegistry, + @Qualifier("systemEntityClient") final SystemEntityClient systemEntityClient) { + return EntityClientAspectRetriever.builder() + .entityRegistry(entityRegistry) + .entityClient(systemEntityClient) + .build(); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index bc82df9f8cdadc..15bf674581b6a8 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -172,7 +172,7 @@ public class GraphQLEngineFactory { @Bean(name = "graphQLEngine") @Nonnull - protected GraphQLEngine getInstance( + protected GraphQLEngine graphQLEngine( @Qualifier("entityClient") final EntityClient entityClient, @Qualifier("systemEntityClient") final SystemEntityClient systemEntityClient) { GmsGraphQLEngineArgs args = new GmsGraphQLEngineArgs(); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java index 7b5f4e18d4d539..7649e3a1ada425 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java @@ -63,7 +63,7 @@ public class ElasticSearchServiceFactory { @Bean(name = "elasticSearchService") @Nonnull - protected ElasticSearchService getInstance(ConfigurationProvider configurationProvider) + protected ElasticSearchService getInstance(final ConfigurationProvider configurationProvider) throws IOException { log.info("Search configuration: {}", configurationProvider.getElasticSearch().getSearch()); @@ -77,7 +77,6 @@ protected ElasticSearchService getInstance(ConfigurationProvider configurationPr ESSearchDAO esSearchDAO = new ESSearchDAO( - entityRegistry, components.getSearchClient(), components.getIndexConvention(), configurationProvider.getFeatureFlags().isPointInTimeCreationEnabled(), @@ -88,7 +87,6 @@ protected ElasticSearchService getInstance(ConfigurationProvider configurationPr entityIndexBuilders, esSearchDAO, new ESBrowseDAO( - entityRegistry, components.getSearchClient(), components.getIndexConvention(), searchConfiguration, diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java index 19efa5e9c4de20..9434928e1bfa03 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java @@ -11,7 +11,7 @@ import com.linkedin.metadata.entity.AspectMigrationsDao; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.utils.DataPlatformInstanceUtils; import com.linkedin.metadata.utils.EntityKeyUtils; @@ -65,7 +65,7 @@ public void execute() throws Exception { start, start + BATCH_SIZE); - List<MCPUpsertBatchItem> items = new LinkedList<>(); + List<ChangeItemImpl> items = new LinkedList<>(); final AuditStamp aspectAuditStamp = new AuditStamp() .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) @@ -76,7 +76,7 @@ public void execute() throws Exception { Optional<DataPlatformInstance> dataPlatformInstance = getDataPlatformInstance(urn); if (dataPlatformInstance.isPresent()) { items.add( - MCPUpsertBatchItem.builder() + ChangeItemImpl.builder() .urn(urn) .aspectName(DATA_PLATFORM_INSTANCE_ASPECT_NAME) .recordTemplate(dataPlatformInstance.get()) @@ -85,7 +85,10 @@ public void execute() throws Exception { } } - _entityService.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); + _entityService.ingestAspects( + AspectsBatchImpl.builder().aspectRetriever(_entityService).items(items).build(), + true, + true); log.info( "Finished ingesting DataPlatformInstance for urn {} to {}", start, start + BATCH_SIZE); diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java index d2bb61ad7ade5d..11e86241e216a1 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java @@ -13,7 +13,7 @@ import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import java.io.IOException; import java.net.URISyntaxException; import java.util.List; @@ -62,7 +62,7 @@ public void execute() throws IOException, URISyntaxException { } // 2. For each JSON object, cast into a DataPlatformSnapshot object. - List<MCPUpsertBatchItem> dataPlatformAspects = + List<ChangeItemImpl> dataPlatformAspects = StreamSupport.stream( Spliterators.spliteratorUnknownSize(dataPlatforms.iterator(), Spliterator.ORDERED), false) @@ -83,7 +83,7 @@ public void execute() throws IOException, URISyntaxException { DataPlatformInfo.class, dataPlatform.get("aspect").toString()); try { - return MCPUpsertBatchItem.builder() + return ChangeItemImpl.builder() .urn(urn) .aspectName(PLATFORM_ASPECT_NAME) .recordTemplate(info) @@ -99,6 +99,11 @@ public void execute() throws IOException, URISyntaxException { .collect(Collectors.toList()); _entityService.ingestAspects( - AspectsBatchImpl.builder().items(dataPlatformAspects).build(), true, false); + AspectsBatchImpl.builder() + .aspectRetriever(_entityService) + .items(dataPlatformAspects) + .build(), + true, + false); } } diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java index 5617d7e9714b08..b9cbf2abe06730 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java @@ -8,7 +8,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.entity.AspectMigrationsDao; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -122,7 +122,7 @@ public void testExecuteWhenSomeEntitiesShouldReceiveDataPlatformInstance() throw item.getUrn().getEntityType().equals("chart") && item.getAspectName() .equals(DATA_PLATFORM_INSTANCE_ASPECT_NAME) - && ((MCPUpsertBatchItem) item).getRecordTemplate() + && ((ChangeItemImpl) item).getRecordTemplate() instanceof DataPlatformInstance)), anyBoolean(), anyBoolean()); @@ -136,7 +136,7 @@ public void testExecuteWhenSomeEntitiesShouldReceiveDataPlatformInstance() throw item.getUrn().getEntityType().equals("chart") && item.getAspectName() .equals(DATA_PLATFORM_INSTANCE_ASPECT_NAME) - && ((MCPUpsertBatchItem) item).getRecordTemplate() + && ((ChangeItemImpl) item).getRecordTemplate() instanceof DataPlatformInstance)), anyBoolean(), anyBoolean()); diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java index 39a7e4722988e1..1e375f90fc38a9 100644 --- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java +++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java @@ -179,7 +179,7 @@ public <AQ, AR> ResponseEntity<AR> createAspect( public ResponseEntity<Void> headAspect(String urn, String aspect) { try { Urn entityUrn = Urn.createFromString(urn); - if (_entityService.exists(entityUrn, aspect)) { + if (_entityService.exists(entityUrn, aspect, true)) { return new ResponseEntity<>(HttpStatus.NO_CONTENT); } else { return new ResponseEntity<>(HttpStatus.NOT_FOUND); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java index ff65db09c2682f..63e78c30383af3 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java @@ -17,7 +17,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.util.Pair; import io.datahubproject.openapi.dto.RollbackRunResultDto; @@ -65,7 +65,7 @@ description = "APIs for ingesting and accessing entities and their constituent aspects") public class EntitiesController { - private final EntityService<MCPUpsertBatchItem> _entityService; + private final EntityService<ChangeItemImpl> _entityService; private final ObjectMapper _objectMapper; private final AuthorizerChain _authorizerChain; diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/platform/entities/PlatformEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/platform/entities/PlatformEntitiesController.java index 3cc67e77ec27e1..7193da3bf85878 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/platform/entities/PlatformEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/platform/entities/PlatformEntitiesController.java @@ -9,7 +9,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.search.client.CachingEntitySearchService; import com.linkedin.util.Pair; import io.datahubproject.openapi.exception.UnauthorizedException; @@ -45,7 +45,7 @@ description = "Platform level APIs intended for lower level access to entities") public class PlatformEntitiesController { - private final EntityService<MCPUpsertBatchItem> _entityService; + private final EntityService<ChangeItemImpl> _entityService; private final CachingEntitySearchService _cachingEntitySearchService; private final ObjectMapper _objectMapper; private final AuthorizerChain _authorizerChain; diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java index 13d2e501abf09f..6b31159a206652 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java @@ -30,7 +30,7 @@ import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.RollbackRunResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.validation.ValidationException; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.metrics.MetricUtils; @@ -441,7 +441,7 @@ public static boolean authorizeProposals( public static Pair<String, Boolean> ingestProposal( com.linkedin.mxe.MetadataChangeProposal serviceProposal, String actorUrn, - EntityService<MCPUpsertBatchItem> entityService, + EntityService<ChangeItemImpl> entityService, boolean async) { // TODO: Use the actor present in the IC. diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index 44202c20ca6db7..656d6542483cf3 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -14,14 +14,14 @@ import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; -import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.patch.GenericJsonPatch; import com.linkedin.metadata.aspect.patch.template.common.GenericPatchTemplate; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -306,12 +306,17 @@ public ResponseEntity<GenericEntity> createAspect( } AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName); - UpsertItem upsert = + ChangeMCP upsert = toUpsertItem(UrnUtils.getUrn(entityUrn), aspectSpec, jsonAspect, authentication.getActor()); List<UpdateAspectResult> results = entityService.ingestAspects( - AspectsBatchImpl.builder().items(List.of(upsert)).build(), true, true); + AspectsBatchImpl.builder() + .aspectRetriever(entityService) + .items(List.of(upsert)) + .build(), + true, + true); return ResponseEntity.of( results.stream() @@ -371,7 +376,7 @@ public ResponseEntity<GenericEntity> patchAspect( .templateDefault( aspectSpec.getDataTemplateClass().getDeclaredConstructor().newInstance()) .build(); - UpsertItem upsert = + ChangeMCP upsert = toUpsertItem( UrnUtils.getUrn(entityUrn), aspectSpec, @@ -381,7 +386,12 @@ public ResponseEntity<GenericEntity> patchAspect( List<UpdateAspectResult> results = entityService.ingestAspects( - AspectsBatchImpl.builder().items(List.of(upsert)).build(), true, true); + AspectsBatchImpl.builder() + .aspectRetriever(entityService) + .items(List.of(upsert)) + .build(), + true, + true); return ResponseEntity.of( results.stream() @@ -409,7 +419,9 @@ private List<GenericEntity> toRecordTemplates( } private Boolean exists(Urn urn, @Nullable String aspect) { - return aspect == null ? entityService.exists(urn, true) : entityService.exists(urn, aspect); + return aspect == null + ? entityService.exists(urn, true) + : entityService.exists(urn, aspect, true); } private List<GenericEntity> toRecordTemplates( @@ -474,10 +486,10 @@ private RecordTemplate toRecordTemplate(AspectSpec aspectSpec, EnvelopedAspect e aspectSpec.getDataTemplateClass(), envelopedAspect.getValue().data()); } - private UpsertItem toUpsertItem( + private ChangeMCP toUpsertItem( Urn entityUrn, AspectSpec aspectSpec, String jsonAspect, Actor actor) throws URISyntaxException { - return MCPUpsertBatchItem.builder() + return ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectSpec.getName()) .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) @@ -489,14 +501,14 @@ private UpsertItem toUpsertItem( .build(entityService); } - private UpsertItem toUpsertItem( + private ChangeMCP toUpsertItem( @Nonnull Urn urn, @Nonnull AspectSpec aspectSpec, @Nullable RecordTemplate currentValue, @Nonnull GenericPatchTemplate<? extends RecordTemplate> genericPatchTemplate, @Nonnull Actor actor) throws URISyntaxException { - return MCPUpsertBatchItem.fromPatch( + return ChangeItemImpl.fromPatch( urn, aspectSpec, currentValue, diff --git a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java index 20862bbc7f000d..4cecfe21281995 100644 --- a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java @@ -17,7 +17,6 @@ import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.service.UpdateIndicesService; import io.datahubproject.openapi.dto.UpsertAspectRequest; import io.datahubproject.openapi.entities.EntitiesController; import io.datahubproject.openapi.generated.AuditStamp; @@ -79,16 +78,11 @@ public void setup() .apply(Mockito.mock(Transaction.class)))); EventProducer mockEntityEventProducer = Mockito.mock(EventProducer.class); - UpdateIndicesService mockUpdateIndicesService = mock(UpdateIndicesService.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); MockEntityService mockEntityService = new MockEntityService( - aspectDao, - mockEntityEventProducer, - mockEntityRegistry, - mockUpdateIndicesService, - preProcessHooks); + aspectDao, mockEntityEventProducer, mockEntityRegistry, preProcessHooks); AuthorizerChain authorizerChain = Mockito.mock(AuthorizerChain.class); _entitiesController = new EntitiesController(mockEntityService, new ObjectMapper(), authorizerChain); diff --git a/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java b/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java index be5f99bed8e630..8ed7c397c5ba4b 100644 --- a/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java +++ b/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java @@ -1,6 +1,7 @@ package mock; import static entities.EntitiesControllerTest.*; +import static org.mockito.Mockito.mock; import com.linkedin.common.AuditStamp; import com.linkedin.common.GlobalTags; @@ -57,9 +58,9 @@ public MockEntityService( @Nonnull AspectDao aspectDao, @Nonnull EventProducer producer, @Nonnull EntityRegistry entityRegistry, - @Nonnull UpdateIndicesService updateIndicesService, PreProcessHooks preProcessHooks) { - super(aspectDao, producer, entityRegistry, true, updateIndicesService, preProcessHooks, true); + super(aspectDao, producer, entityRegistry, true, preProcessHooks, true); + setUpdateIndicesService(mock(UpdateIndicesService.class)); } @Override @@ -69,7 +70,7 @@ public Map<Urn, List<RecordTemplate>> getLatestAspects( } @Override - public Map<String, RecordTemplate> getLatestAspectsForUrn( + public @NotNull Map<String, RecordTemplate> getLatestAspectsForUrn( @Nonnull Urn urn, @Nonnull Set<String> aspectNames) { return Collections.emptyMap(); } diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java index 676b80c8bea32f..65169344776b7b 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java @@ -12,6 +12,7 @@ import com.linkedin.entity.Aspect; import com.linkedin.entity.Entity; import com.linkedin.entity.EntityResponse; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.browse.BrowseResult; @@ -44,6 +45,9 @@ // Consider renaming this to datahub client. public interface EntityClient { + /** Perform post construction asks if needed. Can be used to break circular dependencies */ + default void postConstruct(AspectRetriever aspectRetriever) {} + @Nullable public EntityResponse getV2( @Nonnull String entityName, diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index 21a9f47a13f738..8658096b174378 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -18,14 +18,10 @@ import com.linkedin.metadata.aspect.EnvelopedAspectArray; import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.authorization.PoliciesConfig; -import com.linkedin.metadata.entity.AspectUtils; -import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.aspect.batch.AspectsBatch; -import com.linkedin.metadata.entity.ebean.batch.MCLBatchItemImpl; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.entity.validation.ValidationException; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -52,8 +48,6 @@ import java.time.Clock; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.inject.Inject; @@ -83,10 +77,10 @@ public class AspectResource extends CollectionResourceTaskTemplate<String, Versi @Inject @Named("entityService") - private EntityService<MCPUpsertBatchItem> _entityService; + private EntityService<?> _entityService; @VisibleForTesting - void setEntityService(EntityService<MCPUpsertBatchItem> entityService) { + void setEntityService(EntityService<?> entityService) { _entityService = entityService; } diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java index 17c51604947223..62edb9fdfa6281 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java +++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java @@ -20,7 +20,7 @@ import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.UpdateAspectResult; -import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -35,28 +35,29 @@ import org.testng.annotations.Test; public class AspectResourceTest { - private AspectResource _aspectResource; - private EntityService _entityService; - private AspectDao _aspectDao; - private EventProducer _producer; - private EntityRegistry _entityRegistry; - private UpdateIndicesService _updateIndicesService; - private PreProcessHooks _preProcessHooks; - private Authorizer _authorizer; + private AspectResource aspectResource; + private EntityService<?> entityService; + private AspectDao aspectDao; + private EventProducer producer; + private EntityRegistry entityRegistry; + private UpdateIndicesService updateIndicesService; + private PreProcessHooks preProcessHooks; + private Authorizer authorizer; @BeforeTest public void setup() { - _aspectResource = new AspectResource(); - _aspectDao = mock(AspectDao.class); - _producer = mock(EventProducer.class); - _entityRegistry = new MockEntityRegistry(); - _updateIndicesService = mock(UpdateIndicesService.class); - _preProcessHooks = mock(PreProcessHooks.class); - _entityService = new EntityServiceImpl(_aspectDao, _producer, _entityRegistry, false, - _updateIndicesService, _preProcessHooks, true); - _authorizer = mock(Authorizer.class); - _aspectResource.setAuthorizer(_authorizer); - _aspectResource.setEntityService(_entityService); + aspectResource = new AspectResource(); + aspectDao = mock(AspectDao.class); + producer = mock(EventProducer.class); + entityRegistry = new MockEntityRegistry(); + updateIndicesService = mock(UpdateIndicesService.class); + preProcessHooks = mock(PreProcessHooks.class); + entityService = new EntityServiceImpl(aspectDao, producer, entityRegistry, false, + preProcessHooks, true); + entityService.setUpdateIndicesService(updateIndicesService); + authorizer = mock(Authorizer.class); + aspectResource.setAuthorizer(authorizer); + aspectResource.setEntityService(entityService); } @Test @@ -74,21 +75,21 @@ public void testAsyncDefaultAspects() throws URISyntaxException { AuthenticationContext.setAuthentication(mockAuthentication); Actor actor = new Actor(ActorType.USER, "user"); when(mockAuthentication.getActor()).thenReturn(actor); - _aspectResource.ingestProposal(mcp, "true"); - verify(_producer, times(1)).produceMetadataChangeProposal(urn, mcp); - verifyNoMoreInteractions(_producer); - verifyNoMoreInteractions(_aspectDao); + aspectResource.ingestProposal(mcp, "true"); + verify(producer, times(1)).produceMetadataChangeProposal(urn, mcp); + verifyNoMoreInteractions(producer); + verifyNoMoreInteractions(aspectDao); - reset(_producer, _aspectDao); + reset(producer, aspectDao); - MCPUpsertBatchItem req = MCPUpsertBatchItem.builder() + ChangeItemImpl req = ChangeItemImpl.builder() .urn(urn) .aspectName(mcp.getAspectName()) .recordTemplate(mcp.getAspect()) .auditStamp(new AuditStamp()) .metadataChangeProposal(mcp) - .build(_entityService); - when(_aspectDao.runInTransactionWithRetry(any(), any(), anyInt())) + .build(entityService); + when(aspectDao.runInTransactionWithRetry(any(), any(), anyInt())) .thenReturn( List.of(List.of( UpdateAspectResult.builder() @@ -121,9 +122,9 @@ public void testAsyncDefaultAspects() throws URISyntaxException { .auditStamp(new AuditStamp()) .request(req) .build()))); - _aspectResource.ingestProposal(mcp, "false"); - verify(_producer, times(10)) + aspectResource.ingestProposal(mcp, "false"); + verify(producer, times(10)) .produceMetadataChangeLog(eq(urn), any(AspectSpec.class), any(MetadataChangeLog.class)); - verifyNoMoreInteractions(_producer); + verifyNoMoreInteractions(producer); } } diff --git a/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java b/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java index 5187cba0b91510..9d33551fa2af0b 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java +++ b/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java @@ -2,6 +2,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -39,6 +40,11 @@ public MockTimeseriesAspectService(long count, long filteredCount, String taskId this._taskId = taskId; } + @Override + public TimeseriesAspectService postConstruct(AspectRetriever aspectRetriever) { + return this; + } + @Override public void configure() {} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index d9b0f4b73d5805..1b4b65baeecd62 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -8,10 +8,10 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.aspect.batch.AspectsBatch; -import com.linkedin.metadata.aspect.batch.UpsertItem; -import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; import com.linkedin.metadata.models.AspectSpec; @@ -33,16 +33,60 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; -public interface EntityService<U extends UpsertItem> extends AspectRetriever { +public interface EntityService<U extends ChangeMCP> extends AspectRetriever { /** * Just whether the entity/aspect exists * + * @param urns urns for the entities + * @param aspectName aspect for the entity, if null, assumes key aspect + * @param includeSoftDelete including soft deleted entities + * @return set of urns with the specified aspect existing + */ + Set<Urn> exists( + @Nonnull final Collection<Urn> urns, @Nullable String aspectName, boolean includeSoftDelete); + + /** + * Just whether the entity/aspect exists, prefer batched method. + * * @param urn urn for the entity - * @param aspectName aspect for the entity - * @return exists or not + * @param aspectName aspect for the entity, if null use the key aspect + * @param includeSoftDelete including soft deleted entities + * @return boolean if the entity/aspect exists + */ + default boolean exists(@Nonnull Urn urn, @Nullable String aspectName, boolean includeSoftDelete) { + return exists(Set.of(urn), aspectName, includeSoftDelete).contains(urn); + } + + /** + * Returns a set of urns of entities that exist (has materialized aspects). + * + * @param urns the list of urns of the entities to check + * @return a set of urns of entities that exist. */ - Boolean exists(Urn urn, String aspectName); + default Set<Urn> exists(@Nonnull final Collection<Urn> urns, boolean includeSoftDelete) { + return exists(urns, null, includeSoftDelete); + } + + /** + * Returns a set of urns of entities that exist (has materialized aspects). + * + * @param urns the list of urns of the entities to check + * @return a set of urns of entities that exist. + */ + default Set<Urn> exists(@Nonnull final Collection<Urn> urns) { + return exists(urns, true); + } + + /** + * Returns whether the urn of the entity exists (has materialized aspects). + * + * @param urn the urn of the entity to check + * @return entities exists. + */ + default boolean exists(@Nonnull Urn urn, boolean includeSoftDelete) { + return exists(List.of(urn), includeSoftDelete).contains(urn); + } /** * Retrieves the latest aspects corresponding to a batch of {@link Urn}s based on a provided set @@ -285,29 +329,11 @@ RollbackRunResult rollbackWithConditions( IngestResult ingestProposal( MetadataChangeProposal proposal, AuditStamp auditStamp, final boolean async); - /** - * Returns a set of urns of entities that exist (has materialized aspects). - * - * @param urns the list of urns of the entities to check - * @return a set of urns of entities that exist. - */ - Set<Urn> exists(@Nonnull final Collection<Urn> urns, boolean includeSoftDelete); - - /** - * Returns a set of urns of entities that exist (has materialized aspects). - * - * @param urns the list of urns of the entities to check - * @return a set of urns of entities that exist. - */ - default Set<Urn> exists(@Nonnull final Collection<Urn> urns) { - return exists(urns, true); - } - - default boolean exists(@Nonnull Urn urn, boolean includeSoftDelete) { - return exists(List.of(urn), includeSoftDelete).contains(urn); - } - void setWritable(boolean canWrite); RecordTemplate getLatestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName); + + SearchIndicesService getUpdateIndicesService(); + + void setUpdateIndicesService(@Nullable SearchIndicesService updateIndicesService); } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RetentionService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RetentionService.java index ae33b72010ce2a..ef30e4c82046eb 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RetentionService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RetentionService.java @@ -8,7 +8,7 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.batch.AspectsBatch; -import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; import com.linkedin.metadata.entity.retention.BulkApplyRetentionResult; import com.linkedin.metadata.key.DataHubRetentionKey; @@ -37,7 +37,7 @@ * storage and retention concerns apart, let AspectDaos deal with storage, and merge all retention * concerns into a single class. */ -public abstract class RetentionService<U extends UpsertItem> { +public abstract class RetentionService<U extends ChangeMCP> { protected static final String ALL = "*"; protected abstract EntityService<U> getEntityService(); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/SearchIndicesService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/SearchIndicesService.java new file mode 100644 index 00000000000000..def5bb2730ba84 --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/SearchIndicesService.java @@ -0,0 +1,11 @@ +package com.linkedin.metadata.entity; + +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.mxe.MetadataChangeLog; +import javax.annotation.Nonnull; + +public interface SearchIndicesService { + void handleChangeEvent(@Nonnull MetadataChangeLog metadataChangeLog); + + void initializeAspectRetriever(@Nonnull AspectRetriever aspectRetriever); +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java index 515e08646f9ed3..e85e0567f963ba 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java @@ -3,7 +3,7 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.mxe.MetadataAuditOperation; import com.linkedin.mxe.SystemMetadata; import java.util.concurrent.Future; @@ -14,7 +14,7 @@ @Value public class UpdateAspectResult { Urn urn; - UpsertItem request; + ChangeMCP request; RecordTemplate oldValue; RecordTemplate newValue; SystemMetadata oldSystemMetadata; diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java index 5f047886b56fa9..03b5c7f5547e7b 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.search; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultV2; import com.linkedin.metadata.query.AutoCompleteResult; @@ -15,6 +16,13 @@ public interface EntitySearchService { + /** + * Set aspect retriever after construction to prevent circular dependencies + * + * @param aspectRetriever + */ + EntitySearchService postConstruct(AspectRetriever aspectRetriever); + void configure(); /** Clear all data within the service */ diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java index 529e8e00ecf570..77fa2720a68be1 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java @@ -2,6 +2,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -16,6 +17,13 @@ public interface TimeseriesAspectService { + /** + * Set aspect retriever after construction to prevent circular dependencies + * + * @param aspectRetriever + */ + TimeseriesAspectService postConstruct(AspectRetriever aspectRetriever); + /** Configure the Time-Series aspect service one time at boot-up. */ void configure(); diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java index 27aa9ee04cc756..afaeb9c81039bf 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java @@ -6,6 +6,7 @@ import com.datahub.gms.util.CSVWriter; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -38,13 +39,14 @@ private ConfigurationProvider getConfigProvider(WebApplicationContext ctx) { return (ConfigurationProvider) ctx.getBean("configurationProvider"); } - private EntityRegistry getEntityRegistry(WebApplicationContext ctx) { - return (EntityRegistry) ctx.getBean("entityRegistry"); + private AspectRetriever getAspectRetriever(WebApplicationContext ctx) { + return (AspectRetriever) ctx.getBean("aspectRetriever"); } private void writeSearchCsv(WebApplicationContext ctx, PrintWriter pw) { SearchConfiguration searchConfiguration = getConfigProvider(ctx).getElasticSearch().getSearch(); - EntityRegistry entityRegistry = getEntityRegistry(ctx); + AspectRetriever aspectRetriever = getAspectRetriever(ctx); + EntityRegistry entityRegistry = aspectRetriever.getEntityRegistry(); CSVWriter writer = CSVWriter.builder().printWriter(pw).build(); @@ -79,7 +81,8 @@ private void writeSearchCsv(WebApplicationContext ctx, PrintWriter pw) { entitySpecOpt -> { EntitySpec entitySpec = entitySpecOpt.get(); SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, null) + SearchRequestHandler.getBuilder( + entitySpec, searchConfiguration, null, aspectRetriever) .getSearchRequest( "*", null, diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle index a6f3cd793ddd63..3cba93c452a101 100644 --- a/smoke-test/build.gradle +++ b/smoke-test/build.gradle @@ -72,3 +72,29 @@ task lintFix(type: Exec, dependsOn: installDev) { "ruff --fix tests/ && " + "mypy tests/" } + +/** + * The following tasks assume an already running quickstart. + * ./gradlew quickstart (or another variation) + */ +task quickstartNoCypressSuite0(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { + environment 'RUN_QUICKSTART', 'false' + environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' + environment 'TEST_STRATEGY', 'no_cypress_suite0' + + workingDir = project.projectDir + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "./smoke.sh" +} + +task quickstartNoCypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { + environment 'RUN_QUICKSTART', 'false' + environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' + environment 'TEST_STRATEGY', 'no_cypress_suite1' + + workingDir = project.projectDir + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "./smoke.sh" +} diff --git a/smoke-test/tests/lineage/test_lineage.py b/smoke-test/tests/lineage/test_lineage.py index 9cd98d1245bbbb..a24a700593378f 100644 --- a/smoke-test/tests/lineage/test_lineage.py +++ b/smoke-test/tests/lineage/test_lineage.py @@ -8,11 +8,8 @@ import pytest from datahub.cli.cli_utils import get_url_and_token from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import ( - DatahubClientConfig, - DataHubGraph, - get_default_graph, -) +from datahub.ingestion.graph.client import DataHubGraph # get_default_graph, +from datahub.ingestion.graph.client import DatahubClientConfig from datahub.metadata.schema_classes import ( AuditStampClass, ChangeAuditStampsClass, @@ -959,32 +956,33 @@ def ingest_multipath_metadata( wait_for_writes_to_sync() -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_simple_lineage_multiple_paths( - ingest_multipath_metadata, - chart_urn_fixture, - intermediates_fixture, - destination_urn_fixture, -): - chart_urn = chart_urn_fixture - intermediates = intermediates_fixture - destination_urn = destination_urn_fixture - results = search_across_lineage( - get_default_graph(), - chart_urn, - direction="UPSTREAM", - convert_schema_fields_to_datasets=True, - ) - assert destination_urn in [ - x["entity"]["urn"] for x in results["searchAcrossLineage"]["searchResults"] - ] - for search_result in results["searchAcrossLineage"]["searchResults"]: - if search_result["entity"]["urn"] == destination_urn: - assert ( - len(search_result["paths"]) == 2 - ) # 2 paths from the chart to the dataset - for path in search_result["paths"]: - assert len(path["path"]) == 3 - assert path["path"][-1]["urn"] == destination_urn - assert path["path"][0]["urn"] == chart_urn - assert path["path"][1]["urn"] in intermediates +# TODO: Reenable once fixed +# @pytest.mark.dependency(depends=["test_healthchecks"]) +# def test_simple_lineage_multiple_paths( +# ingest_multipath_metadata, +# chart_urn_fixture, +# intermediates_fixture, +# destination_urn_fixture, +# ): +# chart_urn = chart_urn_fixture +# intermediates = intermediates_fixture +# destination_urn = destination_urn_fixture +# results = search_across_lineage( +# get_default_graph(), +# chart_urn, +# direction="UPSTREAM", +# convert_schema_fields_to_datasets=True, +# ) +# assert destination_urn in [ +# x["entity"]["urn"] for x in results["searchAcrossLineage"]["searchResults"] +# ] +# for search_result in results["searchAcrossLineage"]["searchResults"]: +# if search_result["entity"]["urn"] == destination_urn: +# assert ( +# len(search_result["paths"]) == 2 +# ) # 2 paths from the chart to the dataset +# for path in search_result["paths"]: +# assert len(path["path"]) == 3 +# assert path["path"][-1]["urn"] == destination_urn +# assert path["path"][0]["urn"] == chart_urn +# assert path["path"][1]["urn"] in intermediates diff --git a/smoke-test/tests/structured_properties/test_structured_properties.py b/smoke-test/tests/structured_properties/test_structured_properties.py index de85d2af95e034..49472056a280a8 100644 --- a/smoke-test/tests/structured_properties/test_structured_properties.py +++ b/smoke-test/tests/structured_properties/test_structured_properties.py @@ -182,12 +182,9 @@ def get_property_from_entity( # ) @pytest.mark.dependency(depends=["test_healthchecks"]) def test_structured_property_string(ingest_cleanup_data, graph): - property_name = "retentionPolicy" + property_name = f"retention{randint(10, 10000)}Policy" create_property_definition(property_name, graph) - generated_urns.append( - f"urn:li:structuredProperty:{default_namespace}.retentionPolicy" - ) attach_property_to_entity(dataset_urns[0], property_name, ["30d"], graph=graph) @@ -209,10 +206,8 @@ def test_structured_property_string(ingest_cleanup_data, graph): # ) @pytest.mark.dependency(depends=["test_healthchecks"]) def test_structured_property_double(ingest_cleanup_data, graph): - property_name = "expiryTime" - generated_urns.append( - f"urn:li:structuredProperty:{default_namespace}.{property_name}" - ) + property_name = f"expiryTime{randint(10, 10000)}" + create_property_definition(property_name, graph, value_type="number") attach_property_to_entity(dataset_urns[0], property_name, 2000034, graph=graph) @@ -248,10 +243,7 @@ def test_structured_property_double(ingest_cleanup_data, graph): # ) @pytest.mark.dependency(depends=["test_healthchecks"]) def test_structured_property_double_multiple(ingest_cleanup_data, graph): - property_name = "versions" - generated_urns.append( - f"urn:li:structuredProperty:{default_namespace}.{property_name}" - ) + property_name = f"versions{randint(10, 10000)}" create_property_definition( property_name, graph, value_type="number", cardinality="MULTIPLE" @@ -266,10 +258,7 @@ def test_structured_property_double_multiple(ingest_cleanup_data, graph): # ) @pytest.mark.dependency(depends=["test_healthchecks"]) def test_structured_property_string_allowed_values(ingest_cleanup_data, graph): - property_name = "enumProperty" - generated_urns.append( - f"urn:li:structuredProperty:{default_namespace}.{property_name}" - ) + property_name = f"enumProperty{randint(10, 10000)}" create_property_definition( property_name, @@ -302,7 +291,7 @@ def test_structured_property_string_allowed_values(ingest_cleanup_data, graph): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_structured_property_definition_evolution(ingest_cleanup_data, graph): - property_name = "enumProperty1234" + property_name = f"enumProperty{randint(10, 10000)}" create_property_definition( property_name, @@ -314,9 +303,6 @@ def test_structured_property_definition_evolution(ingest_cleanup_data, graph): PropertyValueClass(value="bar"), ], ) - generated_urns.append( - f"urn:li:structuredProperty:{default_namespace}.{property_name}" - ) try: create_property_definition( @@ -354,9 +340,6 @@ def test_structured_property_schema_field(ingest_cleanup_data, graph): value_type="date", entity_types=["schemaField"], ) - generated_urns.append( - f"urn:li:structuredProperty:io.datahubproject.test.{property_name}" - ) attach_property_to_entity( schema_field_urns[0], @@ -425,16 +408,13 @@ def test_dataset_yaml_loader(ingest_cleanup_data, graph): def test_dataset_structured_property_validation(ingest_cleanup_data, graph, caplog): from datahub.api.entities.dataset.dataset import Dataset - property_name = "replicationSLA" + property_name = f"replicationSLA{randint(10, 10000)}" property_value = 30 value_type = "number" create_property_definition( property_name=property_name, graph=graph, value_type=value_type ) - generated_urns.append( - f"urn:li:structuredProperty:{default_namespace}.replicationSLA" - ) attach_property_to_entity( dataset_urns[0], property_name, [property_value], graph=graph @@ -470,9 +450,6 @@ def to_es_name(property_name, namespace=default_namespace): value_type="date", entity_types=["schemaField"], ) - generated_urns.append( - f"urn:li:structuredProperty:io.datahubproject.test.{field_property_name}" - ) attach_property_to_entity( schema_field_urns[0], @@ -481,16 +458,13 @@ def to_es_name(property_name, namespace=default_namespace): graph=graph, namespace="io.datahubproject.test", ) - dataset_property_name = "replicationSLA" + dataset_property_name = f"replicationSLA{randint(10, 10000)}" property_value = 30 value_type = "number" create_property_definition( property_name=dataset_property_name, graph=graph, value_type=value_type ) - generated_urns.append( - f"urn:li:structuredProperty:{default_namespace}.{dataset_property_name}" - ) attach_property_to_entity( dataset_urns[0], dataset_property_name, [property_value], graph=graph @@ -558,37 +532,245 @@ def to_es_name(property_name, namespace=default_namespace): assert dataset_urns[0] in field_urns -@pytest.mark.skip(reason="Functionality and test needs to be validated for correctness") def test_dataset_structured_property_patch(ingest_cleanup_data, graph, caplog): - property_name = "replicationSLA" - property_value = 30 + # Create 1st Property + property_name = f"replicationSLA{randint(10, 10000)}" + property_value1 = 30.0 + property_value2 = 100.0 value_type = "number" + cardinality = "MULTIPLE" + + create_property_definition( + property_name=property_name, + graph=graph, + value_type=value_type, + cardinality=cardinality, + ) + + # Create 2nd Property + property_name_other = f"replicationSLAOther{randint(10, 10000)}" + property_value_other = 200.0 + create_property_definition( + property_name=property_name_other, + graph=graph, + value_type=value_type, + cardinality=cardinality, + ) + + def patch_one(prop_name, prop_value): + dataset_patcher: DatasetPatchBuilder = DatasetPatchBuilder(urn=dataset_urns[0]) + dataset_patcher.set_structured_property( + StructuredPropertyUrn.make_structured_property_urn( + f"{default_namespace}.{prop_name}" + ), + prop_value, + ) + + for mcp in dataset_patcher.build(): + graph.emit(mcp) + wait_for_writes_to_sync() + + # Add 1 value for property 1 + patch_one(property_name, property_value1) + + actual_property_values = get_property_from_entity( + dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph + ) + assert actual_property_values == [property_value1] + + # Add 1 value for property 2 + patch_one(property_name_other, property_value_other) + + actual_property_values = get_property_from_entity( + dataset_urns[0], f"{default_namespace}.{property_name_other}", graph=graph + ) + assert actual_property_values == [property_value_other] + + # Add 2 values to property 1 + patch_one(property_name, [property_value1, property_value2]) + + actual_property_values = set( + get_property_from_entity( + dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph + ) + ) + assert actual_property_values == {property_value1, property_value2} + + # Validate property 2 is the same + actual_property_values = get_property_from_entity( + dataset_urns[0], f"{default_namespace}.{property_name_other}", graph=graph + ) + assert actual_property_values == [property_value_other] + + +def test_dataset_structured_property_hard_delete(ingest_cleanup_data, graph, caplog): + property_name = f"hardDeleteTest{randint(10, 10000)}Property" + value_type = "string" + property_urn = f"urn:li:structuredProperty:{default_namespace}.{property_name}" create_property_definition( property_name=property_name, graph=graph, value_type=value_type ) - dataset_patcher: DatasetPatchBuilder = DatasetPatchBuilder(urn=dataset_urns[0]) + test_property = StructuredProperties.from_datahub(graph=graph, urn=property_urn) + assert test_property is not None - dataset_patcher.set_structured_property( - StructuredPropertyUrn.make_structured_property_urn( - f"{default_namespace}.{property_name}" - ), - property_value, + try: + graph.hard_delete_entity(urn=property_urn) + raise AssertionError("Should not be able to HARD delete structured property") + except Exception as e: + if "Hard delete of Structured Property Definitions is not supported" in str(e): + pass + else: + raise e + + +def test_dataset_structured_property_soft_delete_validation( + ingest_cleanup_data, graph, caplog +): + property_name = f"softDeleteTest{randint(10, 10000)}Property" + value_type = "string" + property_urn = f"urn:li:structuredProperty:{default_namespace}.{property_name}" + + create_property_definition( + property_name=property_name, + graph=graph, + value_type=value_type, + cardinality="SINGLE", ) - for mcp in dataset_patcher.build(): - graph.emit(mcp) + test_property = StructuredProperties.from_datahub(graph=graph, urn=property_urn) + assert test_property is not None + + graph.soft_delete_entity(urn=property_urn) + + # Attempt to modify soft deleted definition + try: + create_property_definition( + property_name=property_name, + graph=graph, + value_type=value_type, + cardinality="SINGLE", + ) + raise AssertionError( + "Should not be able to modify soft deleted structured property" + ) + except Exception as e: + if "Cannot mutate a soft deleted Structured Property Definition" in str(e): + pass + else: + raise e + + # Attempt to add soft deleted structured property to entity + try: + attach_property_to_entity( + dataset_urns[0], property_name, "test string", graph=graph + ) + raise AssertionError( + "Should not be able to apply a soft deleted structured property to another entity" + ) + except Exception as e: + if "Cannot apply a soft deleted Structured Property value" in str(e): + pass + else: + raise e + + +def test_dataset_structured_property_soft_delete_read_mutation( + ingest_cleanup_data, graph, caplog +): + property_name = f"softDeleteReadTest{randint(10, 10000)}Property" + value_type = "string" + property_urn = f"urn:li:structuredProperty:{default_namespace}.{property_name}" + property_value = "test string" + + # Create property on a dataset + create_property_definition( + property_name=property_name, + graph=graph, + value_type=value_type, + cardinality="SINGLE", + ) + attach_property_to_entity( + dataset_urns[0], property_name, property_value, graph=graph + ) + + # Make sure it exists on the dataset + actual_property_values = get_property_from_entity( + dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph + ) + assert actual_property_values == [property_value] + + # Soft delete the structured property + graph.soft_delete_entity(urn=property_urn) wait_for_writes_to_sync() - dataset = Dataset.from_datahub(graph=graph, urn=dataset_urns[0]) - assert dataset.structured_properties is not None - assert isinstance(dataset.structured_properties, list) - assert [ - int(float(k)) - for k in dataset.structured_properties[ - StructuredPropertyUrn.make_structured_property_urn( - f"{default_namespace}.{property_name}" + # Make sure it is no longer returned on the dataset + actual_property_values = get_property_from_entity( + dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph + ) + assert actual_property_values is None + + +def test_dataset_structured_property_soft_delete_search_filter_validation( + ingest_cleanup_data, graph, caplog +): + def to_es_name(property_name, namespace=default_namespace): + namespace_field = namespace.replace(".", "_") + return f"structuredProperties.{namespace_field}_{property_name}" + + # Create a test structured property + dataset_property_name = f"softDeleteSearchFilter{randint(10, 10000)}" + property_value = 30 + value_type = "number" + property_urn = ( + f"urn:li:structuredProperty:{default_namespace}.{dataset_property_name}" + ) + + create_property_definition( + property_name=dataset_property_name, graph=graph, value_type=value_type + ) + attach_property_to_entity( + dataset_urns[0], dataset_property_name, [property_value], graph=graph + ) + + # Perform search, make sure it works + entity_urns = list( + graph.get_urns_by_filter( + extraFilters=[ + { + "field": to_es_name(dataset_property_name), + "negated": "false", + "condition": "EXISTS", + } + ] + ) + ) + assert len(entity_urns) == 1 + assert entity_urns[0] == dataset_urns[0] + + # Soft delete the structured property + graph.soft_delete_entity(urn=property_urn) + wait_for_writes_to_sync() + + # Perform search, make sure it validates filter and rejects as invalid request + try: + list( + graph.get_urns_by_filter( + extraFilters=[ + { + "field": to_es_name(dataset_property_name), + "negated": "false", + "condition": "EXISTS", + } + ] ) - ] - ] == [property_value] + ) + raise AssertionError( + "Should not be able to filter by soft deleted structured property" + ) + except Exception as e: + if "Cannot filter on deleted Structured Property" in str(e): + pass + else: + raise e diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py index 6e8deb41f177ea..e2fabce8ac4e8d 100644 --- a/smoke-test/tests/tokens/revokable_access_token_test.py +++ b/smoke-test/tests/tokens/revokable_access_token_test.py @@ -10,6 +10,8 @@ wait_for_writes_to_sync, ) +pytestmark = pytest.mark.no_cypress_suite1 + # Disable telemetry os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false"